diff --git a/paddlespeech/audio/CMakeLists.txt b/paddlespeech/audio/CMakeLists.txt
deleted file mode 100644
index c6b43c780deb2af9d26f7c9344d43519c0db9619..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-
-add_subdirectory(third_party)
-add_subdirectory(src)
diff --git a/paddlespeech/audio/README.md b/paddlespeech/audio/README.md
deleted file mode 100644
index fc1e5942c83a9ae9042740cb10f7590851099eaf..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/README.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# PaddleAudio
-
-## Reference
-`csrc` code is reference of `torchaudio`.
-
-```text
-BSD 2-Clause License
-
-Copyright (c) [year], [fullname]
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-```
diff --git a/paddlespeech/audio/__init__.py b/paddlespeech/audio/__init__.py
index ad06603a7f4fd31359a0bb4625edddc85e0ebaf1..a7cf6caafb3eeceee3460df1773992387ddfc0b1 100644
--- a/paddlespeech/audio/__init__.py
+++ b/paddlespeech/audio/__init__.py
@@ -11,17 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from . import _extension
-from . import compliance
-from . import datasets
-from . import features
-from . import functional
-from . import io
-from . import metric
-from . import sox_effects
 from . import streamdata
 from . import text
 from . import transform
-from .backends import load
-from .backends import save
diff --git a/paddlespeech/audio/_extension.py b/paddlespeech/audio/_extension.py
deleted file mode 100644
index ac82c06e53d6898bac5fb0e3b42e47ddd49c8964..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/_extension.py
+++ /dev/null
@@ -1,164 +0,0 @@
-import os
-import warnings
-from pathlib import Path
-
-from ._internal import module_utils as _mod_utils  # noqa: F401
-
-
-import contextlib
-import ctypes
-import os
-import sys
-import types
-
-# Query `hasattr` only once.
-_SET_GLOBAL_FLAGS = hasattr(sys, 'getdlopenflags') and hasattr(sys,
-                                                               'setdlopenflags')
-
-
-@contextlib.contextmanager
-def dl_open_guard():
-    """
-    # https://manpages.debian.org/bullseye/manpages-dev/dlopen.3.en.html
-    Context manager to set the RTLD_GLOBAL dynamic linker flag while we open a
-    shared library to load custom operators.
-    """
-    if _SET_GLOBAL_FLAGS:
-        old_flags = sys.getdlopenflags()
-        sys.setdlopenflags(old_flags | ctypes.RTLD_GLOBAL)
-    yield
-    if _SET_GLOBAL_FLAGS:
-        sys.setdlopenflags(old_flags)
-
-
-def resolve_library_path(path: str) -> str:
-    return os.path.realpath(path)
-
-
-class _Ops(types.ModuleType):
-    #__file__ = '_ops.py'
-
-    def __init__(self):
-        super(_Ops, self).__init__('paddlespeech.ops')
-        self.loaded_libraries = set()
-
-    def load_library(self, path):
-        """
-        Loads a shared library from the given path into the current process.
-        This allows dynamically loading custom operators. For this, 
-        you should compile your operator and 
-        the static registration code into a shared library object, and then
-        call ``paddlespeech.ops.load_library('path/to/libcustom.so')`` to load the
-        shared object.
-        After the library is loaded, it is added to the
-        ``paddlespeech.ops.loaded_libraries`` attribute, a set that may be inspected
-        for the paths of all libraries loaded using this function.
-        Args:
-            path (str): A path to a shared library to load.
-        """
-        path = resolve_library_path(path)
-        with dl_open_guard():
-            # https://docs.python.org/3/library/ctypes.html?highlight=ctypes#loading-shared-libraries
-            # Import the shared library into the process, thus running its
-            # static (global) initialization code in order to register custom
-            # operators with the JIT.
-            ctypes.CDLL(path)
-        self.loaded_libraries.add(path)
-
-
-_LIB_DIR = Path(__file__).parent / "lib"
-
-def _get_lib_path(lib: str):
-    suffix = "pyd" if os.name == "nt" else "so"
-    path = _LIB_DIR / f"{lib}.{suffix}"
-    return path
-
-
-def _load_lib(lib: str) -> bool:
-    """Load extension module
-    Note:
-        In case `paddleaudio` is deployed with `pex` format, the library file
-        is not in a standard location.
-        In this case, we expect that `libpaddlleaudio` is available somewhere
-        in the search path of dynamic loading mechanism, so that importing
-        `_paddlleaudio` will have library loader find and load `libpaddlleaudio`.
-        This is the reason why the function should not raising an error when the library
-        file is not found.
-    Returns:
-        bool:
-            True if the library file is found AND the library loaded without failure.
-            False if the library file is not found (like in the case where paddlleaudio
-            is deployed with pex format, thus the shared library file is
-            in a non-standard location.).
-            If the library file is found but there is an issue loading the library,
-            (such as missing dependency) then this function raises the exception as-is.
-    Raises:
-        Exception:
-            If the library file is found, but there is an issue loading the library file,
-            (when underlying `ctype.DLL` throws an exception), this function will pass
-            the exception as-is, instead of catching it and returning bool.
-            The expected case is `OSError` thrown by `ctype.DLL` when a dynamic dependency
-            is not found.
-            This behavior was chosen because the expected failure case is not recoverable.
-            If a dependency is missing, then users have to install it.
-    """
-    path = _get_lib_path(lib)
-    if not path.exists():
-        warnings.warn("lib path is not exists:" + str(path))
-        return False
-    #paddlespeech.audio.ops.load_library(path)
-    ops.load_library(path)
-    return True
-
-
-_FFMPEG_INITIALIZED = False
-
-
-def _init_ffmpeg():
-    global _FFMPEG_INITIALIZED
-    if _FFMPEG_INITIALIZED:
-        return
-
-    if not paddlespeech.audio._paddlleaudio.is_ffmpeg_available():
-        raise RuntimeError(
-            "paddlleaudio is not compiled with FFmpeg integration. Please set USE_FFMPEG=1 when compiling paddlleaudio."
-        )
-
-    try:
-        _load_lib("libpaddlleaudio_ffmpeg")
-    except OSError as err:
-        raise ImportError(
-            "FFmpeg libraries are not found. Please install FFmpeg.") from err
-
-    import paddllespeech.audio._paddlleaudio_ffmpeg  # noqa
-
-    paddlespeech.audio._paddlleaudio.ffmpeg_init()
-    if paddlespeech.audio._paddlleaudio.ffmpeg_get_log_level() > 8:
-        paddlespeech.audio._paddlleaudio.ffmpeg_set_log_level(8)
-
-    _FFMPEG_INITIALIZED = True
-
-
-def _init_extension():
-    if not _mod_utils.is_module_available("paddlespeech.audio._paddleaudio"):
-        warnings.warn("paddlespeech C++ extension is not available.")
-        return
-
-    _load_lib("libpaddleaudio")
-    # This import is for initializing the methods registered via PyBind11
-    # This has to happen after the base library is loaded
-    from paddlespeech.audio import _paddleaudio  # noqa
-
-    # Because this part is executed as part of `import torchaudio`, we ignore the
-    # initialization failure.
-    # If the FFmpeg integration is not properly initialized, then detailed error
-    # will be raised when client code attempts to import the dedicated feature.
-    try:
-        _init_ffmpeg()
-    except Exception:
-        pass
-
-
-ops = _Ops()
-
-_init_extension()
diff --git a/paddlespeech/audio/_internal/__init__.py b/paddlespeech/audio/_internal/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/paddlespeech/audio/_internal/module_utils.py b/paddlespeech/audio/_internal/module_utils.py
deleted file mode 100644
index d4a308fe7251f48641f927c4689414d64954a842..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/_internal/module_utils.py
+++ /dev/null
@@ -1,148 +0,0 @@
-import importlib.util
-import warnings
-from functools import wraps
-from typing import Optional
-
-#code is from https://github.com/pytorch/audio/blob/main/torchaudio/_internal/module_utils.py
-
-
-def is_module_available(*modules: str) -> bool:
-    r"""Returns if a top-level module with :attr:`name` exists *without**
-    importing it. This is generally safer than try-catch block around a
-    `import X`. It avoids third party libraries breaking assumptions of some of
-    our tests, e.g., setting multiprocessing start method when imported
-    (see librosa/#747, torchvision/#544).
-    """
-    return all(importlib.util.find_spec(m) is not None for m in modules)
-
-
-def requires_module(*modules: str):
-    """Decorate function to give error message if invoked without required optional modules.
-    This decorator is to give better error message to users rather
-    than raising ``NameError:  name 'module' is not defined`` at random places.
-    """
-    missing = [m for m in modules if not is_module_available(m)]
-
-    if not missing:
-        # fall through. If all the modules are available, no need to decorate
-        def decorator(func):
-            return func
-
-    else:
-        req = f"module: {missing[0]}" if len(
-            missing) == 1 else f"modules: {missing}"
-
-        def decorator(func):
-            @wraps(func)
-            def wrapped(*args, **kwargs):
-                raise RuntimeError(
-                    f"{func.__module__}.{func.__name__} requires {req}")
-
-            return wrapped
-
-    return decorator
-
-
-def deprecated(direction: str, version: Optional[str]=None):
-    """Decorator to add deprecation message
-    Args:
-        direction (str): Migration steps to be given to users.
-        version (str or int): The version when the object will be removed
-    """
-
-    def decorator(func):
-        @wraps(func)
-        def wrapped(*args, **kwargs):
-            message = (
-                f"{func.__module__}.{func.__name__} has been deprecated "
-                f'and will be removed from {"future" if version is None else version} release. '
-                f"{direction}")
-            warnings.warn(message, stacklevel=2)
-            return func(*args, **kwargs)
-
-        return wrapped
-
-    return decorator
-
-
-def is_kaldi_available():
-    return is_module_available("paddlespeech.audio._paddleaudio")
-
-
-def requires_kaldi():
-    if is_kaldi_available():
-
-        def decorator(func):
-            return func
-
-    else:
-
-        def decorator(func):
-            @wraps(func)
-            def wrapped(*args, **kwargs):
-                raise RuntimeError(
-                    f"{func.__module__}.{func.__name__} requires kaldi")
-
-            return wrapped
-
-    return decorator
-
-
-def _check_soundfile_importable():
-    if not is_module_available("soundfile"):
-        return False
-    try:
-        import soundfile  # noqa: F401
-
-        return True
-    except Exception:
-        warnings.warn(
-            "Failed to import soundfile. 'soundfile' backend is not available.")
-        return False
-
-
-_is_soundfile_importable = _check_soundfile_importable()
-
-
-def is_soundfile_available():
-    return _is_soundfile_importable
-
-
-def requires_soundfile():
-    if is_soundfile_available():
-
-        def decorator(func):
-            return func
-    else:
-
-        def decorator(func):
-            @wraps(func)
-            def wrapped(*args, **kwargs):
-                raise RuntimeError(
-                    f"{func.__module__}.{func.__name__} requires soundfile")
-
-            return wrapped
-
-    return decorator
-
-
-def is_sox_available():
-    return is_module_available("paddlespeech.audio._paddleaudio")
-
-
-def requires_sox():
-    if is_sox_available():
-
-        def decorator(func):
-            return func
-    else:
-
-        def decorator(func):
-            @wraps(func)
-            def wrapped(*args, **kwargs):
-                raise RuntimeError(
-                    f"{func.__module__}.{func.__name__} requires sox")
-
-            return wrapped
-
-    return decorator
diff --git a/paddlespeech/audio/_ops.py b/paddlespeech/audio/_ops.py
deleted file mode 100644
index 6bcf25fe41d0d9e29a3d11255cd6ec28a6635e88..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/_ops.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import contextlib
-import ctypes
-import os
-import sys
-import types
-
-# Query `hasattr` only once.
-_SET_GLOBAL_FLAGS = hasattr(sys, 'getdlopenflags') and hasattr(sys,
-                                                               'setdlopenflags')
-
-
-@contextlib.contextmanager
-def dl_open_guard():
-    """
-    # https://manpages.debian.org/bullseye/manpages-dev/dlopen.3.en.html
-    Context manager to set the RTLD_GLOBAL dynamic linker flag while we open a
-    shared library to load custom operators.
-    """
-    if _SET_GLOBAL_FLAGS:
-        old_flags = sys.getdlopenflags()
-        sys.setdlopenflags(old_flags | ctypes.RTLD_GLOBAL)
-    yield
-    if _SET_GLOBAL_FLAGS:
-        sys.setdlopenflags(old_flags)
-
-
-def resolve_library_path(path: str) -> str:
-    return os.path.realpath(path)
-
-
-class _Ops(types.ModuleType):
-    __file__ = '_ops.py'
-
-    def __init__(self):
-        super(_Ops, self).__init__('paddlespeech.ops')
-        self.loaded_libraries = set()
-
-    def load_library(self, path):
-        """
-        Loads a shared library from the given path into the current process.
-        This allows dynamically loading custom operators. For this, 
-        you should compile your operator and 
-        the static registration code into a shared library object, and then
-        call ``paddlespeech.ops.load_library('path/to/libcustom.so')`` to load the
-        shared object.
-        After the library is loaded, it is added to the
-        ``paddlespeech.ops.loaded_libraries`` attribute, a set that may be inspected
-        for the paths of all libraries loaded using this function.
-        Args:
-            path (str): A path to a shared library to load.
-        """
-        path = resolve_library_path(path)
-        with dl_open_guard():
-            # https://docs.python.org/3/library/ctypes.html?highlight=ctypes#loading-shared-libraries
-            # Import the shared library into the process, thus running its
-            # static (global) initialization code in order to register custom
-            # operators with the JIT.
-            ctypes.CDLL(path)
-        self.loaded_libraries.add(path)
-
-
-# The ops "namespace"
-ops = _Ops()
diff --git a/paddlespeech/audio/backends/__init__.py b/paddlespeech/audio/backends/__init__.py
deleted file mode 100644
index 6c73ca43c37bb46d5bf77d7e686bf5ceab40c4ce..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/backends/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# flake8: noqa
-from . import utils
-from .utils import get_audio_backend
-from .utils import list_audio_backends
-from .utils import set_audio_backend
diff --git a/paddlespeech/audio/backends/common.py b/paddlespeech/audio/backends/common.py
deleted file mode 100644
index 7ccab1d3389e225c48478d40ba9ef7f85c03617f..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/backends/common.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# code from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/common.py
-
-class AudioMetaData:
-    """Return type of ``torchaudio.info`` function.
-
-    This class is used by :ref:`"sox_io" backend<sox_io_backend>` and
-    :ref:`"soundfile" backend with the new interface<soundfile_backend>`.
-
-    :ivar int sample_rate: Sample rate
-    :ivar int num_frames: The number of frames
-    :ivar int num_channels: The number of channels
-    :ivar int bits_per_sample: The number of bits per sample. This is 0 for lossy formats,
-        or when it cannot be accurately inferred.
-    :ivar str encoding: Audio encoding
-        The values encoding can take are one of the following:
-
-            * ``PCM_S``: Signed integer linear PCM
-            * ``PCM_U``: Unsigned integer linear PCM
-            * ``PCM_F``: Floating point linear PCM
-            * ``FLAC``: Flac, Free Lossless Audio Codec
-            * ``ULAW``: Mu-law
-            * ``ALAW``: A-law
-            * ``MP3`` : MP3, MPEG-1 Audio Layer III
-            * ``VORBIS``: OGG Vorbis
-            * ``AMR_WB``: Adaptive Multi-Rate
-            * ``AMR_NB``: Adaptive Multi-Rate Wideband
-            * ``OPUS``: Opus
-            * ``HTK``: Single channel 16-bit PCM
-            * ``UNKNOWN`` : None of above
-    """
-
-    def __init__(
-        self,
-        sample_rate: int,
-        num_frames: int,
-        num_channels: int,
-        bits_per_sample: int,
-        encoding: str,
-    ):
-        self.sample_rate = sample_rate
-        self.num_frames = num_frames
-        self.num_channels = num_channels
-        self.bits_per_sample = bits_per_sample
-        self.encoding = encoding
-
-    def __str__(self):
-        return (
-            f"AudioMetaData("
-            f"sample_rate={self.sample_rate}, "
-            f"num_frames={self.num_frames}, "
-            f"num_channels={self.num_channels}, "
-            f"bits_per_sample={self.bits_per_sample}, "
-            f"encoding={self.encoding}"
-            f")"
-        )
diff --git a/paddlespeech/audio/backends/no_backend.py b/paddlespeech/audio/backends/no_backend.py
deleted file mode 100644
index 157536f46e73c1b8911ed61f40ecb730a6af41dc..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/backends/no_backend.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from pathlib import Path
-from typing import Callable
-from typing import Optional
-from typing import Tuple
-from typing import Union
-
-from paddle import Tensor
-
-#code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/no_backend.py
-
-
-def load(
-        filepath: Union[str, Path],
-        out: Optional[Tensor]=None,
-        normalization: Union[bool, float, Callable]=True,
-        channels_first: bool=True,
-        num_frames: int=0,
-        offset: int=0,
-        filetype: Optional[str]=None, ) -> Tuple[Tensor, int]:
-    raise RuntimeError("No audio I/O backend is available.")
-
-
-def save(filepath: str,
-         src: Tensor,
-         sample_rate: int,
-         precision: int=16,
-         channels_first: bool=True) -> None:
-    raise RuntimeError("No audio I/O backend is available.")
-
-
-def info(filepath: str) -> None:
-    raise RuntimeError("No audio I/O backend is available.")
diff --git a/paddlespeech/audio/backends/soundfile_backend.py b/paddlespeech/audio/backends/soundfile_backend.py
deleted file mode 100644
index 57e06e521cb3e4de137e062610b58839d19b6b78..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/backends/soundfile_backend.py
+++ /dev/null
@@ -1,662 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import warnings
-from typing import Optional
-from typing import Tuple
-
-import numpy as np
-import paddle
-import resampy
-import soundfile
-from scipy.io import wavfile
-
-from ..utils import depth_convert
-from ..utils import ParameterError
-from .common import AudioMetaData
-
-__all__ = [
-    'resample',
-    'to_mono',
-    'normalize',
-    'save',
-    'soundfile_save',
-    'load',
-    'soundfile_load',
-    'info',
-    'to_mono'
-]
-NORMALMIZE_TYPES = ['linear', 'gaussian']
-MERGE_TYPES = ['ch0', 'ch1', 'random', 'average']
-RESAMPLE_MODES = ['kaiser_best', 'kaiser_fast']
-EPS = 1e-8
-
-
-def resample(y: np.ndarray,
-             src_sr: int,
-             target_sr: int,
-             mode: str='kaiser_fast') -> np.ndarray:
-    """Audio resampling.
-
-    Args:
-        y (np.ndarray): Input waveform array in 1D or 2D.
-        src_sr (int): Source sample rate.
-        target_sr (int): Target sample rate.
-        mode (str, optional): The resampling filter to use. Defaults to 'kaiser_fast'.
-
-    Returns:
-        np.ndarray: `y` resampled to `target_sr`
-    """
-
-    if mode == 'kaiser_best':
-        warnings.warn(
-            f'Using resampy in kaiser_best to {src_sr}=>{target_sr}. This function is pretty slow, \
-        we recommend the mode kaiser_fast in large scale audio trainning')
-
-    if not isinstance(y, np.ndarray):
-        raise ParameterError(
-            'Only support numpy np.ndarray, but received y in {type(y)}')
-
-    if mode not in RESAMPLE_MODES:
-        raise ParameterError(f'resample mode must in {RESAMPLE_MODES}')
-
-    return resampy.resample(y, src_sr, target_sr, filter=mode)
-
-
-def to_mono(y: np.ndarray, merge_type: str='average') -> np.ndarray:
-    """Convert sterior audio to mono.
-
-    Args:
-        y (np.ndarray): Input waveform array in 1D or 2D.
-        merge_type (str, optional): Merge type to generate mono waveform. Defaults to 'average'.
-
-    Returns:
-        np.ndarray: `y` with mono channel.
-    """
-
-    if merge_type not in MERGE_TYPES:
-        raise ParameterError(
-            f'Unsupported merge type {merge_type}, available types are {MERGE_TYPES}'
-        )
-    if y.ndim > 2:
-        raise ParameterError(
-            f'Unsupported audio array,  y.ndim > 2, the shape is {y.shape}')
-    if y.ndim == 1:  # nothing to merge
-        return y
-
-    if merge_type == 'ch0':
-        return y[0]
-    if merge_type == 'ch1':
-        return y[1]
-    if merge_type == 'random':
-        return y[np.random.randint(0, 2)]
-
-    # need to do averaging according to dtype
-
-    if y.dtype == 'float32':
-        y_out = (y[0] + y[1]) * 0.5
-    elif y.dtype == 'int16':
-        y_out = y.astype('int32')
-        y_out = (y_out[0] + y_out[1]) // 2
-        y_out = np.clip(y_out, np.iinfo(y.dtype).min,
-                        np.iinfo(y.dtype).max).astype(y.dtype)
-
-    elif y.dtype == 'int8':
-        y_out = y.astype('int16')
-        y_out = (y_out[0] + y_out[1]) // 2
-        y_out = np.clip(y_out, np.iinfo(y.dtype).min,
-                        np.iinfo(y.dtype).max).astype(y.dtype)
-    else:
-        raise ParameterError(f'Unsupported dtype: {y.dtype}')
-    return y_out
-
-
-def soundfile_load_(file: os.PathLike,
-                    offset: Optional[float]=None,
-                    dtype: str='int16',
-                    duration: Optional[int]=None) -> Tuple[np.ndarray, int]:
-    """Load audio using soundfile library. This function load audio file using libsndfile.
-
-    Args:
-        file (os.PathLike): File of waveform.
-        offset (Optional[float], optional): Offset to the start of waveform. Defaults to None.
-        dtype (str, optional): Data type of waveform. Defaults to 'int16'.
-        duration (Optional[int], optional): Duration of waveform to read. Defaults to None.
-
-    Returns:
-        Tuple[np.ndarray, int]: Waveform in ndarray and its samplerate.
-    """
-    with soundfile.SoundFile(file) as sf_desc:
-        sr_native = sf_desc.samplerate
-        if offset:
-            sf_desc.seek(int(offset * sr_native))
-        if duration is not None:
-            frame_duration = int(duration * sr_native)
-        else:
-            frame_duration = -1
-        y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
-
-    return y, sf_desc.samplerate
-
-
-def normalize(y: np.ndarray, norm_type: str='linear',
-              mul_factor: float=1.0) -> np.ndarray:
-    """Normalize an input audio with additional multiplier.
-
-    Args:
-        y (np.ndarray): Input waveform array in 1D or 2D.
-        norm_type (str, optional): Type of normalization. Defaults to 'linear'.
-        mul_factor (float, optional): Scaling factor. Defaults to 1.0.
-
-    Returns:
-        np.ndarray: `y` after normalization.
-    """
-
-    if norm_type == 'linear':
-        amax = np.max(np.abs(y))
-        factor = 1.0 / (amax + EPS)
-        y = y * factor * mul_factor
-    elif norm_type == 'gaussian':
-        amean = np.mean(y)
-        astd = np.std(y)
-        astd = max(astd, EPS)
-        y = mul_factor * (y - amean) / astd
-    else:
-        raise NotImplementedError(f'norm_type should be in {NORMALMIZE_TYPES}')
-
-    return y
-
-
-def soundfile_save(y: np.ndarray, sr: int, file: os.PathLike) -> None:
-    """Save audio file to disk. This function saves audio to disk using scipy.io.wavfile, with additional step to convert input waveform to int16.
-
-    Args:
-        y (np.ndarray): Input waveform array in 1D or 2D.
-        sr (int): Sample rate.
-        file (os.PathLike): Path of auido file to save.
-    """
-    if not file.endswith('.wav'):
-        raise ParameterError(
-            f'only .wav file supported, but dst file name is: {file}')
-
-    if sr <= 0:
-        raise ParameterError(
-            f'Sample rate should be larger than 0, recieved sr = {sr}')
-
-    if y.dtype not in ['int16', 'int8']:
-        warnings.warn(
-            f'input data type is {y.dtype}, will convert data to int16 format before saving'
-        )
-        y_out = depth_convert(y, 'int16')
-    else:
-        y_out = y
-
-    wavfile.write(file, sr, y_out)
-
-def soundfile_load(
-        file: os.PathLike,
-        sr: Optional[int]=None,
-        mono: bool=True,
-        merge_type: str='average',  # ch0,ch1,random,average
-        normal: bool=True,
-        norm_type: str='linear',
-        norm_mul_factor: float=1.0,
-        offset: float=0.0,
-        duration: Optional[int]=None,
-        dtype: str='float32',
-        resample_mode: str='kaiser_fast') -> Tuple[np.ndarray, int]:
-    """Load audio file from disk. This function loads audio from disk using using audio beackend.
-
-    Args:
-        file (os.PathLike): Path of auido file to load.
-        sr (Optional[int], optional): Sample rate of loaded waveform. Defaults to None.
-        mono (bool, optional): Return waveform with mono channel. Defaults to True.
-        merge_type (str, optional): Merge type of multi-channels waveform. Defaults to 'average'.
-        normal (bool, optional): Waveform normalization. Defaults to True.
-        norm_type (str, optional): Type of normalization. Defaults to 'linear'.
-        norm_mul_factor (float, optional): Scaling factor. Defaults to 1.0.
-        offset (float, optional): Offset to the start of waveform. Defaults to 0.0.
-        duration (Optional[int], optional): Duration of waveform to read. Defaults to None.
-        dtype (str, optional): Data type of waveform. Defaults to 'float32'.
-        resample_mode (str, optional): The resampling filter to use. Defaults to 'kaiser_fast'.
-
-    Returns:
-        Tuple[np.ndarray, int]: Waveform in ndarray and its samplerate.
-    """
-
-    y, r = soundfile_load_(file, offset=offset, dtype=dtype, duration=duration)
-
-    if not ((y.ndim == 1 and len(y) > 0) or (y.ndim == 2 and len(y[0]) > 0)):
-        raise ParameterError(f'audio file {file} looks empty')
-
-    if mono:
-        y = to_mono(y, merge_type)
-
-    if sr is not None and sr != r:
-        y = resample(y, r, sr, mode=resample_mode)
-        r = sr
-
-    if normal:
-        y = normalize(y, norm_type, norm_mul_factor)
-    elif dtype in ['int8', 'int16']:
-        # still need to do normalization, before depth convertion
-        y = normalize(y, 'linear', 1.0)
-
-    y = depth_convert(y, dtype)
-    return y, r
-
-#the code below is form: https://github.com/pytorch/audio/blob/main/torchaudio/backend/soundfile_backend.py
-
-def _get_subtype_for_wav(dtype: paddle.dtype, encoding: str, bits_per_sample: int):
-    if not encoding:
-        if not bits_per_sample:
-            subtype = {
-                paddle.uint8: "PCM_U8",
-                paddle.int16: "PCM_16",
-                paddle.int32: "PCM_32",
-                paddle.float32: "FLOAT",
-                paddle.float64: "DOUBLE",
-            }.get(dtype)
-            if not subtype:
-                raise ValueError(f"Unsupported dtype for wav: {dtype}")
-            return subtype
-        if bits_per_sample == 8:
-            return "PCM_U8"
-        return f"PCM_{bits_per_sample}"
-    if encoding == "PCM_S":
-        if not bits_per_sample:
-            return "PCM_32"
-        if bits_per_sample == 8:
-            raise ValueError("wav does not support 8-bit signed PCM encoding.")
-        return f"PCM_{bits_per_sample}"
-    if encoding == "PCM_U":
-        if bits_per_sample in (None, 8):
-            return "PCM_U8"
-        raise ValueError("wav only supports 8-bit unsigned PCM encoding.")
-    if encoding == "PCM_F":
-        if bits_per_sample in (None, 32):
-            return "FLOAT"
-        if bits_per_sample == 64:
-            return "DOUBLE"
-        raise ValueError("wav only supports 32/64-bit float PCM encoding.")
-    if encoding == "ULAW":
-        if bits_per_sample in (None, 8):
-            return "ULAW"
-        raise ValueError("wav only supports 8-bit mu-law encoding.")
-    if encoding == "ALAW":
-        if bits_per_sample in (None, 8):
-            return "ALAW"
-        raise ValueError("wav only supports 8-bit a-law encoding.")
-    raise ValueError(f"wav does not support {encoding}.")
-
-
-def _get_subtype_for_sphere(encoding: str, bits_per_sample: int):
-    if encoding in (None, "PCM_S"):
-        return f"PCM_{bits_per_sample}" if bits_per_sample else "PCM_32"
-    if encoding in ("PCM_U", "PCM_F"):
-        raise ValueError(f"sph does not support {encoding} encoding.")
-    if encoding == "ULAW":
-        if bits_per_sample in (None, 8):
-            return "ULAW"
-        raise ValueError("sph only supports 8-bit for mu-law encoding.")
-    if encoding == "ALAW":
-        return "ALAW"
-    raise ValueError(f"sph does not support {encoding}.")
-
-
-def _get_subtype(dtype: paddle.dtype, format: str, encoding: str, bits_per_sample: int):
-    if format == "wav":
-        return _get_subtype_for_wav(dtype, encoding, bits_per_sample)
-    if format == "flac":
-        if encoding:
-            raise ValueError("flac does not support encoding.")
-        if not bits_per_sample:
-            return "PCM_16"
-        if bits_per_sample > 24:
-            raise ValueError("flac does not support bits_per_sample > 24.")
-        return "PCM_S8" if bits_per_sample == 8 else f"PCM_{bits_per_sample}"
-    if format in ("ogg", "vorbis"):
-        if encoding or bits_per_sample:
-            raise ValueError("ogg/vorbis does not support encoding/bits_per_sample.")
-        return "VORBIS"
-    if format == "sph":
-        return _get_subtype_for_sphere(encoding, bits_per_sample)
-    if format in ("nis", "nist"):
-        return "PCM_16"
-    raise ValueError(f"Unsupported format: {format}")
-
-def save(
-    filepath: str,
-    src: paddle.Tensor,
-    sample_rate: int,
-    channels_first: bool = True,
-    compression: Optional[float] = None,
-    format: Optional[str] = None,
-    encoding: Optional[str] = None,
-    bits_per_sample: Optional[int] = None,
-):
-    """Save audio data to file.
-
-    Note:
-        The formats this function can handle depend on the soundfile installation.
-        This function is tested on the following formats;
-
-        * WAV
-
-            * 32-bit floating-point
-            * 32-bit signed integer
-            * 16-bit signed integer
-            * 8-bit unsigned integer
-
-        * FLAC
-        * OGG/VORBIS
-        * SPHERE
-
-    Note:
-        ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts
-        ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend,
-
-    Args:
-        filepath (str or pathlib.Path): Path to audio file.
-        src (paddle.Tensor): Audio data to save. must be 2D tensor.
-        sample_rate (int): sampling rate
-        channels_first (bool, optional): If ``True``, the given tensor is interpreted as `[channel, time]`,
-            otherwise `[time, channel]`.
-        compression (float of None, optional): Not used.
-            It is here only for interface compatibility reson with "sox_io" backend.
-        format (str or None, optional): Override the audio format.
-            When ``filepath`` argument is path-like object, audio format is
-            inferred from file extension. If the file extension is missing or
-            different, you can specify the correct format with this argument.
-
-            When ``filepath`` argument is file-like object,
-            this argument is required.
-
-            Valid values are ``"wav"``, ``"ogg"``, ``"vorbis"``,
-            ``"flac"`` and ``"sph"``.
-        encoding (str or None, optional): Changes the encoding for supported formats.
-            This argument is effective only for supported formats, sush as
-            ``"wav"``, ``""flac"`` and ``"sph"``. Valid values are;
-
-                - ``"PCM_S"`` (signed integer Linear PCM)
-                - ``"PCM_U"`` (unsigned integer Linear PCM)
-                - ``"PCM_F"`` (floating point PCM)
-                - ``"ULAW"`` (mu-law)
-                - ``"ALAW"`` (a-law)
-
-        bits_per_sample (int or None, optional): Changes the bit depth for the
-            supported formats.
-            When ``format`` is one of ``"wav"``, ``"flac"`` or ``"sph"``,
-            you can change the bit depth.
-            Valid values are ``8``, ``16``, ``24``, ``32`` and ``64``.
-
-    Supported formats/encodings/bit depth/compression are:
-
-    ``"wav"``
-        - 32-bit floating-point PCM
-        - 32-bit signed integer PCM
-        - 24-bit signed integer PCM
-        - 16-bit signed integer PCM
-        - 8-bit unsigned integer PCM
-        - 8-bit mu-law
-        - 8-bit a-law
-
-        Note:
-            Default encoding/bit depth is determined by the dtype of
-            the input Tensor.
-
-    ``"flac"``
-        - 8-bit
-        - 16-bit (default)
-        - 24-bit
-
-    ``"ogg"``, ``"vorbis"``
-        - Doesn't accept changing configuration.
-
-    ``"sph"``
-        - 8-bit signed integer PCM
-        - 16-bit signed integer PCM
-        - 24-bit signed integer PCM
-        - 32-bit signed integer PCM (default)
-        - 8-bit mu-law
-        - 8-bit a-law
-        - 16-bit a-law
-        - 24-bit a-law
-        - 32-bit a-law
-
-    """
-    if src.ndim != 2:
-        raise ValueError(f"Expected 2D Tensor, got {src.ndim}D.")
-    if compression is not None:
-        warnings.warn(
-            '`save` function of "soundfile" backend does not support "compression" parameter. '
-            "The argument is silently ignored."
-        )
-    if hasattr(filepath, "write"):
-        if format is None:
-            raise RuntimeError("`format` is required when saving to file object.")
-        ext = format.lower()
-    else:
-        ext = str(filepath).split(".")[-1].lower()
-
-    if bits_per_sample not in (None, 8, 16, 24, 32, 64):
-        raise ValueError("Invalid bits_per_sample.")
-    if bits_per_sample == 24:
-        warnings.warn(
-            "Saving audio with 24 bits per sample might warp samples near -1. "
-            "Using 16 bits per sample might be able to avoid this."
-        )
-    subtype = _get_subtype(src.dtype, ext, encoding, bits_per_sample)
-
-    # sph is a extension used in TED-LIUM but soundfile does not recognize it as NIST format,
-    # so we extend the extensions manually here
-    if ext in ["nis", "nist", "sph"] and format is None:
-        format = "NIST"
-
-    if channels_first:
-        src = src.t()
-
-    soundfile.write(file=filepath, data=src, samplerate=sample_rate, subtype=subtype, format=format)
-
-_SUBTYPE2DTYPE = {
-    "PCM_S8": "int8",
-    "PCM_U8": "uint8",
-    "PCM_16": "int16",
-    "PCM_32": "int32",
-    "FLOAT": "float32",
-    "DOUBLE": "float64",
-}
-
-def load(
-    filepath: str,
-    frame_offset: int = 0,
-    num_frames: int = -1,
-    normalize: bool = True,
-    channels_first: bool = True,
-    format: Optional[str] = None,
-) -> Tuple[paddle.Tensor, int]:
-    """Load audio data from file.
-
-    Note:
-        The formats this function can handle depend on the soundfile installation.
-        This function is tested on the following formats;
-
-        * WAV
-
-            * 32-bit floating-point
-            * 32-bit signed integer
-            * 16-bit signed integer
-            * 8-bit unsigned integer
-
-        * FLAC
-        * OGG/VORBIS
-        * SPHERE
-
-    By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with
-    ``float32`` dtype and the shape of `[channel, time]`.
-    The samples are normalized to fit in the range of ``[-1.0, 1.0]``.
-
-    When the input format is WAV with integer type, such as 32-bit signed integer, 16-bit
-    signed integer and 8-bit unsigned integer (24-bit signed integer is not supported),
-    by providing ``normalize=False``, this function can return integer Tensor, where the samples
-    are expressed within the whole range of the corresponding dtype, that is, ``int32`` tensor
-    for 32-bit signed PCM, ``int16`` for 16-bit signed PCM and ``uint8`` for 8-bit unsigned PCM.
-
-    ``normalize`` parameter has no effect on 32-bit floating-point WAV and other formats, such as
-    ``flac`` and ``mp3``.
-    For these formats, this function always returns ``float32`` Tensor with values normalized to
-    ``[-1.0, 1.0]``.
-
-    Note:
-        ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts
-        ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend.
-
-    Args:
-        filepath (path-like object or file-like object):
-            Source of audio data.
-        frame_offset (int, optional):
-            Number of frames to skip before start reading data.
-        num_frames (int, optional):
-            Maximum number of frames to read. ``-1`` reads all the remaining samples,
-            starting from ``frame_offset``.
-            This function may return the less number of frames if there is not enough
-            frames in the given file.
-        normalize (bool, optional):
-            When ``True``, this function always return ``float32``, and sample values are
-            normalized to ``[-1.0, 1.0]``.
-            If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
-            integer type.
-            This argument has no effect for formats other than integer WAV type.
-        channels_first (bool, optional):
-            When True, the returned Tensor has dimension `[channel, time]`.
-            Otherwise, the returned Tensor's dimension is `[time, channel]`.
-        format (str or None, optional):
-            Not used. PySoundFile does not accept format hint.
-
-    Returns:
-        (paddle.Tensor, int): Resulting Tensor and sample rate.
-            If the input file has integer wav format and normalization is off, then it has
-            integer type, else ``float32`` type. If ``channels_first=True``, it has
-            `[channel, time]` else `[time, channel]`.
-    """
-    with soundfile.SoundFile(filepath, "r") as file_:
-        if file_.format != "WAV" or normalize:
-            dtype = "float32"
-        elif file_.subtype not in _SUBTYPE2DTYPE:
-            raise ValueError(f"Unsupported subtype: {file_.subtype}")
-        else:
-            dtype = _SUBTYPE2DTYPE[file_.subtype]
-
-        frames = file_._prepare_read(frame_offset, None, num_frames)
-        waveform = file_.read(frames, dtype, always_2d=True)
-        sample_rate = file_.samplerate
-
-    waveform = paddle.to_tensor(waveform)
-    if channels_first:
-        waveform = paddle.transpose(waveform, perm=[1,0])
-    return waveform, sample_rate
-
-
-# Mapping from soundfile subtype to number of bits per sample.
-# This is mostly heuristical and the value is set to 0 when it is irrelevant
-# (lossy formats) or when it can't be inferred.
-# For ADPCM (and G72X) subtypes, it's hard to infer the bit depth because it's not part of the standard:
-# According to https://en.wikipedia.org/wiki/Adaptive_differential_pulse-code_modulation#In_telephony,
-# the default seems to be 8 bits but it can be compressed further to 4 bits.
-# The dict is inspired from
-# https://github.com/bastibe/python-soundfile/blob/744efb4b01abc72498a96b09115b42a4cabd85e4/soundfile.py#L66-L94
-_SUBTYPE_TO_BITS_PER_SAMPLE = {
-    "PCM_S8": 8,  # Signed 8 bit data
-    "PCM_16": 16,  # Signed 16 bit data
-    "PCM_24": 24,  # Signed 24 bit data
-    "PCM_32": 32,  # Signed 32 bit data
-    "PCM_U8": 8,  # Unsigned 8 bit data (WAV and RAW only)
-    "FLOAT": 32,  # 32 bit float data
-    "DOUBLE": 64,  # 64 bit float data
-    "ULAW": 8,  # U-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types
-    "ALAW": 8,  # A-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types
-    "IMA_ADPCM": 0,  # IMA ADPCM.
-    "MS_ADPCM": 0,  # Microsoft ADPCM.
-    "GSM610": 0,  # GSM 6.10 encoding. (Wikipedia says 1.625 bit depth?? https://en.wikipedia.org/wiki/Full_Rate)
-    "VOX_ADPCM": 0,  # OKI / Dialogix ADPCM
-    "G721_32": 0,  # 32kbs G721 ADPCM encoding.
-    "G723_24": 0,  # 24kbs G723 ADPCM encoding.
-    "G723_40": 0,  # 40kbs G723 ADPCM encoding.
-    "DWVW_12": 12,  # 12 bit Delta Width Variable Word encoding.
-    "DWVW_16": 16,  # 16 bit Delta Width Variable Word encoding.
-    "DWVW_24": 24,  # 24 bit Delta Width Variable Word encoding.
-    "DWVW_N": 0,  # N bit Delta Width Variable Word encoding.
-    "DPCM_8": 8,  # 8 bit differential PCM (XI only)
-    "DPCM_16": 16,  # 16 bit differential PCM (XI only)
-    "VORBIS": 0,  # Xiph Vorbis encoding. (lossy)
-    "ALAC_16": 16,  # Apple Lossless Audio Codec (16 bit).
-    "ALAC_20": 20,  # Apple Lossless Audio Codec (20 bit).
-    "ALAC_24": 24,  # Apple Lossless Audio Codec (24 bit).
-    "ALAC_32": 32,  # Apple Lossless Audio Codec (32 bit).
-}
-
-def _get_bit_depth(subtype):
-    if subtype not in _SUBTYPE_TO_BITS_PER_SAMPLE:
-        warnings.warn(
-            f"The {subtype} subtype is unknown to PaddleAudio. As a result, the bits_per_sample "
-            "attribute will be set to 0. If you are seeing this warning, please "
-            "report by opening an issue on github (after checking for existing/closed ones). "
-            "You may otherwise ignore this warning."
-        )
-    return _SUBTYPE_TO_BITS_PER_SAMPLE.get(subtype, 0)
-
-_SUBTYPE_TO_ENCODING = {
-    "PCM_S8": "PCM_S",
-    "PCM_16": "PCM_S",
-    "PCM_24": "PCM_S",
-    "PCM_32": "PCM_S",
-    "PCM_U8": "PCM_U",
-    "FLOAT": "PCM_F",
-    "DOUBLE": "PCM_F",
-    "ULAW": "ULAW",
-    "ALAW": "ALAW",
-    "VORBIS": "VORBIS",
-}
-
-def _get_encoding(format: str, subtype: str):
-    if format == "FLAC":
-        return "FLAC"
-    return _SUBTYPE_TO_ENCODING.get(subtype, "UNKNOWN")
-
-def info(filepath: str, format: Optional[str] = None) -> AudioMetaData:
-    """Get signal information of an audio file.
-
-    Note:
-        ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts
-        ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend,
-
-    Args:
-        filepath (path-like object or file-like object):
-            Source of audio data.
-        format (str or None, optional):
-            Not used. PySoundFile does not accept format hint.
-
-    Returns:
-        AudioMetaData: meta data of the given audio.
-
-    """
-    sinfo = soundfile.info(filepath)
-    return AudioMetaData(
-        sinfo.samplerate,
-        sinfo.frames,
-        sinfo.channels,
-        bits_per_sample=_get_bit_depth(sinfo.subtype),
-        encoding=_get_encoding(sinfo.format, sinfo.subtype),
-    )
diff --git a/paddlespeech/audio/backends/sox_io_backend.py b/paddlespeech/audio/backends/sox_io_backend.py
deleted file mode 100644
index fff9e2069678475f311cc88d00135ef051a22643..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/backends/sox_io_backend.py
+++ /dev/null
@@ -1,101 +0,0 @@
-from pathlib import Path
-from typing import Callable
-from typing import Optional, Tuple, Union
-
-import paddle
-from paddle import Tensor
-from .common import AudioMetaData
-import os
-
-from paddlespeech.audio._internal import module_utils  as _mod_utils
-from paddlespeech.audio import _paddleaudio as paddleaudio 
-
-#https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py
-
-def _fail_info(filepath: str, format: Optional[str]) -> AudioMetaData:
-    raise RuntimeError("Failed to fetch metadata from {}".format(filepath))
-
-
-def _fail_info_fileobj(fileobj, format: Optional[str]) -> AudioMetaData:
-    raise RuntimeError("Failed to fetch metadata from {}".format(fileobj))
-
-
-# Note: need to comply TorchScript syntax -- need annotation and no f-string
-def _fail_load(
-    filepath: str,
-    frame_offset: int = 0,
-    num_frames: int = -1,
-    normalize: bool = True,
-    channels_first: bool = True,
-    format: Optional[str] = None,
-) -> Tuple[Tensor, int]:
-    raise RuntimeError("Failed to load audio from {}".format(filepath))
-
-
-def _fail_load_fileobj(fileobj, *args, **kwargs):
-    raise RuntimeError(f"Failed to load audio from {fileobj}")
-
-_fallback_info = _fail_info
-_fallback_info_fileobj = _fail_info_fileobj
-_fallback_load = _fail_load
-_fallback_load_filebj = _fail_load_fileobj
-
-@_mod_utils.requires_sox()
-def load(
-        filepath: str,
-        frame_offset: int = 0,
-        num_frames: int=-1,
-        normalize: bool = True,
-        channels_first: bool = True,
-        format: Optional[str]=None, ) -> Tuple[Tensor, int]:
-    if hasattr(filepath, "read"):
-        ret = paddleaudio.load_audio_fileobj(
-            filepath, frame_offset, num_frames, normalize, channels_first, format
-        )
-        if ret is not None:
-            audio_tensor = paddle.to_tensor(ret[0])
-            return (audio_tensor, ret[1])
-        return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format)
-    filepath = os.fspath(filepath)
-    ret = paddleaudio.sox_io_load_audio_file(
-        filepath, frame_offset, num_frames, normalize, channels_first, format
-    )
-    if ret is not None:
-        audio_tensor = paddle.to_tensor(ret[0])
-        return (audio_tensor, ret[1])
-    return _fallback_load(filepath, frame_offset, num_frames, normalize, channels_first, format)
-
-
-@_mod_utils.requires_sox()
-def save(filepath: str,
-    src: Tensor,
-    sample_rate: int,
-    channels_first: bool = True,
-    compression: Optional[float] = None,
-    format: Optional[str] = None,
-    encoding: Optional[str] = None,
-    bits_per_sample: Optional[int] = None,
-):
-    src_arr = src.numpy()
-    if hasattr(filepath, "write"):
-        paddleaudio.save_audio_fileobj(
-            filepath, src_arr, sample_rate, channels_first, compression, format, encoding, bits_per_sample
-        )
-        return
-    filepath = os.fspath(filepath)
-    paddleaudio.sox_io_save_audio_file(
-        filepath, src_arr, sample_rate, channels_first, compression, format, encoding, bits_per_sample
-    )
-
-@_mod_utils.requires_sox()
-def info(filepath: str, format: Optional[str] = None,) -> AudioMetaData:
-    if hasattr(filepath, "read"):
-        sinfo = paddleaudio.get_info_fileobj(filepath, format)
-        if sinfo is not None:
-            return AudioMetaData(*sinfo)
-        return _fallback_info_fileobj(filepath, format)
-    filepath = os.fspath(filepath)
-    sinfo = paddleaudio.get_info_file(filepath, format)
-    if sinfo is not None:
-        return AudioMetaData(*sinfo)
-    return _fallback_info(filepath, format)
diff --git a/paddlespeech/audio/backends/utils.py b/paddlespeech/audio/backends/utils.py
deleted file mode 100644
index 9ea2eaca750eeae8499e94b54c90d5a0cf8065ed..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/backends/utils.py
+++ /dev/null
@@ -1,93 +0,0 @@
-"""Defines utilities for switching audio backends"""
-#code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/utils.py
-
-import warnings
-from typing import List
-from typing import Optional
-
-import paddlespeech.audio
-from paddlespeech.audio._internal import module_utils as _mod_utils
-
-from . import no_backend, soundfile_backend, sox_io_backend
-
-__all__ = [
-    "list_audio_backends",
-    "get_audio_backend",
-    "set_audio_backend",
-]
-
-
-def list_audio_backends() -> List[str]:
-    """List available backends
-
-    Returns:
-        List[str]: The list of available backends.
-    """
-    backends = []
-    if _mod_utils.is_module_available("soundfile"):
-        backends.append("soundfile")
-    if _mod_utils.is_sox_available():
-        backends.append("sox_io")
-    return backends
-
-
-def set_audio_backend(backend: Optional[str]):
-    """Set the backend for I/O operation
-
-    Args:
-        backend (str or None): Name of the backend.
-            One of ``"sox_io"`` or ``"soundfile"`` based on availability
-            of the system. If ``None`` is provided the  current backend is unassigned.
-    """
-    if backend is not None and backend not in list_audio_backends():
-        raise RuntimeError(f'Backend "{backend}" is not one of '
-                           f"available backends: {list_audio_backends()}.")
-
-    if backend is None:
-        module = no_backend
-    elif backend == "sox_io":
-        module = sox_io_backend
-    elif backend == "soundfile":
-        module = soundfile_backend
-    else:
-        raise NotImplementedError(f'Unexpected backend "{backend}"')
-
-    for func in ["save", "load", "info"]:
-        setattr(paddlespeech.audio, func, getattr(module, func))
-
-
-# def _init_audio_backend():
-#     backends = list_audio_backends()
-#     if "sox_io" in backends:
-#         set_audio_backend("sox_io")
-#     elif "soundfile" in backends:
-#         set_audio_backend("soundfile")
-#     else:
-#         warnings.warn("No audio backend is available.")
-#         set_audio_backend(None)
-
-
-def _init_audio_backend():
-    backends = list_audio_backends()
-    if "soundfile" in backends:
-        set_audio_backend("soundfile")
-    elif "sox_io" in backends:
-        set_audio_backend("sox_io")
-    else:
-        warnings.warn("No audio backend is available.")
-        set_audio_backend(None)
-
-
-def get_audio_backend() -> Optional[str]:
-    """Get the name of the current backend
-
-    Returns:
-        Optional[str]: The name of the current backend or ``None`` if no backend is assigned.
-    """
-    if paddlespeech.audio.load == no_backend.load:
-        return None
-    if paddlespeech.audio.load == sox_io_backend.load:
-        return "sox_io"
-    if paddlespeech.audio.load == soundfile_backend.load:
-        return "soundfile"
-    raise ValueError("Unknown backend.")
diff --git a/paddlespeech/audio/compliance/__init__.py b/paddlespeech/audio/compliance/__init__.py
deleted file mode 100644
index c08f9ab11ea7b6e71eb62f095b9404e4d4331e91..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/compliance/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from . import kaldi
-from . import librosa
diff --git a/paddlespeech/audio/compliance/kaldi.py b/paddlespeech/audio/compliance/kaldi.py
deleted file mode 100644
index 538be019619441bee7c135b1e8666a806bc31fa2..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/compliance/kaldi.py
+++ /dev/null
@@ -1,638 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from torchaudio(https://github.com/pytorch/audio)
-import math
-from typing import Tuple
-
-import paddle
-from paddle import Tensor
-
-from ..functional import create_dct
-from ..functional.window import get_window
-
-__all__ = [
-    'spectrogram',
-    'fbank',
-    'mfcc',
-]
-
-# window types
-HANNING = 'hann'
-HAMMING = 'hamming'
-POVEY = 'povey'
-RECTANGULAR = 'rect'
-BLACKMAN = 'blackman'
-
-
-def _get_epsilon(dtype):
-    return paddle.to_tensor(1e-07, dtype=dtype)
-
-
-def _next_power_of_2(x: int) -> int:
-    return 1 if x == 0 else 2**(x - 1).bit_length()
-
-
-def _get_strided(waveform: Tensor,
-                 window_size: int,
-                 window_shift: int,
-                 snip_edges: bool) -> Tensor:
-    assert waveform.dim() == 1
-    num_samples = waveform.shape[0]
-
-    if snip_edges:
-        if num_samples < window_size:
-            return paddle.empty((0, 0), dtype=waveform.dtype)
-        else:
-            m = 1 + (num_samples - window_size) // window_shift
-    else:
-        reversed_waveform = paddle.flip(waveform, [0])
-        m = (num_samples + (window_shift // 2)) // window_shift
-        pad = window_size // 2 - window_shift // 2
-        pad_right = reversed_waveform
-        if pad > 0:
-            pad_left = reversed_waveform[-pad:]
-            waveform = paddle.concat((pad_left, waveform, pad_right), axis=0)
-        else:
-            waveform = paddle.concat((waveform[-pad:], pad_right), axis=0)
-
-    return paddle.signal.frame(waveform, window_size, window_shift)[:, :m].T
-
-
-def _feature_window_function(
-        window_type: str,
-        window_size: int,
-        blackman_coeff: float,
-        dtype: int, ) -> Tensor:
-    if window_type == HANNING:
-        return get_window('hann', window_size, fftbins=False, dtype=dtype)
-    elif window_type == HAMMING:
-        return get_window('hamming', window_size, fftbins=False, dtype=dtype)
-    elif window_type == POVEY:
-        return get_window(
-            'hann', window_size, fftbins=False, dtype=dtype).pow(0.85)
-    elif window_type == RECTANGULAR:
-        return paddle.ones([window_size], dtype=dtype)
-    elif window_type == BLACKMAN:
-        a = 2 * math.pi / (window_size - 1)
-        window_function = paddle.arange(window_size, dtype=dtype)
-        return (blackman_coeff - 0.5 * paddle.cos(a * window_function) +
-                (0.5 - blackman_coeff) * paddle.cos(2 * a * window_function)
-                ).astype(dtype)
-    else:
-        raise Exception('Invalid window type ' + window_type)
-
-
-def _get_log_energy(strided_input: Tensor, epsilon: Tensor,
-                    energy_floor: float) -> Tensor:
-    log_energy = paddle.maximum(strided_input.pow(2).sum(1), epsilon).log()
-    if energy_floor == 0.0:
-        return log_energy
-    return paddle.maximum(
-        log_energy,
-        paddle.to_tensor(math.log(energy_floor), dtype=strided_input.dtype))
-
-
-def _get_waveform_and_window_properties(
-        waveform: Tensor,
-        channel: int,
-        sr: int,
-        frame_shift: float,
-        frame_length: float,
-        round_to_power_of_two: bool,
-        preemphasis_coefficient: float) -> Tuple[Tensor, int, int, int]:
-    channel = max(channel, 0)
-    assert channel < waveform.shape[0], (
-        'Invalid channel {} for size {}'.format(channel, waveform.shape[0]))
-    waveform = waveform[channel, :]  # size (n)
-    window_shift = int(
-        sr * frame_shift *
-        0.001)  # pass frame_shift and frame_length in milliseconds
-    window_size = int(sr * frame_length * 0.001)
-    padded_window_size = _next_power_of_2(
-        window_size) if round_to_power_of_two else window_size
-
-    assert 2 <= window_size <= len(waveform), (
-        'choose a window size {} that is [2, {}]'.format(window_size,
-                                                         len(waveform)))
-    assert 0 < window_shift, '`window_shift` must be greater than 0'
-    assert padded_window_size % 2 == 0, 'the padded `window_size` must be divisible by two.' \
-                                        ' use `round_to_power_of_two` or change `frame_length`'
-    assert 0. <= preemphasis_coefficient <= 1.0, '`preemphasis_coefficient` must be between [0,1]'
-    assert sr > 0, '`sr` must be greater than zero'
-    return waveform, window_shift, window_size, padded_window_size
-
-
-def _get_window(waveform: Tensor,
-                padded_window_size: int,
-                window_size: int,
-                window_shift: int,
-                window_type: str,
-                blackman_coeff: float,
-                snip_edges: bool,
-                raw_energy: bool,
-                energy_floor: float,
-                dither: float,
-                remove_dc_offset: bool,
-                preemphasis_coefficient: float) -> Tuple[Tensor, Tensor]:
-    dtype = waveform.dtype
-    epsilon = _get_epsilon(dtype)
-
-    # (m, window_size)
-    strided_input = _get_strided(waveform, window_size, window_shift,
-                                 snip_edges)
-
-    if dither != 0.0:
-        x = paddle.maximum(epsilon,
-                           paddle.rand(strided_input.shape, dtype=dtype))
-        rand_gauss = paddle.sqrt(-2 * x.log()) * paddle.cos(2 * math.pi * x)
-        strided_input = strided_input + rand_gauss * dither
-
-    if remove_dc_offset:
-        row_means = paddle.mean(strided_input, axis=1).unsqueeze(1)  # (m, 1)
-        strided_input = strided_input - row_means
-
-    if raw_energy:
-        signal_log_energy = _get_log_energy(strided_input, epsilon,
-                                            energy_floor)  # (m)
-
-    if preemphasis_coefficient != 0.0:
-        offset_strided_input = paddle.nn.functional.pad(
-            strided_input.unsqueeze(0), (1, 0),
-            data_format='NCL',
-            mode='replicate').squeeze(0)  # (m, window_size + 1)
-        strided_input = strided_input - preemphasis_coefficient * offset_strided_input[:, :
-                                                                                       -1]
-
-    window_function = _feature_window_function(
-        window_type, window_size, blackman_coeff,
-        dtype).unsqueeze(0)  # (1, window_size)
-    strided_input = strided_input * window_function  # (m, window_size)
-
-    # (m, padded_window_size)
-    if padded_window_size != window_size:
-        padding_right = padded_window_size - window_size
-        strided_input = paddle.nn.functional.pad(
-            strided_input.unsqueeze(0), (0, padding_right),
-            data_format='NCL',
-            mode='constant',
-            value=0).squeeze(0)
-
-    if not raw_energy:
-        signal_log_energy = _get_log_energy(strided_input, epsilon,
-                                            energy_floor)  # size (m)
-
-    return strided_input, signal_log_energy
-
-
-def _subtract_column_mean(tensor: Tensor, subtract_mean: bool) -> Tensor:
-    if subtract_mean:
-        col_means = paddle.mean(tensor, axis=0).unsqueeze(0)
-        tensor = tensor - col_means
-    return tensor
-
-
-def spectrogram(waveform: Tensor,
-                blackman_coeff: float=0.42,
-                channel: int=-1,
-                dither: float=0.0,
-                energy_floor: float=1.0,
-                frame_length: float=25.0,
-                frame_shift: float=10.0,
-                preemphasis_coefficient: float=0.97,
-                raw_energy: bool=True,
-                remove_dc_offset: bool=True,
-                round_to_power_of_two: bool=True,
-                sr: int=16000,
-                snip_edges: bool=True,
-                subtract_mean: bool=False,
-                window_type: str=POVEY) -> Tensor:
-    """Compute and return a spectrogram from a waveform. The output is identical to Kaldi's.
-
-    Args:
-        waveform (Tensor): A waveform tensor with shape `(C, T)`.
-        blackman_coeff (float, optional): Coefficient for Blackman window.. Defaults to 0.42.
-        channel (int, optional): Select the channel of waveform. Defaults to -1.
-        dither (float, optional): Dithering constant . Defaults to 0.0.
-        energy_floor (float, optional): Floor on energy of the output Spectrogram. Defaults to 1.0.
-        frame_length (float, optional): Frame length in milliseconds. Defaults to 25.0.
-        frame_shift (float, optional): Shift between adjacent frames in milliseconds. Defaults to 10.0.
-        preemphasis_coefficient (float, optional): Preemphasis coefficient for input waveform. Defaults to 0.97.
-        raw_energy (bool, optional): Whether to compute before preemphasis and windowing. Defaults to True.
-        remove_dc_offset (bool, optional): Whether to subtract mean from waveform on frames. Defaults to True.
-        round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
-            to FFT. Defaults to True.
-        sr (int, optional): Sample rate of input waveform. Defaults to 16000.
-        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a singal frame when it
-            is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
-        subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
-        window_type (str, optional): Choose type of window for FFT computation. Defaults to POVEY.
-
-    Returns:
-        Tensor: A spectrogram tensor with shape `(m, padded_window_size // 2 + 1)` where m is the number of frames
-            depends on frame_length and frame_shift.
-    """
-    dtype = waveform.dtype
-    epsilon = _get_epsilon(dtype)
-
-    waveform, window_shift, window_size, padded_window_size = _get_waveform_and_window_properties(
-        waveform, channel, sr, frame_shift, frame_length, round_to_power_of_two,
-        preemphasis_coefficient)
-
-    strided_input, signal_log_energy = _get_window(
-        waveform, padded_window_size, window_size, window_shift, window_type,
-        blackman_coeff, snip_edges, raw_energy, energy_floor, dither,
-        remove_dc_offset, preemphasis_coefficient)
-
-    # (m, padded_window_size // 2 + 1, 2)
-    fft = paddle.fft.rfft(strided_input)
-
-    power_spectrum = paddle.maximum(
-        fft.abs().pow(2.), epsilon).log()  # (m, padded_window_size // 2 + 1)
-    power_spectrum[:, 0] = signal_log_energy
-
-    power_spectrum = _subtract_column_mean(power_spectrum, subtract_mean)
-    return power_spectrum
-
-
-def _inverse_mel_scale_scalar(mel_freq: float) -> float:
-    return 700.0 * (math.exp(mel_freq / 1127.0) - 1.0)
-
-
-def _inverse_mel_scale(mel_freq: Tensor) -> Tensor:
-    return 700.0 * ((mel_freq / 1127.0).exp() - 1.0)
-
-
-def _mel_scale_scalar(freq: float) -> float:
-    return 1127.0 * math.log(1.0 + freq / 700.0)
-
-
-def _mel_scale(freq: Tensor) -> Tensor:
-    return 1127.0 * (1.0 + freq / 700.0).log()
-
-
-def _vtln_warp_freq(vtln_low_cutoff: float,
-                    vtln_high_cutoff: float,
-                    low_freq: float,
-                    high_freq: float,
-                    vtln_warp_factor: float,
-                    freq: Tensor) -> Tensor:
-    assert vtln_low_cutoff > low_freq, 'be sure to set the vtln_low option higher than low_freq'
-    assert vtln_high_cutoff < high_freq, 'be sure to set the vtln_high option lower than high_freq [or negative]'
-    l = vtln_low_cutoff * max(1.0, vtln_warp_factor)
-    h = vtln_high_cutoff * min(1.0, vtln_warp_factor)
-    scale = 1.0 / vtln_warp_factor
-    Fl = scale * l
-    Fh = scale * h
-    assert l > low_freq and h < high_freq
-    scale_left = (Fl - low_freq) / (l - low_freq)
-    scale_right = (high_freq - Fh) / (high_freq - h)
-    res = paddle.empty_like(freq)
-
-    outside_low_high_freq = paddle.less_than(freq, paddle.to_tensor(low_freq)) \
-        | paddle.greater_than(freq, paddle.to_tensor(high_freq))
-    before_l = paddle.less_than(freq, paddle.to_tensor(l))
-    before_h = paddle.less_than(freq, paddle.to_tensor(h))
-    after_h = paddle.greater_equal(freq, paddle.to_tensor(h))
-
-    res[after_h] = high_freq + scale_right * (freq[after_h] - high_freq)
-    res[before_h] = scale * freq[before_h]
-    res[before_l] = low_freq + scale_left * (freq[before_l] - low_freq)
-    res[outside_low_high_freq] = freq[outside_low_high_freq]
-
-    return res
-
-
-def _vtln_warp_mel_freq(vtln_low_cutoff: float,
-                        vtln_high_cutoff: float,
-                        low_freq,
-                        high_freq: float,
-                        vtln_warp_factor: float,
-                        mel_freq: Tensor) -> Tensor:
-    return _mel_scale(
-        _vtln_warp_freq(vtln_low_cutoff, vtln_high_cutoff, low_freq, high_freq,
-                        vtln_warp_factor, _inverse_mel_scale(mel_freq)))
-
-
-def _get_mel_banks(num_bins: int,
-                   window_length_padded: int,
-                   sample_freq: float,
-                   low_freq: float,
-                   high_freq: float,
-                   vtln_low: float,
-                   vtln_high: float,
-                   vtln_warp_factor: float) -> Tuple[Tensor, Tensor]:
-    assert num_bins > 3, 'Must have at least 3 mel bins'
-    assert window_length_padded % 2 == 0
-    num_fft_bins = window_length_padded / 2
-    nyquist = 0.5 * sample_freq
-
-    if high_freq <= 0.0:
-        high_freq += nyquist
-
-    assert (0.0 <= low_freq < nyquist) and (0.0 < high_freq <= nyquist) and (low_freq < high_freq), \
-        ('Bad values in options: low-freq {} and high-freq {} vs. nyquist {}'.format(low_freq, high_freq, nyquist))
-
-    fft_bin_width = sample_freq / window_length_padded
-    mel_low_freq = _mel_scale_scalar(low_freq)
-    mel_high_freq = _mel_scale_scalar(high_freq)
-
-    mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1)
-
-    if vtln_high < 0.0:
-        vtln_high += nyquist
-
-    assert vtln_warp_factor == 1.0 or ((low_freq < vtln_low < high_freq) and
-                                       (0.0 < vtln_high < high_freq) and (vtln_low < vtln_high)), \
-        ('Bad values in options: vtln-low {} and vtln-high {}, versus '
-         'low-freq {} and high-freq {}'.format(vtln_low, vtln_high, low_freq, high_freq))
-
-    bin = paddle.arange(num_bins).unsqueeze(1)
-    left_mel = mel_low_freq + bin * mel_freq_delta  # (num_bins, 1)
-    center_mel = mel_low_freq + (bin + 1.0) * mel_freq_delta  # (num_bins, 1)
-    right_mel = mel_low_freq + (bin + 2.0) * mel_freq_delta  # (num_bins, 1)
-
-    if vtln_warp_factor != 1.0:
-        left_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq, high_freq,
-                                       vtln_warp_factor, left_mel)
-        center_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq,
-                                         high_freq, vtln_warp_factor,
-                                         center_mel)
-        right_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq,
-                                        high_freq, vtln_warp_factor, right_mel)
-
-    center_freqs = _inverse_mel_scale(center_mel)  # (num_bins)
-    # (1, num_fft_bins)
-    mel = _mel_scale(fft_bin_width * paddle.arange(num_fft_bins)).unsqueeze(0)
-
-    # (num_bins, num_fft_bins)
-    up_slope = (mel - left_mel) / (center_mel - left_mel)
-    down_slope = (right_mel - mel) / (right_mel - center_mel)
-
-    if vtln_warp_factor == 1.0:
-        bins = paddle.maximum(
-            paddle.zeros([1]), paddle.minimum(up_slope, down_slope))
-    else:
-        bins = paddle.zeros_like(up_slope)
-        up_idx = paddle.greater_than(mel, left_mel) & paddle.less_than(
-            mel, center_mel)
-        down_idx = paddle.greater_than(mel, center_mel) & paddle.less_than(
-            mel, right_mel)
-        bins[up_idx] = up_slope[up_idx]
-        bins[down_idx] = down_slope[down_idx]
-
-    return bins, center_freqs
-
-
-def fbank(waveform: Tensor,
-          blackman_coeff: float=0.42,
-          channel: int=-1,
-          dither: float=0.0,
-          energy_floor: float=1.0,
-          frame_length: float=25.0,
-          frame_shift: float=10.0,
-          high_freq: float=0.0,
-          htk_compat: bool=False,
-          low_freq: float=20.0,
-          n_mels: int=23,
-          preemphasis_coefficient: float=0.97,
-          raw_energy: bool=True,
-          remove_dc_offset: bool=True,
-          round_to_power_of_two: bool=True,
-          sr: int=16000,
-          snip_edges: bool=True,
-          subtract_mean: bool=False,
-          use_energy: bool=False,
-          use_log_fbank: bool=True,
-          use_power: bool=True,
-          vtln_high: float=-500.0,
-          vtln_low: float=100.0,
-          vtln_warp: float=1.0,
-          window_type: str=POVEY) -> Tensor:
-    """Compute and return filter banks from a waveform. The output is identical to Kaldi's.
-
-    Args:
-        waveform (Tensor): A waveform tensor with shape `(C, T)`.
-        blackman_coeff (float, optional): Coefficient for Blackman window.. Defaults to 0.42.
-        channel (int, optional): Select the channel of waveform. Defaults to -1.
-        dither (float, optional): Dithering constant . Defaults to 0.0.
-        energy_floor (float, optional): Floor on energy of the output Spectrogram. Defaults to 1.0.
-        frame_length (float, optional): Frame length in milliseconds. Defaults to 25.0.
-        frame_shift (float, optional): Shift between adjacent frames in milliseconds. Defaults to 10.0.
-        high_freq (float, optional): The upper cut-off frequency. Defaults to 0.0.
-        htk_compat (bool, optional): Put energy to the last when it is set True. Defaults to False.
-        low_freq (float, optional): The lower cut-off frequency. Defaults to 20.0.
-        n_mels (int, optional): Number of output mel bins. Defaults to 23.
-        preemphasis_coefficient (float, optional): Preemphasis coefficient for input waveform. Defaults to 0.97.
-        raw_energy (bool, optional): Whether to compute before preemphasis and windowing. Defaults to True.
-        remove_dc_offset (bool, optional): Whether to subtract mean from waveform on frames. Defaults to True.
-        round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
-            to FFT. Defaults to True.
-        sr (int, optional): Sample rate of input waveform. Defaults to 16000.
-        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a singal frame when it
-            is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
-        subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
-        use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.
-        use_log_fbank (bool, optional): Return log fbank when it is set True. Defaults to True.
-        use_power (bool, optional): Whether to use power instead of magnitude. Defaults to True.
-        vtln_high (float, optional): High inflection point in piecewise linear VTLN warping function. Defaults to -500.0.
-        vtln_low (float, optional): Low inflection point in piecewise linear VTLN warping function. Defaults to 100.0.
-        vtln_warp (float, optional): Vtln warp factor. Defaults to 1.0.
-        window_type (str, optional): Choose type of window for FFT computation. Defaults to POVEY.
-
-    Returns:
-        Tensor: A filter banks tensor with shape `(m, n_mels)`.
-    """
-    dtype = waveform.dtype
-
-    waveform, window_shift, window_size, padded_window_size = _get_waveform_and_window_properties(
-        waveform, channel, sr, frame_shift, frame_length, round_to_power_of_two,
-        preemphasis_coefficient)
-
-    strided_input, signal_log_energy = _get_window(
-        waveform, padded_window_size, window_size, window_shift, window_type,
-        blackman_coeff, snip_edges, raw_energy, energy_floor, dither,
-        remove_dc_offset, preemphasis_coefficient)
-
-    # (m, padded_window_size // 2 + 1)
-    spectrum = paddle.fft.rfft(strided_input).abs()
-    if use_power:
-        spectrum = spectrum.pow(2.)
-
-    # (n_mels, padded_window_size // 2)
-    mel_energies, _ = _get_mel_banks(n_mels, padded_window_size, sr, low_freq,
-                                     high_freq, vtln_low, vtln_high, vtln_warp)
-    mel_energies = mel_energies.astype(dtype)
-
-    # (n_mels, padded_window_size // 2 + 1)
-    mel_energies = paddle.nn.functional.pad(
-        mel_energies.unsqueeze(0), (0, 1),
-        data_format='NCL',
-        mode='constant',
-        value=0).squeeze(0)
-
-    # (m, n_mels)
-    mel_energies = paddle.mm(spectrum, mel_energies.T)
-    if use_log_fbank:
-        mel_energies = paddle.maximum(mel_energies, _get_epsilon(dtype)).log()
-
-    if use_energy:
-        signal_log_energy = signal_log_energy.unsqueeze(1)
-        if htk_compat:
-            mel_energies = paddle.concat(
-                (mel_energies, signal_log_energy), axis=1)
-        else:
-            mel_energies = paddle.concat(
-                (signal_log_energy, mel_energies), axis=1)
-
-    # (m, n_mels + 1)
-    mel_energies = _subtract_column_mean(mel_energies, subtract_mean)
-    return mel_energies
-
-
-def _get_dct_matrix(n_mfcc: int, n_mels: int) -> Tensor:
-    dct_matrix = create_dct(n_mels, n_mels, 'ortho')
-    dct_matrix[:, 0] = math.sqrt(1 / float(n_mels))
-    dct_matrix = dct_matrix[:, :n_mfcc]  # (n_mels, n_mfcc)
-    return dct_matrix
-
-
-def _get_lifter_coeffs(n_mfcc: int, cepstral_lifter: float) -> Tensor:
-    i = paddle.arange(n_mfcc)
-    return 1.0 + 0.5 * cepstral_lifter * paddle.sin(math.pi * i /
-                                                    cepstral_lifter)
-
-
-def mfcc(waveform: Tensor,
-         blackman_coeff: float=0.42,
-         cepstral_lifter: float=22.0,
-         channel: int=-1,
-         dither: float=0.0,
-         energy_floor: float=1.0,
-         frame_length: float=25.0,
-         frame_shift: float=10.0,
-         high_freq: float=0.0,
-         htk_compat: bool=False,
-         low_freq: float=20.0,
-         n_mfcc: int=13,
-         n_mels: int=23,
-         preemphasis_coefficient: float=0.97,
-         raw_energy: bool=True,
-         remove_dc_offset: bool=True,
-         round_to_power_of_two: bool=True,
-         sr: int=16000,
-         snip_edges: bool=True,
-         subtract_mean: bool=False,
-         use_energy: bool=False,
-         vtln_high: float=-500.0,
-         vtln_low: float=100.0,
-         vtln_warp: float=1.0,
-         window_type: str=POVEY) -> Tensor:
-    """Compute and return mel frequency cepstral coefficients from a waveform. The output is
-            identical to Kaldi's.
-
-    Args:
-        waveform (Tensor): A waveform tensor with shape `(C, T)`.
-        blackman_coeff (float, optional): Coefficient for Blackman window.. Defaults to 0.42.
-        cepstral_lifter (float, optional): Scaling of output mfccs. Defaults to 22.0.
-        channel (int, optional): Select the channel of waveform. Defaults to -1.
-        dither (float, optional): Dithering constant . Defaults to 0.0.
-        energy_floor (float, optional): Floor on energy of the output Spectrogram. Defaults to 1.0.
-        frame_length (float, optional): Frame length in milliseconds. Defaults to 25.0.
-        frame_shift (float, optional): Shift between adjacent frames in milliseconds. Defaults to 10.0.
-        high_freq (float, optional): The upper cut-off frequency. Defaults to 0.0.
-        htk_compat (bool, optional): Put energy to the last when it is set True. Defaults to False.
-        low_freq (float, optional): The lower cut-off frequency. Defaults to 20.0.
-        n_mfcc (int, optional): Number of cepstra in MFCC. Defaults to 13.
-        n_mels (int, optional): Number of output mel bins. Defaults to 23.
-        preemphasis_coefficient (float, optional): Preemphasis coefficient for input waveform. Defaults to 0.97.
-        raw_energy (bool, optional): Whether to compute before preemphasis and windowing. Defaults to True.
-        remove_dc_offset (bool, optional): Whether to subtract mean from waveform on frames. Defaults to True.
-        round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
-            to FFT. Defaults to True.
-        sr (int, optional): Sample rate of input waveform. Defaults to 16000.
-        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a singal frame when it
-            is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
-        subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
-        use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.
-        vtln_high (float, optional): High inflection point in piecewise linear VTLN warping function. Defaults to -500.0.
-        vtln_low (float, optional): Low inflection point in piecewise linear VTLN warping function. Defaults to 100.0.
-        vtln_warp (float, optional): Vtln warp factor. Defaults to 1.0.
-        window_type (str, optional): Choose type of window for FFT computation. Defaults to POVEY.
-
-    Returns:
-        Tensor: A mel frequency cepstral coefficients tensor with shape `(m, n_mfcc)`.
-    """
-    assert n_mfcc <= n_mels, 'n_mfcc cannot be larger than n_mels: %d vs %d' % (
-        n_mfcc, n_mels)
-
-    dtype = waveform.dtype
-
-    # (m, n_mels + use_energy)
-    feature = fbank(
-        waveform=waveform,
-        blackman_coeff=blackman_coeff,
-        channel=channel,
-        dither=dither,
-        energy_floor=energy_floor,
-        frame_length=frame_length,
-        frame_shift=frame_shift,
-        high_freq=high_freq,
-        htk_compat=htk_compat,
-        low_freq=low_freq,
-        n_mels=n_mels,
-        preemphasis_coefficient=preemphasis_coefficient,
-        raw_energy=raw_energy,
-        remove_dc_offset=remove_dc_offset,
-        round_to_power_of_two=round_to_power_of_two,
-        sr=sr,
-        snip_edges=snip_edges,
-        subtract_mean=False,
-        use_energy=use_energy,
-        use_log_fbank=True,
-        use_power=True,
-        vtln_high=vtln_high,
-        vtln_low=vtln_low,
-        vtln_warp=vtln_warp,
-        window_type=window_type)
-
-    if use_energy:
-        # (m)
-        signal_log_energy = feature[:, n_mels if htk_compat else 0]
-        mel_offset = int(not htk_compat)
-        feature = feature[:, mel_offset:(n_mels + mel_offset)]
-
-    # (n_mels, n_mfcc)
-    dct_matrix = _get_dct_matrix(n_mfcc, n_mels).astype(dtype=dtype)
-
-    # (m, n_mfcc)
-    feature = feature.matmul(dct_matrix)
-
-    if cepstral_lifter != 0.0:
-        # (1, n_mfcc)
-        lifter_coeffs = _get_lifter_coeffs(n_mfcc, cepstral_lifter).unsqueeze(0)
-        feature *= lifter_coeffs.astype(dtype=dtype)
-
-    if use_energy:
-        feature[:, 0] = signal_log_energy
-
-    if htk_compat:
-        energy = feature[:, 0].unsqueeze(1)  # (m, 1)
-        feature = feature[:, 1:]  # (m, n_mfcc - 1)
-        if not use_energy:
-            energy *= math.sqrt(2)
-
-        feature = paddle.concat((feature, energy), axis=1)
-
-    feature = _subtract_column_mean(feature, subtract_mean)
-    return feature
diff --git a/paddlespeech/audio/compliance/librosa.py b/paddlespeech/audio/compliance/librosa.py
deleted file mode 100644
index 17ad51b41d92ebcdf933a5866f72a68ffa65b41b..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/compliance/librosa.py
+++ /dev/null
@@ -1,788 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from librosa(https://github.com/librosa/librosa)
-import warnings
-from typing import List
-from typing import Optional
-from typing import Union
-
-import numpy as np
-import scipy
-from numpy.lib.stride_tricks import as_strided
-from scipy import signal
-
-from ..utils import depth_convert
-from ..utils import ParameterError
-
-__all__ = [
-    # dsp
-    'stft',
-    'mfcc',
-    'hz_to_mel',
-    'mel_to_hz',
-    'mel_frequencies',
-    'power_to_db',
-    'compute_fbank_matrix',
-    'melspectrogram',
-    'spectrogram',
-    'mu_encode',
-    'mu_decode',
-    # augmentation
-    'depth_augment',
-    'spect_augment',
-    'random_crop1d',
-    'random_crop2d',
-    'adaptive_spect_augment',
-]
-
-
-def _pad_center(data: np.ndarray, size: int, axis: int=-1,
-                **kwargs) -> np.ndarray:
-    """Pad an array to a target length along a target axis.
-
-    This differs from `np.pad` by centering the data prior to padding,
-    analogous to `str.center`
-    """
-
-    kwargs.setdefault("mode", "constant")
-    n = data.shape[axis]
-    lpad = int((size - n) // 2)
-    lengths = [(0, 0)] * data.ndim
-    lengths[axis] = (lpad, int(size - n - lpad))
-
-    if lpad < 0:
-        raise ParameterError(("Target size ({size:d}) must be "
-                              "at least input size ({n:d})"))
-
-    return np.pad(data, lengths, **kwargs)
-
-
-def _split_frames(x: np.ndarray,
-                  frame_length: int,
-                  hop_length: int,
-                  axis: int=-1) -> np.ndarray:
-    """Slice a data array into (overlapping) frames.
-
-    This function is aligned with librosa.frame
-    """
-
-    if not isinstance(x, np.ndarray):
-        raise ParameterError(
-            f"Input must be of type numpy.ndarray, given type(x)={type(x)}")
-
-    if x.shape[axis] < frame_length:
-        raise ParameterError(f"Input is too short (n={x.shape[axis]:d})"
-                             f" for frame_length={frame_length:d}")
-
-    if hop_length < 1:
-        raise ParameterError(f"Invalid hop_length: {hop_length:d}")
-
-    if axis == -1 and not x.flags["F_CONTIGUOUS"]:
-        warnings.warn(f"librosa.util.frame called with axis={axis} "
-                      "on a non-contiguous input. This will result in a copy.")
-        x = np.asfortranarray(x)
-    elif axis == 0 and not x.flags["C_CONTIGUOUS"]:
-        warnings.warn(f"librosa.util.frame called with axis={axis} "
-                      "on a non-contiguous input. This will result in a copy.")
-        x = np.ascontiguousarray(x)
-
-    n_frames = 1 + (x.shape[axis] - frame_length) // hop_length
-    strides = np.asarray(x.strides)
-
-    new_stride = np.prod(strides[strides > 0] // x.itemsize) * x.itemsize
-
-    if axis == -1:
-        shape = list(x.shape)[:-1] + [frame_length, n_frames]
-        strides = list(strides) + [hop_length * new_stride]
-
-    elif axis == 0:
-        shape = [n_frames, frame_length] + list(x.shape)[1:]
-        strides = [hop_length * new_stride] + list(strides)
-
-    else:
-        raise ParameterError(f"Frame axis={axis} must be either 0 or -1")
-
-    return as_strided(x, shape=shape, strides=strides)
-
-
-def _check_audio(y, mono=True) -> bool:
-    """Determine whether a variable contains valid audio data.
-
-    The audio y must be a np.ndarray, ether 1-channel or two channel
-    """
-    if not isinstance(y, np.ndarray):
-        raise ParameterError("Audio data must be of type numpy.ndarray")
-    if y.ndim > 2:
-        raise ParameterError(
-            f"Invalid shape for audio ndim={y.ndim:d}, shape={y.shape}")
-
-    if mono and y.ndim == 2:
-        raise ParameterError(
-            f"Invalid shape for mono audio ndim={y.ndim:d}, shape={y.shape}")
-
-    if (mono and len(y) == 0) or (not mono and y.shape[1] < 0):
-        raise ParameterError(f"Audio is empty ndim={y.ndim:d}, shape={y.shape}")
-
-    if not np.issubdtype(y.dtype, np.floating):
-        raise ParameterError("Audio data must be floating-point")
-
-    if not np.isfinite(y).all():
-        raise ParameterError("Audio buffer is not finite everywhere")
-
-    return True
-
-
-def hz_to_mel(frequencies: Union[float, List[float], np.ndarray],
-              htk: bool=False) -> np.ndarray:
-    """Convert Hz to Mels.
-
-    Args:
-        frequencies (Union[float, List[float], np.ndarray]): Frequencies in Hz.
-        htk (bool, optional): Use htk scaling. Defaults to False.
-
-    Returns:
-        np.ndarray: Frequency in mels.
-    """
-    freq = np.asanyarray(frequencies)
-
-    if htk:
-        return 2595.0 * np.log10(1.0 + freq / 700.0)
-
-    # Fill in the linear part
-    f_min = 0.0
-    f_sp = 200.0 / 3
-
-    mels = (freq - f_min) / f_sp
-
-    # Fill in the log-scale part
-
-    min_log_hz = 1000.0  # beginning of log region (Hz)
-    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
-    logstep = np.log(6.4) / 27.0  # step size for log region
-
-    if freq.ndim:
-        # If we have array data, vectorize
-        log_t = freq >= min_log_hz
-        mels[log_t] = min_log_mel + \
-            np.log(freq[log_t] / min_log_hz) / logstep
-    elif freq >= min_log_hz:
-        # If we have scalar data, heck directly
-        mels = min_log_mel + np.log(freq / min_log_hz) / logstep
-
-    return mels
-
-
-def mel_to_hz(mels: Union[float, List[float], np.ndarray],
-              htk: int=False) -> np.ndarray:
-    """Convert mel bin numbers to frequencies.
-
-    Args:
-        mels (Union[float, List[float], np.ndarray]): Frequency in mels.
-        htk (bool, optional): Use htk scaling. Defaults to False.
-
-    Returns:
-        np.ndarray: Frequencies in Hz.
-    """
-    mel_array = np.asanyarray(mels)
-
-    if htk:
-        return 700.0 * (10.0**(mel_array / 2595.0) - 1.0)
-
-    # Fill in the linear scale
-    f_min = 0.0
-    f_sp = 200.0 / 3
-    freqs = f_min + f_sp * mel_array
-
-    # And now the nonlinear scale
-    min_log_hz = 1000.0  # beginning of log region (Hz)
-    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
-    logstep = np.log(6.4) / 27.0  # step size for log region
-
-    if mel_array.ndim:
-        # If we have vector data, vectorize
-        log_t = mel_array >= min_log_mel
-        freqs[log_t] = min_log_hz * \
-            np.exp(logstep * (mel_array[log_t] - min_log_mel))
-    elif mel_array >= min_log_mel:
-        # If we have scalar data, check directly
-        freqs = min_log_hz * np.exp(logstep * (mel_array - min_log_mel))
-
-    return freqs
-
-
-def mel_frequencies(n_mels: int=128,
-                    fmin: float=0.0,
-                    fmax: float=11025.0,
-                    htk: bool=False) -> np.ndarray:
-    """Compute mel frequencies.
-
-    Args:
-        n_mels (int, optional): Number of mel bins. Defaults to 128.
-        fmin (float, optional): Minimum frequency in Hz. Defaults to 0.0.
-        fmax (float, optional): Maximum frequency in Hz. Defaults to 11025.0.
-        htk (bool, optional): Use htk scaling. Defaults to False.
-
-    Returns:
-        np.ndarray: Vector of n_mels frequencies in Hz with shape `(n_mels,)`.
-    """
-    # 'Center freqs' of mel bands - uniformly spaced between limits
-    min_mel = hz_to_mel(fmin, htk=htk)
-    max_mel = hz_to_mel(fmax, htk=htk)
-
-    mels = np.linspace(min_mel, max_mel, n_mels)
-
-    return mel_to_hz(mels, htk=htk)
-
-
-def fft_frequencies(sr: int, n_fft: int) -> np.ndarray:
-    """Compute fourier frequencies.
-
-    Args:
-        sr (int): Sample rate.
-        n_fft (int): FFT size.
-
-    Returns:
-        np.ndarray: FFT frequencies in Hz with shape `(n_fft//2 + 1,)`.
-    """
-    return np.linspace(0, float(sr) / 2, int(1 + n_fft // 2), endpoint=True)
-
-
-def compute_fbank_matrix(sr: int,
-                         n_fft: int,
-                         n_mels: int=128,
-                         fmin: float=0.0,
-                         fmax: Optional[float]=None,
-                         htk: bool=False,
-                         norm: str="slaney",
-                         dtype: type=np.float32) -> np.ndarray:
-    """Compute fbank matrix.
-
-    Args:
-        sr (int): Sample rate.
-        n_fft (int): FFT size.
-        n_mels (int, optional): Number of mel bins. Defaults to 128.
-        fmin (float, optional): Minimum frequency in Hz. Defaults to 0.0.
-        fmax (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
-        htk (bool, optional): Use htk scaling. Defaults to False.
-        norm (str, optional): Type of normalization. Defaults to "slaney".
-        dtype (type, optional): Data type. Defaults to np.float32.
-
-
-    Returns:
-        np.ndarray: Mel transform matrix with shape `(n_mels, n_fft//2 + 1)`.
-    """
-    if norm != "slaney":
-        raise ParameterError('norm must set to slaney')
-
-    if fmax is None:
-        fmax = float(sr) / 2
-
-    # Initialize the weights
-    n_mels = int(n_mels)
-    weights = np.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype)
-
-    # Center freqs of each FFT bin
-    fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft)
-
-    # 'Center freqs' of mel bands - uniformly spaced between limits
-    mel_f = mel_frequencies(n_mels + 2, fmin=fmin, fmax=fmax, htk=htk)
-
-    fdiff = np.diff(mel_f)
-    ramps = np.subtract.outer(mel_f, fftfreqs)
-
-    for i in range(n_mels):
-        # lower and upper slopes for all bins
-        lower = -ramps[i] / fdiff[i]
-        upper = ramps[i + 2] / fdiff[i + 1]
-
-        # .. then intersect them with each other and zero
-        weights[i] = np.maximum(0, np.minimum(lower, upper))
-
-    if norm == "slaney":
-        # Slaney-style mel is scaled to be approx constant energy per channel
-        enorm = 2.0 / (mel_f[2:n_mels + 2] - mel_f[:n_mels])
-        weights *= enorm[:, np.newaxis]
-
-    # Only check weights if f_mel[0] is positive
-    if not np.all((mel_f[:-2] == 0) | (weights.max(axis=1) > 0)):
-        # This means we have an empty channel somewhere
-        warnings.warn("Empty filters detected in mel frequency basis. "
-                      "Some channels will produce empty responses. "
-                      "Try increasing your sampling rate (and fmax) or "
-                      "reducing n_mels.")
-
-    return weights
-
-
-def stft(x: np.ndarray,
-         n_fft: int=2048,
-         hop_length: Optional[int]=None,
-         win_length: Optional[int]=None,
-         window: str="hann",
-         center: bool=True,
-         dtype: type=np.complex64,
-         pad_mode: str="reflect") -> np.ndarray:
-    """Short-time Fourier transform (STFT).
-
-    Args:
-        x (np.ndarray): Input waveform in one dimension.
-        n_fft (int, optional): FFT size. Defaults to 2048.
-        hop_length (Optional[int], optional): Number of steps to advance between adjacent windows. Defaults to None.
-        win_length (Optional[int], optional): The size of window. Defaults to None.
-        window (str, optional): A string of window specification. Defaults to "hann".
-        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
-        dtype (type, optional): Data type of STFT results. Defaults to np.complex64.
-        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to "reflect".
-
-    Returns:
-        np.ndarray: The complex STFT output with shape `(n_fft//2 + 1, num_frames)`.
-    """
-    _check_audio(x)
-
-    # By default, use the entire frame
-    if win_length is None:
-        win_length = n_fft
-
-    # Set the default hop, if it's not already specified
-    if hop_length is None:
-        hop_length = int(win_length // 4)
-
-    fft_window = signal.get_window(window, win_length, fftbins=True)
-
-    # Pad the window out to n_fft size
-    fft_window = _pad_center(fft_window, n_fft)
-
-    # Reshape so that the window can be broadcast
-    fft_window = fft_window.reshape((-1, 1))
-
-    # Pad the time series so that frames are centered
-    if center:
-        if n_fft > x.shape[-1]:
-            warnings.warn(
-                f"n_fft={n_fft} is too small for input signal of length={x.shape[-1]}"
-            )
-        x = np.pad(x, int(n_fft // 2), mode=pad_mode)
-
-    elif n_fft > x.shape[-1]:
-        raise ParameterError(
-            f"n_fft={n_fft} is too small for input signal of length={x.shape[-1]}"
-        )
-
-    # Window the time series.
-    x_frames = _split_frames(x, frame_length=n_fft, hop_length=hop_length)
-    # Pre-allocate the STFT matrix
-    stft_matrix = np.empty(
-        (int(1 + n_fft // 2), x_frames.shape[1]), dtype=dtype, order="F")
-    fft = np.fft  # use numpy fft as default
-    # Constrain STFT block sizes to 256 KB
-    MAX_MEM_BLOCK = 2**8 * 2**10
-    # how many columns can we fit within MAX_MEM_BLOCK?
-    n_columns = MAX_MEM_BLOCK // (stft_matrix.shape[0] * stft_matrix.itemsize)
-    n_columns = max(n_columns, 1)
-
-    for bl_s in range(0, stft_matrix.shape[1], n_columns):
-        bl_t = min(bl_s + n_columns, stft_matrix.shape[1])
-        stft_matrix[:, bl_s:bl_t] = fft.rfft(
-            fft_window * x_frames[:, bl_s:bl_t], axis=0)
-
-    return stft_matrix
-
-
-def power_to_db(spect: np.ndarray,
-                ref: float=1.0,
-                amin: float=1e-10,
-                top_db: Optional[float]=80.0) -> np.ndarray:
-    """Convert a power spectrogram (amplitude squared) to decibel (dB) units. The function computes the scaling `10 * log10(x / ref)` in a numerically stable way.
-
-    Args:
-        spect (np.ndarray): STFT power spectrogram of an input waveform.
-        ref (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
-        amin (float, optional): Minimum threshold. Defaults to 1e-10.
-        top_db (Optional[float], optional): Threshold the output at `top_db` below the peak. Defaults to 80.0.
-
-    Returns:
-        np.ndarray: Power spectrogram in db scale.
-    """
-    spect = np.asarray(spect)
-
-    if amin <= 0:
-        raise ParameterError("amin must be strictly positive")
-
-    if np.issubdtype(spect.dtype, np.complexfloating):
-        warnings.warn(
-            "power_to_db was called on complex input so phase "
-            "information will be discarded. To suppress this warning, "
-            "call power_to_db(np.abs(D)**2) instead.")
-        magnitude = np.abs(spect)
-    else:
-        magnitude = spect
-
-    if callable(ref):
-        # User supplied a function to calculate reference power
-        ref_value = ref(magnitude)
-    else:
-        ref_value = np.abs(ref)
-
-    log_spec = 10.0 * np.log10(np.maximum(amin, magnitude))
-    log_spec -= 10.0 * np.log10(np.maximum(amin, ref_value))
-
-    if top_db is not None:
-        if top_db < 0:
-            raise ParameterError("top_db must be non-negative")
-        log_spec = np.maximum(log_spec, log_spec.max() - top_db)
-
-    return log_spec
-
-
-def mfcc(x: np.ndarray,
-         sr: int=16000,
-         spect: Optional[np.ndarray]=None,
-         n_mfcc: int=20,
-         dct_type: int=2,
-         norm: str="ortho",
-         lifter: int=0,
-         **kwargs) -> np.ndarray:
-    """Mel-frequency cepstral coefficients (MFCCs)
-
-    Args:
-        x (np.ndarray): Input waveform in one dimension.
-        sr (int, optional): Sample rate. Defaults to 16000.
-        spect (Optional[np.ndarray], optional): Input log-power Mel spectrogram. Defaults to None.
-        n_mfcc (int, optional): Number of cepstra in MFCC. Defaults to 20.
-        dct_type (int, optional): Discrete cosine transform (DCT) type. Defaults to 2.
-        norm (str, optional): Type of normalization. Defaults to "ortho".
-        lifter (int, optional): Cepstral filtering. Defaults to 0.
-
-    Returns:
-        np.ndarray: Mel frequency cepstral coefficients array with shape `(n_mfcc, num_frames)`.
-    """
-    if spect is None:
-        spect = melspectrogram(x, sr=sr, **kwargs)
-
-    M = scipy.fftpack.dct(spect, axis=0, type=dct_type, norm=norm)[:n_mfcc]
-
-    if lifter > 0:
-        factor = np.sin(np.pi * np.arange(1, 1 + n_mfcc, dtype=M.dtype) /
-                        lifter)
-        return M * factor[:, np.newaxis]
-    elif lifter == 0:
-        return M
-    else:
-        raise ParameterError(
-            f"MFCC lifter={lifter} must be a non-negative number")
-
-
-def melspectrogram(x: np.ndarray,
-                   sr: int=16000,
-                   window_size: int=512,
-                   hop_length: int=320,
-                   n_mels: int=64,
-                   fmin: float=50.0,
-                   fmax: Optional[float]=None,
-                   window: str='hann',
-                   center: bool=True,
-                   pad_mode: str='reflect',
-                   power: float=2.0,
-                   to_db: bool=True,
-                   ref: float=1.0,
-                   amin: float=1e-10,
-                   top_db: Optional[float]=None) -> np.ndarray:
-    """Compute mel-spectrogram.
-
-    Args:
-        x (np.ndarray): Input waveform in one dimension.
-        sr (int, optional): Sample rate. Defaults to 16000.
-        window_size (int, optional): Size of FFT and window length. Defaults to 512.
-        hop_length (int, optional): Number of steps to advance between adjacent windows. Defaults to 320.
-        n_mels (int, optional): Number of mel bins. Defaults to 64.
-        fmin (float, optional): Minimum frequency in Hz. Defaults to 50.0.
-        fmax (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
-        window (str, optional): A string of window specification. Defaults to "hann".
-        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
-        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to "reflect".
-        power (float, optional): Exponent for the magnitude melspectrogram. Defaults to 2.0.
-        to_db (bool, optional): Enable db scale. Defaults to True.
-        ref (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
-        amin (float, optional): Minimum threshold. Defaults to 1e-10.
-        top_db (Optional[float], optional): Threshold the output at `top_db` below the peak. Defaults to None.
-
-    Returns:
-        np.ndarray: The mel-spectrogram in power scale or db scale with shape `(n_mels, num_frames)`.
-    """
-    _check_audio(x, mono=True)
-    if len(x) <= 0:
-        raise ParameterError('The input waveform is empty')
-
-    if fmax is None:
-        fmax = sr // 2
-    if fmin < 0 or fmin >= fmax:
-        raise ParameterError('fmin and fmax must statisfy 0<fmin<fmax')
-
-    s = stft(
-        x,
-        n_fft=window_size,
-        hop_length=hop_length,
-        win_length=window_size,
-        window=window,
-        center=center,
-        pad_mode=pad_mode)
-
-    spect_power = np.abs(s)**power
-    fb_matrix = compute_fbank_matrix(
-        sr=sr, n_fft=window_size, n_mels=n_mels, fmin=fmin, fmax=fmax)
-    mel_spect = np.matmul(fb_matrix, spect_power)
-    if to_db:
-        return power_to_db(mel_spect, ref=ref, amin=amin, top_db=top_db)
-    else:
-        return mel_spect
-
-
-def spectrogram(x: np.ndarray,
-                sr: int=16000,
-                window_size: int=512,
-                hop_length: int=320,
-                window: str='hann',
-                center: bool=True,
-                pad_mode: str='reflect',
-                power: float=2.0) -> np.ndarray:
-    """Compute spectrogram.
-
-    Args:
-        x (np.ndarray): Input waveform in one dimension.
-        sr (int, optional): Sample rate. Defaults to 16000.
-        window_size (int, optional): Size of FFT and window length. Defaults to 512.
-        hop_length (int, optional): Number of steps to advance between adjacent windows. Defaults to 320.
-        window (str, optional): A string of window specification. Defaults to "hann".
-        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
-        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to "reflect".
-        power (float, optional): Exponent for the magnitude melspectrogram. Defaults to 2.0.
-
-    Returns:
-        np.ndarray: The STFT spectrogram in power scale `(n_fft//2 + 1, num_frames)`.
-    """
-
-    s = stft(
-        x,
-        n_fft=window_size,
-        hop_length=hop_length,
-        win_length=window_size,
-        window=window,
-        center=center,
-        pad_mode=pad_mode)
-
-    return np.abs(s)**power
-
-
-def mu_encode(x: np.ndarray, mu: int=255, quantized: bool=True) -> np.ndarray:
-    """Mu-law encoding. Encode waveform based on mu-law companding. When quantized is True, the result will be converted to integer in range `[0,mu-1]`. Otherwise, the resulting waveform is in range `[-1,1]`.
-
-    Args:
-        x (np.ndarray): The input waveform to encode.
-        mu (int, optional): The endoceding parameter. Defaults to 255.
-        quantized (bool, optional): If `True`, quantize the encoded values into `1 + mu` distinct integer values. Defaults to True.
-
-    Returns:
-        np.ndarray: The mu-law encoded waveform.
-    """
-    mu = 255
-    y = np.sign(x) * np.log1p(mu * np.abs(x)) / np.log1p(mu)
-    if quantized:
-        y = np.floor((y + 1) / 2 * mu + 0.5)  # convert to [0 , mu-1]
-    return y
-
-
-def mu_decode(y: np.ndarray, mu: int=255, quantized: bool=True) -> np.ndarray:
-    """Mu-law decoding. Compute the mu-law decoding given an input code. It assumes that the input `y` is in range `[0,mu-1]` when quantize is True and `[-1,1]` otherwise.
-
-    Args:
-        y (np.ndarray): The encoded waveform.
-        mu (int, optional): The endoceding parameter. Defaults to 255.
-        quantized (bool, optional): If `True`, the input is assumed to be quantized to `1 + mu` distinct integer values. Defaults to True.
-
-    Returns:
-        np.ndarray: The mu-law decoded waveform.
-    """
-    if mu < 1:
-        raise ParameterError('mu is typically set as 2**k-1, k=1, 2, 3,...')
-
-    mu = mu - 1
-    if quantized:  # undo the quantization
-        y = y * 2 / mu - 1
-    x = np.sign(y) / mu * ((1 + mu)**np.abs(y) - 1)
-    return x
-
-
-def _randint(high: int) -> int:
-    """Generate one random integer in range [0 high)
-
-     This is a helper function for random data augmentaiton
-    """
-    return int(np.random.randint(0, high=high))
-
-
-def depth_augment(y: np.ndarray,
-                  choices: List=['int8', 'int16'],
-                  probs: List[float]=[0.5, 0.5]) -> np.ndarray:
-    """ Audio depth augmentation. Do audio depth augmentation to simulate the distortion brought by quantization.
-
-    Args:
-        y (np.ndarray): Input waveform array in 1D or 2D.
-        choices (List, optional): A list of data type to depth conversion. Defaults to ['int8', 'int16'].
-        probs (List[float], optional): Probabilities to depth conversion. Defaults to [0.5, 0.5].
-
-    Returns:
-        np.ndarray: The augmented waveform.
-    """
-    assert len(probs) == len(
-        choices
-    ), 'number of choices {} must be equal to size of probs {}'.format(
-        len(choices), len(probs))
-    depth = np.random.choice(choices, p=probs)
-    src_depth = y.dtype
-    y1 = depth_convert(y, depth)
-    y2 = depth_convert(y1, src_depth)
-
-    return y2
-
-
-def adaptive_spect_augment(spect: np.ndarray,
-                           tempo_axis: int=0,
-                           level: float=0.1) -> np.ndarray:
-    """Do adpative spectrogram augmentation. The level of the augmentation is gowern by the paramter level, ranging from 0 to 1, with 0 represents no augmentation.
-
-    Args:
-        spect (np.ndarray): Input spectrogram.
-        tempo_axis (int, optional): Indicate the tempo axis. Defaults to 0.
-        level (float, optional): The level factor of masking. Defaults to 0.1.
-
-    Returns:
-        np.ndarray: The augmented spectrogram.
-    """
-    assert spect.ndim == 2., 'only supports 2d tensor or numpy array'
-    if tempo_axis == 0:
-        nt, nf = spect.shape
-    else:
-        nf, nt = spect.shape
-
-    time_mask_width = int(nt * level * 0.5)
-    freq_mask_width = int(nf * level * 0.5)
-
-    num_time_mask = int(10 * level)
-    num_freq_mask = int(10 * level)
-
-    if tempo_axis == 0:
-        for _ in range(num_time_mask):
-            start = _randint(nt - time_mask_width)
-            spect[start:start + time_mask_width, :] = 0
-        for _ in range(num_freq_mask):
-            start = _randint(nf - freq_mask_width)
-            spect[:, start:start + freq_mask_width] = 0
-    else:
-        for _ in range(num_time_mask):
-            start = _randint(nt - time_mask_width)
-            spect[:, start:start + time_mask_width] = 0
-        for _ in range(num_freq_mask):
-            start = _randint(nf - freq_mask_width)
-            spect[start:start + freq_mask_width, :] = 0
-
-    return spect
-
-
-def spect_augment(spect: np.ndarray,
-                  tempo_axis: int=0,
-                  max_time_mask: int=3,
-                  max_freq_mask: int=3,
-                  max_time_mask_width: int=30,
-                  max_freq_mask_width: int=20) -> np.ndarray:
-    """Do spectrogram augmentation in both time and freq axis.
-
-    Args:
-        spect (np.ndarray): Input spectrogram.
-        tempo_axis (int, optional): Indicate the tempo axis. Defaults to 0.
-        max_time_mask (int, optional): Maximum number of time masking. Defaults to 3.
-        max_freq_mask (int, optional): Maximum number of frenquence masking. Defaults to 3.
-        max_time_mask_width (int, optional): Maximum width of time masking. Defaults to 30.
-        max_freq_mask_width (int, optional): Maximum width of frenquence masking. Defaults to 20.
-
-    Returns:
-        np.ndarray: The augmented spectrogram.
-    """
-    assert spect.ndim == 2., 'only supports 2d tensor or numpy array'
-    if tempo_axis == 0:
-        nt, nf = spect.shape
-    else:
-        nf, nt = spect.shape
-
-    num_time_mask = _randint(max_time_mask)
-    num_freq_mask = _randint(max_freq_mask)
-
-    time_mask_width = _randint(max_time_mask_width)
-    freq_mask_width = _randint(max_freq_mask_width)
-
-    if tempo_axis == 0:
-        for _ in range(num_time_mask):
-            start = _randint(nt - time_mask_width)
-            spect[start:start + time_mask_width, :] = 0
-        for _ in range(num_freq_mask):
-            start = _randint(nf - freq_mask_width)
-            spect[:, start:start + freq_mask_width] = 0
-    else:
-        for _ in range(num_time_mask):
-            start = _randint(nt - time_mask_width)
-            spect[:, start:start + time_mask_width] = 0
-        for _ in range(num_freq_mask):
-            start = _randint(nf - freq_mask_width)
-            spect[start:start + freq_mask_width, :] = 0
-
-    return spect
-
-
-def random_crop1d(y: np.ndarray, crop_len: int) -> np.ndarray:
-    """ Random cropping on a input waveform.
-
-    Args:
-        y (np.ndarray): Input waveform array in 1D.
-        crop_len (int): Length of waveform to crop.
-
-    Returns:
-        np.ndarray: The cropped waveform.
-    """
-    if y.ndim != 1:
-        'only accept 1d tensor or numpy array'
-    n = len(y)
-    idx = _randint(n - crop_len)
-    return y[idx:idx + crop_len]
-
-
-def random_crop2d(s: np.ndarray, crop_len: int,
-                  tempo_axis: int=0) -> np.ndarray:
-    """ Random cropping on a spectrogram.
-
-    Args:
-        s (np.ndarray): Input spectrogram in 2D.
-        crop_len (int): Length of spectrogram to crop.
-        tempo_axis (int, optional): Indicate the tempo axis. Defaults to 0.
-
-    Returns:
-        np.ndarray: The cropped spectrogram.
-    """
-    if tempo_axis >= s.ndim:
-        raise ParameterError('axis out of range')
-
-    n = s.shape[tempo_axis]
-    idx = _randint(high=n - crop_len)
-    sli = [slice(None) for i in range(s.ndim)]
-    sli[tempo_axis] = slice(idx, idx + crop_len)
-    out = s[tuple(sli)]
-    return out
diff --git a/paddlespeech/audio/datasets/__init__.py b/paddlespeech/audio/datasets/__init__.py
deleted file mode 100644
index f95fad3054de8d19f24f881b69b682ae6def5b5b..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/datasets/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from .esc50 import ESC50
-from .gtzan import GTZAN
-from .hey_snips import HeySnips
-from .rirs_noises import OpenRIRNoise
-from .tess import TESS
-from .urban_sound import UrbanSound8K
-from .voxceleb import VoxCeleb
diff --git a/paddlespeech/audio/datasets/dataset.py b/paddlespeech/audio/datasets/dataset.py
deleted file mode 100644
index 81e6bdf5efd632578167bd762415b4c4896d4f13..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/datasets/dataset.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from typing import List
-
-import numpy as np
-import paddle
-
-from ..compliance.kaldi import fbank as kaldi_fbank
-from ..compliance.kaldi import mfcc as kaldi_mfcc
-from ..compliance.librosa import melspectrogram
-from ..compliance.librosa import mfcc
-
-feat_funcs = {
-    'raw': None,
-    'melspectrogram': melspectrogram,
-    'mfcc': mfcc,
-    'kaldi_fbank': kaldi_fbank,
-    'kaldi_mfcc': kaldi_mfcc,
-}
-
-
-class AudioClassificationDataset(paddle.io.Dataset):
-    """
-    Base class of audio classification dataset.
-    """
-
-    def __init__(self,
-                 files: List[str],
-                 labels: List[int],
-                 feat_type: str='raw',
-                 sample_rate: int=None,
-                 **kwargs):
-        """
-        Ags:
-            files (:obj:`List[str]`): A list of absolute path of audio files.
-            labels (:obj:`List[int]`): Labels of audio files.
-            feat_type (:obj:`str`, `optional`, defaults to `raw`):
-                It identifies the feature type that user wants to extrace of an audio file.
-        """
-        super(AudioClassificationDataset, self).__init__()
-
-        if feat_type not in feat_funcs.keys():
-            raise RuntimeError(
-                f"Unknown feat_type: {feat_type}, it must be one in {list(feat_funcs.keys())}"
-            )
-
-        self.files = files
-        self.labels = labels
-
-        self.feat_type = feat_type
-        self.sample_rate = sample_rate
-        self.feat_config = kwargs  # Pass keyword arguments to customize feature config
-
-    def _get_data(self, input_file: str):
-        raise NotImplementedError
-
-    def _convert_to_record(self, idx):
-        file, label = self.files[idx], self.labels[idx]
-
-        if self.sample_rate is None:
-            waveform, sample_rate = paddlespeech.audio.load(file)
-        else:
-            waveform, sample_rate = paddlespeech.audio.load(
-                file, sr=self.sample_rate)
-
-        feat_func = feat_funcs[self.feat_type]
-
-        record = {}
-        if self.feat_type in ['kaldi_fbank', 'kaldi_mfcc']:
-            waveform = paddle.to_tensor(waveform).unsqueeze(0)  # (C, T)
-            record['feat'] = feat_func(
-                waveform=waveform, sr=self.sample_rate, **self.feat_config)
-        else:
-            record['feat'] = feat_func(
-                waveform, sample_rate,
-                **self.feat_config) if feat_func else waveform
-        record['label'] = label
-        return record
-
-    def __getitem__(self, idx):
-        record = self._convert_to_record(idx)
-        if self.feat_type in ['kaldi_fbank', 'kaldi_mfcc']:
-            return self.keys[idx], record['feat'], record['label']
-        else:
-            return np.array(record['feat']).transpose(), np.array(
-                record['label'], dtype=np.int64)
-
-    def __len__(self):
-        return len(self.files)
diff --git a/paddlespeech/audio/datasets/esc50.py b/paddlespeech/audio/datasets/esc50.py
deleted file mode 100644
index f5c7050f3fcc75dcb8fd53e7927cf92c22fb40e2..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/datasets/esc50.py
+++ /dev/null
@@ -1,152 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import collections
-import os
-from typing import List
-from typing import Tuple
-
-from ..utils import DATA_HOME
-from ..utils.download import download_and_decompress
-from .dataset import AudioClassificationDataset
-
-__all__ = ['ESC50']
-
-
-class ESC50(AudioClassificationDataset):
-    """
-    The ESC-50 dataset is a labeled collection of 2000 environmental audio recordings
-    suitable for benchmarking methods of environmental sound classification. The dataset
-    consists of 5-second-long recordings organized into 50 semantical classes (with
-    40 examples per class)
-
-    Reference:
-        ESC: Dataset for Environmental Sound Classification
-        http://dx.doi.org/10.1145/2733373.2806390
-    """
-
-    archieves = [
-        {
-            'url':
-            'https://paddleaudio.bj.bcebos.com/datasets/ESC-50-master.zip',
-            'md5': '7771e4b9d86d0945acce719c7a59305a',
-        },
-    ]
-    label_list = [
-        # Animals
-        'Dog',
-        'Rooster',
-        'Pig',
-        'Cow',
-        'Frog',
-        'Cat',
-        'Hen',
-        'Insects (flying)',
-        'Sheep',
-        'Crow',
-        # Natural soundscapes & water sounds
-        'Rain',
-        'Sea waves',
-        'Crackling fire',
-        'Crickets',
-        'Chirping birds',
-        'Water drops',
-        'Wind',
-        'Pouring water',
-        'Toilet flush',
-        'Thunderstorm',
-        # Human, non-speech sounds
-        'Crying baby',
-        'Sneezing',
-        'Clapping',
-        'Breathing',
-        'Coughing',
-        'Footsteps',
-        'Laughing',
-        'Brushing teeth',
-        'Snoring',
-        'Drinking, sipping',
-        # Interior/domestic sounds
-        'Door knock',
-        'Mouse click',
-        'Keyboard typing',
-        'Door, wood creaks',
-        'Can opening',
-        'Washing machine',
-        'Vacuum cleaner',
-        'Clock alarm',
-        'Clock tick',
-        'Glass breaking',
-        # Exterior/urban noises
-        'Helicopter',
-        'Chainsaw',
-        'Siren',
-        'Car horn',
-        'Engine',
-        'Train',
-        'Church bells',
-        'Airplane',
-        'Fireworks',
-        'Hand saw',
-    ]
-    meta = os.path.join('ESC-50-master', 'meta', 'esc50.csv')
-    meta_info = collections.namedtuple(
-        'META_INFO',
-        ('filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take'))
-    audio_path = os.path.join('ESC-50-master', 'audio')
-
-    def __init__(self,
-                 mode: str='train',
-                 split: int=1,
-                 feat_type: str='raw',
-                 **kwargs):
-        """
-        Ags:
-            mode (:obj:`str`, `optional`, defaults to `train`):
-                It identifies the dataset mode (train or dev).
-            split (:obj:`int`, `optional`, defaults to 1):
-                It specify the fold of dev dataset.
-            feat_type (:obj:`str`, `optional`, defaults to `raw`):
-                It identifies the feature type that user wants to extrace of an audio file.
-        """
-        files, labels = self._get_data(mode, split)
-        super(ESC50, self).__init__(
-            files=files, labels=labels, feat_type=feat_type, **kwargs)
-
-    def _get_meta_info(self) -> List[collections.namedtuple]:
-        ret = []
-        with open(os.path.join(DATA_HOME, self.meta), 'r') as rf:
-            for line in rf.readlines()[1:]:
-                ret.append(self.meta_info(*line.strip().split(',')))
-        return ret
-
-    def _get_data(self, mode: str, split: int) -> Tuple[List[str], List[int]]:
-        if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \
-            not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
-            download_and_decompress(self.archieves, DATA_HOME)
-
-        meta_info = self._get_meta_info()
-
-        files = []
-        labels = []
-        for sample in meta_info:
-            filename, fold, target, _, _, _, _ = sample
-            if mode == 'train' and int(fold) != split:
-                files.append(os.path.join(DATA_HOME, self.audio_path, filename))
-                labels.append(int(target))
-
-            if mode != 'train' and int(fold) == split:
-                files.append(os.path.join(DATA_HOME, self.audio_path, filename))
-                labels.append(int(target))
-
-        return files, labels
diff --git a/paddlespeech/audio/datasets/gtzan.py b/paddlespeech/audio/datasets/gtzan.py
deleted file mode 100644
index 1f6835a5a8ea89161c14bda4d619c2a68f45779f..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/datasets/gtzan.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import collections
-import os
-import random
-from typing import List
-from typing import Tuple
-
-from ..utils import DATA_HOME
-from ..utils.download import download_and_decompress
-from .dataset import AudioClassificationDataset
-
-__all__ = ['GTZAN']
-
-
-class GTZAN(AudioClassificationDataset):
-    """
-    The GTZAN dataset consists of 1000 audio tracks each 30 seconds long. It contains 10 genres,
-    each represented by 100 tracks. The dataset is the most-used public dataset for evaluation
-    in machine listening research for music genre recognition (MGR).
-
-    Reference:
-        Musical genre classification of audio signals
-        https://ieeexplore.ieee.org/document/1021072/
-    """
-
-    archieves = [
-        {
-            'url': 'http://opihi.cs.uvic.ca/sound/genres.tar.gz',
-            'md5': '5b3d6dddb579ab49814ab86dba69e7c7',
-        },
-    ]
-    label_list = [
-        'blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal',
-        'pop', 'reggae', 'rock'
-    ]
-    meta = os.path.join('genres', 'input.mf')
-    meta_info = collections.namedtuple('META_INFO', ('file_path', 'label'))
-    audio_path = 'genres'
-
-    def __init__(self,
-                 mode='train',
-                 seed=0,
-                 n_folds=5,
-                 split=1,
-                 feat_type='raw',
-                 **kwargs):
-        """
-        Ags:
-            mode (:obj:`str`, `optional`, defaults to `train`):
-                It identifies the dataset mode (train or dev).
-            seed (:obj:`int`, `optional`, defaults to 0):
-                Set the random seed to shuffle samples.
-            n_folds (:obj:`int`, `optional`, defaults to 5):
-                Split the dataset into n folds. 1 fold for dev dataset and n-1 for train dataset.
-            split (:obj:`int`, `optional`, defaults to 1):
-                It specify the fold of dev dataset.
-            feat_type (:obj:`str`, `optional`, defaults to `raw`):
-                It identifies the feature type that user wants to extrace of an audio file.
-        """
-        assert split <= n_folds, f'The selected split should not be larger than n_fold, but got {split} > {n_folds}'
-        files, labels = self._get_data(mode, seed, n_folds, split)
-        super(GTZAN, self).__init__(
-            files=files, labels=labels, feat_type=feat_type, **kwargs)
-
-    def _get_meta_info(self) -> List[collections.namedtuple]:
-        ret = []
-        with open(os.path.join(DATA_HOME, self.meta), 'r') as rf:
-            for line in rf.readlines():
-                ret.append(self.meta_info(*line.strip().split('\t')))
-        return ret
-
-    def _get_data(self, mode, seed, n_folds,
-                  split) -> Tuple[List[str], List[int]]:
-        if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \
-            not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
-            download_and_decompress(self.archieves, DATA_HOME)
-
-        meta_info = self._get_meta_info()
-        random.seed(seed)  # shuffle samples to split data
-        random.shuffle(
-            meta_info
-        )  # make sure using the same seed to create train and dev dataset
-
-        files = []
-        labels = []
-        n_samples_per_fold = len(meta_info) // n_folds
-        for idx, sample in enumerate(meta_info):
-            file_path, label = sample
-            filename = os.path.basename(file_path)
-            target = self.label_list.index(label)
-            fold = idx // n_samples_per_fold + 1
-
-            if mode == 'train' and int(fold) != split:
-                files.append(
-                    os.path.join(DATA_HOME, self.audio_path, label, filename))
-                labels.append(target)
-
-            if mode != 'train' and int(fold) == split:
-                files.append(
-                    os.path.join(DATA_HOME, self.audio_path, label, filename))
-                labels.append(target)
-
-        return files, labels
diff --git a/paddlespeech/audio/datasets/hey_snips.py b/paddlespeech/audio/datasets/hey_snips.py
deleted file mode 100644
index 7a67b843bb4dca8bea4f49c69cd7dd2105e2618d..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/datasets/hey_snips.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import collections
-import json
-import os
-from typing import List
-from typing import Tuple
-
-from .dataset import AudioClassificationDataset
-
-__all__ = ['HeySnips']
-
-
-class HeySnips(AudioClassificationDataset):
-    meta_info = collections.namedtuple('META_INFO',
-                                       ('key', 'label', 'duration', 'wav'))
-
-    def __init__(self,
-                 data_dir: os.PathLike,
-                 mode: str='train',
-                 feat_type: str='kaldi_fbank',
-                 sample_rate: int=16000,
-                 **kwargs):
-        self.data_dir = data_dir
-        files, labels = self._get_data(mode)
-        super(HeySnips, self).__init__(
-            files=files,
-            labels=labels,
-            feat_type=feat_type,
-            sample_rate=sample_rate,
-            **kwargs)
-
-    def _get_meta_info(self, mode) -> List[collections.namedtuple]:
-        ret = []
-        with open(os.path.join(self.data_dir, '{}.json'.format(mode)),
-                  'r') as f:
-            data = json.load(f)
-            for item in data:
-                sample = collections.OrderedDict()
-                if item['duration'] > 0:
-                    sample['key'] = item['id']
-                    sample['label'] = 0 if item['is_hotword'] == 1 else -1
-                    sample['duration'] = item['duration']
-                    sample['wav'] = os.path.join(self.data_dir,
-                                                 item['audio_file_path'])
-                    ret.append(self.meta_info(*sample.values()))
-        return ret
-
-    def _get_data(self, mode: str) -> Tuple[List[str], List[int]]:
-        meta_info = self._get_meta_info(mode)
-
-        files = []
-        labels = []
-        self.keys = []
-        self.durations = []
-        for sample in meta_info:
-            key, target, duration, wav = sample
-            files.append(wav)
-            labels.append(int(target))
-            self.keys.append(key)
-            self.durations.append(float(duration))
-
-        return files, labels
diff --git a/paddlespeech/audio/datasets/rirs_noises.py b/paddlespeech/audio/datasets/rirs_noises.py
deleted file mode 100644
index 61bbf72a25aff962cd7e693789a9bfa1055d7302..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/datasets/rirs_noises.py
+++ /dev/null
@@ -1,200 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import collections
-import csv
-import os
-import random
-from typing import List
-
-from paddle.io import Dataset
-from tqdm import tqdm
-
-from ..utils import DATA_HOME
-from ..utils.download import download_and_decompress
-from .dataset import feat_funcs
-
-__all__ = ['OpenRIRNoise']
-
-
-class OpenRIRNoise(Dataset):
-    archieves = [
-        {
-            'url': 'http://www.openslr.org/resources/28/rirs_noises.zip',
-            'md5': 'e6f48e257286e05de56413b4779d8ffb',
-        },
-    ]
-
-    sample_rate = 16000
-    meta_info = collections.namedtuple('META_INFO', ('id', 'duration', 'wav'))
-    base_path = os.path.join(DATA_HOME, 'open_rir_noise')
-    wav_path = os.path.join(base_path, 'RIRS_NOISES')
-    csv_path = os.path.join(base_path, 'csv')
-    subsets = ['rir', 'noise']
-
-    def __init__(self,
-                 subset: str='rir',
-                 feat_type: str='raw',
-                 target_dir=None,
-                 random_chunk: bool=True,
-                 chunk_duration: float=3.0,
-                 seed: int=0,
-                 **kwargs):
-
-        assert subset in self.subsets, \
-            'Dataset subset must be one in {}, but got {}'.format(self.subsets, subset)
-
-        self.subset = subset
-        self.feat_type = feat_type
-        self.feat_config = kwargs
-        self.random_chunk = random_chunk
-        self.chunk_duration = chunk_duration
-
-        OpenRIRNoise.csv_path = os.path.join(
-            target_dir, "open_rir_noise",
-            "csv") if target_dir else self.csv_path
-        self._data = self._get_data()
-        super(OpenRIRNoise, self).__init__()
-
-        # Set up a seed to reproduce training or predicting result.
-        # random.seed(seed)
-
-    def _get_data(self):
-        # Download audio files.
-        print(f"rirs noises base path: {self.base_path}")
-        if not os.path.isdir(self.base_path):
-            download_and_decompress(
-                self.archieves, self.base_path, decompress=True)
-        else:
-            print(
-                f"{self.base_path} already exists, we will not download and decompress again"
-            )
-
-        # Data preparation.
-        print(f"prepare the csv to {self.csv_path}")
-        if not os.path.isdir(self.csv_path):
-            os.makedirs(self.csv_path)
-            self.prepare_data()
-
-        data = []
-        with open(os.path.join(self.csv_path, f'{self.subset}.csv'), 'r') as rf:
-            for line in rf.readlines()[1:]:
-                audio_id, duration, wav = line.strip().split(',')
-                data.append(self.meta_info(audio_id, float(duration), wav))
-
-        random.shuffle(data)
-        return data
-
-    def _convert_to_record(self, idx: int):
-        sample = self._data[idx]
-
-        record = {}
-        # To show all fields in a namedtuple: `type(sample)._fields`
-        for field in type(sample)._fields:
-            record[field] = getattr(sample, field)
-
-        waveform, sr = paddlespeech.audio.load(record['wav'])
-
-        assert self.feat_type in feat_funcs.keys(), \
-            f"Unknown feat_type: {self.feat_type}, it must be one in {list(feat_funcs.keys())}"
-        feat_func = feat_funcs[self.feat_type]
-        feat = feat_func(
-            waveform, sr=sr, **self.feat_config) if feat_func else waveform
-
-        record.update({'feat': feat})
-        return record
-
-    @staticmethod
-    def _get_chunks(seg_dur, audio_id, audio_duration):
-        num_chunks = int(audio_duration / seg_dur)  # all in milliseconds
-
-        chunk_lst = [
-            audio_id + "_" + str(i * seg_dur) + "_" + str(i * seg_dur + seg_dur)
-            for i in range(num_chunks)
-        ]
-        return chunk_lst
-
-    def _get_audio_info(self, wav_file: str,
-                        split_chunks: bool) -> List[List[str]]:
-        waveform, sr = paddlespeech.audio.load(wav_file)
-        audio_id = wav_file.split("/open_rir_noise/")[-1].split(".")[0]
-        audio_duration = waveform.shape[0] / sr
-
-        ret = []
-        if split_chunks and audio_duration > self.chunk_duration:  # Split into pieces of self.chunk_duration seconds.
-            uniq_chunks_list = self._get_chunks(self.chunk_duration, audio_id,
-                                                audio_duration)
-
-            for idx, chunk in enumerate(uniq_chunks_list):
-                s, e = chunk.split("_")[-2:]  # Timestamps of start and end
-                start_sample = int(float(s) * sr)
-                end_sample = int(float(e) * sr)
-                new_wav_file = os.path.join(self.base_path,
-                                            audio_id + f'_chunk_{idx+1:02}.wav')
-                paddlespeech.audio.save(waveform[start_sample:end_sample], sr,
-                                        new_wav_file)
-                # id, duration, new_wav
-                ret.append([chunk, self.chunk_duration, new_wav_file])
-        else:  # Keep whole audio.
-            ret.append([audio_id, audio_duration, wav_file])
-        return ret
-
-    def generate_csv(self,
-                     wav_files: List[str],
-                     output_file: str,
-                     split_chunks: bool=True):
-        print(f'Generating csv: {output_file}')
-        header = ["id", "duration", "wav"]
-
-        infos = list(
-            tqdm(
-                map(self._get_audio_info, wav_files, [split_chunks] * len(
-                    wav_files)),
-                total=len(wav_files)))
-
-        csv_lines = []
-        for info in infos:
-            csv_lines.extend(info)
-
-        with open(output_file, mode="w") as csv_f:
-            csv_writer = csv.writer(
-                csv_f, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
-            csv_writer.writerow(header)
-            for line in csv_lines:
-                csv_writer.writerow(line)
-
-    def prepare_data(self):
-        rir_list = os.path.join(self.wav_path, "real_rirs_isotropic_noises",
-                                "rir_list")
-        rir_files = []
-        with open(rir_list, 'r') as f:
-            for line in f.readlines():
-                rir_file = line.strip().split(' ')[-1]
-                rir_files.append(os.path.join(self.base_path, rir_file))
-
-        noise_list = os.path.join(self.wav_path, "pointsource_noises",
-                                  "noise_list")
-        noise_files = []
-        with open(noise_list, 'r') as f:
-            for line in f.readlines():
-                noise_file = line.strip().split(' ')[-1]
-                noise_files.append(os.path.join(self.base_path, noise_file))
-
-        self.generate_csv(rir_files, os.path.join(self.csv_path, 'rir.csv'))
-        self.generate_csv(noise_files, os.path.join(self.csv_path, 'noise.csv'))
-
-    def __getitem__(self, idx):
-        return self._convert_to_record(idx)
-
-    def __len__(self):
-        return len(self._data)
diff --git a/paddlespeech/audio/datasets/tess.py b/paddlespeech/audio/datasets/tess.py
deleted file mode 100644
index 1469fa5e2456166ef30977ac4ac44bb302be7eb2..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/datasets/tess.py
+++ /dev/null
@@ -1,126 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import collections
-import os
-import random
-from typing import List
-from typing import Tuple
-
-from ..utils import DATA_HOME
-from ..utils.download import download_and_decompress
-from .dataset import AudioClassificationDataset
-
-__all__ = ['TESS']
-
-
-class TESS(AudioClassificationDataset):
-    """
-    TESS is a set of 200 target words were spoken in the carrier phrase
-    "Say the word _____' by two actresses (aged 26 and 64 years) and
-    recordings were made of the set portraying each of seven emotions(anger,
-    disgust, fear, happiness, pleasant surprise, sadness, and neutral).
-    There are 2800 stimuli in total.
-
-    Reference:
-        Toronto emotional speech set (TESS)
-        https://doi.org/10.5683/SP2/E8H2MF
-    """
-
-    archieves = [
-        {
-            'url':
-            'https://bj.bcebos.com/paddleaudio/datasets/TESS_Toronto_emotional_speech_set.zip',
-            'md5':
-            '1465311b24d1de704c4c63e4ccc470c7',
-        },
-    ]
-    label_list = [
-        'angry',
-        'disgust',
-        'fear',
-        'happy',
-        'neutral',
-        'ps',  # pleasant surprise
-        'sad',
-    ]
-    meta_info = collections.namedtuple('META_INFO',
-                                       ('speaker', 'word', 'emotion'))
-    audio_path = 'TESS_Toronto_emotional_speech_set'
-
-    def __init__(self,
-                 mode='train',
-                 seed=0,
-                 n_folds=5,
-                 split=1,
-                 feat_type='raw',
-                 **kwargs):
-        """
-        Ags:
-            mode (:obj:`str`, `optional`, defaults to `train`):
-                It identifies the dataset mode (train or dev).
-            seed (:obj:`int`, `optional`, defaults to 0):
-                Set the random seed to shuffle samples.
-            n_folds (:obj:`int`, `optional`, defaults to 5):
-                Split the dataset into n folds. 1 fold for dev dataset and n-1 for train dataset.
-            split (:obj:`int`, `optional`, defaults to 1):
-                It specify the fold of dev dataset.
-            feat_type (:obj:`str`, `optional`, defaults to `raw`):
-                It identifies the feature type that user wants to extrace of an audio file.
-        """
-        assert split <= n_folds, f'The selected split should not be larger than n_fold, but got {split} > {n_folds}'
-        files, labels = self._get_data(mode, seed, n_folds, split)
-        super(TESS, self).__init__(
-            files=files, labels=labels, feat_type=feat_type, **kwargs)
-
-    def _get_meta_info(self, files) -> List[collections.namedtuple]:
-        ret = []
-        for file in files:
-            basename_without_extend = os.path.basename(file)[:-4]
-            ret.append(self.meta_info(*basename_without_extend.split('_')))
-        return ret
-
-    def _get_data(self, mode, seed, n_folds,
-                  split) -> Tuple[List[str], List[int]]:
-        if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)):
-            download_and_decompress(self.archieves, DATA_HOME)
-
-        wav_files = []
-        for root, _, files in os.walk(os.path.join(DATA_HOME, self.audio_path)):
-            for file in files:
-                if file.endswith('.wav'):
-                    wav_files.append(os.path.join(root, file))
-
-        random.seed(seed)  # shuffle samples to split data
-        random.shuffle(
-            wav_files
-        )  # make sure using the same seed to create train and dev dataset
-        meta_info = self._get_meta_info(wav_files)
-
-        files = []
-        labels = []
-        n_samples_per_fold = len(meta_info) // n_folds
-        for idx, sample in enumerate(meta_info):
-            _, _, emotion = sample
-            target = self.label_list.index(emotion)
-            fold = idx // n_samples_per_fold + 1
-
-            if mode == 'train' and int(fold) != split:
-                files.append(wav_files[idx])
-                labels.append(target)
-
-            if mode != 'train' and int(fold) == split:
-                files.append(wav_files[idx])
-                labels.append(target)
-
-        return files, labels
diff --git a/paddlespeech/audio/datasets/urban_sound.py b/paddlespeech/audio/datasets/urban_sound.py
deleted file mode 100644
index 0389cd5f97471e5f01e738d288f47d82d33650dc..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/datasets/urban_sound.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import collections
-import os
-from typing import List
-from typing import Tuple
-
-from ..utils import DATA_HOME
-from ..utils.download import download_and_decompress
-from .dataset import AudioClassificationDataset
-
-__all__ = ['UrbanSound8K']
-
-
-class UrbanSound8K(AudioClassificationDataset):
-    """
-    UrbanSound8K dataset contains 8732 labeled sound excerpts (<=4s) of urban
-    sounds from 10 classes: air_conditioner, car_horn, children_playing, dog_bark,
-    drilling, enginge_idling, gun_shot, jackhammer, siren, and street_music. The
-    classes are drawn from the urban sound taxonomy.
-
-    Reference:
-        A Dataset and Taxonomy for Urban Sound Research
-        https://dl.acm.org/doi/10.1145/2647868.2655045
-    """
-
-    archieves = [
-        {
-            'url':
-            'https://zenodo.org/record/1203745/files/UrbanSound8K.tar.gz',
-            'md5': '9aa69802bbf37fb986f71ec1483a196e',
-        },
-    ]
-    label_list = [
-        "air_conditioner", "car_horn", "children_playing", "dog_bark",
-        "drilling", "engine_idling", "gun_shot", "jackhammer", "siren",
-        "street_music"
-    ]
-    meta = os.path.join('UrbanSound8K', 'metadata', 'UrbanSound8K.csv')
-    meta_info = collections.namedtuple(
-        'META_INFO', ('filename', 'fsid', 'start', 'end', 'salience', 'fold',
-                      'class_id', 'label'))
-    audio_path = os.path.join('UrbanSound8K', 'audio')
-
-    def __init__(self,
-                 mode: str='train',
-                 split: int=1,
-                 feat_type: str='raw',
-                 **kwargs):
-        files, labels = self._get_data(mode, split)
-        super(UrbanSound8K, self).__init__(
-            files=files, labels=labels, feat_type=feat_type, **kwargs)
-        """
-        Ags:
-            mode (:obj:`str`, `optional`, defaults to `train`):
-                It identifies the dataset mode (train or dev).
-            split (:obj:`int`, `optional`, defaults to 1):
-                It specify the fold of dev dataset.
-            feat_type (:obj:`str`, `optional`, defaults to `raw`):
-                It identifies the feature type that user wants to extrace of an audio file.
-        """
-
-    def _get_meta_info(self):
-        ret = []
-        with open(os.path.join(DATA_HOME, self.meta), 'r') as rf:
-            for line in rf.readlines()[1:]:
-                ret.append(self.meta_info(*line.strip().split(',')))
-        return ret
-
-    def _get_data(self, mode: str, split: int) -> Tuple[List[str], List[int]]:
-        if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \
-            not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
-            download_and_decompress(self.archieves, DATA_HOME)
-
-        meta_info = self._get_meta_info()
-
-        files = []
-        labels = []
-        for sample in meta_info:
-            filename, _, _, _, _, fold, target, _ = sample
-            if mode == 'train' and int(fold) != split:
-                files.append(
-                    os.path.join(DATA_HOME, self.audio_path, f'fold{fold}',
-                                 filename))
-                labels.append(int(target))
-
-            if mode != 'train' and int(fold) == split:
-                files.append(
-                    os.path.join(DATA_HOME, self.audio_path, f'fold{fold}',
-                                 filename))
-                labels.append(int(target))
-
-        return files, labels
diff --git a/paddlespeech/audio/datasets/voxceleb.py b/paddlespeech/audio/datasets/voxceleb.py
deleted file mode 100644
index e1a8aa38b9ccf3b9db83f76f6d7ecbb9ec56d486..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/datasets/voxceleb.py
+++ /dev/null
@@ -1,355 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import collections
-import csv
-import glob
-import os
-import random
-from multiprocessing import cpu_count
-from typing import List
-
-from paddle.io import Dataset
-from pathos.multiprocessing import Pool
-from tqdm import tqdm
-
-from ..utils import DATA_HOME
-from ..utils import decompress
-from ..utils.download import download_and_decompress
-from .dataset import feat_funcs
-
-__all__ = ['VoxCeleb']
-
-
-class VoxCeleb(Dataset):
-    source_url = 'https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/'
-    archieves_audio_dev = [
-        {
-            'url': source_url + 'vox1_dev_wav_partaa',
-            'md5': 'e395d020928bc15670b570a21695ed96',
-        },
-        {
-            'url': source_url + 'vox1_dev_wav_partab',
-            'md5': 'bbfaaccefab65d82b21903e81a8a8020',
-        },
-        {
-            'url': source_url + 'vox1_dev_wav_partac',
-            'md5': '017d579a2a96a077f40042ec33e51512',
-        },
-        {
-            'url': source_url + 'vox1_dev_wav_partad',
-            'md5': '7bb1e9f70fddc7a678fa998ea8b3ba19',
-        },
-    ]
-    archieves_audio_test = [
-        {
-            'url': source_url + 'vox1_test_wav.zip',
-            'md5': '185fdc63c3c739954633d50379a3d102',
-        },
-    ]
-    archieves_meta = [
-        {
-            'url':
-            'https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test2.txt',
-            'md5':
-            'b73110731c9223c1461fe49cb48dddfc',
-        },
-    ]
-
-    num_speakers = 1211  # 1211 vox1, 5994 vox2, 7205 vox1+2, test speakers: 41
-    sample_rate = 16000
-    meta_info = collections.namedtuple(
-        'META_INFO', ('id', 'duration', 'wav', 'start', 'stop', 'spk_id'))
-    base_path = os.path.join(DATA_HOME, 'vox1')
-    wav_path = os.path.join(base_path, 'wav')
-    meta_path = os.path.join(base_path, 'meta')
-    veri_test_file = os.path.join(meta_path, 'veri_test2.txt')
-    csv_path = os.path.join(base_path, 'csv')
-    subsets = ['train', 'dev', 'enroll', 'test']
-
-    def __init__(
-            self,
-            subset: str='train',
-            feat_type: str='raw',
-            random_chunk: bool=True,
-            chunk_duration: float=3.0,  # seconds
-            split_ratio: float=0.9,  # train split ratio
-            seed: int=0,
-            target_dir: str=None,
-            vox2_base_path=None,
-            **kwargs):
-        """VoxCeleb data prepare and get the specific dataset audio info
-
-        Args:
-            subset (str, optional): dataset name, such as train, dev, enroll or test. Defaults to 'train'.
-            feat_type (str, optional): feat type, such raw, melspectrogram(fbank) or mfcc . Defaults to 'raw'.
-            random_chunk (bool, optional): random select a duration from audio. Defaults to True.
-            chunk_duration (float, optional): chunk duration if random_chunk flag is set. Defaults to 3.0.
-            target_dir (str, optional): data dir, audio info will be stored in this directory. Defaults to None.
-            vox2_base_path (_type_, optional): vox2 directory. vox2 data must be converted from m4a to wav. Defaults to None.
-        """
-        assert subset in self.subsets, \
-            'Dataset subset must be one in {}, but got {}'.format(self.subsets, subset)
-
-        self.subset = subset
-        self.spk_id2label = {}
-        self.feat_type = feat_type
-        self.feat_config = kwargs
-        self.random_chunk = random_chunk
-        self.chunk_duration = chunk_duration
-        self.split_ratio = split_ratio
-        self.target_dir = target_dir if target_dir else VoxCeleb.base_path
-        self.vox2_base_path = vox2_base_path
-
-        # if we set the target dir, we will change the vox data info data from base path to target dir
-        VoxCeleb.csv_path = os.path.join(
-            target_dir, "voxceleb", 'csv') if target_dir else VoxCeleb.csv_path
-        VoxCeleb.meta_path = os.path.join(
-            target_dir, "voxceleb",
-            'meta') if target_dir else VoxCeleb.meta_path
-        VoxCeleb.veri_test_file = os.path.join(VoxCeleb.meta_path,
-                                               'veri_test2.txt')
-        # self._data = self._get_data()[:1000]  # KP: Small dataset test.
-        self._data = self._get_data()
-        super(VoxCeleb, self).__init__()
-
-        # Set up a seed to reproduce training or predicting result.
-        # random.seed(seed)
-
-    def _get_data(self):
-        # Download audio files.
-        # We need the users to decompress all vox1/dev/wav and vox1/test/wav/ to vox1/wav/ dir
-        # so, we check the vox1/wav dir status
-        print(f"wav base path: {self.wav_path}")
-        if not os.path.isdir(self.wav_path):
-            print("start to download the voxceleb1 dataset")
-            download_and_decompress(  # multi-zip parts concatenate to vox1_dev_wav.zip
-                self.archieves_audio_dev,
-                self.base_path,
-                decompress=False)
-            download_and_decompress(  # download the vox1_test_wav.zip and unzip
-                self.archieves_audio_test,
-                self.base_path,
-                decompress=True)
-
-            # Download all parts and concatenate the files into one zip file.
-            dev_zipfile = os.path.join(self.base_path, 'vox1_dev_wav.zip')
-            print(f'Concatenating all parts to: {dev_zipfile}')
-            os.system(
-                f'cat {os.path.join(self.base_path, "vox1_dev_wav_parta*")} > {dev_zipfile}'
-            )
-
-            # Extract all audio files of dev and test set.
-            decompress(dev_zipfile, self.base_path)
-
-        # Download meta files.
-        if not os.path.isdir(self.meta_path):
-            print("prepare the meta data")
-            download_and_decompress(
-                self.archieves_meta, self.meta_path, decompress=False)
-
-        # Data preparation.
-        if not os.path.isdir(self.csv_path):
-            os.makedirs(self.csv_path)
-            self.prepare_data()
-
-        data = []
-        print(
-            f"read the {self.subset} from {os.path.join(self.csv_path, f'{self.subset}.csv')}"
-        )
-        with open(os.path.join(self.csv_path, f'{self.subset}.csv'), 'r') as rf:
-            for line in rf.readlines()[1:]:
-                audio_id, duration, wav, start, stop, spk_id = line.strip(
-                ).split(',')
-                data.append(
-                    self.meta_info(audio_id,
-                                   float(duration), wav,
-                                   int(start), int(stop), spk_id))
-
-        with open(os.path.join(self.meta_path, 'spk_id2label.txt'), 'r') as f:
-            for line in f.readlines():
-                spk_id, label = line.strip().split(' ')
-                self.spk_id2label[spk_id] = int(label)
-
-        return data
-
-    def _convert_to_record(self, idx: int):
-        sample = self._data[idx]
-
-        record = {}
-        # To show all fields in a namedtuple: `type(sample)._fields`
-        for field in type(sample)._fields:
-            record[field] = getattr(sample, field)
-
-        waveform, sr = paddlespeech.audio.load(record['wav'])
-
-        # random select a chunk audio samples from the audio
-        if self.random_chunk:
-            num_wav_samples = waveform.shape[0]
-            num_chunk_samples = int(self.chunk_duration * sr)
-            start = random.randint(0, num_wav_samples - num_chunk_samples - 1)
-            stop = start + num_chunk_samples
-        else:
-            start = record['start']
-            stop = record['stop']
-
-        waveform = waveform[start:stop]
-
-        assert self.feat_type in feat_funcs.keys(), \
-            f"Unknown feat_type: {self.feat_type}, it must be one in {list(feat_funcs.keys())}"
-        feat_func = feat_funcs[self.feat_type]
-        feat = feat_func(
-            waveform, sr=sr, **self.feat_config) if feat_func else waveform
-
-        record.update({'feat': feat})
-        if self.subset in ['train',
-                           'dev']:  # Labels are available in train and dev.
-            record.update({'label': self.spk_id2label[record['spk_id']]})
-
-        return record
-
-    @staticmethod
-    def _get_chunks(seg_dur, audio_id, audio_duration):
-        num_chunks = int(audio_duration / seg_dur)  # all in milliseconds
-
-        chunk_lst = [
-            audio_id + "_" + str(i * seg_dur) + "_" + str(i * seg_dur + seg_dur)
-            for i in range(num_chunks)
-        ]
-        return chunk_lst
-
-    def _get_audio_info(self, wav_file: str,
-                        split_chunks: bool) -> List[List[str]]:
-        waveform, sr = paddlespeech.audio.load(wav_file)
-        spk_id, sess_id, utt_id = wav_file.split("/")[-3:]
-        audio_id = '-'.join([spk_id, sess_id, utt_id.split(".")[0]])
-        audio_duration = waveform.shape[0] / sr
-
-        ret = []
-        if split_chunks:  # Split into pieces of self.chunk_duration seconds.
-            uniq_chunks_list = self._get_chunks(self.chunk_duration, audio_id,
-                                                audio_duration)
-
-            for chunk in uniq_chunks_list:
-                s, e = chunk.split("_")[-2:]  # Timestamps of start and end
-                start_sample = int(float(s) * sr)
-                end_sample = int(float(e) * sr)
-                # id, duration, wav, start, stop, spk_id
-                ret.append([
-                    chunk, audio_duration, wav_file, start_sample, end_sample,
-                    spk_id
-                ])
-        else:  # Keep whole audio.
-            ret.append([
-                audio_id, audio_duration, wav_file, 0, waveform.shape[0], spk_id
-            ])
-        return ret
-
-    def generate_csv(self,
-                     wav_files: List[str],
-                     output_file: str,
-                     split_chunks: bool=True):
-        print(f'Generating csv: {output_file}')
-        header = ["id", "duration", "wav", "start", "stop", "spk_id"]
-        # Note: this may occurs c++ execption, but the program will execute fine
-        # so we can ignore the execption 
-        with Pool(cpu_count()) as p:
-            infos = list(
-                tqdm(
-                    p.imap(lambda x: self._get_audio_info(x, split_chunks),
-                           wav_files),
-                    total=len(wav_files)))
-
-        csv_lines = []
-        for info in infos:
-            csv_lines.extend(info)
-
-        with open(output_file, mode="w") as csv_f:
-            csv_writer = csv.writer(
-                csv_f, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
-            csv_writer.writerow(header)
-            for line in csv_lines:
-                csv_writer.writerow(line)
-
-    def prepare_data(self):
-        # Audio of speakers in veri_test_file should not be included in training set.
-        print("start to prepare the data csv file")
-        enroll_files = set()
-        test_files = set()
-        # get the enroll and test audio file path
-        with open(self.veri_test_file, 'r') as f:
-            for line in f.readlines():
-                _, enrol_file, test_file = line.strip().split(' ')
-                enroll_files.add(os.path.join(self.wav_path, enrol_file))
-                test_files.add(os.path.join(self.wav_path, test_file))
-            enroll_files = sorted(enroll_files)
-            test_files = sorted(test_files)
-
-        # get the enroll and test speakers
-        test_spks = set()
-        for file in (enroll_files + test_files):
-            spk = file.split('/wav/')[1].split('/')[0]
-            test_spks.add(spk)
-
-        # get all the train and dev audios file path
-        audio_files = []
-        speakers = set()
-        print("Getting file list...")
-        for path in [self.wav_path, self.vox2_base_path]:
-            # if vox2 directory is not set and vox2 is not a directory 
-            # we will not process this directory
-            if not path or not os.path.exists(path):
-                print(f"{path} is an invalid path, please check again, "
-                      "and we will ignore the vox2 base path")
-                continue
-            for file in glob.glob(
-                    os.path.join(path, "**", "*.wav"), recursive=True):
-                spk = file.split('/wav/')[1].split('/')[0]
-                if spk in test_spks:
-                    continue
-                speakers.add(spk)
-                audio_files.append(file)
-
-        print(
-            f"start to generate the {os.path.join(self.meta_path, 'spk_id2label.txt')}"
-        )
-        # encode the train and dev speakers label to spk_id2label.txt
-        with open(os.path.join(self.meta_path, 'spk_id2label.txt'), 'w') as f:
-            for label, spk_id in enumerate(
-                    sorted(speakers)):  # 1211 vox1, 5994 vox2, 7205 vox1+2
-                f.write(f'{spk_id} {label}\n')
-
-        audio_files = sorted(audio_files)
-        random.shuffle(audio_files)
-        split_idx = int(self.split_ratio * len(audio_files))
-        # split_ratio to train
-        train_files, dev_files = audio_files[:split_idx], audio_files[
-            split_idx:]
-
-        self.generate_csv(train_files, os.path.join(self.csv_path, 'train.csv'))
-        self.generate_csv(dev_files, os.path.join(self.csv_path, 'dev.csv'))
-
-        self.generate_csv(
-            enroll_files,
-            os.path.join(self.csv_path, 'enroll.csv'),
-            split_chunks=False)
-        self.generate_csv(
-            test_files,
-            os.path.join(self.csv_path, 'test.csv'),
-            split_chunks=False)
-
-    def __getitem__(self, idx):
-        return self._convert_to_record(idx)
-
-    def __len__(self):
-        return len(self._data)
diff --git a/paddlespeech/audio/features/__init__.py b/paddlespeech/audio/features/__init__.py
deleted file mode 100644
index 00781397f3d35cd995db38741e93db65228bde0a..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/features/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from .layers import LogMelSpectrogram
-from .layers import MelSpectrogram
-from .layers import MFCC
-from .layers import Spectrogram
diff --git a/paddlespeech/audio/features/layers.py b/paddlespeech/audio/features/layers.py
deleted file mode 100644
index 292363e64d276f53369ea151209972fcc9489e01..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/features/layers.py
+++ /dev/null
@@ -1,328 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from functools import partial
-from typing import Optional
-from typing import Union
-
-import paddle
-import paddle.nn as nn
-from paddle import Tensor
-
-from ..functional import compute_fbank_matrix
-from ..functional import create_dct
-from ..functional import power_to_db
-from ..functional.window import get_window
-
-__all__ = [
-    'Spectrogram',
-    'MelSpectrogram',
-    'LogMelSpectrogram',
-    'MFCC',
-]
-
-
-class Spectrogram(nn.Layer):
-    """Compute spectrogram of given signals, typically audio waveforms.
-    The spectorgram is defined as the complex norm of the short-time Fourier transformation.
-
-    Args:
-        n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512.
-        hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None.
-        win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None.
-        window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'.
-        power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0.
-        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
-        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'.
-        dtype (str, optional): Data type of input and window. Defaults to 'float32'.
-    """
-
-    def __init__(self,
-                 n_fft: int=512,
-                 hop_length: Optional[int]=None,
-                 win_length: Optional[int]=None,
-                 window: str='hann',
-                 power: float=2.0,
-                 center: bool=True,
-                 pad_mode: str='reflect',
-                 dtype: str='float32') -> None:
-        super(Spectrogram, self).__init__()
-
-        assert power > 0, 'Power of spectrogram must be > 0.'
-        self.power = power
-
-        if win_length is None:
-            win_length = n_fft
-
-        self.fft_window = get_window(
-            window, win_length, fftbins=True, dtype=dtype)
-        self._stft = partial(
-            paddle.signal.stft,
-            n_fft=n_fft,
-            hop_length=hop_length,
-            win_length=win_length,
-            window=self.fft_window,
-            center=center,
-            pad_mode=pad_mode)
-        self.register_buffer('fft_window', self.fft_window)
-
-    def forward(self, x: Tensor) -> Tensor:
-        """
-        Args:
-            x (Tensor): Tensor of waveforms with shape `(N, T)`
-
-        Returns:
-            Tensor: Spectrograms with shape `(N, n_fft//2 + 1, num_frames)`.
-        """
-        stft = self._stft(x)
-        spectrogram = paddle.pow(paddle.abs(stft), self.power)
-        return spectrogram
-
-
-class MelSpectrogram(nn.Layer):
-    """Compute the melspectrogram of given signals, typically audio waveforms. It is computed by multiplying spectrogram with Mel filter bank matrix.
-
-    Args:
-        sr (int, optional): Sample rate. Defaults to 22050.
-        n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512.
-        hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None.
-        win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None.
-        window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'.
-        power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0.
-        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
-        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'.
-        n_mels (int, optional): Number of mel bins. Defaults to 64.
-        f_min (float, optional): Minimum frequency in Hz. Defaults to 50.0.
-        f_max (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
-        htk (bool, optional): Use HTK formula in computing fbank matrix. Defaults to False.
-        norm (Union[str, float], optional): Type of normalization in computing fbank matrix. Slaney-style is used by default. You can specify norm=1.0/2.0 to use customized p-norm normalization. Defaults to 'slaney'.
-        dtype (str, optional): Data type of input and window. Defaults to 'float32'.
-    """
-
-    def __init__(self,
-                 sr: int=22050,
-                 n_fft: int=512,
-                 hop_length: Optional[int]=None,
-                 win_length: Optional[int]=None,
-                 window: str='hann',
-                 power: float=2.0,
-                 center: bool=True,
-                 pad_mode: str='reflect',
-                 n_mels: int=64,
-                 f_min: float=50.0,
-                 f_max: Optional[float]=None,
-                 htk: bool=False,
-                 norm: Union[str, float]='slaney',
-                 dtype: str='float32') -> None:
-        super(MelSpectrogram, self).__init__()
-
-        self._spectrogram = Spectrogram(
-            n_fft=n_fft,
-            hop_length=hop_length,
-            win_length=win_length,
-            window=window,
-            power=power,
-            center=center,
-            pad_mode=pad_mode,
-            dtype=dtype)
-        self.n_mels = n_mels
-        self.f_min = f_min
-        self.f_max = f_max
-        self.htk = htk
-        self.norm = norm
-        if f_max is None:
-            f_max = sr // 2
-        self.fbank_matrix = compute_fbank_matrix(
-            sr=sr,
-            n_fft=n_fft,
-            n_mels=n_mels,
-            f_min=f_min,
-            f_max=f_max,
-            htk=htk,
-            norm=norm,
-            dtype=dtype)  # float64 for better numerical results
-        self.register_buffer('fbank_matrix', self.fbank_matrix)
-
-    def forward(self, x: Tensor) -> Tensor:
-        """
-        Args:
-            x (Tensor): Tensor of waveforms with shape `(N, T)`
-
-        Returns:
-            Tensor: Mel spectrograms with shape `(N, n_mels, num_frames)`.
-        """
-        spect_feature = self._spectrogram(x)
-        mel_feature = paddle.matmul(self.fbank_matrix, spect_feature)
-        return mel_feature
-
-
-class LogMelSpectrogram(nn.Layer):
-    """Compute log-mel-spectrogram feature of given signals, typically audio waveforms.
-
-    Args:
-        sr (int, optional): Sample rate. Defaults to 22050.
-        n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512.
-        hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None.
-        win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None.
-        window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'.
-        power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0.
-        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
-        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'.
-        n_mels (int, optional): Number of mel bins. Defaults to 64.
-        f_min (float, optional): Minimum frequency in Hz. Defaults to 50.0.
-        f_max (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
-        htk (bool, optional): Use HTK formula in computing fbank matrix. Defaults to False.
-        norm (Union[str, float], optional): Type of normalization in computing fbank matrix. Slaney-style is used by default. You can specify norm=1.0/2.0 to use customized p-norm normalization. Defaults to 'slaney'.
-        ref_value (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
-        amin (float, optional): The minimum value of input magnitude. Defaults to 1e-10.
-        top_db (Optional[float], optional): The maximum db value of spectrogram. Defaults to None.
-        dtype (str, optional): Data type of input and window. Defaults to 'float32'.
-    """
-
-    def __init__(self,
-                 sr: int=22050,
-                 n_fft: int=512,
-                 hop_length: Optional[int]=None,
-                 win_length: Optional[int]=None,
-                 window: str='hann',
-                 power: float=2.0,
-                 center: bool=True,
-                 pad_mode: str='reflect',
-                 n_mels: int=64,
-                 f_min: float=50.0,
-                 f_max: Optional[float]=None,
-                 htk: bool=False,
-                 norm: Union[str, float]='slaney',
-                 ref_value: float=1.0,
-                 amin: float=1e-10,
-                 top_db: Optional[float]=None,
-                 dtype: str='float32') -> None:
-        super(LogMelSpectrogram, self).__init__()
-
-        self._melspectrogram = MelSpectrogram(
-            sr=sr,
-            n_fft=n_fft,
-            hop_length=hop_length,
-            win_length=win_length,
-            window=window,
-            power=power,
-            center=center,
-            pad_mode=pad_mode,
-            n_mels=n_mels,
-            f_min=f_min,
-            f_max=f_max,
-            htk=htk,
-            norm=norm,
-            dtype=dtype)
-
-        self.ref_value = ref_value
-        self.amin = amin
-        self.top_db = top_db
-
-    def forward(self, x: Tensor) -> Tensor:
-        """
-        Args:
-            x (Tensor): Tensor of waveforms with shape `(N, T)`
-
-        Returns:
-            Tensor: Log mel spectrograms with shape `(N, n_mels, num_frames)`.
-        """
-        mel_feature = self._melspectrogram(x)
-        log_mel_feature = power_to_db(
-            mel_feature,
-            ref_value=self.ref_value,
-            amin=self.amin,
-            top_db=self.top_db)
-        return log_mel_feature
-
-
-class MFCC(nn.Layer):
-    """Compute mel frequency cepstral coefficients(MFCCs) feature of given waveforms.
-
-    Args:
-        sr (int, optional): Sample rate. Defaults to 22050.
-        n_mfcc (int, optional): [description]. Defaults to 40.
-        n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512.
-        hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None.
-        win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None.
-        window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'.
-        power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0.
-        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
-        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'.
-        n_mels (int, optional): Number of mel bins. Defaults to 64.
-        f_min (float, optional): Minimum frequency in Hz. Defaults to 50.0.
-        f_max (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
-        htk (bool, optional): Use HTK formula in computing fbank matrix. Defaults to False.
-        norm (Union[str, float], optional): Type of normalization in computing fbank matrix. Slaney-style is used by default. You can specify norm=1.0/2.0 to use customized p-norm normalization. Defaults to 'slaney'.
-        ref_value (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
-        amin (float, optional): The minimum value of input magnitude. Defaults to 1e-10.
-        top_db (Optional[float], optional): The maximum db value of spectrogram. Defaults to None.
-        dtype (str, optional): Data type of input and window. Defaults to 'float32'.
-    """
-
-    def __init__(self,
-                 sr: int=22050,
-                 n_mfcc: int=40,
-                 n_fft: int=512,
-                 hop_length: Optional[int]=None,
-                 win_length: Optional[int]=None,
-                 window: str='hann',
-                 power: float=2.0,
-                 center: bool=True,
-                 pad_mode: str='reflect',
-                 n_mels: int=64,
-                 f_min: float=50.0,
-                 f_max: Optional[float]=None,
-                 htk: bool=False,
-                 norm: Union[str, float]='slaney',
-                 ref_value: float=1.0,
-                 amin: float=1e-10,
-                 top_db: Optional[float]=None,
-                 dtype: str=paddle.float32) -> None:
-        super(MFCC, self).__init__()
-        assert n_mfcc <= n_mels, 'n_mfcc cannot be larger than n_mels: %d vs %d' % (
-            n_mfcc, n_mels)
-        self._log_melspectrogram = LogMelSpectrogram(
-            sr=sr,
-            n_fft=n_fft,
-            hop_length=hop_length,
-            win_length=win_length,
-            window=window,
-            power=power,
-            center=center,
-            pad_mode=pad_mode,
-            n_mels=n_mels,
-            f_min=f_min,
-            f_max=f_max,
-            htk=htk,
-            norm=norm,
-            ref_value=ref_value,
-            amin=amin,
-            top_db=top_db,
-            dtype=dtype)
-        self.dct_matrix = create_dct(n_mfcc=n_mfcc, n_mels=n_mels, dtype=dtype)
-        self.register_buffer('dct_matrix', self.dct_matrix)
-
-    def forward(self, x: Tensor) -> Tensor:
-        """
-        Args:
-            x (Tensor): Tensor of waveforms with shape `(N, T)`
-
-        Returns:
-            Tensor: Mel frequency cepstral coefficients with shape `(N, n_mfcc, num_frames)`.
-        """
-        log_mel_feature = self._log_melspectrogram(x)
-        mfcc = paddle.matmul(
-            log_mel_feature.transpose((0, 2, 1)), self.dct_matrix).transpose(
-                (0, 2, 1))  # (B, n_mels, L)
-        return mfcc
diff --git a/paddlespeech/audio/functional/__init__.py b/paddlespeech/audio/functional/__init__.py
deleted file mode 100644
index c85232df199e9e888b786a991f8b1d290d38e9d0..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/functional/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from .functional import compute_fbank_matrix
-from .functional import create_dct
-from .functional import fft_frequencies
-from .functional import hz_to_mel
-from .functional import mel_frequencies
-from .functional import mel_to_hz
-from .functional import power_to_db
diff --git a/paddlespeech/audio/functional/functional.py b/paddlespeech/audio/functional/functional.py
deleted file mode 100644
index 19c63a9aef23c65c94b5de34bebc3974e61be736..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/functional/functional.py
+++ /dev/null
@@ -1,266 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Modified from librosa(https://github.com/librosa/librosa)
-import math
-from typing import Optional
-from typing import Union
-
-import paddle
-from paddle import Tensor
-
-__all__ = [
-    'hz_to_mel',
-    'mel_to_hz',
-    'mel_frequencies',
-    'fft_frequencies',
-    'compute_fbank_matrix',
-    'power_to_db',
-    'create_dct',
-]
-
-
-def hz_to_mel(freq: Union[Tensor, float],
-              htk: bool=False) -> Union[Tensor, float]:
-    """Convert Hz to Mels.
-
-    Args:
-        freq (Union[Tensor, float]): The input tensor with arbitrary shape.
-        htk (bool, optional): Use htk scaling. Defaults to False.
-
-    Returns:
-        Union[Tensor, float]: Frequency in mels.
-    """
-
-    if htk:
-        if isinstance(freq, Tensor):
-            return 2595.0 * paddle.log10(1.0 + freq / 700.0)
-        else:
-            return 2595.0 * math.log10(1.0 + freq / 700.0)
-
-    # Fill in the linear part
-    f_min = 0.0
-    f_sp = 200.0 / 3
-
-    mels = (freq - f_min) / f_sp
-
-    # Fill in the log-scale part
-
-    min_log_hz = 1000.0  # beginning of log region (Hz)
-    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
-    logstep = math.log(6.4) / 27.0  # step size for log region
-
-    if isinstance(freq, Tensor):
-        target = min_log_mel + paddle.log(
-            freq / min_log_hz + 1e-10) / logstep  # prevent nan with 1e-10
-        mask = (freq > min_log_hz).astype(freq.dtype)
-        mels = target * mask + mels * (
-            1 - mask)  # will replace by masked_fill OP in future
-    else:
-        if freq >= min_log_hz:
-            mels = min_log_mel + math.log(freq / min_log_hz + 1e-10) / logstep
-
-    return mels
-
-
-def mel_to_hz(mel: Union[float, Tensor],
-              htk: bool=False) -> Union[float, Tensor]:
-    """Convert mel bin numbers to frequencies.
-
-    Args:
-        mel (Union[float, Tensor]): The mel frequency represented as a tensor with arbitrary shape.
-        htk (bool, optional): Use htk scaling. Defaults to False.
-
-    Returns:
-        Union[float, Tensor]: Frequencies in Hz.
-    """
-    if htk:
-        return 700.0 * (10.0**(mel / 2595.0) - 1.0)
-
-    f_min = 0.0
-    f_sp = 200.0 / 3
-    freqs = f_min + f_sp * mel
-    # And now the nonlinear scale
-    min_log_hz = 1000.0  # beginning of log region (Hz)
-    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
-    logstep = math.log(6.4) / 27.0  # step size for log region
-    if isinstance(mel, Tensor):
-        target = min_log_hz * paddle.exp(logstep * (mel - min_log_mel))
-        mask = (mel > min_log_mel).astype(mel.dtype)
-        freqs = target * mask + freqs * (
-            1 - mask)  # will replace by masked_fill OP in future
-    else:
-        if mel >= min_log_mel:
-            freqs = min_log_hz * math.exp(logstep * (mel - min_log_mel))
-
-    return freqs
-
-
-def mel_frequencies(n_mels: int=64,
-                    f_min: float=0.0,
-                    f_max: float=11025.0,
-                    htk: bool=False,
-                    dtype: str='float32') -> Tensor:
-    """Compute mel frequencies.
-
-    Args:
-        n_mels (int, optional): Number of mel bins. Defaults to 64.
-        f_min (float, optional): Minimum frequency in Hz. Defaults to 0.0.
-        fmax (float, optional): Maximum frequency in Hz. Defaults to 11025.0.
-        htk (bool, optional): Use htk scaling. Defaults to False.
-        dtype (str, optional): The data type of the return frequencies. Defaults to 'float32'.
-
-    Returns:
-        Tensor: Tensor of n_mels frequencies in Hz with shape `(n_mels,)`.
-    """
-    # 'Center freqs' of mel bands - uniformly spaced between limits
-    min_mel = hz_to_mel(f_min, htk=htk)
-    max_mel = hz_to_mel(f_max, htk=htk)
-    mels = paddle.linspace(min_mel, max_mel, n_mels, dtype=dtype)
-    freqs = mel_to_hz(mels, htk=htk)
-    return freqs
-
-
-def fft_frequencies(sr: int, n_fft: int, dtype: str='float32') -> Tensor:
-    """Compute fourier frequencies.
-
-    Args:
-        sr (int): Sample rate.
-        n_fft (int): Number of fft bins.
-        dtype (str, optional): The data type of the return frequencies. Defaults to 'float32'.
-
-    Returns:
-        Tensor: FFT frequencies in Hz with shape `(n_fft//2 + 1,)`.
-    """
-    return paddle.linspace(0, float(sr) / 2, int(1 + n_fft // 2), dtype=dtype)
-
-
-def compute_fbank_matrix(sr: int,
-                         n_fft: int,
-                         n_mels: int=64,
-                         f_min: float=0.0,
-                         f_max: Optional[float]=None,
-                         htk: bool=False,
-                         norm: Union[str, float]='slaney',
-                         dtype: str='float32') -> Tensor:
-    """Compute fbank matrix.
-
-    Args:
-        sr (int): Sample rate.
-        n_fft (int): Number of fft bins.
-        n_mels (int, optional): Number of mel bins. Defaults to 64.
-        f_min (float, optional): Minimum frequency in Hz. Defaults to 0.0.
-        f_max (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
-        htk (bool, optional): Use htk scaling. Defaults to False.
-        norm (Union[str, float], optional): Type of normalization. Defaults to 'slaney'.
-        dtype (str, optional): The data type of the return matrix. Defaults to 'float32'.
-
-    Returns:
-        Tensor: Mel transform matrix with shape `(n_mels, n_fft//2 + 1)`.
-    """
-
-    if f_max is None:
-        f_max = float(sr) / 2
-
-    # Initialize the weights
-    weights = paddle.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype)
-
-    # Center freqs of each FFT bin
-    fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft, dtype=dtype)
-
-    # 'Center freqs' of mel bands - uniformly spaced between limits
-    mel_f = mel_frequencies(
-        n_mels + 2, f_min=f_min, f_max=f_max, htk=htk, dtype=dtype)
-
-    fdiff = mel_f[1:] - mel_f[:-1]  #np.diff(mel_f)
-    ramps = mel_f.unsqueeze(1) - fftfreqs.unsqueeze(0)
-    #ramps = np.subtract.outer(mel_f, fftfreqs)
-
-    for i in range(n_mels):
-        # lower and upper slopes for all bins
-        lower = -ramps[i] / fdiff[i]
-        upper = ramps[i + 2] / fdiff[i + 1]
-
-        # .. then intersect them with each other and zero
-        weights[i] = paddle.maximum(
-            paddle.zeros_like(lower), paddle.minimum(lower, upper))
-
-    # Slaney-style mel is scaled to be approx constant energy per channel
-    if norm == 'slaney':
-        enorm = 2.0 / (mel_f[2:n_mels + 2] - mel_f[:n_mels])
-        weights *= enorm.unsqueeze(1)
-    elif isinstance(norm, int) or isinstance(norm, float):
-        weights = paddle.nn.functional.normalize(weights, p=norm, axis=-1)
-
-    return weights
-
-
-def power_to_db(spect: Tensor,
-                ref_value: float=1.0,
-                amin: float=1e-10,
-                top_db: Optional[float]=None) -> Tensor:
-    """Convert a power spectrogram (amplitude squared) to decibel (dB) units. The function computes the scaling `10 * log10(x / ref)` in a numerically stable way.
-
-    Args:
-        spect (Tensor): STFT power spectrogram.
-        ref_value (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
-        amin (float, optional): Minimum threshold. Defaults to 1e-10.
-        top_db (Optional[float], optional): Threshold the output at `top_db` below the peak. Defaults to None.
-
-    Returns:
-        Tensor: Power spectrogram in db scale.
-    """
-    if amin <= 0:
-        raise Exception("amin must be strictly positive")
-
-    if ref_value <= 0:
-        raise Exception("ref_value must be strictly positive")
-
-    ones = paddle.ones_like(spect)
-    log_spec = 10.0 * paddle.log10(paddle.maximum(ones * amin, spect))
-    log_spec -= 10.0 * math.log10(max(ref_value, amin))
-
-    if top_db is not None:
-        if top_db < 0:
-            raise Exception("top_db must be non-negative")
-        log_spec = paddle.maximum(log_spec, ones * (log_spec.max() - top_db))
-
-    return log_spec
-
-
-def create_dct(n_mfcc: int,
-               n_mels: int,
-               norm: Optional[str]='ortho',
-               dtype: str='float32') -> Tensor:
-    """Create a discrete cosine transform(DCT) matrix.
-
-    Args:
-        n_mfcc (int): Number of mel frequency cepstral coefficients. 
-        n_mels (int): Number of mel filterbanks.
-        norm (Optional[str], optional): Normalizaiton type. Defaults to 'ortho'.
-        dtype (str, optional): The data type of the return matrix. Defaults to 'float32'.
-
-    Returns:
-        Tensor: The DCT matrix with shape `(n_mels, n_mfcc)`.
-    """
-    n = paddle.arange(n_mels, dtype=dtype)
-    k = paddle.arange(n_mfcc, dtype=dtype).unsqueeze(1)
-    dct = paddle.cos(math.pi / float(n_mels) * (n + 0.5) *
-                     k)  # size (n_mfcc, n_mels)
-    if norm is None:
-        dct *= 2.0
-    else:
-        assert norm == "ortho"
-        dct[0] *= 1.0 / math.sqrt(2.0)
-        dct *= math.sqrt(2.0 / float(n_mels))
-    return dct.T
diff --git a/paddlespeech/audio/functional/window.py b/paddlespeech/audio/functional/window.py
deleted file mode 100644
index c99d50462e355d4f1c199f74d737fc13c339d630..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/functional/window.py
+++ /dev/null
@@ -1,337 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-import math
-from typing import List
-from typing import Tuple
-from typing import Union
-
-import paddle
-from paddle import Tensor
-
-__all__ = [
-    'get_window',
-]
-
-
-def _cat(x: List[Tensor], data_type: str) -> Tensor:
-    l = [paddle.to_tensor(_, data_type) for _ in x]
-    return paddle.concat(l)
-
-
-def _acosh(x: Union[Tensor, float]) -> Tensor:
-    if isinstance(x, float):
-        return math.log(x + math.sqrt(x**2 - 1))
-    return paddle.log(x + paddle.sqrt(paddle.square(x) - 1))
-
-
-def _extend(M: int, sym: bool) -> bool:
-    """Extend window by 1 sample if needed for DFT-even symmetry. """
-    if not sym:
-        return M + 1, True
-    else:
-        return M, False
-
-
-def _len_guards(M: int) -> bool:
-    """Handle small or incorrect window lengths. """
-    if int(M) != M or M < 0:
-        raise ValueError('Window length M must be a non-negative integer')
-
-    return M <= 1
-
-
-def _truncate(w: Tensor, needed: bool) -> Tensor:
-    """Truncate window by 1 sample if needed for DFT-even symmetry. """
-    if needed:
-        return w[:-1]
-    else:
-        return w
-
-
-def _general_gaussian(M: int, p, sig, sym: bool=True,
-                      dtype: str='float64') -> Tensor:
-    """Compute a window with a generalized Gaussian shape.
-    This function is consistent with scipy.signal.windows.general_gaussian().
-    """
-    if _len_guards(M):
-        return paddle.ones((M, ), dtype=dtype)
-    M, needs_trunc = _extend(M, sym)
-
-    n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0
-    w = paddle.exp(-0.5 * paddle.abs(n / sig)**(2 * p))
-
-    return _truncate(w, needs_trunc)
-
-
-def _general_cosine(M: int, a: float, sym: bool=True,
-                    dtype: str='float64') -> Tensor:
-    """Compute a generic weighted sum of cosine terms window.
-    This function is consistent with scipy.signal.windows.general_cosine().
-    """
-    if _len_guards(M):
-        return paddle.ones((M, ), dtype=dtype)
-    M, needs_trunc = _extend(M, sym)
-    fac = paddle.linspace(-math.pi, math.pi, M, dtype=dtype)
-    w = paddle.zeros((M, ), dtype=dtype)
-    for k in range(len(a)):
-        w += a[k] * paddle.cos(k * fac)
-    return _truncate(w, needs_trunc)
-
-
-def _general_hamming(M: int, alpha: float, sym: bool=True,
-                     dtype: str='float64') -> Tensor:
-    """Compute a generalized Hamming window.
-    This function is consistent with scipy.signal.windows.general_hamming()
-    """
-    return _general_cosine(M, [alpha, 1. - alpha], sym, dtype=dtype)
-
-
-def _taylor(M: int,
-            nbar=4,
-            sll=30,
-            norm=True,
-            sym: bool=True,
-            dtype: str='float64') -> Tensor:
-    """Compute a Taylor window.
-    The Taylor window taper function approximates the Dolph-Chebyshev window's
-    constant sidelobe level for a parameterized number of near-in sidelobes.
-    """
-    if _len_guards(M):
-        return paddle.ones((M, ), dtype=dtype)
-    M, needs_trunc = _extend(M, sym)
-    # Original text uses a negative sidelobe level parameter and then negates
-    # it in the calculation of B. To keep consistent with other methods we
-    # assume the sidelobe level parameter to be positive.
-    B = 10**(sll / 20)
-    A = _acosh(B) / math.pi
-    s2 = nbar**2 / (A**2 + (nbar - 0.5)**2)
-    ma = paddle.arange(1, nbar, dtype=dtype)
-
-    Fm = paddle.empty((nbar - 1, ), dtype=dtype)
-    signs = paddle.empty_like(ma)
-    signs[::2] = 1
-    signs[1::2] = -1
-    m2 = ma * ma
-    for mi in range(len(ma)):
-        numer = signs[mi] * paddle.prod(1 - m2[mi] / s2 / (A**2 + (ma - 0.5)**2
-                                                           ))
-        if mi == 0:
-            denom = 2 * paddle.prod(1 - m2[mi] / m2[mi + 1:])
-        elif mi == len(ma) - 1:
-            denom = 2 * paddle.prod(1 - m2[mi] / m2[:mi])
-        else:
-            denom = 2 * paddle.prod(1 - m2[mi] / m2[:mi]) * paddle.prod(1 - m2[
-                mi] / m2[mi + 1:])
-
-        Fm[mi] = numer / denom
-
-    def W(n):
-        return 1 + 2 * paddle.matmul(
-            Fm.unsqueeze(0),
-            paddle.cos(2 * math.pi * ma.unsqueeze(1) * (n - M / 2. + 0.5) / M))
-
-    w = W(paddle.arange(0, M, dtype=dtype))
-
-    # normalize (Note that this is not described in the original text [1])
-    if norm:
-        scale = 1.0 / W((M - 1) / 2)
-        w *= scale
-    w = w.squeeze()
-    return _truncate(w, needs_trunc)
-
-
-def _hamming(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
-    """Compute a Hamming window.
-    The Hamming window is a taper formed by using a raised cosine with
-    non-zero endpoints, optimized to minimize the nearest side lobe.
-    """
-    return _general_hamming(M, 0.54, sym, dtype=dtype)
-
-
-def _hann(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
-    """Compute a Hann window.
-    The Hann window is a taper formed by using a raised cosine or sine-squared
-    with ends that touch zero.
-    """
-    return _general_hamming(M, 0.5, sym, dtype=dtype)
-
-
-def _tukey(M: int, alpha=0.5, sym: bool=True, dtype: str='float64') -> Tensor:
-    """Compute a Tukey window.
-    The Tukey window is also known as a tapered cosine window.
-    """
-    if _len_guards(M):
-        return paddle.ones((M, ), dtype=dtype)
-
-    if alpha <= 0:
-        return paddle.ones((M, ), dtype=dtype)
-    elif alpha >= 1.0:
-        return hann(M, sym=sym)
-
-    M, needs_trunc = _extend(M, sym)
-
-    n = paddle.arange(0, M, dtype=dtype)
-    width = int(alpha * (M - 1) / 2.0)
-    n1 = n[0:width + 1]
-    n2 = n[width + 1:M - width - 1]
-    n3 = n[M - width - 1:]
-
-    w1 = 0.5 * (1 + paddle.cos(math.pi * (-1 + 2.0 * n1 / alpha / (M - 1))))
-    w2 = paddle.ones(n2.shape, dtype=dtype)
-    w3 = 0.5 * (1 + paddle.cos(math.pi * (-2.0 / alpha + 1 + 2.0 * n3 / alpha /
-                                          (M - 1))))
-    w = paddle.concat([w1, w2, w3])
-
-    return _truncate(w, needs_trunc)
-
-
-def _kaiser(M: int, beta: float, sym: bool=True,
-            dtype: str='float64') -> Tensor:
-    """Compute a Kaiser window.
-    The Kaiser window is a taper formed by using a Bessel function.
-    """
-    raise NotImplementedError()
-
-
-def _gaussian(M: int, std: float, sym: bool=True,
-              dtype: str='float64') -> Tensor:
-    """Compute a Gaussian window.
-    The Gaussian widows has a Gaussian shape defined by the standard deviation(std).
-    """
-    if _len_guards(M):
-        return paddle.ones((M, ), dtype=dtype)
-    M, needs_trunc = _extend(M, sym)
-
-    n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0
-    sig2 = 2 * std * std
-    w = paddle.exp(-n**2 / sig2)
-
-    return _truncate(w, needs_trunc)
-
-
-def _exponential(M: int,
-                 center=None,
-                 tau=1.,
-                 sym: bool=True,
-                 dtype: str='float64') -> Tensor:
-    """Compute an exponential (or Poisson) window. """
-    if sym and center is not None:
-        raise ValueError("If sym==True, center must be None.")
-    if _len_guards(M):
-        return paddle.ones((M, ), dtype=dtype)
-    M, needs_trunc = _extend(M, sym)
-
-    if center is None:
-        center = (M - 1) / 2
-
-    n = paddle.arange(0, M, dtype=dtype)
-    w = paddle.exp(-paddle.abs(n - center) / tau)
-
-    return _truncate(w, needs_trunc)
-
-
-def _triang(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
-    """Compute a triangular window.
-    """
-    if _len_guards(M):
-        return paddle.ones((M, ), dtype=dtype)
-    M, needs_trunc = _extend(M, sym)
-
-    n = paddle.arange(1, (M + 1) // 2 + 1, dtype=dtype)
-    if M % 2 == 0:
-        w = (2 * n - 1.0) / M
-        w = paddle.concat([w, w[::-1]])
-    else:
-        w = 2 * n / (M + 1.0)
-        w = paddle.concat([w, w[-2::-1]])
-
-    return _truncate(w, needs_trunc)
-
-
-def _bohman(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
-    """Compute a Bohman window.
-    The Bohman window is the autocorrelation of a cosine window.
-    """
-    if _len_guards(M):
-        return paddle.ones((M, ), dtype=dtype)
-    M, needs_trunc = _extend(M, sym)
-
-    fac = paddle.abs(paddle.linspace(-1, 1, M, dtype=dtype)[1:-1])
-    w = (1 - fac) * paddle.cos(math.pi * fac) + 1.0 / math.pi * paddle.sin(
-        math.pi * fac)
-    w = _cat([0, w, 0], dtype)
-
-    return _truncate(w, needs_trunc)
-
-
-def _blackman(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
-    """Compute a Blackman window.
-    The Blackman window is a taper formed by using the first three terms of
-    a summation of cosines. It was designed to have close to the minimal
-    leakage possible.  It is close to optimal, only slightly worse than a
-    Kaiser window.
-    """
-    return _general_cosine(M, [0.42, 0.50, 0.08], sym, dtype=dtype)
-
-
-def _cosine(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
-    """Compute a window with a simple cosine shape.
-    """
-    if _len_guards(M):
-        return paddle.ones((M, ), dtype=dtype)
-    M, needs_trunc = _extend(M, sym)
-    w = paddle.sin(math.pi / M * (paddle.arange(0, M, dtype=dtype) + .5))
-
-    return _truncate(w, needs_trunc)
-
-
-def get_window(window: Union[str, Tuple[str, float]],
-               win_length: int,
-               fftbins: bool=True,
-               dtype: str='float64') -> Tensor:
-    """Return a window of a given length and type.
-
-    Args:
-        window (Union[str, Tuple[str, float]]): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'.
-        win_length (int): Number of samples.
-        fftbins (bool, optional): If True, create a "periodic" window. Otherwise, create a "symmetric" window, for use in filter design. Defaults to True.
-        dtype (str, optional): The data type of the return window. Defaults to 'float64'.
-
-    Returns:
-        Tensor: The window represented as a tensor.
-    """
-    sym = not fftbins
-
-    args = ()
-    if isinstance(window, tuple):
-        winstr = window[0]
-        if len(window) > 1:
-            args = window[1:]
-    elif isinstance(window, str):
-        if window in ['gaussian', 'exponential']:
-            raise ValueError("The '" + window + "' window needs one or "
-                             "more parameters -- pass a tuple.")
-        else:
-            winstr = window
-    else:
-        raise ValueError("%s as window type is not supported." %
-                         str(type(window)))
-
-    try:
-        winfunc = eval('_' + winstr)
-    except KeyError as e:
-        raise ValueError("Unknown window type.") from e
-
-    params = (win_length, ) + args
-    kwargs = {'sym': sym}
-    return winfunc(*params, dtype=dtype, **kwargs)
diff --git a/paddlespeech/audio/io/__init__.py b/paddlespeech/audio/io/__init__.py
deleted file mode 100644
index 185a92b8d94d3426d616c0624f0f2ee04339349e..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/io/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/paddlespeech/audio/kaldi/__init__.py b/paddlespeech/audio/kaldi/__init__.py
deleted file mode 100644
index f951e280a58b123965c46e6951e610740cd19bb4..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/kaldi/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from .kaldi import fbank
-from .kaldi import pitch
diff --git a/paddlespeech/audio/kaldi/kaldi.py b/paddlespeech/audio/kaldi/kaldi.py
deleted file mode 100644
index ff0fd8d9db7669b018ee88366e8ad8c1be31f378..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/kaldi/kaldi.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddlespeech
-from paddlespeech.audio._internal import module_utils 
-
-__all__ = [
-    'fbank',
-    'pitch',
-]
-
-
-@module_utils.requires_kaldi()
-def fbank(
-        wav,
-        samp_freq: int=16000,
-        frame_shift_ms: float=10.0,
-        frame_length_ms: float=25.0,
-        dither: float=0.0,
-        preemph_coeff: float=0.97,
-        remove_dc_offset: bool=True,
-        window_type: str='povey',
-        round_to_power_of_two: bool=True,
-        blackman_coeff: float=0.42,
-        snip_edges: bool=True,
-        allow_downsample: bool=False,
-        allow_upsample: bool=False,
-        max_feature_vectors: int=-1,
-        num_bins: int=23,
-        low_freq: float=20,
-        high_freq: float=0,
-        vtln_low: float=100,
-        vtln_high: float=-500,
-        debug_mel: bool=False,
-        htk_mode: bool=False,
-        use_energy: bool=False,  # fbank opts
-        energy_floor: float=0.0,
-        raw_energy: bool=True,
-        htk_compat: bool=False,
-        use_log_fbank: bool=True,
-        use_power: bool=True):
-    frame_opts = paddlespeech.audio._paddleaudio.FrameExtractionOptions()
-    mel_opts = paddlespeech.audio._paddleaudio.MelBanksOptions()
-    fbank_opts = paddlespeech.audio._paddleaudio.FbankOptions()
-    frame_opts.samp_freq = samp_freq
-    frame_opts.frame_shift_ms = frame_shift_ms
-    frame_opts.frame_length_ms = frame_length_ms
-    frame_opts.dither = dither
-    frame_opts.preemph_coeff = preemph_coeff
-    frame_opts.remove_dc_offset = remove_dc_offset
-    frame_opts.window_type = window_type
-    frame_opts.round_to_power_of_two = round_to_power_of_two
-    frame_opts.blackman_coeff = blackman_coeff
-    frame_opts.snip_edges = snip_edges
-    frame_opts.allow_downsample = allow_downsample
-    frame_opts.allow_upsample = allow_upsample
-    frame_opts.max_feature_vectors = max_feature_vectors
-
-    mel_opts.num_bins = num_bins
-    mel_opts.low_freq = low_freq
-    mel_opts.high_freq = high_freq
-    mel_opts.vtln_low = vtln_low
-    mel_opts.vtln_high = vtln_high
-    mel_opts.debug_mel = debug_mel
-    mel_opts.htk_mode = htk_mode
-
-    fbank_opts.use_energy = use_energy
-    fbank_opts.energy_floor = energy_floor
-    fbank_opts.raw_energy = raw_energy
-    fbank_opts.htk_compat = htk_compat
-    fbank_opts.use_log_fbank = use_log_fbank
-    fbank_opts.use_power = use_power
-    feat = paddlespeech.audio._paddleaudio.ComputeFbank(frame_opts, mel_opts, fbank_opts, wav)
-    return feat
-
-
-@module_utils.requires_kaldi()
-def pitch(wav,
-          samp_freq: int=16000,
-          frame_shift_ms: float=10.0,
-          frame_length_ms: float=25.0,
-          preemph_coeff: float=0.0,
-          min_f0: int=50,
-          max_f0: int=400,
-          soft_min_f0: float=10.0,
-          penalty_factor: float=0.1,
-          lowpass_cutoff: int=1000,
-          resample_freq: int=4000,
-          delta_pitch: float=0.005,
-          nccf_ballast: int=7000,
-          lowpass_filter_width: int=1,
-          upsample_filter_width: int=5,
-          max_frames_latency: int=0,
-          frames_per_chunk: int=0,
-          simulate_first_pass_online: bool=False,
-          recompute_frame: int=500,
-          nccf_ballast_online: bool=False,
-          snip_edges: bool=True):
-    pitch_opts = paddlespeech.audio._paddleaudio.PitchExtractionOptions()
-    pitch_opts.samp_freq = samp_freq
-    pitch_opts.frame_shift_ms = frame_shift_ms
-    pitch_opts.frame_length_ms = frame_length_ms
-    pitch_opts.preemph_coeff = preemph_coeff
-    pitch_opts.min_f0 = min_f0
-    pitch_opts.max_f0 = max_f0
-    pitch_opts.soft_min_f0 = soft_min_f0
-    pitch_opts.penalty_factor = penalty_factor
-    pitch_opts.lowpass_cutoff = lowpass_cutoff
-    pitch_opts.resample_freq = resample_freq
-    pitch_opts.delta_pitch = delta_pitch
-    pitch_opts.nccf_ballast = nccf_ballast
-    pitch_opts.lowpass_filter_width = lowpass_filter_width
-    pitch_opts.upsample_filter_width = upsample_filter_width
-    pitch_opts.max_frames_latency = max_frames_latency
-    pitch_opts.frames_per_chunk = frames_per_chunk
-    pitch_opts.simulate_first_pass_online = simulate_first_pass_online
-    pitch_opts.recompute_frame = recompute_frame
-    pitch_opts.nccf_ballast_online = nccf_ballast_online
-    pitch_opts.snip_edges = snip_edges
-    pitch = paddlespeech.audio._paddleaudio.ComputeKaldiPitch(pitch_opts, wav)
-    return pitch
diff --git a/paddlespeech/audio/lib/.gitignore b/paddlespeech/audio/lib/.gitignore
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/paddlespeech/audio/metric/__init__.py b/paddlespeech/audio/metric/__init__.py
deleted file mode 100644
index 7ce6f5cfffda1f475c2cc6b2734c98027957d123..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/metric/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from .eer import compute_eer
-from .eer import compute_minDCF
diff --git a/paddlespeech/audio/metric/eer.py b/paddlespeech/audio/metric/eer.py
deleted file mode 100644
index a1166d3f93a2135b692ad822aae8134ffd1f5295..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/metric/eer.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from typing import List
-
-import numpy as np
-import paddle
-from sklearn.metrics import roc_curve
-
-
-def compute_eer(labels: np.ndarray, scores: np.ndarray) -> List[float]:
-    """Compute EER and return score threshold.
-
-    Args:
-        labels (np.ndarray): the trial label, shape: [N], one-dimention, N refer to the samples num
-        scores (np.ndarray): the trial scores, shape: [N], one-dimention, N refer to the samples num
-
-    Returns:
-        List[float]: eer and the specific threshold
-    """
-    fpr, tpr, threshold = roc_curve(y_true=labels, y_score=scores)
-    fnr = 1 - tpr
-    eer_threshold = threshold[np.nanargmin(np.absolute((fnr - fpr)))]
-    eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
-    return eer, eer_threshold
-
-
-def compute_minDCF(positive_scores,
-                   negative_scores,
-                   c_miss=1.0,
-                   c_fa=1.0,
-                   p_target=0.01):
-    """
-    This is modified from SpeechBrain
-    https://github.com/speechbrain/speechbrain/blob/085be635c07f16d42cd1295045bc46c407f1e15b/speechbrain/utils/metric_stats.py#L509
-    Computes the minDCF metric normally used to evaluate speaker verification
-    systems. The min_DCF is the minimum of the following C_det function computed
-    within the defined threshold range:
-
-    C_det =  c_miss * p_miss * p_target + c_fa * p_fa * (1 -p_target)
-
-    where p_miss is the missing probability and p_fa is the probability of having
-    a false alarm.
-
-    Args:
-        positive_scores (Paddle.Tensor): The scores from entries of the same class.
-        negative_scores (Paddle.Tensor): The scores from entries of different classes.
-        c_miss (float, optional): Cost assigned to a missing error (default 1.0).
-        c_fa (float, optional): Cost assigned to a false alarm (default 1.0).
-        p_target (float, optional): Prior probability of having a target (default 0.01).
-
-    Returns:
-        List[float]: min dcf and the specific threshold
-    """
-    # Computing candidate thresholds
-    if len(positive_scores.shape) > 1:
-        positive_scores = positive_scores.squeeze()
-
-    if len(negative_scores.shape) > 1:
-        negative_scores = negative_scores.squeeze()
-
-    thresholds = paddle.sort(paddle.concat([positive_scores, negative_scores]))
-    thresholds = paddle.unique(thresholds)
-
-    # Adding intermediate thresholds
-    interm_thresholds = (thresholds[0:-1] + thresholds[1:]) / 2
-    thresholds = paddle.sort(paddle.concat([thresholds, interm_thresholds]))
-
-    # Computing False Rejection Rate (miss detection)
-    positive_scores = paddle.concat(
-        len(thresholds) * [positive_scores.unsqueeze(0)])
-    pos_scores_threshold = positive_scores.transpose(perm=[1, 0]) <= thresholds
-    p_miss = (pos_scores_threshold.sum(0)
-              ).astype("float32") / positive_scores.shape[1]
-    del positive_scores
-    del pos_scores_threshold
-
-    # Computing False Acceptance Rate (false alarm)
-    negative_scores = paddle.concat(
-        len(thresholds) * [negative_scores.unsqueeze(0)])
-    neg_scores_threshold = negative_scores.transpose(perm=[1, 0]) > thresholds
-    p_fa = (neg_scores_threshold.sum(0)
-            ).astype("float32") / negative_scores.shape[1]
-    del negative_scores
-    del neg_scores_threshold
-
-    c_det = c_miss * p_miss * p_target + c_fa * p_fa * (1 - p_target)
-    c_min = paddle.min(c_det, axis=0)
-    min_index = paddle.argmin(c_det, axis=0)
-    return float(c_min), float(thresholds[min_index])
diff --git a/paddlespeech/audio/sox_effects/__init__.py b/paddlespeech/audio/sox_effects/__init__.py
deleted file mode 100644
index d68158776830d05195131300886f8f8a43be7cff..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/sox_effects/__init__.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from paddlespeech.audio._internal import module_utils as _mod_utils
-
-from .sox_effects import (
-    apply_effects_file,
-    apply_effects_tensor,
-    effect_names,
-    init_sox_effects,
-    shutdown_sox_effects,
-)
-
-
-if _mod_utils.is_sox_available():
-    import atexit
-
-    init_sox_effects()
-    atexit.register(shutdown_sox_effects)
-
-__all__ = [
-    "init_sox_effects",
-    "shutdown_sox_effects",
-    "effect_names",
-    "apply_effects_tensor",
-    "apply_effects_file",
-]
-
diff --git a/paddlespeech/audio/sox_effects/sox_effects.py b/paddlespeech/audio/sox_effects/sox_effects.py
deleted file mode 100644
index e9b839c1ad2f07977a173406feff9932fc578957..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/sox_effects/sox_effects.py
+++ /dev/null
@@ -1,238 +0,0 @@
-import os
-from typing import List, Optional, Tuple
-import paddle
-import numpy
-
-from paddlespeech.audio._internal import module_utils as _mod_utils
-from paddlespeech.audio.utils.sox_utils import list_effects
-from paddlespeech.audio import _paddleaudio as paddleaudio
-
-#code is from: https://github.com/pytorch/audio/blob/main/torchaudio/sox_effects/sox_effects.py
-
-@_mod_utils.requires_sox()
-def init_sox_effects():
-    """Initialize resources required to use sox effects.
-
-    Note:
-        You do not need to call this function manually. It is called automatically.
-
-    Once initialized, you do not need to call this function again across the multiple uses of
-    sox effects though it is safe to do so as long as :func:`shutdown_sox_effects` is not called yet.
-    Once :func:`shutdown_sox_effects` is called, you can no longer use SoX effects and initializing
-    again will result in error.
-    """
-    paddleaudio.sox_effects_initialize_sox_effects()
-
-
-@_mod_utils.requires_sox()
-def shutdown_sox_effects():
-    """Clean up resources required to use sox effects.
-
-    Note:
-        You do not need to call this function manually. It is called automatically.
-
-    It is safe to call this function multiple times.
-    Once :py:func:`shutdown_sox_effects` is called, you can no longer use SoX effects and
-    initializing again will result in error.
-    """
-    paddleaudio.sox_effects_shutdown_sox_effects()
-
-
-@_mod_utils.requires_sox()
-def effect_names() -> List[str]:
-    """Gets list of valid sox effect names
-
-    Returns:
-        List[str]: list of available effect names.
-
-    Example
-        >>> paddleaudio.sox_effects.effect_names()
-        ['allpass', 'band', 'bandpass', ... ]
-    """
-    return list(list_effects().keys())
-
-
-@_mod_utils.requires_sox()
-def apply_effects_tensor(
-    tensor: paddle.Tensor,
-    sample_rate: int,
-    effects: List[List[str]],
-    channels_first: bool = True,
-) -> Tuple[paddle.Tensor, int]:
-    """Apply sox effects to given Tensor
-
-    .. devices:: CPU
-
-    Note:
-        This function only works on CPU Tensors.
-        This function works in the way very similar to ``sox`` command, however there are slight
-        differences. For example, ``sox`` command adds certain effects automatically (such as
-        ``rate`` effect after ``speed`` and ``pitch`` and other effects), but this function does
-        only applies the given effects. (Therefore, to actually apply ``speed`` effect, you also
-        need to give ``rate`` effect with desired sampling rate.).
-
-    Args:
-        tensor (paddle.Tensor): Input 2D CPU Tensor.
-        sample_rate (int): Sample rate
-        effects (List[List[str]]): List of effects.
-        channels_first (bool, optional): Indicates if the input Tensor's dimension is
-            `[channels, time]` or `[time, channels]`
-
-    Returns:
-        (Tensor, int): Resulting Tensor and sample rate.
-        The resulting Tensor has the same ``dtype`` as the input Tensor, and
-        the same channels order. The shape of the Tensor can be different based on the
-        effects applied. Sample rate can also be different based on the effects applied.
-
-    Example - Basic usage
-        >>>
-        >>> # Defines the effects to apply
-        >>> effects = [
-        ...     ['gain', '-n'],  # normalises to 0dB
-        ...     ['pitch', '5'],  # 5 cent pitch shift
-        ...     ['rate', '8000'],  # resample to 8000 Hz
-        ... ]
-        >>>
-        >>> # Generate pseudo wave:
-        >>> # normalized, channels first, 2ch, sampling rate 16000, 1 second
-        >>> sample_rate = 16000
-        >>> waveform = 2 * paddle.rand([2, sample_rate * 1]) - 1
-        >>> waveform.shape
-        paddle.Size([2, 16000])
-        >>> waveform
-        tensor([[ 0.3138,  0.7620, -0.9019,  ..., -0.7495, -0.4935,  0.5442],
-                [-0.0832,  0.0061,  0.8233,  ..., -0.5176, -0.9140, -0.2434]])
-        >>>
-        >>> # Apply effects
-        >>> waveform, sample_rate = apply_effects_tensor(
-        ...     wave_form, sample_rate, effects, channels_first=True)
-        >>>
-        >>> # Check the result
-        >>> # The new waveform is sampling rate 8000, 1 second.
-        >>> # normalization and channel order are preserved
-        >>> waveform.shape
-        paddle.Size([2, 8000])
-        >>> waveform
-        tensor([[ 0.5054, -0.5518, -0.4800,  ..., -0.0076,  0.0096, -0.0110],
-                [ 0.1331,  0.0436, -0.3783,  ..., -0.0035,  0.0012,  0.0008]])
-        >>> sample_rate
-        8000
-
-    """
-    tensor_np = tensor.numpy()
-    ret = paddleaudio.sox_effects_apply_effects_tensor(tensor_np, sample_rate, effects, channels_first)
-    if ret is not None:
-       return (paddle.to_tensor(ret[0]), ret[1])
-    raise RuntimeError("Failed to apply sox effect")
-
-
-@_mod_utils.requires_sox()
-def apply_effects_file(
-    path: str,
-    effects: List[List[str]],
-    normalize: bool = True,
-    channels_first: bool = True,
-    format: Optional[str] = None,
-) -> Tuple[paddle.Tensor, int]:
-    """Apply sox effects to the audio file and load the resulting data as Tensor
-
-    Note:
-        This function works in the way very similar to ``sox`` command, however there are slight
-        differences. For example, ``sox`` commnad adds certain effects automatically (such as
-        ``rate`` effect after ``speed``, ``pitch`` etc), but this function only applies the given
-        effects. Therefore, to actually apply ``speed`` effect, you also need to give ``rate``
-        effect with desired sampling rate, because internally, ``speed`` effects only alter sampling
-        rate and leave samples untouched.
-
-    Args:
-        path (path-like object or file-like object):
-        effects (List[List[str]]): List of effects.
-        normalize (bool, optional):
-            When ``True``, this function always return ``float32``, and sample values are
-            normalized to ``[-1.0, 1.0]``.
-            If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
-            integer type. This argument has no effect for formats other
-            than integer WAV type.
-        channels_first (bool, optional): When True, the returned Tensor has dimension `[channel, time]`.
-            Otherwise, the returned Tensor's dimension is `[time, channel]`.
-        format (str or None, optional):
-            Override the format detection with the given format.
-            Providing the argument might help when libsox can not infer the format
-            from header or extension,
-
-    Returns:
-        (Tensor, int): Resulting Tensor and sample rate.
-        If ``normalize=True``, the resulting Tensor is always ``float32`` type.
-        If ``normalize=False`` and the input audio file is of integer WAV file, then the
-        resulting Tensor has corresponding integer type. (Note 24 bit integer type is not supported)
-        If ``channels_first=True``, the resulting Tensor has dimension `[channel, time]`,
-        otherwise `[time, channel]`.
-
-    Example - Basic usage
-        >>>
-        >>> # Defines the effects to apply
-        >>> effects = [
-        ...     ['gain', '-n'],  # normalises to 0dB
-        ...     ['pitch', '5'],  # 5 cent pitch shift
-        ...     ['rate', '8000'],  # resample to 8000 Hz
-        ... ]
-        >>>
-        >>> # Apply effects and load data with channels_first=True
-        >>> waveform, sample_rate = apply_effects_file("data.wav", effects, channels_first=True)
-        >>>
-        >>> # Check the result
-        >>> waveform.shape
-        paddle.Size([2, 8000])
-        >>> waveform
-        tensor([[ 5.1151e-03,  1.8073e-02,  2.2188e-02,  ...,  1.0431e-07,
-                 -1.4761e-07,  1.8114e-07],
-                [-2.6924e-03,  2.1860e-03,  1.0650e-02,  ...,  6.4122e-07,
-                 -5.6159e-07,  4.8103e-07]])
-        >>> sample_rate
-        8000
-
-    Example - Apply random speed perturbation to dataset
-        >>>
-        >>> # Load data from file, apply random speed perturbation
-        >>> class RandomPerturbationFile(paddle.utils.data.Dataset):
-        ...     \"\"\"Given flist, apply random speed perturbation
-        ...
-        ...     Suppose all the input files are at least one second long.
-        ...     \"\"\"
-        ...     def __init__(self, flist: List[str], sample_rate: int):
-        ...         super().__init__()
-        ...         self.flist = flist
-        ...         self.sample_rate = sample_rate
-        ...
-        ...     def __getitem__(self, index):
-        ...         speed = 0.5 + 1.5 * random.randn()
-        ...         effects = [
-        ...             ['gain', '-n', '-10'],  # apply 10 db attenuation
-        ...             ['remix', '-'],  # merge all the channels
-        ...             ['speed', f'{speed:.5f}'],  # duration is now 0.5 ~ 2.0 seconds.
-        ...             ['rate', f'{self.sample_rate}'],
-        ...             ['pad', '0', '1.5'],  # add 1.5 seconds silence at the end
-        ...             ['trim', '0', '2'],  # get the first 2 seconds
-        ...         ]
-        ...         waveform, _ = paddleaudio.sox_effects.apply_effects_file(
-        ...             self.flist[index], effects)
-        ...         return waveform
-        ...
-        ...     def __len__(self):
-        ...         return len(self.flist)
-        ...
-        >>> dataset = RandomPerturbationFile(file_list, sample_rate=8000)
-        >>> loader = paddle.utils.data.DataLoader(dataset, batch_size=32)
-        >>> for batch in loader:
-        >>>     pass
-    """
-    if hasattr(path, "read"):
-        ret = paddleaudio.apply_effects_fileobj(path, effects, normalize, channels_first, format)
-        if ret is None:
-            raise RuntimeError("Failed to load audio from {}".format(path))
-        return (paddle.to_tensor(ret[0]), ret[1])
-    path = os.fspath(path)
-    ret = paddleaudio.sox_effects_apply_effects_file(path, effects, normalize, channels_first, format)
-    if ret is not None:
-        return (paddle.to_tensor(ret[0]), ret[1])
-    raise RuntimeError("Failed to load audio from {}".format(path))
\ No newline at end of file
diff --git a/paddlespeech/audio/src/CMakeLists.txt b/paddlespeech/audio/src/CMakeLists.txt
deleted file mode 100644
index 4c46fbe2483a850c519ab8cdbda90ca4dac31210..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/CMakeLists.txt
+++ /dev/null
@@ -1,201 +0,0 @@
-if (MSVC)
-  set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
-endif()
-
-################################################################################
-# libpaddleaudio
-################################################################################
-set(
-  LIBPADDLEAUDIO_SOURCES
-  utils.cpp
-  )
-
-set(
-  LIBPADDLEAUDIO_INCLUDE_DIRS
-  ${PROJECT_SOURCE_DIR}
-  )
-
-set(
-  LIBPADDLEAUDIO_LINK_LIBRARIES
-  )
-
-set(
-  LIBPADDLEAUDIO_COMPILE_DEFINITIONS)
-
-#------------------------------------------------------------------------------#
-# START OF CUSTOMIZATION LOGICS
-#------------------------------------------------------------------------------#
-
-if(BUILD_SOX)
-  list(
-    APPEND
-    LIBPADDLEAUDIO_LINK_LIBRARIES
-    libsox
-    )
-  list(
-    APPEND
-    LIBPADDLEAUDIO_SOURCES
-    #sox/io.cpp
-    #sox/utils.cpp
-    #sox/effects.cpp
-    #sox/effects_chain.cpp
-    #sox/types.cpp
-    )
-  list(
-    APPEND
-    LIBPADDLEAUDIO_COMPILE_DEFINITIONS
-    INCLUDE_SOX
-    )
-endif()
-
-
-if(BUILD_KALDI)
-  list(
-    APPEND
-    LIBPADDLEAUDIO_LINK_LIBRARIES
-    libkaldi
-  )
-  list(
-    APPEND
-    LIBPADDLEAUDIO_COMPILE_DEFINITIONS
-    INCLUDE_KALDI
-    COMPILE_WITHOUT_OPENFST
-  )
-endif()
-
-#------------------------------------------------------------------------------#
-# END OF CUSTOMIZATION LOGICS
-#------------------------------------------------------------------------------#
-
-function (define_library name source include_dirs link_libraries compile_defs)
-  add_library(${name} SHARED ${source})
-  target_include_directories(${name} PRIVATE ${include_dirs})
-  target_link_libraries(${name} ${link_libraries})
-  target_compile_definitions(${name} PRIVATE ${compile_defs})
-  set_target_properties(${name} PROPERTIES PREFIX "")
-  if (MSVC)
-    set_target_properties(${name} PROPERTIES SUFFIX ".pyd")
-  endif(MSVC)
-  install(
-    TARGETS ${name}
-    LIBRARY DESTINATION lib
-    RUNTIME DESTINATION lib  # For Windows
-    )
-endfunction()
-
-
-define_library(
-  libpaddleaudio
-  "${LIBPADDLEAUDIO_SOURCES}"
-  "${LIBPADDLEAUDIO_INCLUDE_DIRS}"
-  "${LIBPADDLEAUDIO_LINK_LIBRARIES}"
-  "${LIBPADDLEAUDIO_COMPILE_DEFINITIONS}"
-)
-
-if (APPLE)
-  set(TORCHAUDIO_LIBRARY libpaddleaudio CACHE INTERNAL "")
-else()
-  set(TORCHAUDIO_LIBRARY -Wl,--no-as-needed libpaddleaudio -Wl,--as-needed CACHE INTERNAL "")
-endif()
-
-  ################################################################################
-# _paddleaudio.so
-################################################################################
-if (BUILD_PADDLEAUDIO_PYTHON_EXTENSION)
-if (WIN32)
-  find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development)
-  set(ADDITIONAL_ITEMS Python3::Python)
-endif()
-function(define_extension name sources include_dirs libraries definitions)
-  add_library(${name} SHARED ${sources})
-  target_compile_definitions(${name} PRIVATE "${definitions}")
-  target_include_directories(
-    ${name} PRIVATE ${PROJECT_SOURCE_DIR} ${Python_INCLUDE_DIR} ${pybind11_INCLUDE_DIR} ${include_dirs})
-  target_link_libraries(
-    ${name}
-    ${libraries}
-    ${TORCH_PYTHON_LIBRARY}
-    ${ADDITIONAL_ITEMS}
-    )
-  set_target_properties(${name} PROPERTIES PREFIX "")
-  if (MSVC)
-    set_target_properties(${name} PROPERTIES SUFFIX ".pyd")
-  endif(MSVC)
-  if (APPLE)
-    # https://github.com/facebookarchive/caffe2/issues/854#issuecomment-364538485
-    # https://github.com/pytorch/pytorch/commit/73f6715f4725a0723d8171d3131e09ac7abf0666
-    set_target_properties(${name} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
-  endif()
-  install(
-    TARGETS ${name}
-    LIBRARY DESTINATION .
-    RUNTIME DESTINATION .  # For Windows
-    )
-endfunction()
-
-set(
-  EXTENSION_SOURCES
-  pybind/pybind.cpp
-  )
-#----------------------------------------------------------------------------#
-# START OF CUSTOMIZATION LOGICS
-#----------------------------------------------------------------------------#
-if(BUILD_SOX)
-  list(
-    APPEND
-    EXTENSION_SOURCES
-    pybind/sox/effects.cpp
-    pybind/sox/effects_chain.cpp
-    pybind/sox/io.cpp
-    pybind/sox/types.cpp
-    pybind/sox/utils.cpp
-    )
-endif()
-
-if(BUILD_KALDI)
-  list(
-    APPEND
-    EXTENSION_SOURCES
-    pybind/kaldi/kaldi_feature_wrapper.cc
-    pybind/kaldi/kaldi_feature.cc
-    )
-endif()
-#----------------------------------------------------------------------------#
-# END OF CUSTOMIZATION LOGICS
-#----------------------------------------------------------------------------#
-define_extension(
-  _paddleaudio
-  "${EXTENSION_SOURCES}"
-  ""
-  libpaddleaudio
-  "${LIBPADDLEAUDIO_COMPILE_DEFINITIONS}"
-  )
-# if(BUILD_CTC_DECODER)
-#   set(
-#     DECODER_EXTENSION_SOURCES
-#     decoder/bindings/pybind.cpp
-#     )
-#   define_extension(
-#     _paddleaudio_decoder
-#     "${DECODER_EXTENSION_SOURCES}"
-#     ""
-#     "libpaddleaudio_decoder"
-#     "${LIBPADDLEAUDIO_DECODER_DEFINITIONS}"
-#     )
-# endif()
-# if(USE_FFMPEG)
-#   set(
-#     FFMPEG_EXTENSION_SOURCES
-#     ffmpeg/pybind/typedefs.cpp
-#     ffmpeg/pybind/pybind.cpp
-#     ffmpeg/pybind/stream_reader.cpp
-#     )
-#   define_extension(
-#     _paddleaudio_ffmpeg
-#     "${FFMPEG_EXTENSION_SOURCES}"
-#     "${FFMPEG_INCLUDE_DIRS}"
-#     "libpaddleaudio_ffmpeg"
-#     "${LIBPADDLEAUDIO_DECODER_DEFINITIONS}"
-#     )
-# endif()
-endif()
diff --git a/paddlespeech/audio/src/optional/COPYING b/paddlespeech/audio/src/optional/COPYING
deleted file mode 100644
index 0e259d42c996742e9e3cba14c677129b2c1b6311..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/optional/COPYING
+++ /dev/null
@@ -1,121 +0,0 @@
-Creative Commons Legal Code
-
-CC0 1.0 Universal
-
-    CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
-    LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
-    ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
-    INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
-    REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
-    PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
-    THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
-    HEREUNDER.
-
-Statement of Purpose
-
-The laws of most jurisdictions throughout the world automatically confer
-exclusive Copyright and Related Rights (defined below) upon the creator
-and subsequent owner(s) (each and all, an "owner") of an original work of
-authorship and/or a database (each, a "Work").
-
-Certain owners wish to permanently relinquish those rights to a Work for
-the purpose of contributing to a commons of creative, cultural and
-scientific works ("Commons") that the public can reliably and without fear
-of later claims of infringement build upon, modify, incorporate in other
-works, reuse and redistribute as freely as possible in any form whatsoever
-and for any purposes, including without limitation commercial purposes.
-These owners may contribute to the Commons to promote the ideal of a free
-culture and the further production of creative, cultural and scientific
-works, or to gain reputation or greater distribution for their Work in
-part through the use and efforts of others.
-
-For these and/or other purposes and motivations, and without any
-expectation of additional consideration or compensation, the person
-associating CC0 with a Work (the "Affirmer"), to the extent that he or she
-is an owner of Copyright and Related Rights in the Work, voluntarily
-elects to apply CC0 to the Work and publicly distribute the Work under its
-terms, with knowledge of his or her Copyright and Related Rights in the
-Work and the meaning and intended legal effect of CC0 on those rights.
-
-1. Copyright and Related Rights. A Work made available under CC0 may be
-protected by copyright and related or neighboring rights ("Copyright and
-Related Rights"). Copyright and Related Rights include, but are not
-limited to, the following:
-
-  i. the right to reproduce, adapt, distribute, perform, display,
-     communicate, and translate a Work;
- ii. moral rights retained by the original author(s) and/or performer(s);
-iii. publicity and privacy rights pertaining to a person's image or
-     likeness depicted in a Work;
- iv. rights protecting against unfair competition in regards to a Work,
-     subject to the limitations in paragraph 4(a), below;
-  v. rights protecting the extraction, dissemination, use and reuse of data
-     in a Work;
- vi. database rights (such as those arising under Directive 96/9/EC of the
-     European Parliament and of the Council of 11 March 1996 on the legal
-     protection of databases, and under any national implementation
-     thereof, including any amended or successor version of such
-     directive); and
-vii. other similar, equivalent or corresponding rights throughout the
-     world based on applicable law or treaty, and any national
-     implementations thereof.
-
-2. Waiver. To the greatest extent permitted by, but not in contravention
-of, applicable law, Affirmer hereby overtly, fully, permanently,
-irrevocably and unconditionally waives, abandons, and surrenders all of
-Affirmer's Copyright and Related Rights and associated claims and causes
-of action, whether now known or unknown (including existing as well as
-future claims and causes of action), in the Work (i) in all territories
-worldwide, (ii) for the maximum duration provided by applicable law or
-treaty (including future time extensions), (iii) in any current or future
-medium and for any number of copies, and (iv) for any purpose whatsoever,
-including without limitation commercial, advertising or promotional
-purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
-member of the public at large and to the detriment of Affirmer's heirs and
-successors, fully intending that such Waiver shall not be subject to
-revocation, rescission, cancellation, termination, or any other legal or
-equitable action to disrupt the quiet enjoyment of the Work by the public
-as contemplated by Affirmer's express Statement of Purpose.
-
-3. Public License Fallback. Should any part of the Waiver for any reason
-be judged legally invalid or ineffective under applicable law, then the
-Waiver shall be preserved to the maximum extent permitted taking into
-account Affirmer's express Statement of Purpose. In addition, to the
-extent the Waiver is so judged Affirmer hereby grants to each affected
-person a royalty-free, non transferable, non sublicensable, non exclusive,
-irrevocable and unconditional license to exercise Affirmer's Copyright and
-Related Rights in the Work (i) in all territories worldwide, (ii) for the
-maximum duration provided by applicable law or treaty (including future
-time extensions), (iii) in any current or future medium and for any number
-of copies, and (iv) for any purpose whatsoever, including without
-limitation commercial, advertising or promotional purposes (the
-"License"). The License shall be deemed effective as of the date CC0 was
-applied by Affirmer to the Work. Should any part of the License for any
-reason be judged legally invalid or ineffective under applicable law, such
-partial invalidity or ineffectiveness shall not invalidate the remainder
-of the License, and in such case Affirmer hereby affirms that he or she
-will not (i) exercise any of his or her remaining Copyright and Related
-Rights in the Work or (ii) assert any associated claims and causes of
-action with respect to the Work, in either case contrary to Affirmer's
-express Statement of Purpose.
-
-4. Limitations and Disclaimers.
-
- a. No trademark or patent rights held by Affirmer are waived, abandoned,
-    surrendered, licensed or otherwise affected by this document.
- b. Affirmer offers the Work as-is and makes no representations or
-    warranties of any kind concerning the Work, express, implied,
-    statutory or otherwise, including without limitation warranties of
-    title, merchantability, fitness for a particular purpose, non
-    infringement, or the absence of latent or other defects, accuracy, or
-    the present or absence of errors, whether or not discoverable, all to
-    the greatest extent permissible under applicable law.
- c. Affirmer disclaims responsibility for clearing rights of other persons
-    that may apply to the Work or any use thereof, including without
-    limitation any person's Copyright and Related Rights in the Work.
-    Further, Affirmer disclaims responsibility for obtaining any necessary
-    consents, permissions or other rights required for any use of the
-    Work.
- d. Affirmer understands and acknowledges that Creative Commons is not a
-    party to this document and has no duty or obligation with respect to
-    this CC0 or use of the Work.
diff --git a/paddlespeech/audio/src/optional/optional.hpp b/paddlespeech/audio/src/optional/optional.hpp
deleted file mode 100644
index bceb41135712a879bf2c205138d54bf06b4f1209..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/optional/optional.hpp
+++ /dev/null
@@ -1,2182 +0,0 @@
-
-///
-// optional - An implementation of std::optional with extensions
-// Written in 2017 by Sy Brand (tartanllama@gmail.com, @TartanLlama)
-//
-// Documentation available at https://tl.tartanllama.xyz/
-//
-// To the extent possible under law, the author(s) have dedicated all
-// copyright and related and neighboring rights to this software to the
-// public domain worldwide. This software is distributed without any warranty.
-//
-// You should have received a copy of the CC0 Public Domain Dedication
-// along with this software. If not, see
-// <http://creativecommons.org/publicdomain/zero/1.0/>.
-// https://github.com/TartanLlama/optional
-///
-
-#ifndef TL_OPTIONAL_HPP
-#define TL_OPTIONAL_HPP
-
-#define TL_OPTIONAL_VERSION_MAJOR 1
-#define TL_OPTIONAL_VERSION_MINOR 0
-#define TL_OPTIONAL_VERSION_PATCH 0
-
-#include <exception>
-#include <functional>
-#include <new>
-#include <type_traits>
-#include <utility>
-
-#if (defined(_MSC_VER) && _MSC_VER == 1900)
-#define TL_OPTIONAL_MSVC2015
-#endif
-
-#if (defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ <= 9 && \
-     !defined(__clang__))
-#define TL_OPTIONAL_GCC49
-#endif
-
-#if (defined(__GNUC__) && __GNUC__ == 5 && __GNUC_MINOR__ <= 4 && \
-     !defined(__clang__))
-#define TL_OPTIONAL_GCC54
-#endif
-
-#if (defined(__GNUC__) && __GNUC__ == 5 && __GNUC_MINOR__ <= 5 && \
-     !defined(__clang__))
-#define TL_OPTIONAL_GCC55
-#endif
-
-#if (defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ <= 9 && \
-     !defined(__clang__))
-// GCC < 5 doesn't support overloading on const&& for member functions
-#define TL_OPTIONAL_NO_CONSTRR
-
-// GCC < 5 doesn't support some standard C++11 type traits
-#define TL_OPTIONAL_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \
-    std::has_trivial_copy_constructor<T>::value
-#define TL_OPTIONAL_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \
-    std::has_trivial_copy_assign<T>::value
-
-// This one will be different for GCC 5.7 if it's ever supported
-#define TL_OPTIONAL_IS_TRIVIALLY_DESTRUCTIBLE(T) \
-    std::is_trivially_destructible<T>::value
-
-// GCC 5 < v < 8 has a bug in is_trivially_copy_constructible which breaks
-// std::vector
-// for non-copyable types
-#elif (defined(__GNUC__) && __GNUC__ < 8 && !defined(__clang__))
-#ifndef TL_GCC_LESS_8_TRIVIALLY_COPY_CONSTRUCTIBLE_MUTEX
-#define TL_GCC_LESS_8_TRIVIALLY_COPY_CONSTRUCTIBLE_MUTEX
-namespace tl {
-namespace detail {
-template <class T>
-struct is_trivially_copy_constructible
-    : std::is_trivially_copy_constructible<T> {};
-#ifdef _GLIBCXX_VECTOR
-template <class T, class A>
-struct is_trivially_copy_constructible<std::vector<T, A>>
-    : std::is_trivially_copy_constructible<T> {};
-#endif
-}
-}
-#endif
-
-#define TL_OPTIONAL_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \
-    tl::detail::is_trivially_copy_constructible<T>::value
-#define TL_OPTIONAL_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \
-    std::is_trivially_copy_assignable<T>::value
-#define TL_OPTIONAL_IS_TRIVIALLY_DESTRUCTIBLE(T) \
-    std::is_trivially_destructible<T>::value
-#else
-#define TL_OPTIONAL_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \
-    std::is_trivially_copy_constructible<T>::value
-#define TL_OPTIONAL_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \
-    std::is_trivially_copy_assignable<T>::value
-#define TL_OPTIONAL_IS_TRIVIALLY_DESTRUCTIBLE(T) \
-    std::is_trivially_destructible<T>::value
-#endif
-
-#if __cplusplus > 201103L
-#define TL_OPTIONAL_CXX14
-#endif
-
-// constexpr implies const in C++11, not C++14
-#if (__cplusplus == 201103L || defined(TL_OPTIONAL_MSVC2015) || \
-     defined(TL_OPTIONAL_GCC49))
-#define TL_OPTIONAL_11_CONSTEXPR
-#else
-#define TL_OPTIONAL_11_CONSTEXPR constexpr
-#endif
-
-namespace tl {
-#ifndef TL_MONOSTATE_INPLACE_MUTEX
-#define TL_MONOSTATE_INPLACE_MUTEX
-/// Used to represent an optional with no data; essentially a bool
-class monostate {};
-
-///  A tag type to tell optional to construct its value in-place
-struct in_place_t {
-    explicit in_place_t() = default;
-};
-/// A tag to tell optional to construct its value in-place
-static constexpr in_place_t in_place{};
-#endif
-
-template <class T>
-class optional;
-
-namespace detail {
-#ifndef TL_TRAITS_MUTEX
-#define TL_TRAITS_MUTEX
-// C++14-style aliases for brevity
-template <class T>
-using remove_const_t = typename std::remove_const<T>::type;
-template <class T>
-using remove_reference_t = typename std::remove_reference<T>::type;
-template <class T>
-using decay_t = typename std::decay<T>::type;
-template <bool E, class T = void>
-using enable_if_t = typename std::enable_if<E, T>::type;
-template <bool B, class T, class F>
-using conditional_t = typename std::conditional<B, T, F>::type;
-
-// std::conjunction from C++17
-template <class...>
-struct conjunction : std::true_type {};
-template <class B>
-struct conjunction<B> : B {};
-template <class B, class... Bs>
-struct conjunction<B, Bs...>
-    : std::conditional<bool(B::value), conjunction<Bs...>, B>::type {};
-
-#if defined(_LIBCPP_VERSION) && __cplusplus == 201103L
-#define TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND
-#endif
-
-// In C++11 mode, there's an issue in libc++'s std::mem_fn
-// which results in a hard-error when using it in a noexcept expression
-// in some cases. This is a check to workaround the common failing case.
-#ifdef TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND
-template <class T>
-struct is_pointer_to_non_const_member_func : std::false_type {};
-template <class T, class Ret, class... Args>
-struct is_pointer_to_non_const_member_func<Ret (T::*)(Args...)>
-    : std::true_type {};
-template <class T, class Ret, class... Args>
-struct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) &>
-    : std::true_type {};
-template <class T, class Ret, class... Args>
-struct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) &&>
-    : std::true_type {};
-template <class T, class Ret, class... Args>
-struct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) volatile>
-    : std::true_type {};
-template <class T, class Ret, class... Args>
-struct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) volatile &>
-    : std::true_type {};
-template <class T, class Ret, class... Args>
-struct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) volatile &&>
-    : std::true_type {};
-
-template <class T>
-struct is_const_or_const_ref : std::false_type {};
-template <class T>
-struct is_const_or_const_ref<T const &> : std::true_type {};
-template <class T>
-struct is_const_or_const_ref<T const> : std::true_type {};
-#endif
-
-// std::invoke from C++17
-// https://stackoverflow.com/questions/38288042/c11-14-invoke-workaround
-template <
-    typename Fn,
-    typename... Args,
-#ifdef TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND
-    typename = enable_if_t<!(is_pointer_to_non_const_member_func<Fn>::value &&
-                             is_const_or_const_ref<Args...>::value)>,
-#endif
-    typename = enable_if_t<std::is_member_pointer<decay_t<Fn>>::value>,
-    int = 0>
-constexpr auto invoke(Fn &&f, Args &&... args) noexcept(
-    noexcept(std::mem_fn(f)(std::forward<Args>(args)...)))
-    -> decltype(std::mem_fn(f)(std::forward<Args>(args)...)) {
-    return std::mem_fn(f)(std::forward<Args>(args)...);
-}
-
-template <typename Fn,
-          typename... Args,
-          typename = enable_if_t<!std::is_member_pointer<decay_t<Fn>>::value>>
-constexpr auto invoke(Fn &&f, Args &&... args) noexcept(
-    noexcept(std::forward<Fn>(f)(std::forward<Args>(args)...)))
-    -> decltype(std::forward<Fn>(f)(std::forward<Args>(args)...)) {
-    return std::forward<Fn>(f)(std::forward<Args>(args)...);
-}
-
-// std::invoke_result from C++17
-template <class F, class, class... Us>
-struct invoke_result_impl;
-
-template <class F, class... Us>
-struct invoke_result_impl<
-    F,
-    decltype(detail::invoke(std::declval<F>(), std::declval<Us>()...), void()),
-    Us...> {
-    using type =
-        decltype(detail::invoke(std::declval<F>(), std::declval<Us>()...));
-};
-
-template <class F, class... Us>
-using invoke_result = invoke_result_impl<F, void, Us...>;
-
-template <class F, class... Us>
-using invoke_result_t = typename invoke_result<F, Us...>::type;
-
-#if defined(_MSC_VER) && _MSC_VER <= 1900
-// TODO make a version which works with MSVC 2015
-template <class T, class U = T>
-struct is_swappable : std::true_type {};
-
-template <class T, class U = T>
-struct is_nothrow_swappable : std::true_type {};
-#else
-// https://stackoverflow.com/questions/26744589/what-is-a-proper-way-to-implement-is-swappable-to-test-for-the-swappable-concept
-namespace swap_adl_tests {
-// if swap ADL finds this then it would call std::swap otherwise (same
-// signature)
-struct tag {};
-
-template <class T>
-tag swap(T &, T &);
-template <class T, std::size_t N>
-tag swap(T (&a)[N], T (&b)[N]);
-
-// helper functions to test if an unqualified swap is possible, and if it
-// becomes std::swap
-template <class, class>
-std::false_type can_swap(...) noexcept(false);
-template <class T,
-          class U,
-          class = decltype(swap(std::declval<T &>(), std::declval<U &>()))>
-std::true_type can_swap(int) noexcept(noexcept(swap(std::declval<T &>(),
-                                                    std::declval<U &>())));
-
-template <class, class>
-std::false_type uses_std(...);
-template <class T, class U>
-std::is_same<decltype(swap(std::declval<T &>(), std::declval<U &>())), tag>
-uses_std(int);
-
-template <class T>
-struct is_std_swap_noexcept
-    : std::integral_constant<bool,
-                             std::is_nothrow_move_constructible<T>::value &&
-                                 std::is_nothrow_move_assignable<T>::value> {};
-
-template <class T, std::size_t N>
-struct is_std_swap_noexcept<T[N]> : is_std_swap_noexcept<T> {};
-
-template <class T, class U>
-struct is_adl_swap_noexcept
-    : std::integral_constant<bool, noexcept(can_swap<T, U>(0))> {};
-}  // namespace swap_adl_tests
-
-template <class T, class U = T>
-struct is_swappable
-    : std::integral_constant<
-          bool,
-          decltype(detail::swap_adl_tests::can_swap<T, U>(0))::value &&
-              (!decltype(detail::swap_adl_tests::uses_std<T, U>(0))::value ||
-               (std::is_move_assignable<T>::value &&
-                std::is_move_constructible<T>::value))> {};
-
-template <class T, std::size_t N>
-struct is_swappable<T[N], T[N]>
-    : std::integral_constant<
-          bool,
-          decltype(detail::swap_adl_tests::can_swap<T[N], T[N]>(0))::value &&
-              (!decltype(
-                   detail::swap_adl_tests::uses_std<T[N], T[N]>(0))::value ||
-               is_swappable<T, T>::value)> {};
-
-template <class T, class U = T>
-struct is_nothrow_swappable
-    : std::integral_constant<
-          bool,
-          is_swappable<T, U>::value &&
-              ((decltype(detail::swap_adl_tests::uses_std<T, U>(0))::value
-                    &&detail::swap_adl_tests::is_std_swap_noexcept<T>::value) ||
-               (!decltype(detail::swap_adl_tests::uses_std<T, U>(0))::value &&
-                    detail::swap_adl_tests::is_adl_swap_noexcept<T,
-                                                                 U>::value))> {
-};
-#endif
-#endif
-
-// std::void_t from C++17
-template <class...>
-struct voider {
-    using type = void;
-};
-template <class... Ts>
-using void_t = typename voider<Ts...>::type;
-
-// Trait for checking if a type is a tl::optional
-template <class T>
-struct is_optional_impl : std::false_type {};
-template <class T>
-struct is_optional_impl<optional<T>> : std::true_type {};
-template <class T>
-using is_optional = is_optional_impl<decay_t<T>>;
-
-// Change void to tl::monostate
-template <class U>
-using fixup_void = conditional_t<std::is_void<U>::value, monostate, U>;
-
-template <class F, class U, class = invoke_result_t<F, U>>
-using get_map_return = optional<fixup_void<invoke_result_t<F, U>>>;
-
-// Check if invoking F for some Us returns void
-template <class F, class = void, class... U>
-struct returns_void_impl;
-template <class F, class... U>
-struct returns_void_impl<F, void_t<invoke_result_t<F, U...>>, U...>
-    : std::is_void<invoke_result_t<F, U...>> {};
-template <class F, class... U>
-using returns_void = returns_void_impl<F, void, U...>;
-
-template <class T, class... U>
-using enable_if_ret_void = enable_if_t<returns_void<T &&, U...>::value>;
-
-template <class T, class... U>
-using disable_if_ret_void = enable_if_t<!returns_void<T &&, U...>::value>;
-
-template <class T, class U>
-using enable_forward_value =
-    detail::enable_if_t<std::is_constructible<T, U &&>::value &&
-                        !std::is_same<detail::decay_t<U>, in_place_t>::value &&
-                        !std::is_same<optional<T>, detail::decay_t<U>>::value>;
-
-template <class T, class U, class Other>
-using enable_from_other = detail::enable_if_t<
-    std::is_constructible<T, Other>::value &&
-    !std::is_constructible<T, optional<U> &>::value &&
-    !std::is_constructible<T, optional<U> &&>::value &&
-    !std::is_constructible<T, const optional<U> &>::value &&
-    !std::is_constructible<T, const optional<U> &&>::value &&
-    !std::is_convertible<optional<U> &, T>::value &&
-    !std::is_convertible<optional<U> &&, T>::value &&
-    !std::is_convertible<const optional<U> &, T>::value &&
-    !std::is_convertible<const optional<U> &&, T>::value>;
-
-template <class T, class U>
-using enable_assign_forward = detail::enable_if_t<
-    !std::is_same<optional<T>, detail::decay_t<U>>::value &&
-    !detail::conjunction<std::is_scalar<T>,
-                         std::is_same<T, detail::decay_t<U>>>::value &&
-    std::is_constructible<T, U>::value && std::is_assignable<T &, U>::value>;
-
-template <class T, class U, class Other>
-using enable_assign_from_other = detail::enable_if_t<
-    std::is_constructible<T, Other>::value &&
-    std::is_assignable<T &, Other>::value &&
-    !std::is_constructible<T, optional<U> &>::value &&
-    !std::is_constructible<T, optional<U> &&>::value &&
-    !std::is_constructible<T, const optional<U> &>::value &&
-    !std::is_constructible<T, const optional<U> &&>::value &&
-    !std::is_convertible<optional<U> &, T>::value &&
-    !std::is_convertible<optional<U> &&, T>::value &&
-    !std::is_convertible<const optional<U> &, T>::value &&
-    !std::is_convertible<const optional<U> &&, T>::value &&
-    !std::is_assignable<T &, optional<U> &>::value &&
-    !std::is_assignable<T &, optional<U> &&>::value &&
-    !std::is_assignable<T &, const optional<U> &>::value &&
-    !std::is_assignable<T &, const optional<U> &&>::value>;
-
-// The storage base manages the actual storage, and correctly propagates
-// trivial destruction from T. This case is for when T is not trivially
-// destructible.
-template <class T, bool = ::std::is_trivially_destructible<T>::value>
-struct optional_storage_base {
-    TL_OPTIONAL_11_CONSTEXPR optional_storage_base() noexcept
-        : m_dummy(),
-          m_has_value(false) {}
-
-    template <class... U>
-    TL_OPTIONAL_11_CONSTEXPR optional_storage_base(in_place_t, U &&... u)
-        : m_value(std::forward<U>(u)...), m_has_value(true) {}
-
-    ~optional_storage_base() {
-        if (m_has_value) {
-            m_value.~T();
-            m_has_value = false;
-        }
-    }
-
-    struct dummy {};
-    union {
-        dummy m_dummy;
-        T m_value;
-    };
-
-    bool m_has_value;
-};
-
-// This case is for when T is trivially destructible.
-template <class T>
-struct optional_storage_base<T, true> {
-    TL_OPTIONAL_11_CONSTEXPR optional_storage_base() noexcept
-        : m_dummy(),
-          m_has_value(false) {}
-
-    template <class... U>
-    TL_OPTIONAL_11_CONSTEXPR optional_storage_base(in_place_t, U &&... u)
-        : m_value(std::forward<U>(u)...), m_has_value(true) {}
-
-    // No destructor, so this class is trivially destructible
-
-    struct dummy {};
-    union {
-        dummy m_dummy;
-        T m_value;
-    };
-
-    bool m_has_value = false;
-};
-
-// This base class provides some handy member functions which can be used in
-// further derived classes
-template <class T>
-struct optional_operations_base : optional_storage_base<T> {
-    using optional_storage_base<T>::optional_storage_base;
-
-    void hard_reset() noexcept {
-        get().~T();
-        this->m_has_value = false;
-    }
-
-    template <class... Args>
-    void construct(Args &&... args) noexcept {
-        new (std::addressof(this->m_value)) T(std::forward<Args>(args)...);
-        this->m_has_value = true;
-    }
-
-    template <class Opt>
-    void assign(Opt &&rhs) {
-        if (this->has_value()) {
-            if (rhs.has_value()) {
-                this->m_value = std::forward<Opt>(rhs).get();
-            } else {
-                this->m_value.~T();
-                this->m_has_value = false;
-            }
-        }
-
-        else if (rhs.has_value()) {
-            construct(std::forward<Opt>(rhs).get());
-        }
-    }
-
-    bool has_value() const { return this->m_has_value; }
-
-    TL_OPTIONAL_11_CONSTEXPR T &get() & { return this->m_value; }
-    TL_OPTIONAL_11_CONSTEXPR const T &get() const & { return this->m_value; }
-    TL_OPTIONAL_11_CONSTEXPR T &&get() && { return std::move(this->m_value); }
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    constexpr const T &&get() const && { return std::move(this->m_value); }
-#endif
-};
-
-// This class manages conditionally having a trivial copy constructor
-// This specialization is for when T is trivially copy constructible
-template <class T, bool = TL_OPTIONAL_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T)>
-struct optional_copy_base : optional_operations_base<T> {
-    using optional_operations_base<T>::optional_operations_base;
-};
-
-// This specialization is for when T is not trivially copy constructible
-template <class T>
-struct optional_copy_base<T, false> : optional_operations_base<T> {
-    using optional_operations_base<T>::optional_operations_base;
-
-    optional_copy_base() = default;
-    optional_copy_base(const optional_copy_base &rhs)
-        : optional_operations_base<T>() {
-        if (rhs.has_value()) {
-            this->construct(rhs.get());
-        } else {
-            this->m_has_value = false;
-        }
-    }
-
-    optional_copy_base(optional_copy_base &&rhs) = default;
-    optional_copy_base &operator=(const optional_copy_base &rhs) = default;
-    optional_copy_base &operator=(optional_copy_base &&rhs) = default;
-};
-
-// This class manages conditionally having a trivial move constructor
-// Unfortunately there's no way to achieve this in GCC < 5 AFAIK, since it
-// doesn't implement an analogue to std::is_trivially_move_constructible. We
-// have to make do with a non-trivial move constructor even if T is trivially
-// move constructible
-#ifndef TL_OPTIONAL_GCC49
-template <class T, bool = std::is_trivially_move_constructible<T>::value>
-struct optional_move_base : optional_copy_base<T> {
-    using optional_copy_base<T>::optional_copy_base;
-};
-#else
-template <class T, bool = false>
-struct optional_move_base;
-#endif
-template <class T>
-struct optional_move_base<T, false> : optional_copy_base<T> {
-    using optional_copy_base<T>::optional_copy_base;
-
-    optional_move_base() = default;
-    optional_move_base(const optional_move_base &rhs) = default;
-
-    optional_move_base(optional_move_base &&rhs) noexcept(
-        std::is_nothrow_move_constructible<T>::value) {
-        if (rhs.has_value()) {
-            this->construct(std::move(rhs.get()));
-        } else {
-            this->m_has_value = false;
-        }
-    }
-    optional_move_base &operator=(const optional_move_base &rhs) = default;
-    optional_move_base &operator=(optional_move_base &&rhs) = default;
-};
-
-// This class manages conditionally having a trivial copy assignment operator
-template <class T,
-          bool = TL_OPTIONAL_IS_TRIVIALLY_COPY_ASSIGNABLE(T) &&
-                 TL_OPTIONAL_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) &&
-                 TL_OPTIONAL_IS_TRIVIALLY_DESTRUCTIBLE(T)>
-struct optional_copy_assign_base : optional_move_base<T> {
-    using optional_move_base<T>::optional_move_base;
-};
-
-template <class T>
-struct optional_copy_assign_base<T, false> : optional_move_base<T> {
-    using optional_move_base<T>::optional_move_base;
-
-    optional_copy_assign_base() = default;
-    optional_copy_assign_base(const optional_copy_assign_base &rhs) = default;
-
-    optional_copy_assign_base(optional_copy_assign_base &&rhs) = default;
-    optional_copy_assign_base &operator=(const optional_copy_assign_base &rhs) {
-        this->assign(rhs);
-        return *this;
-    }
-    optional_copy_assign_base &operator=(optional_copy_assign_base &&rhs) =
-        default;
-};
-
-// This class manages conditionally having a trivial move assignment operator
-// Unfortunately there's no way to achieve this in GCC < 5 AFAIK, since it
-// doesn't implement an analogue to std::is_trivially_move_assignable. We have
-// to make do with a non-trivial move assignment operator even if T is trivially
-// move assignable
-#ifndef TL_OPTIONAL_GCC49
-template <class T,
-          bool = std::is_trivially_destructible<T>::value
-              &&std::is_trivially_move_constructible<T>::value
-                  &&std::is_trivially_move_assignable<T>::value>
-struct optional_move_assign_base : optional_copy_assign_base<T> {
-    using optional_copy_assign_base<T>::optional_copy_assign_base;
-};
-#else
-template <class T, bool = false>
-struct optional_move_assign_base;
-#endif
-
-template <class T>
-struct optional_move_assign_base<T, false> : optional_copy_assign_base<T> {
-    using optional_copy_assign_base<T>::optional_copy_assign_base;
-
-    optional_move_assign_base() = default;
-    optional_move_assign_base(const optional_move_assign_base &rhs) = default;
-
-    optional_move_assign_base(optional_move_assign_base &&rhs) = default;
-
-    optional_move_assign_base &operator=(const optional_move_assign_base &rhs) =
-        default;
-
-    optional_move_assign_base &
-    operator=(optional_move_assign_base &&rhs) noexcept(
-        std::is_nothrow_move_constructible<T>::value
-            &&std::is_nothrow_move_assignable<T>::value) {
-        this->assign(std::move(rhs));
-        return *this;
-    }
-};
-
-// optional_delete_ctor_base will conditionally delete copy and move
-// constructors depending on whether T is copy/move constructible
-template <class T,
-          bool EnableCopy = std::is_copy_constructible<T>::value,
-          bool EnableMove = std::is_move_constructible<T>::value>
-struct optional_delete_ctor_base {
-    optional_delete_ctor_base() = default;
-    optional_delete_ctor_base(const optional_delete_ctor_base &) = default;
-    optional_delete_ctor_base(optional_delete_ctor_base &&) noexcept = default;
-    optional_delete_ctor_base &operator=(const optional_delete_ctor_base &) =
-        default;
-    optional_delete_ctor_base &operator=(
-        optional_delete_ctor_base &&) noexcept = default;
-};
-
-template <class T>
-struct optional_delete_ctor_base<T, true, false> {
-    optional_delete_ctor_base() = default;
-    optional_delete_ctor_base(const optional_delete_ctor_base &) = default;
-    optional_delete_ctor_base(optional_delete_ctor_base &&) noexcept = delete;
-    optional_delete_ctor_base &operator=(const optional_delete_ctor_base &) =
-        default;
-    optional_delete_ctor_base &operator=(
-        optional_delete_ctor_base &&) noexcept = default;
-};
-
-template <class T>
-struct optional_delete_ctor_base<T, false, true> {
-    optional_delete_ctor_base() = default;
-    optional_delete_ctor_base(const optional_delete_ctor_base &) = delete;
-    optional_delete_ctor_base(optional_delete_ctor_base &&) noexcept = default;
-    optional_delete_ctor_base &operator=(const optional_delete_ctor_base &) =
-        default;
-    optional_delete_ctor_base &operator=(
-        optional_delete_ctor_base &&) noexcept = default;
-};
-
-template <class T>
-struct optional_delete_ctor_base<T, false, false> {
-    optional_delete_ctor_base() = default;
-    optional_delete_ctor_base(const optional_delete_ctor_base &) = delete;
-    optional_delete_ctor_base(optional_delete_ctor_base &&) noexcept = delete;
-    optional_delete_ctor_base &operator=(const optional_delete_ctor_base &) =
-        default;
-    optional_delete_ctor_base &operator=(
-        optional_delete_ctor_base &&) noexcept = default;
-};
-
-// optional_delete_assign_base will conditionally delete copy and move
-// constructors depending on whether T is copy/move constructible + assignable
-template <class T,
-          bool EnableCopy = (std::is_copy_constructible<T>::value &&
-                             std::is_copy_assignable<T>::value),
-          bool EnableMove = (std::is_move_constructible<T>::value &&
-                             std::is_move_assignable<T>::value)>
-struct optional_delete_assign_base {
-    optional_delete_assign_base() = default;
-    optional_delete_assign_base(const optional_delete_assign_base &) = default;
-    optional_delete_assign_base(optional_delete_assign_base &&) noexcept =
-        default;
-    optional_delete_assign_base &operator=(
-        const optional_delete_assign_base &) = default;
-    optional_delete_assign_base &operator=(
-        optional_delete_assign_base &&) noexcept = default;
-};
-
-template <class T>
-struct optional_delete_assign_base<T, true, false> {
-    optional_delete_assign_base() = default;
-    optional_delete_assign_base(const optional_delete_assign_base &) = default;
-    optional_delete_assign_base(optional_delete_assign_base &&) noexcept =
-        default;
-    optional_delete_assign_base &operator=(
-        const optional_delete_assign_base &) = default;
-    optional_delete_assign_base &operator=(
-        optional_delete_assign_base &&) noexcept = delete;
-};
-
-template <class T>
-struct optional_delete_assign_base<T, false, true> {
-    optional_delete_assign_base() = default;
-    optional_delete_assign_base(const optional_delete_assign_base &) = default;
-    optional_delete_assign_base(optional_delete_assign_base &&) noexcept =
-        default;
-    optional_delete_assign_base &operator=(
-        const optional_delete_assign_base &) = delete;
-    optional_delete_assign_base &operator=(
-        optional_delete_assign_base &&) noexcept = default;
-};
-
-template <class T>
-struct optional_delete_assign_base<T, false, false> {
-    optional_delete_assign_base() = default;
-    optional_delete_assign_base(const optional_delete_assign_base &) = default;
-    optional_delete_assign_base(optional_delete_assign_base &&) noexcept =
-        default;
-    optional_delete_assign_base &operator=(
-        const optional_delete_assign_base &) = delete;
-    optional_delete_assign_base &operator=(
-        optional_delete_assign_base &&) noexcept = delete;
-};
-
-}  // namespace detail
-
-/// A tag type to represent an empty optional
-struct nullopt_t {
-    struct do_not_use {};
-    constexpr explicit nullopt_t(do_not_use, do_not_use) noexcept {}
-};
-/// Represents an empty optional
-static constexpr nullopt_t nullopt{nullopt_t::do_not_use{},
-                                   nullopt_t::do_not_use{}};
-
-class bad_optional_access : public std::exception {
-  public:
-    bad_optional_access() = default;
-    const char *what() const noexcept { return "Optional has no value"; }
-};
-
-/// An optional object is an object that contains the storage for another
-/// object and manages the lifetime of this contained object, if any. The
-/// contained object may be initialized after the optional object has been
-/// initialized, and may be destroyed before the optional object has been
-/// destroyed. The initialization state of the contained object is tracked by
-/// the optional object.
-template <class T>
-class optional : private detail::optional_move_assign_base<T>,
-                 private detail::optional_delete_ctor_base<T>,
-                 private detail::optional_delete_assign_base<T> {
-    using base = detail::optional_move_assign_base<T>;
-
-    static_assert(!std::is_same<T, in_place_t>::value,
-                  "instantiation of optional with in_place_t is ill-formed");
-    static_assert(!std::is_same<detail::decay_t<T>, nullopt_t>::value,
-                  "instantiation of optional with nullopt_t is ill-formed");
-
-  public:
-// The different versions for C++14 and 11 are needed because deduced return
-// types are not SFINAE-safe. This provides better support for things like
-// generic lambdas. C.f.
-// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0826r0.html
-#if defined(TL_OPTIONAL_CXX14) && !defined(TL_OPTIONAL_GCC49) && \
-    !defined(TL_OPTIONAL_GCC54) && !defined(TL_OPTIONAL_GCC55)
-    /// Carries out some operation which returns an optional on the stored
-    /// object if there is one.
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR auto and_then(F &&f) & {
-        using result = detail::invoke_result_t<F, T &>;
-        static_assert(detail::is_optional<result>::value,
-                      "F must return an optional");
-
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : result(nullopt);
-    }
-
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR auto and_then(F &&f) && {
-        using result = detail::invoke_result_t<F, T &&>;
-        static_assert(detail::is_optional<result>::value,
-                      "F must return an optional");
-
-        return has_value()
-                   ? detail::invoke(std::forward<F>(f), std::move(**this))
-                   : result(nullopt);
-    }
-
-    template <class F>
-    constexpr auto and_then(F &&f) const & {
-        using result = detail::invoke_result_t<F, const T &>;
-        static_assert(detail::is_optional<result>::value,
-                      "F must return an optional");
-
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : result(nullopt);
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    template <class F>
-    constexpr auto and_then(F &&f) const && {
-        using result = detail::invoke_result_t<F, const T &&>;
-        static_assert(detail::is_optional<result>::value,
-                      "F must return an optional");
-
-        return has_value()
-                   ? detail::invoke(std::forward<F>(f), std::move(**this))
-                   : result(nullopt);
-    }
-#endif
-#else
-    /// Carries out some operation which returns an optional on the stored
-    /// object if there is one.
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR detail::invoke_result_t<F, T &> and_then(F &&f) & {
-        using result = detail::invoke_result_t<F, T &>;
-        static_assert(detail::is_optional<result>::value,
-                      "F must return an optional");
-
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : result(nullopt);
-    }
-
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR detail::invoke_result_t<F, T &&> and_then(
-        F &&f) && {
-        using result = detail::invoke_result_t<F, T &&>;
-        static_assert(detail::is_optional<result>::value,
-                      "F must return an optional");
-
-        return has_value()
-                   ? detail::invoke(std::forward<F>(f), std::move(**this))
-                   : result(nullopt);
-    }
-
-    template <class F>
-    constexpr detail::invoke_result_t<F, const T &> and_then(F &&f) const & {
-        using result = detail::invoke_result_t<F, const T &>;
-        static_assert(detail::is_optional<result>::value,
-                      "F must return an optional");
-
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : result(nullopt);
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    template <class F>
-    constexpr detail::invoke_result_t<F, const T &&> and_then(F &&f) const && {
-        using result = detail::invoke_result_t<F, const T &&>;
-        static_assert(detail::is_optional<result>::value,
-                      "F must return an optional");
-
-        return has_value()
-                   ? detail::invoke(std::forward<F>(f), std::move(**this))
-                   : result(nullopt);
-    }
-#endif
-#endif
-
-#if defined(TL_OPTIONAL_CXX14) && !defined(TL_OPTIONAL_GCC49) && \
-    !defined(TL_OPTIONAL_GCC54) && !defined(TL_OPTIONAL_GCC55)
-    /// Carries out some operation on the stored object if there is one.
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR auto map(F &&f) & {
-        return optional_map_impl(*this, std::forward<F>(f));
-    }
-
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR auto map(F &&f) && {
-        return optional_map_impl(std::move(*this), std::forward<F>(f));
-    }
-
-    template <class F>
-    constexpr auto map(F &&f) const & {
-        return optional_map_impl(*this, std::forward<F>(f));
-    }
-
-    template <class F>
-    constexpr auto map(F &&f) const && {
-        return optional_map_impl(std::move(*this), std::forward<F>(f));
-    }
-#else
-    /// Carries out some operation on the stored object if there is one.
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR decltype(
-        optional_map_impl(std::declval<optional &>(), std::declval<F &&>()))
-    map(F &&f) & {
-        return optional_map_impl(*this, std::forward<F>(f));
-    }
-
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR decltype(
-        optional_map_impl(std::declval<optional &&>(), std::declval<F &&>()))
-    map(F &&f) && {
-        return optional_map_impl(std::move(*this), std::forward<F>(f));
-    }
-
-    template <class F>
-    constexpr decltype(optional_map_impl(std::declval<const optional &>(),
-                                         std::declval<F &&>()))
-    map(F &&f) const & {
-        return optional_map_impl(*this, std::forward<F>(f));
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    template <class F>
-    constexpr decltype(optional_map_impl(std::declval<const optional &&>(),
-                                         std::declval<F &&>()))
-    map(F &&f) const && {
-        return optional_map_impl(std::move(*this), std::forward<F>(f));
-    }
-#endif
-#endif
-
-#if defined(TL_OPTIONAL_CXX14) && !defined(TL_OPTIONAL_GCC49) && \
-    !defined(TL_OPTIONAL_GCC54) && !defined(TL_OPTIONAL_GCC55)
-    /// Carries out some operation on the stored object if there is one.
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR auto transform(F &&f) & {
-        return optional_map_impl(*this, std::forward<F>(f));
-    }
-
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR auto transform(F &&f) && {
-        return optional_map_impl(std::move(*this), std::forward<F>(f));
-    }
-
-    template <class F>
-    constexpr auto transform(F &&f) const & {
-        return optional_map_impl(*this, std::forward<F>(f));
-    }
-
-    template <class F>
-    constexpr auto transform(F &&f) const && {
-        return optional_map_impl(std::move(*this), std::forward<F>(f));
-    }
-#else
-    /// Carries out some operation on the stored object if there is one.
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR decltype(
-        optional_map_impl(std::declval<optional &>(), std::declval<F &&>()))
-    transform(F &&f) & {
-        return optional_map_impl(*this, std::forward<F>(f));
-    }
-
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR decltype(
-        optional_map_impl(std::declval<optional &&>(), std::declval<F &&>()))
-    transform(F &&f) && {
-        return optional_map_impl(std::move(*this), std::forward<F>(f));
-    }
-
-    template <class F>
-    constexpr decltype(optional_map_impl(std::declval<const optional &>(),
-                                         std::declval<F &&>()))
-    transform(F &&f) const & {
-        return optional_map_impl(*this, std::forward<F>(f));
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    template <class F>
-    constexpr decltype(optional_map_impl(std::declval<const optional &&>(),
-                                         std::declval<F &&>()))
-    transform(F &&f) const && {
-        return optional_map_impl(std::move(*this), std::forward<F>(f));
-    }
-#endif
-#endif
-
-    /// Calls `f` if the optional is empty
-    template <class F, detail::enable_if_ret_void<F> * = nullptr>
-    optional<T> TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) & {
-        if (has_value()) return *this;
-
-        std::forward<F>(f)();
-        return nullopt;
-    }
-
-    template <class F, detail::disable_if_ret_void<F> * = nullptr>
-    optional<T> TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) & {
-        return has_value() ? *this : std::forward<F>(f)();
-    }
-
-    template <class F, detail::enable_if_ret_void<F> * = nullptr>
-    optional<T> or_else(F &&f) && {
-        if (has_value()) return std::move(*this);
-
-        std::forward<F>(f)();
-        return nullopt;
-    }
-
-    template <class F, detail::disable_if_ret_void<F> * = nullptr>
-    optional<T> TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) && {
-        return has_value() ? std::move(*this) : std::forward<F>(f)();
-    }
-
-    template <class F, detail::enable_if_ret_void<F> * = nullptr>
-    optional<T> or_else(F &&f) const & {
-        if (has_value()) return *this;
-
-        std::forward<F>(f)();
-        return nullopt;
-    }
-
-    template <class F, detail::disable_if_ret_void<F> * = nullptr>
-    optional<T> TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) const & {
-        return has_value() ? *this : std::forward<F>(f)();
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    template <class F, detail::enable_if_ret_void<F> * = nullptr>
-    optional<T> or_else(F &&f) const && {
-        if (has_value()) return std::move(*this);
-
-        std::forward<F>(f)();
-        return nullopt;
-    }
-
-    template <class F, detail::disable_if_ret_void<F> * = nullptr>
-    optional<T> or_else(F &&f) const && {
-        return has_value() ? std::move(*this) : std::forward<F>(f)();
-    }
-#endif
-
-    /// Maps the stored value with `f` if there is one, otherwise returns `u`.
-    template <class F, class U>
-    U map_or(F &&f, U &&u) & {
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : std::forward<U>(u);
-    }
-
-    template <class F, class U>
-    U map_or(F &&f, U &&u) && {
-        return has_value()
-                   ? detail::invoke(std::forward<F>(f), std::move(**this))
-                   : std::forward<U>(u);
-    }
-
-    template <class F, class U>
-    U map_or(F &&f, U &&u) const & {
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : std::forward<U>(u);
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    template <class F, class U>
-    U map_or(F &&f, U &&u) const && {
-        return has_value()
-                   ? detail::invoke(std::forward<F>(f), std::move(**this))
-                   : std::forward<U>(u);
-    }
-#endif
-
-    /// Maps the stored value with `f` if there is one, otherwise calls
-    /// `u` and returns the result.
-    template <class F, class U>
-    detail::invoke_result_t<U> map_or_else(F &&f, U &&u) & {
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : std::forward<U>(u)();
-    }
-
-    template <class F, class U>
-    detail::invoke_result_t<U> map_or_else(F &&f, U &&u) && {
-        return has_value()
-                   ? detail::invoke(std::forward<F>(f), std::move(**this))
-                   : std::forward<U>(u)();
-    }
-
-    template <class F, class U>
-    detail::invoke_result_t<U> map_or_else(F &&f, U &&u) const & {
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : std::forward<U>(u)();
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    template <class F, class U>
-    detail::invoke_result_t<U> map_or_else(F &&f, U &&u) const && {
-        return has_value()
-                   ? detail::invoke(std::forward<F>(f), std::move(**this))
-                   : std::forward<U>(u)();
-    }
-#endif
-
-    /// Returns `u` if `*this` has a value, otherwise an empty optional.
-    template <class U>
-    constexpr optional<typename std::decay<U>::type> conjunction(U &&u) const {
-        using result = optional<detail::decay_t<U>>;
-        return has_value() ? result{u} : result{nullopt};
-    }
-
-    /// Returns `rhs` if `*this` is empty, otherwise the current value.
-    TL_OPTIONAL_11_CONSTEXPR optional disjunction(const optional &rhs) & {
-        return has_value() ? *this : rhs;
-    }
-
-    constexpr optional disjunction(const optional &rhs) const & {
-        return has_value() ? *this : rhs;
-    }
-
-    TL_OPTIONAL_11_CONSTEXPR optional disjunction(const optional &rhs) && {
-        return has_value() ? std::move(*this) : rhs;
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    constexpr optional disjunction(const optional &rhs) const && {
-        return has_value() ? std::move(*this) : rhs;
-    }
-#endif
-
-    TL_OPTIONAL_11_CONSTEXPR optional disjunction(optional &&rhs) & {
-        return has_value() ? *this : std::move(rhs);
-    }
-
-    constexpr optional disjunction(optional &&rhs) const & {
-        return has_value() ? *this : std::move(rhs);
-    }
-
-    TL_OPTIONAL_11_CONSTEXPR optional disjunction(optional &&rhs) && {
-        return has_value() ? std::move(*this) : std::move(rhs);
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    constexpr optional disjunction(optional &&rhs) const && {
-        return has_value() ? std::move(*this) : std::move(rhs);
-    }
-#endif
-
-    /// Takes the value out of the optional, leaving it empty
-    optional take() {
-        optional ret = std::move(*this);
-        reset();
-        return ret;
-    }
-
-    using value_type = T;
-
-    /// Constructs an optional that does not contain a value.
-    constexpr optional() noexcept = default;
-
-    constexpr optional(nullopt_t) noexcept {}
-
-    /// Copy constructor
-    ///
-    /// If `rhs` contains a value, the stored value is direct-initialized with
-    /// it. Otherwise, the constructed optional is empty.
-    TL_OPTIONAL_11_CONSTEXPR optional(const optional &rhs) = default;
-
-    /// Move constructor
-    ///
-    /// If `rhs` contains a value, the stored value is direct-initialized with
-    /// it. Otherwise, the constructed optional is empty.
-    TL_OPTIONAL_11_CONSTEXPR optional(optional &&rhs) = default;
-
-    /// Constructs the stored value in-place using the given arguments.
-    template <class... Args>
-    constexpr explicit optional(
-        detail::enable_if_t<std::is_constructible<T, Args...>::value,
-                            in_place_t>,
-        Args &&... args)
-        : base(in_place, std::forward<Args>(args)...) {}
-
-    template <class U, class... Args>
-    TL_OPTIONAL_11_CONSTEXPR explicit optional(
-        detail::enable_if_t<std::is_constructible<T,
-                                                  std::initializer_list<U> &,
-                                                  Args &&...>::value,
-                            in_place_t>,
-        std::initializer_list<U> il,
-        Args &&... args) {
-        this->construct(il, std::forward<Args>(args)...);
-    }
-
-    /// Constructs the stored value with `u`.
-    template <
-        class U = T,
-        detail::enable_if_t<std::is_convertible<U &&, T>::value> * = nullptr,
-        detail::enable_forward_value<T, U> * = nullptr>
-    constexpr optional(U &&u) : base(in_place, std::forward<U>(u)) {}
-
-    template <
-        class U = T,
-        detail::enable_if_t<!std::is_convertible<U &&, T>::value> * = nullptr,
-        detail::enable_forward_value<T, U> * = nullptr>
-    constexpr explicit optional(U &&u) : base(in_place, std::forward<U>(u)) {}
-
-    /// Converting copy constructor.
-    template <class U,
-              detail::enable_from_other<T, U, const U &> * = nullptr,
-              detail::enable_if_t<std::is_convertible<const U &, T>::value> * =
-                  nullptr>
-    optional(const optional<U> &rhs) {
-        if (rhs.has_value()) {
-            this->construct(*rhs);
-        }
-    }
-
-    template <class U,
-              detail::enable_from_other<T, U, const U &> * = nullptr,
-              detail::enable_if_t<!std::is_convertible<const U &, T>::value> * =
-                  nullptr>
-    explicit optional(const optional<U> &rhs) {
-        if (rhs.has_value()) {
-            this->construct(*rhs);
-        }
-    }
-
-    /// Converting move constructor.
-    template <
-        class U,
-        detail::enable_from_other<T, U, U &&> * = nullptr,
-        detail::enable_if_t<std::is_convertible<U &&, T>::value> * = nullptr>
-    optional(optional<U> &&rhs) {
-        if (rhs.has_value()) {
-            this->construct(std::move(*rhs));
-        }
-    }
-
-    template <
-        class U,
-        detail::enable_from_other<T, U, U &&> * = nullptr,
-        detail::enable_if_t<!std::is_convertible<U &&, T>::value> * = nullptr>
-    explicit optional(optional<U> &&rhs) {
-        if (rhs.has_value()) {
-            this->construct(std::move(*rhs));
-        }
-    }
-
-    /// Destroys the stored value if there is one.
-    ~optional() = default;
-
-    /// Assignment to empty.
-    ///
-    /// Destroys the current value if there is one.
-    optional &operator=(nullopt_t) noexcept {
-        if (has_value()) {
-            this->m_value.~T();
-            this->m_has_value = false;
-        }
-
-        return *this;
-    }
-
-    /// Copy assignment.
-    ///
-    /// Copies the value from `rhs` if there is one. Otherwise resets the stored
-    /// value in `*this`.
-    optional &operator=(const optional &rhs) = default;
-
-    /// Move assignment.
-    ///
-    /// Moves the value from `rhs` if there is one. Otherwise resets the stored
-    /// value in `*this`.
-    optional &operator=(optional &&rhs) = default;
-
-    /// Assigns the stored value from `u`, destroying the old value if there was
-    /// one.
-    template <class U = T, detail::enable_assign_forward<T, U> * = nullptr>
-    optional &operator=(U &&u) {
-        if (has_value()) {
-            this->m_value = std::forward<U>(u);
-        } else {
-            this->construct(std::forward<U>(u));
-        }
-
-        return *this;
-    }
-
-    /// Converting copy assignment operator.
-    ///
-    /// Copies the value from `rhs` if there is one. Otherwise resets the stored
-    /// value in `*this`.
-    template <class U,
-              detail::enable_assign_from_other<T, U, const U &> * = nullptr>
-    optional &operator=(const optional<U> &rhs) {
-        if (has_value()) {
-            if (rhs.has_value()) {
-                this->m_value = *rhs;
-            } else {
-                this->hard_reset();
-            }
-        }
-
-        if (rhs.has_value()) {
-            this->construct(*rhs);
-        }
-
-        return *this;
-    }
-
-    // TODO check exception guarantee
-    /// Converting move assignment operator.
-    ///
-    /// Moves the value from `rhs` if there is one. Otherwise resets the stored
-    /// value in `*this`.
-    template <class U, detail::enable_assign_from_other<T, U, U> * = nullptr>
-    optional &operator=(optional<U> &&rhs) {
-        if (has_value()) {
-            if (rhs.has_value()) {
-                this->m_value = std::move(*rhs);
-            } else {
-                this->hard_reset();
-            }
-        }
-
-        if (rhs.has_value()) {
-            this->construct(std::move(*rhs));
-        }
-
-        return *this;
-    }
-
-    /// Constructs the value in-place, destroying the current one if there is
-    /// one.
-    template <class... Args>
-    T &emplace(Args &&... args) {
-        static_assert(std::is_constructible<T, Args &&...>::value,
-                      "T must be constructible with Args");
-
-        *this = nullopt;
-        this->construct(std::forward<Args>(args)...);
-        return value();
-    }
-
-    template <class U, class... Args>
-    detail::enable_if_t<
-        std::is_constructible<T, std::initializer_list<U> &, Args &&...>::value,
-        T &>
-    emplace(std::initializer_list<U> il, Args &&... args) {
-        *this = nullopt;
-        this->construct(il, std::forward<Args>(args)...);
-        return value();
-    }
-
-    /// Swaps this optional with the other.
-    ///
-    /// If neither optionals have a value, nothing happens.
-    /// If both have a value, the values are swapped.
-    /// If one has a value, it is moved to the other and the movee is left
-    /// valueless.
-    void swap(optional &rhs) noexcept(
-        std::is_nothrow_move_constructible<T>::value
-            &&detail::is_nothrow_swappable<T>::value) {
-        using std::swap;
-        if (has_value()) {
-            if (rhs.has_value()) {
-                swap(**this, *rhs);
-            } else {
-                new (std::addressof(rhs.m_value)) T(std::move(this->m_value));
-                this->m_value.T::~T();
-            }
-        } else if (rhs.has_value()) {
-            new (std::addressof(this->m_value)) T(std::move(rhs.m_value));
-            rhs.m_value.T::~T();
-        }
-        swap(this->m_has_value, rhs.m_has_value);
-    }
-
-    /// Returns a pointer to the stored value
-    constexpr const T *operator->() const {
-        return std::addressof(this->m_value);
-    }
-
-    TL_OPTIONAL_11_CONSTEXPR T *operator->() {
-        return std::addressof(this->m_value);
-    }
-
-    /// Returns the stored value
-    TL_OPTIONAL_11_CONSTEXPR T &operator*() & { return this->m_value; }
-
-    constexpr const T &operator*() const & { return this->m_value; }
-
-    TL_OPTIONAL_11_CONSTEXPR T &&operator*() && {
-        return std::move(this->m_value);
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    constexpr const T &&operator*() const && {
-        return std::move(this->m_value);
-    }
-#endif
-
-    /// Returns whether or not the optional has a value
-    constexpr bool has_value() const noexcept { return this->m_has_value; }
-
-    constexpr explicit operator bool() const noexcept {
-        return this->m_has_value;
-    }
-
-    /// Returns the contained value if there is one, otherwise throws
-    /// bad_optional_access
-    TL_OPTIONAL_11_CONSTEXPR T &value() & {
-        if (has_value()) return this->m_value;
-        throw bad_optional_access();
-    }
-    TL_OPTIONAL_11_CONSTEXPR const T &value() const & {
-        if (has_value()) return this->m_value;
-        throw bad_optional_access();
-    }
-    TL_OPTIONAL_11_CONSTEXPR T &&value() && {
-        if (has_value()) return std::move(this->m_value);
-        throw bad_optional_access();
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    TL_OPTIONAL_11_CONSTEXPR const T &&value() const && {
-        if (has_value()) return std::move(this->m_value);
-        throw bad_optional_access();
-    }
-#endif
-
-    /// Returns the stored value if there is one, otherwise returns `u`
-    template <class U>
-    constexpr T value_or(U &&u) const & {
-        static_assert(std::is_copy_constructible<T>::value &&
-                          std::is_convertible<U &&, T>::value,
-                      "T must be copy constructible and convertible from U");
-        return has_value() ? **this : static_cast<T>(std::forward<U>(u));
-    }
-
-    template <class U>
-    TL_OPTIONAL_11_CONSTEXPR T value_or(U &&u) && {
-        static_assert(std::is_move_constructible<T>::value &&
-                          std::is_convertible<U &&, T>::value,
-                      "T must be move constructible and convertible from U");
-        return has_value() ? **this : static_cast<T>(std::forward<U>(u));
-    }
-
-    /// Destroys the stored value if one exists, making the optional empty
-    void reset() noexcept {
-        if (has_value()) {
-            this->m_value.~T();
-            this->m_has_value = false;
-        }
-    }
-};  // namespace tl
-
-/// Compares two optional objects
-template <class T, class U>
-inline constexpr bool operator==(const optional<T> &lhs,
-                                 const optional<U> &rhs) {
-    return lhs.has_value() == rhs.has_value() &&
-           (!lhs.has_value() || *lhs == *rhs);
-}
-template <class T, class U>
-inline constexpr bool operator!=(const optional<T> &lhs,
-                                 const optional<U> &rhs) {
-    return lhs.has_value() != rhs.has_value() ||
-           (lhs.has_value() && *lhs != *rhs);
-}
-template <class T, class U>
-inline constexpr bool operator<(const optional<T> &lhs,
-                                const optional<U> &rhs) {
-    return rhs.has_value() && (!lhs.has_value() || *lhs < *rhs);
-}
-template <class T, class U>
-inline constexpr bool operator>(const optional<T> &lhs,
-                                const optional<U> &rhs) {
-    return lhs.has_value() && (!rhs.has_value() || *lhs > *rhs);
-}
-template <class T, class U>
-inline constexpr bool operator<=(const optional<T> &lhs,
-                                 const optional<U> &rhs) {
-    return !lhs.has_value() || (rhs.has_value() && *lhs <= *rhs);
-}
-template <class T, class U>
-inline constexpr bool operator>=(const optional<T> &lhs,
-                                 const optional<U> &rhs) {
-    return !rhs.has_value() || (lhs.has_value() && *lhs >= *rhs);
-}
-
-/// Compares an optional to a `nullopt`
-template <class T>
-inline constexpr bool operator==(const optional<T> &lhs, nullopt_t) noexcept {
-    return !lhs.has_value();
-}
-template <class T>
-inline constexpr bool operator==(nullopt_t, const optional<T> &rhs) noexcept {
-    return !rhs.has_value();
-}
-template <class T>
-inline constexpr bool operator!=(const optional<T> &lhs, nullopt_t) noexcept {
-    return lhs.has_value();
-}
-template <class T>
-inline constexpr bool operator!=(nullopt_t, const optional<T> &rhs) noexcept {
-    return rhs.has_value();
-}
-template <class T>
-inline constexpr bool operator<(const optional<T> &, nullopt_t) noexcept {
-    return false;
-}
-template <class T>
-inline constexpr bool operator<(nullopt_t, const optional<T> &rhs) noexcept {
-    return rhs.has_value();
-}
-template <class T>
-inline constexpr bool operator<=(const optional<T> &lhs, nullopt_t) noexcept {
-    return !lhs.has_value();
-}
-template <class T>
-inline constexpr bool operator<=(nullopt_t, const optional<T> &) noexcept {
-    return true;
-}
-template <class T>
-inline constexpr bool operator>(const optional<T> &lhs, nullopt_t) noexcept {
-    return lhs.has_value();
-}
-template <class T>
-inline constexpr bool operator>(nullopt_t, const optional<T> &) noexcept {
-    return false;
-}
-template <class T>
-inline constexpr bool operator>=(const optional<T> &, nullopt_t) noexcept {
-    return true;
-}
-template <class T>
-inline constexpr bool operator>=(nullopt_t, const optional<T> &rhs) noexcept {
-    return !rhs.has_value();
-}
-
-/// Compares the optional with a value.
-template <class T, class U>
-inline constexpr bool operator==(const optional<T> &lhs, const U &rhs) {
-    return lhs.has_value() ? *lhs == rhs : false;
-}
-template <class T, class U>
-inline constexpr bool operator==(const U &lhs, const optional<T> &rhs) {
-    return rhs.has_value() ? lhs == *rhs : false;
-}
-template <class T, class U>
-inline constexpr bool operator!=(const optional<T> &lhs, const U &rhs) {
-    return lhs.has_value() ? *lhs != rhs : true;
-}
-template <class T, class U>
-inline constexpr bool operator!=(const U &lhs, const optional<T> &rhs) {
-    return rhs.has_value() ? lhs != *rhs : true;
-}
-template <class T, class U>
-inline constexpr bool operator<(const optional<T> &lhs, const U &rhs) {
-    return lhs.has_value() ? *lhs < rhs : true;
-}
-template <class T, class U>
-inline constexpr bool operator<(const U &lhs, const optional<T> &rhs) {
-    return rhs.has_value() ? lhs < *rhs : false;
-}
-template <class T, class U>
-inline constexpr bool operator<=(const optional<T> &lhs, const U &rhs) {
-    return lhs.has_value() ? *lhs <= rhs : true;
-}
-template <class T, class U>
-inline constexpr bool operator<=(const U &lhs, const optional<T> &rhs) {
-    return rhs.has_value() ? lhs <= *rhs : false;
-}
-template <class T, class U>
-inline constexpr bool operator>(const optional<T> &lhs, const U &rhs) {
-    return lhs.has_value() ? *lhs > rhs : false;
-}
-template <class T, class U>
-inline constexpr bool operator>(const U &lhs, const optional<T> &rhs) {
-    return rhs.has_value() ? lhs > *rhs : true;
-}
-template <class T, class U>
-inline constexpr bool operator>=(const optional<T> &lhs, const U &rhs) {
-    return lhs.has_value() ? *lhs >= rhs : false;
-}
-template <class T, class U>
-inline constexpr bool operator>=(const U &lhs, const optional<T> &rhs) {
-    return rhs.has_value() ? lhs >= *rhs : true;
-}
-
-template <class T,
-          detail::enable_if_t<std::is_move_constructible<T>::value> * = nullptr,
-          detail::enable_if_t<detail::is_swappable<T>::value> * = nullptr>
-void swap(optional<T> &lhs,
-          optional<T> &rhs) noexcept(noexcept(lhs.swap(rhs))) {
-    return lhs.swap(rhs);
-}
-
-namespace detail {
-struct i_am_secret {};
-}  // namespace detail
-
-template <class T = detail::i_am_secret,
-          class U,
-          class Ret =
-              detail::conditional_t<std::is_same<T, detail::i_am_secret>::value,
-                                    detail::decay_t<U>,
-                                    T>>
-inline constexpr optional<Ret> make_optional(U &&v) {
-    return optional<Ret>(std::forward<U>(v));
-}
-
-template <class T, class... Args>
-inline constexpr optional<T> make_optional(Args &&... args) {
-    return optional<T>(in_place, std::forward<Args>(args)...);
-}
-template <class T, class U, class... Args>
-inline constexpr optional<T> make_optional(std::initializer_list<U> il,
-                                           Args &&... args) {
-    return optional<T>(in_place, il, std::forward<Args>(args)...);
-}
-
-#if __cplusplus >= 201703L
-template <class T>
-optional(T)->optional<T>;
-#endif
-
-/// \exclude
-namespace detail {
-#ifdef TL_OPTIONAL_CXX14
-template <class Opt,
-          class F,
-          class Ret = decltype(detail::invoke(std::declval<F>(),
-                                              *std::declval<Opt>())),
-          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>
-constexpr auto optional_map_impl(Opt &&opt, F &&f) {
-    return opt.has_value()
-               ? detail::invoke(std::forward<F>(f), *std::forward<Opt>(opt))
-               : optional<Ret>(nullopt);
-}
-
-template <class Opt,
-          class F,
-          class Ret = decltype(detail::invoke(std::declval<F>(),
-                                              *std::declval<Opt>())),
-          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>
-auto optional_map_impl(Opt &&opt, F &&f) {
-    if (opt.has_value()) {
-        detail::invoke(std::forward<F>(f), *std::forward<Opt>(opt));
-        return make_optional(monostate{});
-    }
-
-    return optional<monostate>(nullopt);
-}
-#else
-template <class Opt,
-          class F,
-          class Ret = decltype(detail::invoke(std::declval<F>(),
-                                              *std::declval<Opt>())),
-          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>
-
-constexpr auto optional_map_impl(Opt &&opt, F &&f) -> optional<Ret> {
-    return opt.has_value()
-               ? detail::invoke(std::forward<F>(f), *std::forward<Opt>(opt))
-               : optional<Ret>(nullopt);
-}
-
-template <class Opt,
-          class F,
-          class Ret = decltype(detail::invoke(std::declval<F>(),
-                                              *std::declval<Opt>())),
-          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>
-
-auto optional_map_impl(Opt &&opt, F &&f) -> optional<monostate> {
-    if (opt.has_value()) {
-        detail::invoke(std::forward<F>(f), *std::forward<Opt>(opt));
-        return monostate{};
-    }
-
-    return nullopt;
-}
-#endif
-}  // namespace detail
-
-/// Specialization for when `T` is a reference. `optional<T&>` acts similarly
-/// to a `T*`, but provides more operations and shows intent more clearly.
-template <class T>
-class optional<T &> {
-  public:
-// The different versions for C++14 and 11 are needed because deduced return
-// types are not SFINAE-safe. This provides better support for things like
-// generic lambdas. C.f.
-// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0826r0.html
-#if defined(TL_OPTIONAL_CXX14) && !defined(TL_OPTIONAL_GCC49) && \
-    !defined(TL_OPTIONAL_GCC54) && !defined(TL_OPTIONAL_GCC55)
-
-    /// Carries out some operation which returns an optional on the stored
-    /// object if there is one.
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR auto and_then(F &&f) & {
-        using result = detail::invoke_result_t<F, T &>;
-        static_assert(detail::is_optional<result>::value,
-                      "F must return an optional");
-
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : result(nullopt);
-    }
-
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR auto and_then(F &&f) && {
-        using result = detail::invoke_result_t<F, T &>;
-        static_assert(detail::is_optional<result>::value,
-                      "F must return an optional");
-
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : result(nullopt);
-    }
-
-    template <class F>
-    constexpr auto and_then(F &&f) const & {
-        using result = detail::invoke_result_t<F, const T &>;
-        static_assert(detail::is_optional<result>::value,
-                      "F must return an optional");
-
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : result(nullopt);
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    template <class F>
-    constexpr auto and_then(F &&f) const && {
-        using result = detail::invoke_result_t<F, const T &>;
-        static_assert(detail::is_optional<result>::value,
-                      "F must return an optional");
-
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : result(nullopt);
-    }
-#endif
-#else
-    /// Carries out some operation which returns an optional on the stored
-    /// object if there is one.
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR detail::invoke_result_t<F, T &> and_then(F &&f) & {
-        using result = detail::invoke_result_t<F, T &>;
-        static_assert(detail::is_optional<result>::value,
-                      "F must return an optional");
-
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : result(nullopt);
-    }
-
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR detail::invoke_result_t<F, T &> and_then(
-        F &&f) && {
-        using result = detail::invoke_result_t<F, T &>;
-        static_assert(detail::is_optional<result>::value,
-                      "F must return an optional");
-
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : result(nullopt);
-    }
-
-    template <class F>
-    constexpr detail::invoke_result_t<F, const T &> and_then(F &&f) const & {
-        using result = detail::invoke_result_t<F, const T &>;
-        static_assert(detail::is_optional<result>::value,
-                      "F must return an optional");
-
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : result(nullopt);
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    template <class F>
-    constexpr detail::invoke_result_t<F, const T &> and_then(F &&f) const && {
-        using result = detail::invoke_result_t<F, const T &>;
-        static_assert(detail::is_optional<result>::value,
-                      "F must return an optional");
-
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : result(nullopt);
-    }
-#endif
-#endif
-
-#if defined(TL_OPTIONAL_CXX14) && !defined(TL_OPTIONAL_GCC49) && \
-    !defined(TL_OPTIONAL_GCC54) && !defined(TL_OPTIONAL_GCC55)
-    /// Carries out some operation on the stored object if there is one.
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR auto map(F &&f) & {
-        return detail::optional_map_impl(*this, std::forward<F>(f));
-    }
-
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR auto map(F &&f) && {
-        return detail::optional_map_impl(std::move(*this), std::forward<F>(f));
-    }
-
-    template <class F>
-    constexpr auto map(F &&f) const & {
-        return detail::optional_map_impl(*this, std::forward<F>(f));
-    }
-
-    template <class F>
-    constexpr auto map(F &&f) const && {
-        return detail::optional_map_impl(std::move(*this), std::forward<F>(f));
-    }
-#else
-    /// Carries out some operation on the stored object if there is one.
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR decltype(detail::optional_map_impl(
-        std::declval<optional &>(), std::declval<F &&>()))
-    map(F &&f) & {
-        return detail::optional_map_impl(*this, std::forward<F>(f));
-    }
-
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR decltype(detail::optional_map_impl(
-        std::declval<optional &&>(), std::declval<F &&>()))
-    map(F &&f) && {
-        return detail::optional_map_impl(std::move(*this), std::forward<F>(f));
-    }
-
-    template <class F>
-    constexpr decltype(detail::optional_map_impl(
-        std::declval<const optional &>(), std::declval<F &&>()))
-    map(F &&f) const & {
-        return detail::optional_map_impl(*this, std::forward<F>(f));
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    template <class F>
-    constexpr decltype(detail::optional_map_impl(
-        std::declval<const optional &&>(), std::declval<F &&>()))
-    map(F &&f) const && {
-        return detail::optional_map_impl(std::move(*this), std::forward<F>(f));
-    }
-#endif
-#endif
-
-#if defined(TL_OPTIONAL_CXX14) && !defined(TL_OPTIONAL_GCC49) && \
-    !defined(TL_OPTIONAL_GCC54) && !defined(TL_OPTIONAL_GCC55)
-    /// Carries out some operation on the stored object if there is one.
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR auto transform(F &&f) & {
-        return detail::optional_map_impl(*this, std::forward<F>(f));
-    }
-
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR auto transform(F &&f) && {
-        return detail::optional_map_impl(std::move(*this), std::forward<F>(f));
-    }
-
-    template <class F>
-    constexpr auto transform(F &&f) const & {
-        return detail::optional_map_impl(*this, std::forward<F>(f));
-    }
-
-    template <class F>
-    constexpr auto transform(F &&f) const && {
-        return detail::optional_map_impl(std::move(*this), std::forward<F>(f));
-    }
-#else
-    /// Carries out some operation on the stored object if there is one.
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR decltype(detail::optional_map_impl(
-        std::declval<optional &>(), std::declval<F &&>()))
-    transform(F &&f) & {
-        return detail::optional_map_impl(*this, std::forward<F>(f));
-    }
-
-    /// \group map
-    /// \synopsis template <class F> auto transform(F &&f) &&;
-    template <class F>
-    TL_OPTIONAL_11_CONSTEXPR decltype(detail::optional_map_impl(
-        std::declval<optional &&>(), std::declval<F &&>()))
-    transform(F &&f) && {
-        return detail::optional_map_impl(std::move(*this), std::forward<F>(f));
-    }
-
-    template <class F>
-    constexpr decltype(detail::optional_map_impl(
-        std::declval<const optional &>(), std::declval<F &&>()))
-    transform(F &&f) const & {
-        return detail::optional_map_impl(*this, std::forward<F>(f));
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    template <class F>
-    constexpr decltype(detail::optional_map_impl(
-        std::declval<const optional &&>(), std::declval<F &&>()))
-    transform(F &&f) const && {
-        return detail::optional_map_impl(std::move(*this), std::forward<F>(f));
-    }
-#endif
-#endif
-
-    /// Calls `f` if the optional is empty
-    template <class F, detail::enable_if_ret_void<F> * = nullptr>
-    optional<T> TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) & {
-        if (has_value()) return *this;
-
-        std::forward<F>(f)();
-        return nullopt;
-    }
-
-    template <class F, detail::disable_if_ret_void<F> * = nullptr>
-    optional<T> TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) & {
-        return has_value() ? *this : std::forward<F>(f)();
-    }
-
-    template <class F, detail::enable_if_ret_void<F> * = nullptr>
-    optional<T> or_else(F &&f) && {
-        if (has_value()) return std::move(*this);
-
-        std::forward<F>(f)();
-        return nullopt;
-    }
-
-    template <class F, detail::disable_if_ret_void<F> * = nullptr>
-    optional<T> TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) && {
-        return has_value() ? std::move(*this) : std::forward<F>(f)();
-    }
-
-    template <class F, detail::enable_if_ret_void<F> * = nullptr>
-    optional<T> or_else(F &&f) const & {
-        if (has_value()) return *this;
-
-        std::forward<F>(f)();
-        return nullopt;
-    }
-
-    template <class F, detail::disable_if_ret_void<F> * = nullptr>
-    optional<T> TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) const & {
-        return has_value() ? *this : std::forward<F>(f)();
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    template <class F, detail::enable_if_ret_void<F> * = nullptr>
-    optional<T> or_else(F &&f) const && {
-        if (has_value()) return std::move(*this);
-
-        std::forward<F>(f)();
-        return nullopt;
-    }
-
-    template <class F, detail::disable_if_ret_void<F> * = nullptr>
-    optional<T> or_else(F &&f) const && {
-        return has_value() ? std::move(*this) : std::forward<F>(f)();
-    }
-#endif
-
-    /// Maps the stored value with `f` if there is one, otherwise returns `u`
-    template <class F, class U>
-    U map_or(F &&f, U &&u) & {
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : std::forward<U>(u);
-    }
-
-    template <class F, class U>
-    U map_or(F &&f, U &&u) && {
-        return has_value()
-                   ? detail::invoke(std::forward<F>(f), std::move(**this))
-                   : std::forward<U>(u);
-    }
-
-    template <class F, class U>
-    U map_or(F &&f, U &&u) const & {
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : std::forward<U>(u);
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    template <class F, class U>
-    U map_or(F &&f, U &&u) const && {
-        return has_value()
-                   ? detail::invoke(std::forward<F>(f), std::move(**this))
-                   : std::forward<U>(u);
-    }
-#endif
-
-    /// Maps the stored value with `f` if there is one, otherwise calls
-    /// `u` and returns the result.
-    template <class F, class U>
-    detail::invoke_result_t<U> map_or_else(F &&f, U &&u) & {
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : std::forward<U>(u)();
-    }
-
-    template <class F, class U>
-    detail::invoke_result_t<U> map_or_else(F &&f, U &&u) && {
-        return has_value()
-                   ? detail::invoke(std::forward<F>(f), std::move(**this))
-                   : std::forward<U>(u)();
-    }
-
-    template <class F, class U>
-    detail::invoke_result_t<U> map_or_else(F &&f, U &&u) const & {
-        return has_value() ? detail::invoke(std::forward<F>(f), **this)
-                           : std::forward<U>(u)();
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    template <class F, class U>
-    detail::invoke_result_t<U> map_or_else(F &&f, U &&u) const && {
-        return has_value()
-                   ? detail::invoke(std::forward<F>(f), std::move(**this))
-                   : std::forward<U>(u)();
-    }
-#endif
-
-    /// Returns `u` if `*this` has a value, otherwise an empty optional.
-    template <class U>
-    constexpr optional<typename std::decay<U>::type> conjunction(U &&u) const {
-        using result = optional<detail::decay_t<U>>;
-        return has_value() ? result{u} : result{nullopt};
-    }
-
-    /// Returns `rhs` if `*this` is empty, otherwise the current value.
-    TL_OPTIONAL_11_CONSTEXPR optional disjunction(const optional &rhs) & {
-        return has_value() ? *this : rhs;
-    }
-
-    constexpr optional disjunction(const optional &rhs) const & {
-        return has_value() ? *this : rhs;
-    }
-
-    TL_OPTIONAL_11_CONSTEXPR optional disjunction(const optional &rhs) && {
-        return has_value() ? std::move(*this) : rhs;
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    constexpr optional disjunction(const optional &rhs) const && {
-        return has_value() ? std::move(*this) : rhs;
-    }
-#endif
-
-    TL_OPTIONAL_11_CONSTEXPR optional disjunction(optional &&rhs) & {
-        return has_value() ? *this : std::move(rhs);
-    }
-
-    constexpr optional disjunction(optional &&rhs) const & {
-        return has_value() ? *this : std::move(rhs);
-    }
-
-    TL_OPTIONAL_11_CONSTEXPR optional disjunction(optional &&rhs) && {
-        return has_value() ? std::move(*this) : std::move(rhs);
-    }
-
-#ifndef TL_OPTIONAL_NO_CONSTRR
-    constexpr optional disjunction(optional &&rhs) const && {
-        return has_value() ? std::move(*this) : std::move(rhs);
-    }
-#endif
-
-    /// Takes the value out of the optional, leaving it empty
-    optional take() {
-        optional ret = std::move(*this);
-        reset();
-        return ret;
-    }
-
-    using value_type = T &;
-
-    /// Constructs an optional that does not contain a value.
-    constexpr optional() noexcept : m_value(nullptr) {}
-
-    constexpr optional(nullopt_t) noexcept : m_value(nullptr) {}
-
-    /// Copy constructor
-    ///
-    /// If `rhs` contains a value, the stored value is direct-initialized with
-    /// it. Otherwise, the constructed optional is empty.
-    TL_OPTIONAL_11_CONSTEXPR optional(const optional &rhs) noexcept = default;
-
-    /// Move constructor
-    ///
-    /// If `rhs` contains a value, the stored value is direct-initialized with
-    /// it. Otherwise, the constructed optional is empty.
-    TL_OPTIONAL_11_CONSTEXPR optional(optional &&rhs) = default;
-
-    /// Constructs the stored value with `u`.
-    template <class U = T,
-              detail::enable_if_t<
-                  !detail::is_optional<detail::decay_t<U>>::value> * = nullptr>
-    constexpr optional(U &&u) noexcept : m_value(std::addressof(u)) {
-        static_assert(std::is_lvalue_reference<U>::value,
-                      "U must be an lvalue");
-    }
-
-    template <class U>
-    constexpr explicit optional(const optional<U> &rhs) noexcept
-        : optional(*rhs) {}
-
-    /// No-op
-    ~optional() = default;
-
-    /// Assignment to empty.
-    ///
-    /// Destroys the current value if there is one.
-    optional &operator=(nullopt_t) noexcept {
-        m_value = nullptr;
-        return *this;
-    }
-
-    /// Copy assignment.
-    ///
-    /// Rebinds this optional to the referee of `rhs` if there is one. Otherwise
-    /// resets the stored value in `*this`.
-    optional &operator=(const optional &rhs) = default;
-
-    /// Rebinds this optional to `u`.
-    template <class U = T,
-              detail::enable_if_t<
-                  !detail::is_optional<detail::decay_t<U>>::value> * = nullptr>
-    optional &operator=(U &&u) {
-        static_assert(std::is_lvalue_reference<U>::value,
-                      "U must be an lvalue");
-        m_value = std::addressof(u);
-        return *this;
-    }
-
-    /// Converting copy assignment operator.
-    ///
-    /// Rebinds this optional to the referee of `rhs` if there is one. Otherwise
-    /// resets the stored value in `*this`.
-    template <class U>
-    optional &operator=(const optional<U> &rhs) noexcept {
-        m_value = std::addressof(rhs.value());
-        return *this;
-    }
-
-    /// Rebinds this optional to `u`.
-    template <class U = T,
-              detail::enable_if_t<
-                  !detail::is_optional<detail::decay_t<U>>::value> * = nullptr>
-    optional &emplace(U &&u) noexcept {
-        return *this = std::forward<U>(u);
-    }
-
-    void swap(optional &rhs) noexcept { std::swap(m_value, rhs.m_value); }
-
-    /// Returns a pointer to the stored value
-    constexpr const T *operator->() const noexcept { return m_value; }
-
-    TL_OPTIONAL_11_CONSTEXPR T *operator->() noexcept { return m_value; }
-
-    /// Returns the stored value
-    TL_OPTIONAL_11_CONSTEXPR T &operator*() noexcept { return *m_value; }
-
-    constexpr const T &operator*() const noexcept { return *m_value; }
-
-    constexpr bool has_value() const noexcept { return m_value != nullptr; }
-
-    constexpr explicit operator bool() const noexcept {
-        return m_value != nullptr;
-    }
-
-    /// Returns the contained value if there is one, otherwise throws
-    /// bad_optional_access
-    TL_OPTIONAL_11_CONSTEXPR T &value() {
-        if (has_value()) return *m_value;
-        throw bad_optional_access();
-    }
-    TL_OPTIONAL_11_CONSTEXPR const T &value() const {
-        if (has_value()) return *m_value;
-        throw bad_optional_access();
-    }
-
-    /// Returns the stored value if there is one, otherwise returns `u`
-    template <class U>
-    constexpr T value_or(U &&u) const &noexcept {
-        static_assert(std::is_copy_constructible<T>::value &&
-                          std::is_convertible<U &&, T>::value,
-                      "T must be copy constructible and convertible from U");
-        return has_value() ? **this : static_cast<T>(std::forward<U>(u));
-    }
-
-    /// \group value_or
-    template <class U>
-        TL_OPTIONAL_11_CONSTEXPR T value_or(U &&u) && noexcept {
-        static_assert(std::is_move_constructible<T>::value &&
-                          std::is_convertible<U &&, T>::value,
-                      "T must be move constructible and convertible from U");
-        return has_value() ? **this : static_cast<T>(std::forward<U>(u));
-    }
-
-    /// Destroys the stored value if one exists, making the optional empty
-    void reset() noexcept { m_value = nullptr; }
-
-  private:
-    T *m_value;
-};  // namespace tl
-
-
-}  // namespace tl
-
-namespace std {
-// TODO SFINAE
-template <class T>
-struct hash<tl::optional<T>> {
-    ::std::size_t operator()(const tl::optional<T> &o) const {
-        if (!o.has_value()) return 0;
-
-        return std::hash<tl::detail::remove_const_t<T>>()(*o);
-    }
-};
-}  // namespace std
-
-#endif
diff --git a/paddlespeech/audio/src/pybind/kaldi/feature_common.h b/paddlespeech/audio/src/pybind/kaldi/feature_common.h
deleted file mode 100644
index 05522bb7e8a82f932a0004d7f985b88ec517f3db..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/pybind/kaldi/feature_common.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "pybind11/pybind11.h"
-#include "pybind11/numpy.h"
-#include "feat/feature-window.h"
-
-namespace paddleaudio {
-namespace kaldi {
-
-namespace py = pybind11;
-
-template <class F>
-class StreamingFeatureTpl {
-  public:
-    typedef typename F::Options Options;
-    StreamingFeatureTpl(const Options& opts);
-    bool ComputeFeature(const ::kaldi::VectorBase<::kaldi::BaseFloat>& wav,
-                        ::kaldi::Vector<::kaldi::BaseFloat>* feats);
-    void Reset() { remained_wav_.Resize(0); }
-
-    int Dim() { return computer_.Dim(); }
-
-  private:
-    bool Compute(const ::kaldi::Vector<::kaldi::BaseFloat>& waves,
-                 ::kaldi::Vector<::kaldi::BaseFloat>* feats);
-    Options opts_;
-    ::kaldi::FeatureWindowFunction window_function_;
-    ::kaldi::Vector<::kaldi::BaseFloat> remained_wav_;
-    F computer_;
-};
-
-}  // namespace kaldi
-}  // namespace ppspeech
-
-#include "feature_common_inl.h"
diff --git a/paddlespeech/audio/src/pybind/kaldi/feature_common_inl.h b/paddlespeech/audio/src/pybind/kaldi/feature_common_inl.h
deleted file mode 100644
index c894b97755845a46837e68a98cbaa54567a5a9dd..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/pybind/kaldi/feature_common_inl.h
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-
-namespace paddleaudio {
-namespace kaldi {
-
-template <class F>
-StreamingFeatureTpl<F>::StreamingFeatureTpl(const Options& opts)
-    : opts_(opts), computer_(opts), window_function_(opts.frame_opts) {
-    // window_function_(computer_.GetFrameOptions()) { the opt set to zero
-}
-
-template <class F>
-bool StreamingFeatureTpl<F>::ComputeFeature(
-    const ::kaldi::VectorBase<::kaldi::BaseFloat>& wav,
-    ::kaldi::Vector<::kaldi::BaseFloat>* feats) {
-    // append remaned waves
-    ::kaldi::int32 wav_len = wav.Dim();
-    if (wav_len == 0) return false;
-    ::kaldi::int32 left_len = remained_wav_.Dim();
-    ::kaldi::Vector<::kaldi::BaseFloat> waves(left_len + wav_len);
-    waves.Range(0, left_len).CopyFromVec(remained_wav_);
-    waves.Range(left_len, wav_len).CopyFromVec(wav);
-
-    // cache remaned waves
-    ::kaldi::FrameExtractionOptions frame_opts = computer_.GetFrameOptions();
-    ::kaldi::int32 num_frames = ::kaldi::NumFrames(waves.Dim(), frame_opts);
-    ::kaldi::int32 frame_shift = frame_opts.WindowShift();
-    ::kaldi::int32 left_samples = waves.Dim() - frame_shift * num_frames;
-    remained_wav_.Resize(left_samples);
-    remained_wav_.CopyFromVec(
-        waves.Range(frame_shift * num_frames, left_samples));
-
-    // compute speech feature
-    Compute(waves, feats);
-    return true;
-}
-
-// Compute feat
-template <class F>
-bool StreamingFeatureTpl<F>::Compute(
-    const ::kaldi::Vector<::kaldi::BaseFloat>& waves,
-    ::kaldi::Vector<::kaldi::BaseFloat>* feats) {
-    ::kaldi::BaseFloat vtln_warp = 1.0;
-    const ::kaldi::FrameExtractionOptions& frame_opts =
-        computer_.GetFrameOptions();
-    ::kaldi::int32 num_samples = waves.Dim();
-    ::kaldi::int32 frame_length = frame_opts.WindowSize();
-    ::kaldi::int32 sample_rate = frame_opts.samp_freq;
-    if (num_samples < frame_length) {
-        return false;
-    }
-
-    ::kaldi::int32 num_frames = ::kaldi::NumFrames(num_samples, frame_opts);
-    feats->Resize(num_frames * Dim());
-
-    ::kaldi::Vector<::kaldi::BaseFloat> window;
-    bool need_raw_log_energy = computer_.NeedRawLogEnergy();
-    for (::kaldi::int32 frame = 0; frame < num_frames; frame++) {
-        ::kaldi::BaseFloat raw_log_energy = 0.0;
-        ::kaldi::ExtractWindow(0,
-                               waves,
-                               frame,
-                               frame_opts,
-                               window_function_,
-                               &window,
-                               need_raw_log_energy ? &raw_log_energy : NULL);
-
-        ::kaldi::Vector<::kaldi::BaseFloat> this_feature(computer_.Dim(),
-                                                         ::kaldi::kUndefined);
-        computer_.Compute(raw_log_energy, vtln_warp, &window, &this_feature);
-        ::kaldi::SubVector<::kaldi::BaseFloat> output_row(
-            feats->Data() + frame * Dim(), Dim());
-        output_row.CopyFromVec(this_feature);
-    }
-    return true;
-}
-
-}  // namespace kaldi
-}  // namespace paddleaudio
diff --git a/paddlespeech/audio/src/pybind/kaldi/kaldi_feature.cc b/paddlespeech/audio/src/pybind/kaldi/kaldi_feature.cc
deleted file mode 100644
index 9fd8e93f9e29b0351e490622d06aaf37d7c8ce88..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/pybind/kaldi/kaldi_feature.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h"
-#include "feat/pitch-functions.h"
-
-namespace paddleaudio {
-namespace kaldi {
-
-bool InitFbank(
-    ::kaldi::FrameExtractionOptions frame_opts,
-    ::kaldi::MelBanksOptions mel_opts,
-    FbankOptions fbank_opts) {
-    ::kaldi::FbankOptions opts;
-    opts.frame_opts = frame_opts;
-    opts.mel_opts = mel_opts;
-    opts.use_energy = fbank_opts.use_energy;
-    opts.energy_floor = fbank_opts.energy_floor;
-    opts.raw_energy = fbank_opts.raw_energy;
-    opts.htk_compat = fbank_opts.htk_compat;
-    opts.use_log_fbank = fbank_opts.use_log_fbank;
-    opts.use_power = fbank_opts.use_power;
-    paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->InitFbank(opts);
-    return true;
-}
-
-py::array_t<float> ComputeFbankStreaming(const py::array_t<float>& wav) {
-    return paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ComputeFbank(
-        wav);
-}
-
-py::array_t<float> ComputeFbank(
-    ::kaldi::FrameExtractionOptions frame_opts,
-    ::kaldi::MelBanksOptions mel_opts,
-    FbankOptions fbank_opts,
-    const py::array_t<float>& wav) {
-    InitFbank(frame_opts, mel_opts, fbank_opts);
-    py::array_t<float> result = ComputeFbankStreaming(wav);
-    paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ResetFbank();
-    return result;
-}
-
-void ResetFbank() {
-    paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ResetFbank();
-}
-
-py::array_t<float> ComputeKaldiPitch(
-  const ::kaldi::PitchExtractionOptions& opts,
-  const py::array_t<float>& wav) {
-    py::buffer_info info = wav.request();
-    ::kaldi::SubVector<::kaldi::BaseFloat> input_wav((float*)info.ptr, info.size);
-   
-    ::kaldi::Matrix<::kaldi::BaseFloat> features;
-    ::kaldi::ComputeKaldiPitch(opts, input_wav, &features);
-    auto result = py::array_t<float>({features.NumRows(), features.NumCols()});
-    for (int row_idx = 0; row_idx < features.NumRows(); ++row_idx) {
-        std::memcpy(result.mutable_data(row_idx), features.Row(row_idx).Data(),
-                    sizeof(float)*features.NumCols());
-    }
-   return result;
-}
-
-}  // namespace kaldi
-}  // namespace paddleaudio
diff --git a/paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h b/paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h
deleted file mode 100644
index bbc88825cd03788fe3aa9f086b4787c68eee8910..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <pybind11/numpy.h>
-#include <pybind11/pybind11.h>
-#include <string>
-
-#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h"
-#include "feat/pitch-functions.h"
-
-namespace py = pybind11;
-
-namespace paddleaudio {
-namespace kaldi {
-
-struct FbankOptions{
-  bool use_energy;  // append an extra dimension with energy to the filter banks
-  float energy_floor;
-  bool raw_energy;  // If true, compute energy before preemphasis and windowing
-  bool htk_compat;  // If true, put energy last (if using energy)
-  bool use_log_fbank;  // if true (default), produce log-filterbank, else linear
-  bool use_power; 
-  FbankOptions(): use_energy(false),
-                 energy_floor(0.0),
-                 raw_energy(true),
-                 htk_compat(false),
-                 use_log_fbank(true),
-                 use_power(true) {}
-};
-
-bool InitFbank(
-    ::kaldi::FrameExtractionOptions frame_opts,
-    ::kaldi::MelBanksOptions mel_opts,
-    FbankOptions fbank_opts);
-
-py::array_t<float> ComputeFbank(
-    ::kaldi::FrameExtractionOptions frame_opts,
-    ::kaldi::MelBanksOptions mel_opts,
-    FbankOptions fbank_opts,
-    const py::array_t<float>& wav);
-
-py::array_t<float> ComputeFbankStreaming(const py::array_t<float>& wav);
-
-void ResetFbank();
-
-py::array_t<float> ComputeKaldiPitch(
-    const ::kaldi::PitchExtractionOptions& opts,
-    const py::array_t<float>& wav);
-
-}  // namespace kaldi
-}  // namespace paddleaudio
diff --git a/paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.cc b/paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.cc
deleted file mode 100644
index 186cd92a0f6d35b6837c6811468d031609674178..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.cc
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h"
-
-namespace paddleaudio {
-namespace kaldi {
-
-KaldiFeatureWrapper* KaldiFeatureWrapper::GetInstance() {
-    static KaldiFeatureWrapper instance;
-    return &instance;
-}
-
-bool KaldiFeatureWrapper::InitFbank(::kaldi::FbankOptions opts) {
-    fbank_.reset(new Fbank(opts));
-    return true;
-}
-
-py::array_t<float> KaldiFeatureWrapper::ComputeFbank(
-    const py::array_t<float> wav) {
-    py::buffer_info info = wav.request();
-    ::kaldi::SubVector<::kaldi::BaseFloat> input_wav((float*)info.ptr, info.size);
-
-    ::kaldi::Vector<::kaldi::BaseFloat> feats;
-    bool flag = fbank_->ComputeFeature(input_wav, &feats);
-    if (flag == false || feats.Dim() == 0) return py::array_t<float>();
-    auto result = py::array_t<float>(feats.Dim());
-    py::buffer_info xs = result.request();
-    std::cout << std::endl;
-    float* res_ptr = (float*)xs.ptr;
-    for (int idx = 0; idx < feats.Dim(); ++idx) {
-        *res_ptr = feats(idx);
-        res_ptr++;
-    }
-
-    return result.reshape({feats.Dim() / Dim(), Dim()});
-}
-
-}  // namesapce kaldi
-}  // namespace paddleaudio
diff --git a/paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h b/paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h
deleted file mode 100644
index 48b12bb80b8b38ed0506e1d7c387ea2dbe265128..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "base/kaldi-common.h"
-#include "feat/feature-fbank.h"
-
-#include "paddlespeech/audio/src/pybind/kaldi/feature_common.h"
-
-namespace paddleaudio {
-namespace kaldi {
-
-typedef StreamingFeatureTpl<::kaldi::FbankComputer> Fbank;
-
-class KaldiFeatureWrapper {
-  public:
-    static KaldiFeatureWrapper* GetInstance();
-    bool InitFbank(::kaldi::FbankOptions opts);
-    py::array_t<float> ComputeFbank(const py::array_t<float> wav);
-    int Dim() { return fbank_->Dim(); }
-    void ResetFbank() { fbank_->Reset(); }
-
-  private:
-    std::unique_ptr<paddleaudio::kaldi::Fbank> fbank_;
-};
-
-}  // namespace kaldi
-}  // namespace paddleaudio
diff --git a/paddlespeech/audio/src/pybind/pybind.cpp b/paddlespeech/audio/src/pybind/pybind.cpp
deleted file mode 100644
index b265a2ab1b52f2f6e913b6e6f5bdbeb24f7215c3..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/pybind/pybind.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-// Copyright (c) 2017 Facebook Inc. (Soumith Chintala), All rights reserved.
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h"
-#include "paddlespeech/audio/src/pybind/sox/io.h"
-#include "paddlespeech/audio/src/pybind/sox/effects.h"
-#include "paddlespeech/audio/third_party/kaldi/feat/feature-fbank.h"
-
-#include <pybind11/stl.h>
-#include <pybind11/pybind11.h>
-
-// `tl::optional` 
-namespace pybind11 { namespace detail {
-   template <typename T>
-   struct type_caster<tl::optional<T>> : optional_caster<tl::optional<T>> {};
-}}
-
-PYBIND11_MODULE(_paddleaudio, m) {
-#ifdef INCLUDE_SOX
-    m.def("get_info_file",
-          &paddleaudio::sox_io::get_info_file,
-          "Get metadata of audio file.");
-    // support obj later
-    m.def("get_info_fileobj",
-          &paddleaudio::sox_io::get_info_fileobj,
-          "Get metadata of audio in file object.");
-    m.def("load_audio_fileobj",
-          &paddleaudio::sox_io::load_audio_fileobj,
-          "Load audio from file object.");
-    m.def("save_audio_fileobj",
-          &paddleaudio::sox_io::save_audio_fileobj,
-          "Save audio to file obj.");
-          
-    // sox io
-     m.def("sox_io_get_info", &paddleaudio::sox_io::get_info_file);
-     m.def(
-         "sox_io_load_audio_file",
-         &paddleaudio::sox_io::load_audio_file);
-     m.def(
-         "sox_io_save_audio_file",
-         &paddleaudio::sox_io::save_audio_file);
-    
-     // sox utils
-     m.def("sox_utils_set_seed", &paddleaudio::sox_utils::set_seed);
-     m.def(
-         "sox_utils_set_verbosity",
-         &paddleaudio::sox_utils::set_verbosity);
-     m.def(
-         "sox_utils_set_use_threads",
-         &paddleaudio::sox_utils::set_use_threads);
-     m.def(
-         "sox_utils_set_buffer_size",
-         &paddleaudio::sox_utils::set_buffer_size);
-     m.def(
-         "sox_utils_list_effects",
-         &paddleaudio::sox_utils::list_effects);
-     m.def(
-         "sox_utils_list_read_formats",
-         &paddleaudio::sox_utils::list_read_formats);
-     m.def(
-         "sox_utils_list_write_formats",
-         &paddleaudio::sox_utils::list_write_formats);
-     m.def(
-         "sox_utils_get_buffer_size",
-         &paddleaudio::sox_utils::get_buffer_size);
-
-     // effect
-     m.def("apply_effects_fileobj",
-           &paddleaudio::sox_effects::apply_effects_fileobj,
-           "Decode audio data from file-like obj and apply effects.");
-     m.def("sox_effects_initialize_sox_effects",
-       &paddleaudio::sox_effects::initialize_sox_effects);
-     m.def(
-         "sox_effects_shutdown_sox_effects",
-         &paddleaudio::sox_effects::shutdown_sox_effects);
-     m.def(
-         "sox_effects_apply_effects_tensor",
-         &paddleaudio::sox_effects::apply_effects_tensor);
-     m.def(
-         "sox_effects_apply_effects_file",
-         &paddleaudio::sox_effects::apply_effects_file);
-#endif
-
-#ifdef INCLUDE_KALDI
-    m.def("ComputeFbank", &paddleaudio::kaldi::ComputeFbank, "compute fbank");
-    py::class_<kaldi::PitchExtractionOptions>(m, "PitchExtractionOptions")
-        .def(py::init<>())
-        .def_readwrite("samp_freq", &kaldi::PitchExtractionOptions::samp_freq)
-        .def_readwrite("frame_shift_ms", &kaldi::PitchExtractionOptions::frame_shift_ms)
-        .def_readwrite("frame_length_ms", &kaldi::PitchExtractionOptions::frame_length_ms)
-        .def_readwrite("preemph_coeff", &kaldi::PitchExtractionOptions::preemph_coeff)
-        .def_readwrite("min_f0", &kaldi::PitchExtractionOptions::min_f0)
-        .def_readwrite("max_f0", &kaldi::PitchExtractionOptions::max_f0)
-        .def_readwrite("soft_min_f0", &kaldi::PitchExtractionOptions::soft_min_f0)
-        .def_readwrite("penalty_factor", &kaldi::PitchExtractionOptions::penalty_factor)
-        .def_readwrite("lowpass_cutoff", &kaldi::PitchExtractionOptions::lowpass_cutoff)
-        .def_readwrite("resample_freq", &kaldi::PitchExtractionOptions::resample_freq)
-        .def_readwrite("delta_pitch", &kaldi::PitchExtractionOptions::delta_pitch)
-        .def_readwrite("nccf_ballast", &kaldi::PitchExtractionOptions::nccf_ballast)
-        .def_readwrite("lowpass_filter_width", &kaldi::PitchExtractionOptions::lowpass_filter_width)
-        .def_readwrite("upsample_filter_width", &kaldi::PitchExtractionOptions::upsample_filter_width)
-        .def_readwrite("max_frames_latency", &kaldi::PitchExtractionOptions::max_frames_latency)
-        .def_readwrite("frames_per_chunk", &kaldi::PitchExtractionOptions::frames_per_chunk)
-        .def_readwrite("simulate_first_pass_online", &kaldi::PitchExtractionOptions::simulate_first_pass_online)
-        .def_readwrite("recompute_frame", &kaldi::PitchExtractionOptions::recompute_frame)
-        .def_readwrite("nccf_ballast_online", &kaldi::PitchExtractionOptions::nccf_ballast_online)
-        .def_readwrite("snip_edges", &kaldi::PitchExtractionOptions::snip_edges);
-    m.def("ComputeKaldiPitch", &paddleaudio::kaldi::ComputeKaldiPitch, "compute kaldi pitch");
-    py::class_<kaldi::FrameExtractionOptions>(m, "FrameExtractionOptions")
-        .def(py::init<>())            
-        .def_readwrite("samp_freq", &kaldi::FrameExtractionOptions::samp_freq)
-        .def_readwrite("frame_shift_ms", &kaldi::FrameExtractionOptions::frame_shift_ms)            
-        .def_readwrite("frame_length_ms", &kaldi::FrameExtractionOptions::frame_length_ms)
-        .def_readwrite("dither", &kaldi::FrameExtractionOptions::dither)            
-        .def_readwrite("preemph_coeff", &kaldi::FrameExtractionOptions::preemph_coeff)            
-        .def_readwrite("remove_dc_offset", &kaldi::FrameExtractionOptions::remove_dc_offset)            
-        .def_readwrite("window_type", &kaldi::FrameExtractionOptions::window_type)
-        .def_readwrite("round_to_power_of_two", &kaldi::FrameExtractionOptions::round_to_power_of_two)           
-        .def_readwrite("blackman_coeff", &kaldi::FrameExtractionOptions::blackman_coeff)          
-        .def_readwrite("snip_edges", &kaldi::FrameExtractionOptions::snip_edges)
-        .def_readwrite("allow_downsample", &kaldi::FrameExtractionOptions::allow_downsample)
-        .def_readwrite("allow_upsample", &kaldi::FrameExtractionOptions::allow_upsample)
-        .def_readwrite("max_feature_vectors", &kaldi::FrameExtractionOptions::max_feature_vectors);
-    py::class_<kaldi::MelBanksOptions>(m, "MelBanksOptions")
-        .def(py::init<>())
-        .def_readwrite("num_bins", &kaldi::MelBanksOptions::num_bins)
-        .def_readwrite("low_freq", &kaldi::MelBanksOptions::low_freq)
-        .def_readwrite("high_freq", &kaldi::MelBanksOptions::high_freq)
-        .def_readwrite("vtln_low", &kaldi::MelBanksOptions::vtln_low)
-        .def_readwrite("vtln_high", &kaldi::MelBanksOptions::vtln_high)
-        .def_readwrite("debug_mel", &kaldi::MelBanksOptions::debug_mel)
-        .def_readwrite("htk_mode", &kaldi::MelBanksOptions::htk_mode);
-
-    py::class_<paddleaudio::kaldi::FbankOptions>(m, "FbankOptions")
-        .def(py::init<>())
-        .def_readwrite("use_energy", &paddleaudio::kaldi::FbankOptions::use_energy)
-        .def_readwrite("energy_floor", &paddleaudio::kaldi::FbankOptions::energy_floor)
-        .def_readwrite("raw_energy", &paddleaudio::kaldi::FbankOptions::raw_energy)
-        .def_readwrite("htk_compat", &paddleaudio::kaldi::FbankOptions::htk_compat)
-        .def_readwrite("use_log_fbank", &paddleaudio::kaldi::FbankOptions::use_log_fbank)
-        .def_readwrite("use_power", &paddleaudio::kaldi::FbankOptions::use_power);
-#endif
-
-}
diff --git a/paddlespeech/audio/src/pybind/sox/effects.cpp b/paddlespeech/audio/src/pybind/sox/effects.cpp
deleted file mode 100644
index b69c5358a4cf0c2d000970ce44563edb0eecc447..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/pybind/sox/effects.cpp
+++ /dev/null
@@ -1,257 +0,0 @@
-#include <mutex>
-#include <sox.h>
-
-#include "paddlespeech/audio/src/pybind/sox/effects.h"
-#include "paddlespeech/audio/src/pybind/sox/effects_chain.h"
-#include "paddlespeech/audio/src/pybind/sox/utils.h"
-
-using namespace paddleaudio::sox_utils;
-
-namespace paddleaudio::sox_effects {
-
-// Streaming decoding over file-like object is tricky because libsox operates on
-// FILE pointer. The folloing is what `sox` and `play` commands do
-//  - file input -> FILE pointer
-//  - URL input -> call wget in suprocess and pipe the data -> FILE pointer
-//  - stdin -> FILE pointer
-//
-// We want to, instead, fetch byte strings chunk by chunk, consume them, and
-// discard.
-//
-// Here is the approach
-// 1. Initialize sox_format_t using sox_open_mem_read, providing the initial
-// chunk of byte string
-//    This will perform header-based format detection, if necessary, then fill
-//    the metadata of sox_format_t. Internally, sox_open_mem_read uses fmemopen,
-//    which returns FILE* which points the buffer of the provided byte string.
-// 2. Each time sox reads a chunk from the FILE*, we update the underlying
-// buffer in a way that it
-//    starts with unseen data, and append the new data read from the given
-//    fileobj. This will trick libsox as if it keeps reading from the FILE*
-//    continuously.
-// For Step 2. see `fileobj_input_drain` function in effects_chain.cpp
-auto apply_effects_fileobj(
-    py::object fileobj,
-    const std::vector<std::vector<std::string>>& effects,
-    tl::optional<bool> normalize,
-    tl::optional<bool> channels_first,
-    tl::optional<std::string> format)
-    -> tl::optional<std::tuple<py::array, int64_t>> {
-  // Prepare the buffer used throughout the lifecycle of SoxEffectChain.
-  //
-  // For certain format (such as FLAC), libsox keeps reading the content at
-  // the initialization unless it reaches EOF even when the header is properly
-  // parsed. (Making buffer size 8192, which is way bigger than the header,
-  // resulted in libsox consuming all the buffer content at the time it opens
-  // the file.) Therefore buffer has to always contain valid data, except after
-  // EOF. We default to `sox_get_globals()->bufsiz`* for buffer size and we
-  // first check if there is enough data to fill the buffer. `read_fileobj`
-  // repeatedly calls `read`  method until it receives the requested length of
-  // bytes or it reaches EOF. If we get bytes shorter than requested, that means
-  // the whole audio data are fetched.
-  //
-  // * This can be changed with `paddleaudio.utils.sox_utils.set_buffer_size`.
-  const auto capacity = [&]() {
-    // NOTE:
-    // Use the abstraction provided by `libpaddleaudio` to access the global
-    // config defined by libsox. Directly using `sox_get_globals` function will
-    // end up retrieving the static variable defined in `_paddleaudio`, which is
-    // not correct.
-    const auto bufsiz = get_buffer_size();
-    const int64_t kDefaultCapacityInBytes = 256;
-    return (bufsiz > kDefaultCapacityInBytes) ? bufsiz
-                                              : kDefaultCapacityInBytes;
-  }();
-  std::string buffer(capacity, '\0');
-  auto* in_buf = const_cast<char*>(buffer.data());
-  auto num_read = read_fileobj(&fileobj, capacity, in_buf);
-  // If the file is shorter than 256, then libsox cannot read the header.
-  auto in_buffer_size = (num_read > 256) ? num_read : 256;
-
-  // Open file (this starts reading the header)
-  // When opening a file there are two functions that can touches FILE*.
-  // * `auto_detect_format`
-  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L43
-  // * `startread` handler of detected format.
-  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L574
-  // To see the handler of a particular format, go to
-  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/<FORMAT>.c
-  // For example, voribs can be found
-  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/vorbis.c#L97-L158
-  SoxFormat sf(sox_open_mem_read(
-      in_buf,
-      in_buffer_size,
-      /*signal=*/nullptr,
-      /*encoding=*/nullptr,
-      /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
-
-  // In case of streamed data, length can be 0
-  if (static_cast<sox_format_t*>(sf) == nullptr ||
-      sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
-    return {};
-  }
-
-  // Prepare output buffer
-  std::vector<sox_sample_t> out_buffer;
-  out_buffer.reserve(sf->signal.length);
-
-  // Create and run SoxEffectsChain
-  const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision);
-  paddleaudio::sox_effects_chain::SoxEffectsChainPyBind chain(
-      /*input_encoding=*/sf->encoding,
-      /*output_encoding=*/get_tensor_encodinginfo(dtype));
-  chain.addInputFileObj(sf, in_buf, in_buffer_size, &fileobj);
-  for (const auto& effect : effects) {
-    chain.addEffect(effect);
-  }
-  chain.addOutputBuffer(&out_buffer);
-  chain.run();
-
-  // Create tensor from buffer
-  bool channels_first_ = channels_first.value_or(true);
-  auto tensor = convert_to_tensor(
-      /*buffer=*/out_buffer.data(),
-      /*num_samples=*/out_buffer.size(),
-      /*num_channels=*/chain.getOutputNumChannels(),
-      dtype,
-      normalize.value_or(true),
-      channels_first_);
-
-  return std::forward_as_tuple(
-      tensor, static_cast<int64_t>(chain.getOutputSampleRate()));
-}
-
-namespace {
-
-enum SoxEffectsResourceState { NotInitialized, Initialized, ShutDown };
-SoxEffectsResourceState SOX_RESOURCE_STATE = NotInitialized;
-std::mutex SOX_RESOUCE_STATE_MUTEX;
-
-} // namespace
-
-void initialize_sox_effects() {
-  const std::lock_guard<std::mutex> lock(SOX_RESOUCE_STATE_MUTEX);
-
-  switch (SOX_RESOURCE_STATE) {
-    case NotInitialized:
-      if (sox_init() != SOX_SUCCESS) {
-        throw std::runtime_error("Failed to initialize sox effects.");
-      };
-      SOX_RESOURCE_STATE = Initialized;
-      break;
-    case Initialized:
-      break;
-    case ShutDown:
-      throw std::runtime_error(
-          "SoX Effects has been shut down. Cannot initialize again.");
-  }
-};
-
-void shutdown_sox_effects() {
-  const std::lock_guard<std::mutex> lock(SOX_RESOUCE_STATE_MUTEX);
-
-  switch (SOX_RESOURCE_STATE) {
-    case NotInitialized:
-      throw std::runtime_error(
-          "SoX Effects is not initialized. Cannot shutdown.");
-    case Initialized:
-      if (sox_quit() != SOX_SUCCESS) {
-        throw std::runtime_error("Failed to initialize sox effects.");
-      };
-      SOX_RESOURCE_STATE = ShutDown;
-      break;
-    case ShutDown:
-      break;
-  }
-}
-
-auto apply_effects_tensor(
-    py::array waveform,
-    int64_t sample_rate,
-    const std::vector<std::vector<std::string>>& effects,
-    bool channels_first) -> std::tuple<py::array, int64_t> {
-  validate_input_tensor(waveform);
-
-  // Create SoxEffectsChain
-  const auto dtype = waveform.dtype();
-  paddleaudio::sox_effects_chain::SoxEffectsChain chain(
-      /*input_encoding=*/get_tensor_encodinginfo(dtype),
-      /*output_encoding=*/get_tensor_encodinginfo(dtype));
-
-  // Prepare output buffer
-  std::vector<sox_sample_t> out_buffer;
-  out_buffer.reserve(waveform.size());
-
-  // Build and run effects chain
-  chain.addInputTensor(&waveform, sample_rate, channels_first);
-  for (const auto& effect : effects) {
-    chain.addEffect(effect);
-  }
-  chain.addOutputBuffer(&out_buffer);
-  chain.run();
-
-  // Create tensor from buffer
-  auto out_tensor = convert_to_tensor(
-      /*buffer=*/out_buffer.data(),
-      /*num_samples=*/out_buffer.size(),
-      /*num_channels=*/chain.getOutputNumChannels(),
-      dtype,
-      /*normalize=*/false,
-      channels_first);
-
-  return std::tuple<py::array, int64_t>(
-      out_tensor, chain.getOutputSampleRate());
-}
-
-auto apply_effects_file(
-    const std::string& path,
-    const std::vector<std::vector<std::string>>& effects,
-    tl::optional<bool> normalize,
-    tl::optional<bool> channels_first,
-    const tl::optional<std::string>& format)
-    -> tl::optional<std::tuple<py::array, int64_t>> {
-  // Open input file
-  SoxFormat sf(sox_open_read(
-      path.c_str(),
-      /*signal=*/nullptr,
-      /*encoding=*/nullptr,
-      /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
-
-  if (static_cast<sox_format_t*>(sf) == nullptr ||
-      sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
-    return {};
-  }
-
-  const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision);
-
-  // Prepare output
-  std::vector<sox_sample_t> out_buffer;
-  out_buffer.reserve(sf->signal.length);
-
-  // Create and run SoxEffectsChain
-  paddleaudio::sox_effects_chain::SoxEffectsChain chain(
-      /*input_encoding=*/sf->encoding,
-      /*output_encoding=*/get_tensor_encodinginfo(dtype));
-
-  chain.addInputFile(sf);
-  for (const auto& effect : effects) {
-    chain.addEffect(effect);
-  }
-  chain.addOutputBuffer(&out_buffer);
-  chain.run();
-
-  // Create tensor from buffer
-  bool channels_first_ = channels_first.value_or(true);
-  auto tensor = convert_to_tensor(
-      /*buffer=*/out_buffer.data(),
-      /*num_samples=*/out_buffer.size(),
-      /*num_channels=*/chain.getOutputNumChannels(),
-      dtype,
-      normalize.value_or(true),
-      channels_first_);
-
-  return std::tuple<py::array, int64_t>(
-      tensor, chain.getOutputSampleRate());
-}
-
-} // namespace paddleaudio::sox_effects
diff --git a/paddlespeech/audio/src/pybind/sox/effects.h b/paddlespeech/audio/src/pybind/sox/effects.h
deleted file mode 100644
index 6ba53d008715ed6dd937d50475bf2a978b734d89..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/pybind/sox/effects.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#include <pybind11/pybind11.h>
-#include <pybind11/numpy.h>
-
-#include "paddlespeech/audio/src/optional/optional.hpp"
-
-namespace py = pybind11;
-
-namespace paddleaudio::sox_effects {
-
-auto apply_effects_fileobj(
-    py::object fileobj,
-    const std::vector<std::vector<std::string>>& effects,
-    tl::optional<bool> normalize,
-    tl::optional<bool> channels_first,
-    tl::optional<std::string> format)
-    -> tl::optional<std::tuple<py::array, int64_t>>;
-
-void initialize_sox_effects();
-
-void shutdown_sox_effects();
-
-auto apply_effects_tensor(
-    py::array waveform,
-    int64_t sample_rate,
-    const std::vector<std::vector<std::string>>& effects,
-    bool channels_first) -> std::tuple<py::array, int64_t>;
-
-auto apply_effects_file(
-    const std::string& path,
-    const std::vector<std::vector<std::string>>& effects,
-    tl::optional<bool> normalize,
-    tl::optional<bool> channels_first,
-    const tl::optional<std::string>& format)
-    -> tl::optional<std::tuple<py::array, int64_t>>;
-
-} // namespace paddleaudio::sox_effects
diff --git a/paddlespeech/audio/src/pybind/sox/effects_chain.cpp b/paddlespeech/audio/src/pybind/sox/effects_chain.cpp
deleted file mode 100644
index 5e8f6ee71fac3daf10871dfdbdf6b2920e33cff7..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/pybind/sox/effects_chain.cpp
+++ /dev/null
@@ -1,595 +0,0 @@
-#include <sox.h>
-#include <iostream>
-#include <vector>
-#include "paddlespeech/audio/src/pybind/sox/effects_chain.h"
-#include "paddlespeech/audio/src/pybind/sox/utils.h"
-
-using namespace paddleaudio::sox_utils;
-
-namespace paddleaudio::sox_effects_chain {
-
-namespace {
-
-/// helper classes for passing the location of input tensor and output buffer
-///
-/// drain/flow callback functions require plaing C style function signature and
-/// the way to pass extra data is to attach data to sox_effect_t::priv pointer.
-/// The following structs will be assigned to sox_effect_t::priv pointer which
-/// gives sox_effect_t an access to input Tensor and output buffer object.
-struct TensorInputPriv {
-  size_t index;
-  py::array* waveform;
-  int64_t sample_rate;
-  bool channels_first;
-};
-
-struct TensorOutputPriv {
-  std::vector<sox_sample_t>* buffer;
-};
-struct FileOutputPriv {
-  sox_format_t* sf;
-};
-
-/// Callback function to feed Tensor data to SoxEffectChain.
-int tensor_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
-  // Retrieve the input Tensor and current index
-  auto priv = static_cast<TensorInputPriv*>(effp->priv);
-  auto index = priv->index;
-  auto tensor = *(priv->waveform);
-  auto num_channels = effp->out_signal.channels;
-
-  // Adjust the number of samples to read
-  const size_t num_samples = tensor.size();
-  if (index + *osamp > num_samples) {
-    *osamp = num_samples - index;
-  }
-
-  // Ensure that it's a multiple of the number of channels
-  *osamp -= *osamp % num_channels;
-
-  // Slice the input Tensor
-  // refacor this module, chunk
-  auto i_frame = index / num_channels;
-  auto num_frames = *osamp / num_channels;
-
-  std::vector<int> chunk(num_frames*num_channels);
-  py::buffer_info ori_info = tensor.request();
-  void* ptr = ori_info.ptr;
-  // Convert to sox_sample_t (int32_t)
-  switch (tensor.dtype().num()) {
-    //case c10::ScalarType::Float: {
-    case 11: {
-      // Need to convert to 64-bit precision so that
-      // values around INT32_MIN/MAX are handled correctly.
-      for (int idx = 0; idx < chunk.size(); ++idx) {
-        int frame_idx = (idx + index) / num_channels;
-        int channels_idx = (idx + index) % num_channels;
-        double elem = 0; 
-        if (priv->channels_first) {
-          elem = *(float*)tensor.data(channels_idx, frame_idx);
-        } else {
-          elem = *(float*)tensor.data(frame_idx, channels_idx);
-        } 
-        elem = elem * 2147483648.;
-        // *new_ptr = std::clamp(elem, INT32_MIN, INT32_MAX);
-        if (elem > INT32_MAX) { 
-          chunk[idx] = INT32_MAX; 
-        } else if (elem < INT32_MIN) {
-          chunk[idx] = INT32_MIN; 
-        } else { 
-          chunk[idx] = elem;
-        }
-      }
-      break;
-    }
-    //case c10::ScalarType::Int: {
-    case 5: {
-      for (int idx = 0; idx < chunk.size(); ++idx) {
-        int frame_idx = (idx + index) / num_channels;
-        int channels_idx = (idx + index) % num_channels;
-        int elem = 0;
-        if (priv->channels_first) {
-          elem = *(int*)tensor.data(channels_idx, frame_idx);
-        } else {
-          elem = *(int*)tensor.data(frame_idx, channels_idx);
-        }
-        chunk[idx] = elem;
-      }
-      break;
-    }
-    // case short
-    case 3: {
-      for (int idx = 0; idx < chunk.size(); ++idx) {
-        int frame_idx = (idx + index) / num_channels;
-        int channels_idx = (idx + index) % num_channels;
-        int16_t elem = 0;
-        if (priv->channels_first) {
-          elem = *(int16_t*)tensor.data(channels_idx, frame_idx);
-        } else {
-          elem = *(int16_t*)tensor.data(frame_idx, channels_idx);
-        }
-        chunk[idx] = elem * 65536;
-      }
-      break;
-    }
-    // case byte
-    case 1: {
-      for (int idx = 0; idx < chunk.size(); ++idx) {
-        int frame_idx = (idx + index) / num_channels;
-        int channels_idx = (idx + index) % num_channels;
-        int8_t elem = 0;
-        if (priv->channels_first) {
-          elem = *(int8_t*)tensor.data(channels_idx, frame_idx);
-        } else {
-          elem = *(int8_t*)tensor.data(frame_idx, channels_idx);
-        }
-        chunk[idx] = (elem - 128) * 16777216; 
-      }
-      break;
-    }
-    default:
-      throw std::runtime_error("Unexpected dtype.");
-  }
-  // Write to buffer
-  memcpy(obuf, chunk.data(), *osamp * 4);
-  priv->index += *osamp;
-  return (priv->index == num_samples) ? SOX_EOF : SOX_SUCCESS;
-}
-
-/// Callback function to fetch data from SoxEffectChain.
-int tensor_output_flow(
-    sox_effect_t* effp,
-    sox_sample_t const* ibuf,
-    sox_sample_t* obuf LSX_UNUSED,
-    size_t* isamp,
-    size_t* osamp) {
-  *osamp = 0;
-  // Get output buffer
-  auto out_buffer = static_cast<TensorOutputPriv*>(effp->priv)->buffer;
-  // Append at the end
-  out_buffer->insert(out_buffer->end(), ibuf, ibuf + *isamp);
-  return SOX_SUCCESS;
-}
-
-int file_output_flow(
-    sox_effect_t* effp,
-    sox_sample_t const* ibuf,
-    sox_sample_t* obuf LSX_UNUSED,
-    size_t* isamp,
-    size_t* osamp) {
-  *osamp = 0;
-  if (*isamp) {
-    auto sf = static_cast<FileOutputPriv*>(effp->priv)->sf;
-    if (sox_write(sf, ibuf, *isamp) != *isamp) {
-      if (sf->sox_errno) {
-        std::ostringstream stream;
-        stream << sf->sox_errstr << " " << sox_strerror(sf->sox_errno) << " "
-               << sf->filename;
-        throw std::runtime_error(stream.str());
-      }
-      return SOX_EOF;
-    }
-  }
-  return SOX_SUCCESS;
-}
-
-sox_effect_handler_t* get_tensor_input_handler() {
-  static sox_effect_handler_t handler{
-      /*name=*/"input_tensor",
-      /*usage=*/NULL,
-      /*flags=*/SOX_EFF_MCHAN,
-      /*getopts=*/NULL,
-      /*start=*/NULL,
-      /*flow=*/NULL,
-      /*drain=*/tensor_input_drain,
-      /*stop=*/NULL,
-      /*kill=*/NULL,
-      /*priv_size=*/sizeof(TensorInputPriv)};
-  return &handler;
-}
-
-sox_effect_handler_t* get_tensor_output_handler() {
-  static sox_effect_handler_t handler{
-      /*name=*/"output_tensor",
-      /*usage=*/NULL,
-      /*flags=*/SOX_EFF_MCHAN,
-      /*getopts=*/NULL,
-      /*start=*/NULL,
-      /*flow=*/tensor_output_flow,
-      /*drain=*/NULL,
-      /*stop=*/NULL,
-      /*kill=*/NULL,
-      /*priv_size=*/sizeof(TensorOutputPriv)};
-  return &handler;
-}
-
-sox_effect_handler_t* get_file_output_handler() {
-  static sox_effect_handler_t handler{
-      /*name=*/"output_file",
-      /*usage=*/NULL,
-      /*flags=*/SOX_EFF_MCHAN,
-      /*getopts=*/NULL,
-      /*start=*/NULL,
-      /*flow=*/file_output_flow,
-      /*drain=*/NULL,
-      /*stop=*/NULL,
-      /*kill=*/NULL,
-      /*priv_size=*/sizeof(FileOutputPriv)};
-  return &handler;
-}
-
-} // namespace
-
-SoxEffect::SoxEffect(sox_effect_t* se) noexcept : se_(se) {}
-
-SoxEffect::~SoxEffect() {
-  if (se_ != nullptr) {
-    free(se_);
-  }
-}
-
-SoxEffect::operator sox_effect_t*() const {
-  return se_;
-}
-
-auto SoxEffect::operator->() noexcept -> sox_effect_t* {
-  return se_;
-}
-
-SoxEffectsChain::SoxEffectsChain(
-    sox_encodinginfo_t input_encoding,
-    sox_encodinginfo_t output_encoding)
-    : in_enc_(input_encoding),
-      out_enc_(output_encoding),
-      in_sig_(),
-      interm_sig_(),
-      out_sig_(),
-      sec_(sox_create_effects_chain(&in_enc_, &out_enc_)) {
-  if (!sec_) {
-    throw std::runtime_error("Failed to create effect chain.");
-  }
-}
-
-SoxEffectsChain::~SoxEffectsChain() {
-  if (sec_ != nullptr) {
-    sox_delete_effects_chain(sec_);
-  }
-}
-
-void SoxEffectsChain::run() {
-  sox_flow_effects(sec_, NULL, NULL);
-}
-
-void SoxEffectsChain::addInputTensor(
-    py::array* waveform,
-    int64_t sample_rate,
-    bool channels_first) {
-  in_sig_ = get_signalinfo(waveform, sample_rate, "wav", channels_first);
-  interm_sig_ = in_sig_;
-  SoxEffect e(sox_create_effect(get_tensor_input_handler()));
-  auto priv = static_cast<TensorInputPriv*>(e->priv);
-  priv->index = 0;
-  priv->waveform = waveform;
-  priv->sample_rate = sample_rate;
-  priv->channels_first = channels_first;
-  if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
-    throw std::runtime_error(
-        "Internal Error: Failed to add effect: input_tensor");
-  }
-}
-
-void SoxEffectsChain::addOutputBuffer(
-    std::vector<sox_sample_t>* output_buffer) {
-  SoxEffect e(sox_create_effect(get_tensor_output_handler()));
-  static_cast<TensorOutputPriv*>(e->priv)->buffer = output_buffer;
-  if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
-    throw std::runtime_error(
-        "Internal Error: Failed to add effect: output_tensor");
-  }
-}
-
-void SoxEffectsChain::addInputFile(sox_format_t* sf) {
-  in_sig_ = sf->signal;
-  interm_sig_ = in_sig_;
-  SoxEffect e(sox_create_effect(sox_find_effect("input")));
-  char* opts[] = {(char*)sf};
-  sox_effect_options(e, 1, opts);
-  if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
-    std::ostringstream stream;
-    stream << "Internal Error: Failed to add effect: input " << sf->filename;
-    throw std::runtime_error(stream.str());
-  }
-}
-
-void SoxEffectsChain::addOutputFile(sox_format_t* sf) {
-  out_sig_ = sf->signal;
-  SoxEffect e(sox_create_effect(get_file_output_handler()));
-  static_cast<FileOutputPriv*>(e->priv)->sf = sf;
-  if (sox_add_effect(sec_, e, &interm_sig_, &out_sig_) != SOX_SUCCESS) {
-    std::ostringstream stream;
-    stream << "Internal Error: Failed to add effect: output " << sf->filename;
-    throw std::runtime_error(stream.str());
-  }
-}
-
-void SoxEffectsChain::addEffect(const std::vector<std::string> effect) {
-  const auto num_args = effect.size();
-  if (num_args == 0) {
-    throw std::runtime_error("Invalid argument: empty effect.");
-  }
-  const auto name = effect[0];
-  if (UNSUPPORTED_EFFECTS.find(name) != UNSUPPORTED_EFFECTS.end()) {
-    std::ostringstream stream;
-    stream << "Unsupported effect: " << name;
-    throw std::runtime_error(stream.str());
-  }
-
-  auto returned_effect = sox_find_effect(name.c_str());
-  if (!returned_effect) {
-    std::ostringstream stream;
-    stream << "Unsupported effect: " << name;
-    throw std::runtime_error(stream.str());
-  }
-  SoxEffect e(sox_create_effect(returned_effect));
-  const auto num_options = num_args - 1;
-
-  std::vector<char*> opts;
-  for (size_t i = 1; i < num_args; ++i) {
-    opts.push_back((char*)effect[i].c_str());
-  }
-  if (sox_effect_options(e, num_options, num_options ? opts.data() : nullptr) !=
-      SOX_SUCCESS) {
-    std::ostringstream stream;
-    stream << "Invalid effect option:";
-    for (const auto& v : effect) {
-      stream << " " << v;
-    }
-    throw std::runtime_error(stream.str());
-  }
-
-  if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
-    std::ostringstream stream;
-    stream << "Internal Error: Failed to add effect: \"" << name;
-    for (size_t i = 1; i < num_args; ++i) {
-      stream << " " << effect[i];
-    }
-    stream << "\"";
-    throw std::runtime_error(stream.str());
-  }
-}
-
-int64_t SoxEffectsChain::getOutputNumChannels() {
-  return interm_sig_.channels;
-}
-
-int64_t SoxEffectsChain::getOutputSampleRate() {
-  return interm_sig_.rate;
-}
-
-namespace {
-
-/// helper classes for passing file-like object to SoxEffectChain
-struct FileObjInputPriv {
-  sox_format_t* sf;
-  py::object* fileobj;
-  bool eof_reached;
-  char* buffer;
-  uint64_t buffer_size;
-};
-
-struct FileObjOutputPriv {
-  sox_format_t* sf;
-  py::object* fileobj;
-  char** buffer;
-  size_t* buffer_size;
-};
-
-/// Callback function to feed byte string
-/// https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/sox.h#L1268-L1278
-auto fileobj_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp)
-    -> int {
-  auto priv = static_cast<FileObjInputPriv*>(effp->priv);
-  auto sf = priv->sf;
-  auto buffer = priv->buffer;
-
-  // 1. Refresh the buffer
-  //
-  // NOTE:
-  //   Since the underlying FILE* was opened with `fmemopen`, the only way
-  //   libsox detect EOF is reaching the end of the buffer. (null byte won't
-  //   help) Therefore we need to align the content at the end of buffer,
-  //   otherwise, libsox will keep reading the content beyond intended length.
-  //
-  // Before:
-  //
-  //     |<-------consumed------>|<---remaining--->|
-  //     |***********************|-----------------|
-  //                             ^ ftell
-  //
-  // After:
-  //
-  //     |<-offset->|<---remaining--->|<-new data->|
-  //     |**********|-----------------|++++++++++++|
-  //                ^ ftell
-
-  // NOTE:
-  //   Do not use `sf->tell_off` here. Presumably, `tell_off` and `fseek` are
-  //   supposed to be in sync, but there are cases (Vorbis) they are not
-  //   in sync and `tell_off` has seemingly uninitialized value, which
-  //   leads num_remain to be negative and cause segmentation fault
-  //   in `memmove`.
-  const auto tell = ftell((FILE*)sf->fp);
-  if (tell < 0) {
-    throw std::runtime_error("Internal Error: ftell failed.");
-  }
-  const auto num_consumed = static_cast<size_t>(tell);
-  if (num_consumed > priv->buffer_size) {
-    throw std::runtime_error("Internal Error: buffer overrun.");
-  }
-
-  const auto num_remain = priv->buffer_size - num_consumed;
-
-  // 1.1. Fetch the data to see if there is data to fill the buffer
-  size_t num_refill = 0;
-  std::string chunk(num_consumed, '\0');
-  if (num_consumed && !priv->eof_reached) {
-    num_refill = read_fileobj(
-        priv->fileobj, num_consumed, const_cast<char*>(chunk.data()));
-    if (num_refill < num_consumed) {
-      priv->eof_reached = true;
-    }
-  }
-  const auto offset = num_consumed - num_refill;
-
-  // 1.2. Move the unconsumed data towards the beginning of buffer.
-  if (num_remain) {
-    auto src = static_cast<void*>(buffer + num_consumed);
-    auto dst = static_cast<void*>(buffer + offset);
-    memmove(dst, src, num_remain);
-  }
-
-  // 1.3. Refill the remaining buffer.
-  if (num_refill) {
-    auto src = static_cast<void*>(const_cast<char*>(chunk.c_str()));
-    auto dst = buffer + offset + num_remain;
-    memcpy(dst, src, num_refill);
-  }
-
-  // 1.4. Set the file pointer to the new offset
-  sf->tell_off = offset;
-  fseek((FILE*)sf->fp, offset, SEEK_SET);
-
-  // 2. Perform decoding operation
-  // The following part is practically same as "input" effect
-  // https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/input.c#L30-L48
-
-  // At this point, osamp represents the buffer size in bytes,
-  // but sox_read expects the maximum number of samples ready to read.
-  // Normally, this is fine, but in case when the samples are not 4-byte
-  // aligned, (e.g. sample is 24bits), the resulting signal is not correct.
-  // https://github.com/pytorch/audio/issues/2083
-  if (sf->encoding.bits_per_sample > 0)
-    *osamp /= (sf->encoding.bits_per_sample / 8);
-
-  // Ensure that it's a multiple of the number of channels
-  *osamp -= *osamp % effp->out_signal.channels;
-
-  // Read up to *osamp samples into obuf;
-  // store the actual number read back to *osamp
-  *osamp = sox_read(sf, obuf, *osamp);
-
-  // Decoding is finished when fileobject is exhausted and sox can no longer
-  // decode a sample.
-  return (priv->eof_reached && !*osamp) ? SOX_EOF : SOX_SUCCESS;
-}
-
-auto fileobj_output_flow(
-    sox_effect_t* effp,
-    sox_sample_t const* ibuf,
-    sox_sample_t* obuf LSX_UNUSED,
-    size_t* isamp,
-    size_t* osamp) -> int {
-  *osamp = 0;
-  if (*isamp) {
-    auto priv = static_cast<FileObjOutputPriv*>(effp->priv);
-    auto sf = priv->sf;
-    auto fp = static_cast<FILE*>(sf->fp);
-    auto fileobj = priv->fileobj;
-    auto buffer = priv->buffer;
-
-    // Encode chunk
-    auto num_samples_written = sox_write(sf, ibuf, *isamp);
-    fflush(fp);
-
-    // Copy the encoded chunk to python object.
-    fileobj->attr("write")(py::bytes(*buffer, ftell(fp)));
-
-    // Reset FILE*
-    sf->tell_off = 0;
-    fseek(fp, 0, SEEK_SET);
-
-    if (num_samples_written != *isamp) {
-      if (sf->sox_errno) {
-        std::ostringstream stream;
-        stream << sf->sox_errstr << " " << sox_strerror(sf->sox_errno) << " "
-               << sf->filename;
-        throw std::runtime_error(stream.str());
-      }
-      return SOX_EOF;
-    }
-  }
-  return SOX_SUCCESS;
-}
-
-auto get_fileobj_input_handler() -> sox_effect_handler_t* {
-  static sox_effect_handler_t handler{
-      /*name=*/"input_fileobj_object",
-      /*usage=*/nullptr,
-      /*flags=*/SOX_EFF_MCHAN,
-      /*getopts=*/nullptr,
-      /*start=*/nullptr,
-      /*flow=*/nullptr,
-      /*drain=*/fileobj_input_drain,
-      /*stop=*/nullptr,
-      /*kill=*/nullptr,
-      /*priv_size=*/sizeof(FileObjInputPriv)};
-  return &handler;
-}
-
-auto get_fileobj_output_handler() -> sox_effect_handler_t* {
-  static sox_effect_handler_t handler{
-      /*name=*/"output_fileobj_object",
-      /*usage=*/nullptr,
-      /*flags=*/SOX_EFF_MCHAN,
-      /*getopts=*/nullptr,
-      /*start=*/nullptr,
-      /*flow=*/fileobj_output_flow,
-      /*drain=*/nullptr,
-      /*stop=*/nullptr,
-      /*kill=*/nullptr,
-      /*priv_size=*/sizeof(FileObjOutputPriv)};
-  return &handler;
-}
-
-} // namespace
-
-void SoxEffectsChainPyBind::addInputFileObj(
-    sox_format_t* sf,
-    char* buffer,
-    uint64_t buffer_size,
-    py::object* fileobj) {
-  in_sig_ = sf->signal;
-  interm_sig_ = in_sig_;
-
-  SoxEffect e(sox_create_effect(get_fileobj_input_handler()));
-  auto priv = static_cast<FileObjInputPriv*>(e->priv);
-  priv->sf = sf;
-  priv->fileobj = fileobj;
-  priv->eof_reached = false;
-  priv->buffer = buffer;
-  priv->buffer_size = buffer_size;
-  if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
-    throw std::runtime_error(
-        "Internal Error: Failed to add effect: input fileobj");
-  }
-}
-
-void SoxEffectsChainPyBind::addOutputFileObj(
-    sox_format_t* sf,
-    char** buffer,
-    size_t* buffer_size,
-    py::object* fileobj) {
-  out_sig_ = sf->signal;
-  SoxEffect e(sox_create_effect(get_fileobj_output_handler()));
-  auto priv = static_cast<FileObjOutputPriv*>(e->priv);
-  priv->sf = sf;
-  priv->fileobj = fileobj;
-  priv->buffer = buffer;
-  priv->buffer_size = buffer_size;
-  if (sox_add_effect(sec_, e, &interm_sig_, &out_sig_) != SOX_SUCCESS) {
-    throw std::runtime_error(
-        "Internal Error: Failed to add effect: output fileobj");
-  }
-}
-
-} // namespace paddleaudio::sox_effects_chain
diff --git a/paddlespeech/audio/src/pybind/sox/effects_chain.h b/paddlespeech/audio/src/pybind/sox/effects_chain.h
deleted file mode 100644
index 6fb994b5af6a9f519fa9e10811ba3fb6a2fac47b..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/pybind/sox/effects_chain.h
+++ /dev/null
@@ -1,76 +0,0 @@
-#pragma once
-
-#include <sox.h>
-#include "paddlespeech/audio/src/pybind/sox/utils.h"
-
-namespace paddleaudio::sox_effects_chain {
-
-// Helper struct to safely close sox_effect_t* pointer returned by
-// sox_create_effect
-
-struct SoxEffect {
-  explicit SoxEffect(sox_effect_t* se) noexcept;
-  SoxEffect(const SoxEffect& other) = delete;
-  SoxEffect(const SoxEffect&& other) = delete;
-  auto operator=(const SoxEffect& other) -> SoxEffect& = delete;
-  auto operator=(SoxEffect&& other) -> SoxEffect& = delete;
-  ~SoxEffect();
-  operator sox_effect_t*() const;
-  auto operator->() noexcept -> sox_effect_t*;
-
- private:
-  sox_effect_t* se_;
-};
-
-// Helper struct to safely close sox_effects_chain_t with handy methods
-class SoxEffectsChain {
-  const sox_encodinginfo_t in_enc_;
-  const sox_encodinginfo_t out_enc_;
-
- protected:
-  sox_signalinfo_t in_sig_;
-  sox_signalinfo_t interm_sig_;
-  sox_signalinfo_t out_sig_;
-  sox_effects_chain_t* sec_;
-
- public:
-  explicit SoxEffectsChain(
-      sox_encodinginfo_t input_encoding,
-      sox_encodinginfo_t output_encoding);
-  SoxEffectsChain(const SoxEffectsChain& other) = delete;
-  SoxEffectsChain(const SoxEffectsChain&& other) = delete;
-  SoxEffectsChain& operator=(const SoxEffectsChain& other) = delete;
-  SoxEffectsChain& operator=(SoxEffectsChain&& other) = delete;
-  ~SoxEffectsChain();
-  void run();
-  void addInputTensor(
-      py::array* waveform,
-      int64_t sample_rate,
-      bool channels_first);
-  void addInputFile(sox_format_t* sf);
-  void addOutputBuffer(std::vector<sox_sample_t>* output_buffer);
-  void addOutputFile(sox_format_t* sf);
-  void addEffect(const std::vector<std::string> effect);
-  int64_t getOutputNumChannels();
-  int64_t getOutputSampleRate();
-};
-
-class SoxEffectsChainPyBind : public SoxEffectsChain {
-  using SoxEffectsChain::SoxEffectsChain;
-
- public:
-  void addInputFileObj(
-      sox_format_t* sf,
-      char* buffer,
-      uint64_t buffer_size,
-      py::object* fileobj);
-
-  void addOutputFileObj(
-      sox_format_t* sf,
-      char** buffer,
-      size_t* buffer_size,
-      py::object* fileobj);
-};
-
-} // namespace paddleaudio::sox_effects_chain
-
diff --git a/paddlespeech/audio/src/pybind/sox/io.cpp b/paddlespeech/audio/src/pybind/sox/io.cpp
deleted file mode 100644
index 60f9222abc666f17b291a1fa1ada4507ac5d484e..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/pybind/sox/io.cpp
+++ /dev/null
@@ -1,280 +0,0 @@
-// Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
-// All rights reserved.
-
-#include "paddlespeech/audio/src/pybind/sox/io.h"
-#include "paddlespeech/audio/src/pybind/sox/effects.h"
-#include "paddlespeech/audio/src/pybind/sox/types.h"
-#include "paddlespeech/audio/src/pybind/sox/effects_chain.h"
-#include "paddlespeech/audio/src/pybind/sox/utils.h"
-#include "paddlespeech/audio/src/optional/optional.hpp"
-
-using namespace paddleaudio::sox_utils;
-
-namespace paddleaudio {
-namespace sox_io {
-
-auto get_info_file(const std::string &path, 
-                   const tl::optional<std::string> &format)
-    -> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> {
-    SoxFormat sf(
-        sox_open_read(path.data(),
-                      /*signal=*/nullptr,
-                      /*encoding=*/nullptr,
-                      /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
-
-
-    validate_input_file(sf, path);
-
-    return std::make_tuple(
-        static_cast<int64_t>(sf->signal.rate),
-        static_cast<int64_t>(sf->signal.length / sf->signal.channels),
-        static_cast<int64_t>(sf->signal.channels),
-        static_cast<int64_t>(sf->encoding.bits_per_sample),
-        get_encoding(sf->encoding.encoding));
-}
-
-std::vector<std::vector<std::string>> get_effects(
-    const tl::optional<int64_t>& frame_offset,
-    const tl::optional<int64_t>& num_frames) {
-  const auto offset = frame_offset.value_or(0);
-  if (offset < 0) {
-    throw std::runtime_error(
-        "Invalid argument: frame_offset must be non-negative.");
-  }
-  const auto frames = num_frames.value_or(-1);
-  if (frames == 0 || frames < -1) {
-    throw std::runtime_error(
-        "Invalid argument: num_frames must be -1 or greater than 0.");
-  }
-
-  std::vector<std::vector<std::string>> effects;
-  if (frames != -1) {
-    std::ostringstream os_offset, os_frames;
-    os_offset << offset << "s";
-    os_frames << "+" << frames << "s";
-    effects.emplace_back(
-        std::vector<std::string>{"trim", os_offset.str(), os_frames.str()});
-  } else if (offset != 0) {
-    std::ostringstream os_offset;
-    os_offset << offset << "s";
-    effects.emplace_back(std::vector<std::string>{"trim", os_offset.str()});
-  }
-  return effects;
-}
-
-auto get_info_fileobj(py::object fileobj, 
-                      const tl::optional<std::string> &format)
-    -> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> {
-    const auto capacity = [&]() {
-        const auto bufsiz = get_buffer_size();
-        const int64_t kDefaultCapacityInBytes = 4096;
-        return (bufsiz > kDefaultCapacityInBytes) ? bufsiz
-                                                  : kDefaultCapacityInBytes;
-    }();
-    std::string buffer(capacity, '\0');
-    auto *buf = const_cast<char *>(buffer.data());
-    auto num_read = read_fileobj(&fileobj, capacity, buf);
-    // If the file is shorter than 256, then libsox cannot read the header.
-    auto buf_size = (num_read > 256) ? num_read : 256;
-
-    SoxFormat sf(sox_open_mem_read(
-        buf,
-        buf_size,
-        /*signal=*/nullptr,
-        /*encoding=*/nullptr,
-        /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
-
-    // In case of streamed data, length can be 0
-    validate_input_memfile(sf);
-
-    return std::make_tuple(
-        static_cast<int64_t>(sf->signal.rate),
-        static_cast<int64_t>(sf->signal.length / sf->signal.channels),
-        static_cast<int64_t>(sf->signal.channels),
-        static_cast<int64_t>(sf->encoding.bits_per_sample),
-        get_encoding(sf->encoding.encoding));
-}
-
-tl::optional<std::tuple<py::array, int64_t>> load_audio_fileobj(
-    py::object fileobj,
-    const tl::optional<int64_t>& frame_offset,
-    const tl::optional<int64_t>& num_frames,
-    tl::optional<bool> normalize,
-    tl::optional<bool> channels_first,
-    const tl::optional<std::string>& format) {
-  auto effects = get_effects(frame_offset, num_frames);
-  return paddleaudio::sox_effects::apply_effects_fileobj(
-      std::move(fileobj), effects, normalize, channels_first, std::move(format));
-}
-
-tl::optional<std::tuple<py::array, int64_t>> load_audio_file(
-    const std::string& path,
-    const tl::optional<int64_t>& frame_offset,
-    const tl::optional<int64_t>& num_frames,
-    tl::optional<bool> normalize,
-    tl::optional<bool> channels_first,
-    const tl::optional<std::string>& format) {
-    auto effects = get_effects(frame_offset, num_frames);
-    return paddleaudio::sox_effects::apply_effects_file(
-        path, effects, normalize, channels_first, format);
-}
-
-void save_audio_file(const std::string& path,
-                     py::array tensor,
-                     int64_t sample_rate,
-                     bool channels_first,
-                     tl::optional<double> compression,
-                     tl::optional<std::string> format,
-                     tl::optional<std::string> encoding,
-                     tl::optional<int64_t> bits_per_sample) {
-    validate_input_tensor(tensor);
-
-    const auto filetype = [&]() {
-        if (format.has_value()) return format.value();
-        return get_filetype(path);
-    }();
-
-    if (filetype == "amr-nb") {
-        const auto num_channels = tensor.shape(channels_first ? 0 : 1);
-        //TORCH_CHECK(num_channels == 1,
-        //            "amr-nb format only supports single channel audio.");
-        assert(num_channels == 1);
-    } else if (filetype == "htk") {
-        const auto num_channels = tensor.shape(channels_first ? 0 : 1);
-       // TORCH_CHECK(num_channels == 1,
-        //            "htk format only supports single channel audio.");
-        assert(num_channels == 1);
-    } else if (filetype == "gsm") {
-        const auto num_channels = tensor.shape(channels_first ? 0 : 1);
-        assert(num_channels == 1);
-        assert(sample_rate == 8000);
-        //TORCH_CHECK(num_channels == 1,
-        //            "gsm format only supports single channel audio.");
-        //TORCH_CHECK(sample_rate == 8000,
-        //            "gsm format only supports a sampling rate of 8kHz.");
-    }
-    const auto signal_info =
-        get_signalinfo(&tensor, sample_rate, filetype, channels_first);
-    const auto encoding_info = get_encodinginfo_for_save(
-        filetype, tensor.dtype(), compression, encoding, bits_per_sample);
-
-    SoxFormat sf(sox_open_write(path.c_str(),
-                                &signal_info,
-                                &encoding_info,
-                                /*filetype=*/filetype.c_str(),
-                                /*oob=*/nullptr,
-                                /*overwrite_permitted=*/nullptr));
-
-    if (static_cast<sox_format_t*>(sf) == nullptr) {
-        throw std::runtime_error(
-            "Error saving audio file: failed to open file " + path);
-    }
-
-    paddleaudio::sox_effects_chain::SoxEffectsChain chain(
-        /*input_encoding=*/get_tensor_encodinginfo(tensor.dtype()),
-        /*output_encoding=*/sf->encoding);
-    chain.addInputTensor(&tensor, sample_rate, channels_first);
-    chain.addOutputFile(sf);
-    chain.run();
-}
-
-namespace {
-// helper class to automatically release buffer, to be used by
-// save_audio_fileobj
-struct AutoReleaseBuffer {
-  char* ptr;
-  size_t size;
-
-  AutoReleaseBuffer() : ptr(nullptr), size(0) {}
-  AutoReleaseBuffer(const AutoReleaseBuffer& other) = delete;
-  AutoReleaseBuffer(AutoReleaseBuffer&& other) = delete;
-  auto operator=(const AutoReleaseBuffer& other) -> AutoReleaseBuffer& = delete;
-  auto operator=(AutoReleaseBuffer&& other) -> AutoReleaseBuffer& = delete;
-  ~AutoReleaseBuffer() {
-    if (ptr) {
-      free(ptr);
-    }
-  }
-};
-
-} // namespace
-
-void save_audio_fileobj(
-    py::object fileobj,
-    py::array tensor,
-    int64_t sample_rate,
-    bool channels_first,
-    tl::optional<double> compression,
-    tl::optional<std::string> format,
-    tl::optional<std::string> encoding,
-    tl::optional<int64_t> bits_per_sample) {
-
-  if (!format.has_value()) {
-    throw std::runtime_error(
-        "`format` is required when saving to file object.");
-  }
-  const auto filetype = format.value();
-
-  if (filetype == "amr-nb") {
-    const auto num_channels = tensor.shape(channels_first ? 0 : 1);
-    if (num_channels != 1) {
-      throw std::runtime_error(
-          "amr-nb format only supports single channel audio.");
-    }
-  } else if (filetype == "htk") {
-    const auto num_channels = tensor.shape(channels_first ? 0 : 1);
-    if (num_channels != 1) {
-      throw std::runtime_error(
-          "htk format only supports single channel audio.");
-    }
-  } else if (filetype == "gsm") {
-    const auto num_channels = tensor.shape(channels_first ? 0 : 1);
-    if (num_channels != 1) {
-      throw std::runtime_error(
-          "gsm format only supports single channel audio.");
-    }
-    if (sample_rate != 8000) {
-      throw std::runtime_error(
-          "gsm format only supports a sampling rate of 8kHz.");
-    }
-  }
-
-  const auto signal_info =
-      get_signalinfo(&tensor, sample_rate, filetype, channels_first);
-  const auto encoding_info = get_encodinginfo_for_save(
-      filetype,
-      tensor.dtype(),
-      compression,
-      std::move(encoding),
-      bits_per_sample);
-
-  AutoReleaseBuffer buffer;
-
-  SoxFormat sf(sox_open_memstream_write(
-      &buffer.ptr,
-      &buffer.size,
-      &signal_info,
-      &encoding_info,
-      filetype.c_str(),
-      /*oob=*/nullptr));
-
-  if (static_cast<sox_format_t*>(sf) == nullptr) {
-    throw std::runtime_error(
-        "Error saving audio file: failed to open memory stream.");
-  }
-
-  paddleaudio::sox_effects_chain::SoxEffectsChainPyBind chain(
-      /*input_encoding=*/get_tensor_encodinginfo(tensor.dtype()),
-      /*output_encoding=*/sf->encoding);
-  chain.addInputTensor(&tensor, sample_rate, channels_first);
-  chain.addOutputFileObj(sf, &buffer.ptr, &buffer.size, &fileobj);
-  chain.run();
-
-  // Closing the sox_format_t is necessary for flushing the last chunk to the
-  // buffer
-  sf.close();
-  fileobj.attr("write")(py::bytes(buffer.ptr, buffer.size));
-}
-
-}  // namespace paddleaudio
-}  // namespace sox_io
diff --git a/paddlespeech/audio/src/pybind/sox/io.h b/paddlespeech/audio/src/pybind/sox/io.h
deleted file mode 100644
index 3734bcb34d34f2b23e26a374aef646ecb715276f..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/pybind/sox/io.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
-// All rights reserved.
-
-#pragma once
-
-#include "paddlespeech/audio/src/pybind/sox/utils.h"
-
-namespace py = pybind11;
-
-namespace paddleaudio {
-namespace sox_io {
-
-auto get_info_file(const std::string &path, 
-                   const tl::optional<std::string> &format)
-    -> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;
-
-auto get_info_fileobj(py::object fileobj,
-                   const tl::optional<std::string> &format)
-    -> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;
-
-tl::optional<std::tuple<py::array, int64_t>> load_audio_fileobj(
-    py::object fileobj,
-    const tl::optional<int64_t>& frame_offset,
-    const tl::optional<int64_t>& num_frames,
-    tl::optional<bool> normalize,
-    tl::optional<bool> channels_first,
-    const tl::optional<std::string>& format);
-
-void save_audio_fileobj(
-    py::object fileobj,
-    py::array tensor,
-    int64_t sample_rate,
-    bool channels_first,
-    tl::optional<double> compression,
-    tl::optional<std::string> format,
-    tl::optional<std::string> encoding,
-    tl::optional<int64_t> bits_per_sample);
-
-auto get_effects(const tl::optional<int64_t>& frame_offset,
-                 const tl::optional<int64_t>& num_frames)
-    -> std::vector<std::vector<std::string>>;
-
-
-tl::optional<std::tuple<py::array, int64_t>> load_audio_file(
-    const std::string& path,
-    const tl::optional<int64_t>& frame_offset,
-    const tl::optional<int64_t>& num_frames,
-    tl::optional<bool> normalize,
-    tl::optional<bool> channels_first,
-    const tl::optional<std::string>& format);
-
-void save_audio_file(const std::string& path,
-                     py::array tensor,
-                     int64_t sample_rate,
-                     bool channels_first,
-                     tl::optional<double> compression,
-                     tl::optional<std::string> format,
-                     tl::optional<std::string> encoding,
-                     tl::optional<int64_t> bits_per_sample);    
-
-
-}  // namespace paddleaudio
-}  // namespace sox_io
diff --git a/paddlespeech/audio/src/pybind/sox/types.cpp b/paddlespeech/audio/src/pybind/sox/types.cpp
deleted file mode 100644
index 8e3e61373e0cc0238c7b3a722ce995daea7cfd25..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/pybind/sox/types.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-//code is from: https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/types.cpp
-
-#include "paddlespeech/audio/src/pybind/sox/types.h"
-#include <ostream>
-#include <sstream>
-
-namespace paddleaudio {
-namespace sox_utils {
-
-Format get_format_from_string(const std::string& format) {
-  if (format == "wav")
-    return Format::WAV;
-  if (format == "mp3")
-    return Format::MP3;
-  if (format == "flac")
-    return Format::FLAC;
-  if (format == "ogg" || format == "vorbis")
-    return Format::VORBIS;
-  if (format == "amr-nb")
-    return Format::AMR_NB;
-  if (format == "amr-wb")
-    return Format::AMR_WB;
-  if (format == "amb")
-    return Format::AMB;
-  if (format == "sph")
-    return Format::SPHERE;
-  if (format == "htk")
-    return Format::HTK;
-  if (format == "gsm")
-    return Format::GSM;
-  std::ostringstream stream;
-  stream << "Internal Error: unexpected format value: " << format;
-  throw std::runtime_error(stream.str());
-}
-
-std::string to_string(Encoding v) {
-  switch (v) {
-    case Encoding::UNKNOWN:
-      return "UNKNOWN";
-    case Encoding::PCM_SIGNED:
-      return "PCM_S";
-    case Encoding::PCM_UNSIGNED:
-      return "PCM_U";
-    case Encoding::PCM_FLOAT:
-      return "PCM_F";
-    case Encoding::FLAC:
-      return "FLAC";
-    case Encoding::ULAW:
-      return "ULAW";
-    case Encoding::ALAW:
-      return "ALAW";
-    case Encoding::MP3:
-      return "MP3";
-    case Encoding::VORBIS:
-      return "VORBIS";
-    case Encoding::AMR_WB:
-      return "AMR_WB";
-    case Encoding::AMR_NB:
-      return "AMR_NB";
-    case Encoding::OPUS:
-      return "OPUS";
-    default:
-      throw std::runtime_error("Internal Error: unexpected encoding.");
-  }
-}
-
-Encoding get_encoding_from_option(const tl::optional<std::string> encoding) {
-  if (!encoding.has_value())
-    return Encoding::NOT_PROVIDED;
-  std::string v = encoding.value();
-  if (v == "PCM_S")
-    return Encoding::PCM_SIGNED;
-  if (v == "PCM_U")
-    return Encoding::PCM_UNSIGNED;
-  if (v == "PCM_F")
-    return Encoding::PCM_FLOAT;
-  if (v == "ULAW")
-    return Encoding::ULAW;
-  if (v == "ALAW")
-    return Encoding::ALAW;
-  std::ostringstream stream;
-  stream << "Internal Error: unexpected encoding value: " << v;
-  throw std::runtime_error(stream.str());
-}
-
-BitDepth get_bit_depth_from_option(const tl::optional<int64_t> bit_depth) {
-  if (!bit_depth.has_value())
-    return BitDepth::NOT_PROVIDED;
-  int64_t v = bit_depth.value();
-  switch (v) {
-    case 8:
-      return BitDepth::B8;
-    case 16:
-      return BitDepth::B16;
-    case 24:
-      return BitDepth::B24;
-    case 32:
-      return BitDepth::B32;
-    case 64:
-      return BitDepth::B64;
-    default: {
-      std::ostringstream s;
-      s << "Internal Error: unexpected bit depth value: " << v;
-      throw std::runtime_error(s.str());
-    }
-  }
-}
-
-std::string get_encoding(sox_encoding_t encoding) {
-  switch (encoding) {
-    case SOX_ENCODING_UNKNOWN:
-      return "UNKNOWN";
-    case SOX_ENCODING_SIGN2:
-      return "PCM_S";
-    case SOX_ENCODING_UNSIGNED:
-      return "PCM_U";
-    case SOX_ENCODING_FLOAT:
-      return "PCM_F";
-    case SOX_ENCODING_FLAC:
-      return "FLAC";
-    case SOX_ENCODING_ULAW:
-      return "ULAW";
-    case SOX_ENCODING_ALAW:
-      return "ALAW";
-    case SOX_ENCODING_MP3:
-      return "MP3";
-    case SOX_ENCODING_VORBIS:
-      return "VORBIS";
-    case SOX_ENCODING_AMR_WB:
-      return "AMR_WB";
-    case SOX_ENCODING_AMR_NB:
-      return "AMR_NB";
-    case SOX_ENCODING_OPUS:
-      return "OPUS";
-    case SOX_ENCODING_GSM:
-      return "GSM";
-    default:
-      return "UNKNOWN";
-  }
-}
-
-} // namespace sox_utils
-} // namespace paddleaudio
diff --git a/paddlespeech/audio/src/pybind/sox/types.h b/paddlespeech/audio/src/pybind/sox/types.h
deleted file mode 100644
index 780840161366b7be4384fc86ac0ed6064557ced8..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/pybind/sox/types.h
+++ /dev/null
@@ -1,58 +0,0 @@
-//code is from: https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/types.h
-#pragma once
-
-#include <sox.h>
-#include "paddlespeech/audio/src/optional/optional.hpp"
-
-namespace paddleaudio {
-namespace sox_utils {
-
-enum class Format {
-  WAV,
-  MP3,
-  FLAC,
-  VORBIS,
-  AMR_NB,
-  AMR_WB,
-  AMB,
-  SPHERE,
-  GSM,
-  HTK,
-};
-
-Format get_format_from_string(const std::string& format);
-
-enum class Encoding {
-  NOT_PROVIDED,
-  UNKNOWN,
-  PCM_SIGNED,
-  PCM_UNSIGNED,
-  PCM_FLOAT,
-  FLAC,
-  ULAW,
-  ALAW,
-  MP3,
-  VORBIS,
-  AMR_WB,
-  AMR_NB,
-  OPUS,
-};
-
-std::string to_string(Encoding v);
-Encoding get_encoding_from_option(const tl::optional<std::string> encoding);
-
-enum class BitDepth : unsigned {
-  NOT_PROVIDED = 0,
-  B8 = 8,
-  B16 = 16,
-  B24 = 24,
-  B32 = 32,
-  B64 = 64,
-};
-
-BitDepth get_bit_depth_from_option(const tl::optional<int64_t> bit_depth);
-
-std::string get_encoding(sox_encoding_t encoding);
-
-} // namespace sox_utils
-} // namespace paddleaudio
\ No newline at end of file
diff --git a/paddlespeech/audio/src/pybind/sox/utils.cpp b/paddlespeech/audio/src/pybind/sox/utils.cpp
deleted file mode 100644
index 5c78bc11614c2646b20fdbf9717d2d79f15ec89d..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/pybind/sox/utils.cpp
+++ /dev/null
@@ -1,642 +0,0 @@
-// Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
-// All rights reserved.
-#include <sox.h>
-
-#include "paddlespeech/audio/src/pybind/sox/utils.h"
-#include "paddlespeech/audio/src/pybind/sox/types.h"
-
-#include <sstream>
-
-namespace paddleaudio {
-namespace sox_utils {
-
-auto read_fileobj(py::object *fileobj, const uint64_t size, char *buffer)
-    -> uint64_t {
-    uint64_t num_read = 0;
-    while (num_read < size) {
-        auto request = size - num_read;
-        auto chunk = static_cast<std::string>(
-            static_cast<py::bytes>(fileobj->attr("read")(request)));
-        auto chunk_len = chunk.length();
-        if (chunk_len == 0) {
-            break;
-        }
-        if (chunk_len > request) {
-            std::ostringstream message;
-            message
-                << "Requested up to " << request << " bytes but, "
-                << "received " << chunk_len << " bytes. "
-                << "The given object does not confirm to read protocol of file "
-                   "object.";
-            throw std::runtime_error(message.str());
-        }
-        memcpy(buffer, chunk.data(), chunk_len);
-        buffer += chunk_len;
-        num_read += chunk_len;
-    }
-    return num_read;
-}
-
-
-void set_seed(const int64_t seed) {
-  sox_get_globals()->ranqd1 = static_cast<sox_int32_t>(seed);
-}
-
-void set_verbosity(const int64_t verbosity) {
-  sox_get_globals()->verbosity = static_cast<unsigned>(verbosity);
-}
-
-void set_use_threads(const bool use_threads) {
-  sox_get_globals()->use_threads = static_cast<sox_bool>(use_threads);
-}
-
-void set_buffer_size(const int64_t buffer_size) {
-  sox_get_globals()->bufsiz = static_cast<size_t>(buffer_size);
-}
-
-int64_t get_buffer_size() {
-  return sox_get_globals()->bufsiz;
-}
-
-std::vector<std::vector<std::string>> list_effects() {
-  std::vector<std::vector<std::string>> effects;
-  for (const sox_effect_fn_t* fns = sox_get_effect_fns(); *fns; ++fns) {
-    const sox_effect_handler_t* handler = (*fns)();
-    if (handler && handler->name) {
-      if (UNSUPPORTED_EFFECTS.find(handler->name) ==
-          UNSUPPORTED_EFFECTS.end()) {
-        effects.emplace_back(std::vector<std::string>{
-            handler->name,
-            handler->usage ? std::string(handler->usage) : std::string("")});
-      }
-    }
-  }
-  return effects;
-}
-
-std::vector<std::string> list_write_formats() {
-  std::vector<std::string> formats;
-  for (const sox_format_tab_t* fns = sox_get_format_fns(); fns->fn; ++fns) {
-    const sox_format_handler_t* handler = fns->fn();
-    for (const char* const* names = handler->names; *names; ++names) {
-      if (!strchr(*names, '/') && handler->write)
-        formats.emplace_back(*names);
-    }
-  }
-  return formats;
-}
-
-std::vector<std::string> list_read_formats() {
-  std::vector<std::string> formats;
-  for (const sox_format_tab_t* fns = sox_get_format_fns(); fns->fn; ++fns) {
-    const sox_format_handler_t* handler = fns->fn();
-    for (const char* const* names = handler->names; *names; ++names) {
-      if (!strchr(*names, '/') && handler->read)
-        formats.emplace_back(*names);
-    }
-  }
-  return formats;
-}
-
-SoxFormat::SoxFormat(sox_format_t* fd) noexcept : fd_(fd) {}
-SoxFormat::~SoxFormat() {
-  close();
-}
-
-sox_format_t* SoxFormat::operator->() const noexcept {
-  return fd_;
-}
-SoxFormat::operator sox_format_t*() const noexcept {
-  return fd_;
-}
-
-void SoxFormat::close() {
-  if (fd_ != nullptr) {
-    sox_close(fd_);
-    fd_ = nullptr;
-  }
-}
-
-void validate_input_file(const SoxFormat& sf, const std::string& path) {
-  if (static_cast<sox_format_t*>(sf) == nullptr) {
-    throw std::runtime_error(
-        "Error loading audio file: failed to open file " + path);
-  }
-  if (sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
-    throw std::runtime_error("Error loading audio file: unknown encoding.");
-  }
-}
-
-void validate_input_memfile(const SoxFormat &sf) {
-    return validate_input_file(sf, "<in memory buffer>");
-}
-
-void validate_input_tensor(const py::array tensor) {
-  if (tensor.ndim() != 2) {
-    throw std::runtime_error("Input tensor has to be 2D.");
-  }
-
-  char dtype = tensor.dtype().char_();
-  bool flag = (dtype == 'f') || (dtype == 'd') || (dtype == 'l') || (dtype == 'i');
-  if (flag == false) {
-      throw std::runtime_error(
-          "Input tensor has to be one of float32, int32, int16 or uint8 type.");
-  }
-}
-
-py::dtype get_dtype(
-    const sox_encoding_t encoding,
-    const unsigned precision) {
-    switch (encoding) {
-      case SOX_ENCODING_UNSIGNED: // 8-bit PCM WAV
-        return py::dtype('u1');
-      case SOX_ENCODING_SIGN2: // 16-bit, 24-bit, or 32-bit PCM WAV
-        switch (precision) {
-          case 16:
-            return py::dtype("i2");
-          case 24: // Cast 24-bit to 32-bit.
-          case 32:
-            return py::dtype('i');
-          default:
-            throw std::runtime_error(
-                "Only 16, 24, and 32 bits are supported for signed PCM.");
-        }
-      default:
-        // default to float32 for the other formats, including
-        // 32-bit flaoting-point WAV,
-        // MP3,
-        // FLAC,
-        // VORBIS etc...
-        return py::dtype("f");
-    }
-}
-
-py::array convert_to_tensor(
-    sox_sample_t* buffer,
-    const int32_t num_samples,
-    const int32_t num_channels,
-    const py::dtype dtype,
-    const bool normalize,
-    const bool channels_first) {
-  // todo refector later(SGoat)
-  py::array t;
-  uint64_t dummy = 0;
-  SOX_SAMPLE_LOCALS;
-  int32_t num_rows = num_samples / num_channels;
-  if (normalize || dtype.char_() == 'f') {
-    t = py::array(dtype, {num_rows, num_channels});
-    auto ptr = (float*)t.mutable_data(0, 0);
-    for (int32_t i = 0; i < num_samples; ++i) {
-      ptr[i] = SOX_SAMPLE_TO_FLOAT_32BIT(buffer[i], dummy);
-    }
-    if (channels_first) {
-    py::array t2 = py::array(dtype, {num_channels, num_rows});
-    for (int32_t row_idx = 0; row_idx < num_channels; ++row_idx) {
-      for (int32_t col_idx = 0; col_idx < num_rows; ++col_idx)
-       *(float*)t2.mutable_data(row_idx, col_idx) = *(float*)t.data(col_idx, row_idx);
-    }
-    return t2;
-  }
-  } else if (dtype.char_() == 'i') {
-    t = py::array(dtype, {num_rows, num_channels});
-    auto ptr = (int*)t.mutable_data(0, 0);
-    for (int32_t i = 0; i < num_samples; ++i) {
-      ptr[i] = buffer[i];
-    }
-    if (channels_first) {
-      py::array t2 = py::array(dtype, {num_channels, num_rows});
-      for (int32_t row_idx = 0; row_idx < num_channels; ++row_idx) {
-        for (int32_t col_idx = 0; col_idx < num_rows; ++col_idx)
-          *(int*)t2.mutable_data(row_idx, col_idx) = *(int*)t.data(col_idx, row_idx);
-      }
-      return t2;
-    }
-  } else if (dtype.char_() == 'h') { // int16
-    t = py::array(dtype, {num_rows, num_channels});
-    auto ptr = (int16_t*)t.mutable_data(0, 0);
-    for (int32_t i = 0; i < num_samples; ++i) {
-      ptr[i] = SOX_SAMPLE_TO_SIGNED_16BIT(buffer[i], dummy);
-    }
-    if (channels_first) {
-      py::array t2 = py::array(dtype, {num_channels, num_rows});
-      for (int32_t row_idx = 0; row_idx < num_channels; ++row_idx) {
-        for (int32_t col_idx = 0; col_idx < num_rows; ++col_idx)
-          *(int16_t*)t2.mutable_data(row_idx, col_idx) = *(int16_t*)t.data(col_idx, row_idx);
-      }
-      return t2;
-    }
-  } else if (dtype.char_() == 'b') {
-    //t = torch::empty({num_samples / num_channels, num_channels}, torch::kUInt8);
-    t = py::array(dtype, {num_rows, num_channels});
-    auto ptr = (uint8_t*)t.mutable_data(0,0);
-    for (int32_t i = 0; i < num_samples; ++i) {
-      ptr[i] = SOX_SAMPLE_TO_UNSIGNED_8BIT(buffer[i], dummy);
-    }
-    if (channels_first) {
-      py::array t2 = py::array(dtype, {num_channels, num_rows});
-      for (int32_t row_idx = 0; row_idx < num_channels; ++row_idx) {
-        for (int32_t col_idx = 0; col_idx < num_rows; ++col_idx)
-        *(uint8_t*)t2.mutable_data(row_idx, col_idx) = *(uint8_t*)t.data(col_idx, row_idx);
-      }
-      return t2;
-    }
-  } else {
-    throw std::runtime_error("Unsupported dtype.");
-  }
-  return t;
-}
-
-const std::string get_filetype(const std::string path) {
-  std::string ext = path.substr(path.find_last_of(".") + 1);
-  std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
-  return ext;
-}
-
-namespace {
-
-std::tuple<sox_encoding_t, unsigned> get_save_encoding_for_wav(
-    const std::string format,
-    py::dtype dtype,
-    const Encoding& encoding,
-    const BitDepth& bits_per_sample) {
-  switch (encoding) {
-    case Encoding::NOT_PROVIDED:
-      switch (bits_per_sample) {
-        case BitDepth::NOT_PROVIDED:
-          switch (dtype.num()) {
-            case 11: // float32 numpy dtype num 
-              return std::make_tuple<>(SOX_ENCODING_FLOAT, 32);
-            case 5: // int numpy dtype num
-              return std::make_tuple<>(SOX_ENCODING_SIGN2, 32);
-            case 3: // int16 numpy
-              return std::make_tuple<>(SOX_ENCODING_SIGN2, 16);
-            case 1: // byte numpy
-              return std::make_tuple<>(SOX_ENCODING_UNSIGNED, 8);
-            default:
-              throw std::runtime_error("Internal Error: Unexpected dtype.");
-          }
-        case BitDepth::B8:
-          return std::make_tuple<>(SOX_ENCODING_UNSIGNED, 8);
-        default:
-          return std::make_tuple<>(
-              SOX_ENCODING_SIGN2, static_cast<unsigned>(bits_per_sample));
-      }
-    case Encoding::PCM_SIGNED:
-      switch (bits_per_sample) {
-        case BitDepth::NOT_PROVIDED:
-          return std::make_tuple<>(SOX_ENCODING_SIGN2, 32);
-        case BitDepth::B8:
-          throw std::runtime_error(
-              format + " does not support 8-bit signed PCM encoding.");
-        default:
-          return std::make_tuple<>(
-              SOX_ENCODING_SIGN2, static_cast<unsigned>(bits_per_sample));
-      }
-    case Encoding::PCM_UNSIGNED:
-      switch (bits_per_sample) {
-        case BitDepth::NOT_PROVIDED:
-        case BitDepth::B8:
-          return std::make_tuple<>(SOX_ENCODING_UNSIGNED, 8);
-        default:
-          throw std::runtime_error(
-              format + " only supports 8-bit for unsigned PCM encoding.");
-      }
-    case Encoding::PCM_FLOAT:
-      switch (bits_per_sample) {
-        case BitDepth::NOT_PROVIDED:
-        case BitDepth::B32:
-          return std::make_tuple<>(SOX_ENCODING_FLOAT, 32);
-        case BitDepth::B64:
-          return std::make_tuple<>(SOX_ENCODING_FLOAT, 64);
-        default:
-          throw std::runtime_error(
-              format +
-              " only supports 32-bit or 64-bit for floating-point PCM encoding.");
-      }
-    case Encoding::ULAW:
-      switch (bits_per_sample) {
-        case BitDepth::NOT_PROVIDED:
-        case BitDepth::B8:
-          return std::make_tuple<>(SOX_ENCODING_ULAW, 8);
-        default:
-          throw std::runtime_error(
-              format + " only supports 8-bit for mu-law encoding.");
-      }
-    case Encoding::ALAW:
-      switch (bits_per_sample) {
-        case BitDepth::NOT_PROVIDED:
-        case BitDepth::B8:
-          return std::make_tuple<>(SOX_ENCODING_ALAW, 8);
-        default:
-          throw std::runtime_error(
-              format + " only supports 8-bit for a-law encoding.");
-      }
-    default:
-      throw std::runtime_error(
-          format + " does not support encoding: " + to_string(encoding));
-  }
-}
-
-std::tuple<sox_encoding_t, unsigned> get_save_encoding(
-    const std::string& format,
-    const py::dtype dtype,
-    const tl::optional<std::string> encoding,
-    const tl::optional<int64_t> bits_per_sample) {
-  const Format fmt = get_format_from_string(format);
-  const Encoding enc = get_encoding_from_option(encoding);
-  const BitDepth bps = get_bit_depth_from_option(bits_per_sample);
-
-  switch (fmt) {
-    case Format::WAV:
-    case Format::AMB:
-      return get_save_encoding_for_wav(format, dtype, enc, bps);
-    case Format::MP3:
-      if (enc != Encoding::NOT_PROVIDED)
-        throw std::runtime_error("mp3 does not support `encoding` option.");
-      if (bps != BitDepth::NOT_PROVIDED)
-        throw std::runtime_error(
-            "mp3 does not support `bits_per_sample` option.");
-      return std::make_tuple<>(SOX_ENCODING_MP3, 16);
-    case Format::HTK:
-      if (enc != Encoding::NOT_PROVIDED)
-        throw std::runtime_error("htk does not support `encoding` option.");
-      if (bps != BitDepth::NOT_PROVIDED)
-        throw std::runtime_error(
-            "htk does not support `bits_per_sample` option.");
-      return std::make_tuple<>(SOX_ENCODING_SIGN2, 16);
-    case Format::VORBIS:
-      if (enc != Encoding::NOT_PROVIDED)
-        throw std::runtime_error("vorbis does not support `encoding` option.");
-      if (bps != BitDepth::NOT_PROVIDED)
-        throw std::runtime_error(
-            "vorbis does not support `bits_per_sample` option.");
-      return std::make_tuple<>(SOX_ENCODING_VORBIS, 16);
-    case Format::AMR_NB:
-      if (enc != Encoding::NOT_PROVIDED)
-        throw std::runtime_error("amr-nb does not support `encoding` option.");
-      if (bps != BitDepth::NOT_PROVIDED)
-        throw std::runtime_error(
-            "amr-nb does not support `bits_per_sample` option.");
-      return std::make_tuple<>(SOX_ENCODING_AMR_NB, 16);
-    case Format::FLAC:
-      if (enc != Encoding::NOT_PROVIDED)
-        throw std::runtime_error("flac does not support `encoding` option.");
-      switch (bps) {
-        case BitDepth::B32:
-        case BitDepth::B64:
-          throw std::runtime_error(
-              "flac does not support `bits_per_sample` larger than 24.");
-        default:
-          return std::make_tuple<>(
-              SOX_ENCODING_FLAC, static_cast<unsigned>(bps));
-      }
-    case Format::SPHERE:
-      switch (enc) {
-        case Encoding::NOT_PROVIDED:
-        case Encoding::PCM_SIGNED:
-          switch (bps) {
-            case BitDepth::NOT_PROVIDED:
-              return std::make_tuple<>(SOX_ENCODING_SIGN2, 32);
-            default:
-              return std::make_tuple<>(
-                  SOX_ENCODING_SIGN2, static_cast<unsigned>(bps));
-          }
-        case Encoding::PCM_UNSIGNED:
-          throw std::runtime_error(
-              "sph does not support unsigned integer PCM.");
-        case Encoding::PCM_FLOAT:
-          throw std::runtime_error("sph does not support floating point PCM.");
-        case Encoding::ULAW:
-          switch (bps) {
-            case BitDepth::NOT_PROVIDED:
-            case BitDepth::B8:
-              return std::make_tuple<>(SOX_ENCODING_ULAW, 8);
-            default:
-              throw std::runtime_error(
-                  "sph only supports 8-bit for mu-law encoding.");
-          }
-        case Encoding::ALAW:
-          switch (bps) {
-            case BitDepth::NOT_PROVIDED:
-            case BitDepth::B8:
-              return std::make_tuple<>(SOX_ENCODING_ALAW, 8);
-            default:
-              return std::make_tuple<>(
-                  SOX_ENCODING_ALAW, static_cast<unsigned>(bps));
-          }
-        default:
-          throw std::runtime_error(
-              "sph does not support encoding: " + encoding.value());
-      }
-    case Format::GSM:
-      if (enc != Encoding::NOT_PROVIDED)
-        throw std::runtime_error("gsm does not support `encoding` option.");
-      if (bps != BitDepth::NOT_PROVIDED)
-        throw std::runtime_error(
-            "gsm does not support `bits_per_sample` option.");
-      return std::make_tuple<>(SOX_ENCODING_GSM, 16);
-
-    default:
-      throw std::runtime_error("Unsupported format: " + format);
-  }
-}
-
-unsigned get_precision(const std::string filetype, py::dtype dtype) {
-  if (filetype == "mp3")
-    return SOX_UNSPEC;
-  if (filetype == "flac")
-    return 24;
-  if (filetype == "ogg" || filetype == "vorbis")
-    return SOX_UNSPEC;
-  if (filetype == "wav" || filetype == "amb") {
-    switch (dtype.num()) {
-      case 1: // byte in numpy dype num
-        return 8;
-      case 3: // short, in numpy dtype num
-        return 16;
-      case 5: // int, numpy dtype 
-        return 32;
-      case 11: // float, numpy dtype
-        return 32;
-      default:
-        throw std::runtime_error("Unsupported dtype.");
-    }
-  }
-  if (filetype == "sph")
-    return 32;
-  if (filetype == "amr-nb") {
-    return 16;
-  }
-  if (filetype == "gsm") {
-    return 16;
-  }
-  if (filetype == "htk") {
-    return 16;
-  }
-  throw std::runtime_error("Unsupported file type: " + filetype);
-}
-
-} // namespace
-
-sox_signalinfo_t get_signalinfo(
-    const py::array* waveform,
-    const int64_t sample_rate,
-    const std::string filetype,
-    const bool channels_first) {
-  return sox_signalinfo_t{
-      /*rate=*/static_cast<sox_rate_t>(sample_rate),
-      /*channels=*/
-      static_cast<unsigned>(waveform->shape(channels_first ? 0 : 1)),
-      /*precision=*/get_precision(filetype, waveform->dtype()),
-      /*length=*/static_cast<uint64_t>(waveform->size())};
-}
-
-sox_encodinginfo_t get_tensor_encodinginfo(py::dtype dtype) {
-  sox_encoding_t encoding = [&]() {
-    switch (dtype.num()) {
-      case 1: // byte
-        return SOX_ENCODING_UNSIGNED;
-      case 3: // short
-        return SOX_ENCODING_SIGN2;
-      case 5: // int32
-        return SOX_ENCODING_SIGN2;
-      case 11: // float
-        return SOX_ENCODING_FLOAT;
-      default:
-        throw std::runtime_error("Unsupported dtype.");
-    }
-  }();
-  unsigned bits_per_sample = [&]() {
-    switch (dtype.num()) {
-      case 1: // byte
-        return 8;
-      case 3: //short
-        return 16;
-      case 5: // int32
-        return 32;
-      case 11: // float
-        return 32;
-      default:
-        throw std::runtime_error("Unsupported dtype.");
-    }
-  }();
-  return sox_encodinginfo_t{
-      /*encoding=*/encoding,
-      /*bits_per_sample=*/bits_per_sample,
-      /*compression=*/HUGE_VAL,
-      /*reverse_bytes=*/sox_option_default,
-      /*reverse_nibbles=*/sox_option_default,
-      /*reverse_bits=*/sox_option_default,
-      /*opposite_endian=*/sox_false};
-}
-
-sox_encodinginfo_t get_encodinginfo_for_save(
-    const std::string& format,
-    const py::dtype dtype,
-    const tl::optional<double> compression,
-    const tl::optional<std::string> encoding,
-    const tl::optional<int64_t> bits_per_sample) {
-  auto enc = get_save_encoding(format, dtype, encoding, bits_per_sample);
-  return sox_encodinginfo_t{
-      /*encoding=*/std::get<0>(enc),
-      /*bits_per_sample=*/std::get<1>(enc),
-      /*compression=*/compression.value_or(HUGE_VAL),
-      /*reverse_bytes=*/sox_option_default,
-      /*reverse_nibbles=*/sox_option_default,
-      /*reverse_bits=*/sox_option_default,
-      /*opposite_endian=*/sox_false};
-}
-
-
-/*
-SoxFormat::SoxFormat(sox_format_t *fd) noexcept : fd_(fd) {}
-SoxFormat::~SoxFormat() { close(); }
-
-sox_format_t *SoxFormat::operator->() const noexcept { return fd_; }
-SoxFormat::operator sox_format_t *() const noexcept { return fd_; }
-
-void SoxFormat::close() {
-    if (fd_ != nullptr) {
-        sox_close(fd_);
-        fd_ = nullptr;
-    }
-}
-
-auto read_fileobj(py::object *fileobj, const uint64_t size, char *buffer)
-    -> uint64_t {
-    uint64_t num_read = 0;
-    while (num_read < size) {
-        auto request = size - num_read;
-        auto chunk = static_cast<std::string>(
-            static_cast<py::bytes>(fileobj->attr("read")(request)));
-        auto chunk_len = chunk.length();
-        if (chunk_len == 0) {
-            break;
-        }
-        if (chunk_len > request) {
-            std::ostringstream message;
-            message
-                << "Requested up to " << request << " bytes but, "
-                << "received " << chunk_len << " bytes. "
-                << "The given object does not confirm to read protocol of file "
-                   "object.";
-            throw std::runtime_error(message.str());
-        }
-        memcpy(buffer, chunk.data(), chunk_len);
-        buffer += chunk_len;
-        num_read += chunk_len;
-    }
-    return num_read;
-}
-
-int64_t get_buffer_size() { return sox_get_globals()->bufsiz; }
-
-void validate_input_file(const SoxFormat &sf, const std::string &path) {
-    if (static_cast<sox_format_t *>(sf) == nullptr) {
-        throw std::runtime_error(
-            "Error loading audio file: failed to open file " + path);
-    }
-    if (sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
-        throw std::runtime_error("Error loading audio file: unknown encoding.");
-    }
-}
-
-void validate_input_memfile(const SoxFormat &sf) {
-    return validate_input_file(sf, "<in memory buffer>");
-}
-
-std::string get_encoding(sox_encoding_t encoding) {
-    switch (encoding) {
-        case SOX_ENCODING_UNKNOWN:
-            return "UNKNOWN";
-        case SOX_ENCODING_SIGN2:
-            return "PCM_S";
-        case SOX_ENCODING_UNSIGNED:
-            return "PCM_U";
-        case SOX_ENCODING_FLOAT:
-            return "PCM_F";
-        case SOX_ENCODING_FLAC:
-            return "FLAC";
-        case SOX_ENCODING_ULAW:
-            return "ULAW";
-        case SOX_ENCODING_ALAW:
-            return "ALAW";
-        case SOX_ENCODING_MP3:
-            return "MP3";
-        case SOX_ENCODING_VORBIS:
-            return "VORBIS";
-        case SOX_ENCODING_AMR_WB:
-            return "AMR_WB";
-        case SOX_ENCODING_AMR_NB:
-            return "AMR_NB";
-        case SOX_ENCODING_OPUS:
-            return "OPUS";
-        case SOX_ENCODING_GSM:
-            return "GSM";
-        default:
-            return "UNKNOWN";
-    }
-}
-*/
-}  // namespace paddleaudio
-}  // namespace sox_utils
diff --git a/paddlespeech/audio/src/pybind/sox/utils.h b/paddlespeech/audio/src/pybind/sox/utils.h
deleted file mode 100644
index 65223bc0c5c78085abf1ddc9ca4e10f5b1801718..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/pybind/sox/utils.h
+++ /dev/null
@@ -1,116 +0,0 @@
-// Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
-// All rights reserved.
-
-#pragma once
-
-#include <pybind11/pybind11.h>
-#include <pybind11/numpy.h>
-#include <sox.h>
-#include "paddlespeech/audio/src/optional/optional.hpp"
-
-namespace py = pybind11;
-
-namespace paddleaudio {
-namespace sox_utils {
-
-auto read_fileobj(py::object *fileobj, uint64_t size, char *buffer) -> uint64_t;
-
-void set_seed(const int64_t seed);
-
-void set_verbosity(const int64_t verbosity);
-
-void set_use_threads(const bool use_threads);
-
-void set_buffer_size(const int64_t buffer_size);
-
-int64_t get_buffer_size();
-
-std::vector<std::vector<std::string>> list_effects();
-
-std::vector<std::string> list_read_formats();
-
-std::vector<std::string> list_write_formats();
-
-////////////////////////////////////////////////////////////////////////////////
-// Utilities for sox_io / sox_effects implementations
-////////////////////////////////////////////////////////////////////////////////
-
-const std::unordered_set<std::string> UNSUPPORTED_EFFECTS =
-    {"input", "output", "spectrogram", "noiseprof", "noisered", "splice"};
-
-/// helper class to automatically close sox_format_t*
-struct SoxFormat {
-  explicit SoxFormat(sox_format_t* fd) noexcept;
-  SoxFormat(const SoxFormat& other) = delete;
-  SoxFormat(SoxFormat&& other) = delete;
-  SoxFormat& operator=(const SoxFormat& other) = delete;
-  SoxFormat& operator=(SoxFormat&& other) = delete;
-  ~SoxFormat();
-  sox_format_t* operator->() const noexcept;
-  operator sox_format_t*() const noexcept;
-
-  void close();
-
- private:
-  sox_format_t* fd_;
-};
-
-///
-/// Verify that input Tensor is 2D, CPU and either uin8, int16, int32 or float32
-void validate_input_tensor(const py::array);
-
-void validate_input_file(const SoxFormat& sf, const std::string& path);
-
-void validate_input_memfile(const SoxFormat &sf);
-///
-/// Get target dtype for the given encoding and precision.
-py::dtype get_dtype(
-    const sox_encoding_t encoding,
-    const unsigned precision);
-
-///
-/// Convert sox_sample_t buffer to uint8/int16/int32/float32 Tensor
-/// NOTE: This function might modify the values in the input buffer to
-/// reduce the number of memory copy.
-/// @param buffer Pointer to buffer that contains audio data.
-/// @param num_samples The number of samples to read.
-/// @param num_channels The number of channels. Used to reshape the resulting
-/// Tensor.
-/// @param dtype Target dtype. Determines the output dtype and value range in
-/// conjunction with normalization.
-/// @param noramlize Perform normalization. Only effective when dtype is not
-/// kFloat32. When effective, the output tensor is kFloat32 type and value range
-/// is [-1.0, 1.0]
-/// @param channels_first When True, output Tensor has shape of [num_channels,
-/// num_frames].
-py::array convert_to_tensor(
-    sox_sample_t* buffer,
-    const int32_t num_samples,
-    const int32_t num_channels,
-    const py::dtype dtype,
-    const bool normalize,
-    const bool channels_first);
-
-/// Extract extension from file path
-const std::string get_filetype(const std::string path);
-
-/// Get sox_signalinfo_t for passing a py::array object.
-sox_signalinfo_t get_signalinfo(
-    const py::array* waveform,
-    const int64_t sample_rate,
-    const std::string filetype,
-    const bool channels_first);
-
-/// Get sox_encodinginfo_t for Tensor I/O
-sox_encodinginfo_t get_tensor_encodinginfo(const py::dtype dtype);
-
-/// Get sox_encodinginfo_t for saving to file/file object
-sox_encodinginfo_t get_encodinginfo_for_save(
-    const std::string& format,
-    const py::dtype dtype,
-    const tl::optional<double> compression,
-    const tl::optional<std::string> encoding,
-    const tl::optional<int64_t> bits_per_sample);
-
-}  // namespace paddleaudio
-}  // namespace sox_utils
diff --git a/paddlespeech/audio/src/utils.cpp b/paddlespeech/audio/src/utils.cpp
deleted file mode 100644
index d9d3575f201018aa88dc3ec146b19b3f4eeaa993..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/src/utils.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-namespace paddleaudio {
-
-namespace {
-
-bool is_sox_available() {
-#ifdef INCLUDE_SOX
-    return true;
-#else
-    return false;
-#endif
-}
-
-bool is_kaldi_available() {
-#ifdef INCLUDE_KALDI
-    return true;
-#else
-    return false;
-#endif
-}
-
-// It tells whether paddleaudio was compiled with ffmpeg
-// not the runtime availability.
-bool is_ffmpeg_available() {
-#ifdef USE_FFMPEG
-    return true;
-#else
-    return false;
-#endif
-}
-
-}  // namespace
-
-}  // namespace paddleaudio
\ No newline at end of file
diff --git a/paddlespeech/audio/streamdata/autodecode.py b/paddlespeech/audio/streamdata/autodecode.py
index d7f7937bde3fc8f37af615612832eff223ca944d..ce5181d836ccbd0eaf11505c9cb15c088c54f638 100644
--- a/paddlespeech/audio/streamdata/autodecode.py
+++ b/paddlespeech/audio/streamdata/autodecode.py
@@ -295,7 +295,7 @@ def torch_video(key, data):
 
 
 def paddle_audio(key, data):
-    """Decode audio using the paddlespeech.audio library.
+    """Decode audio using the paddleaudio library.
 
     :param key: file name extension
     :param data: data to be decoded
@@ -304,13 +304,13 @@ def paddle_audio(key, data):
     if extension not in ["flac", "mp3", "sox", "wav", "m4a", "ogg", "wma"]:
         return None
 
-    import paddlespeech.audio
+    import paddlesaudio
 
     with tempfile.TemporaryDirectory() as dirname:
         fname = os.path.join(dirname, f"file.{extension}")
         with open(fname, "wb") as stream:
             stream.write(data)
-        return paddlespeech.audio.load(fname)
+        return paddleaudio.backends.soundfile_load(fname)
 
 
 ################################################################
diff --git a/paddlespeech/audio/streamdata/filters.py b/paddlespeech/audio/streamdata/filters.py
index 68d6830bb83c8c6944923963ef1ec69a79f8780e..c4f590fc850d38c29dc649bc142fea34db6dac4a 100644
--- a/paddlespeech/audio/streamdata/filters.py
+++ b/paddlespeech/audio/streamdata/filters.py
@@ -25,8 +25,10 @@ import paddle
 
 from . import autodecode
 from . import utils
-from .. import backends
-from ..compliance import kaldi
+
+from paddleaudio import backends
+from paddleaudio.compliance import kaldi
+
 from ..transform.cmvn import GlobalCMVN
 from ..transform.spec_augment import freq_mask
 from ..transform.spec_augment import time_mask
diff --git a/paddlespeech/audio/streamdata/tariterators.py b/paddlespeech/audio/streamdata/tariterators.py
index 79b81c0ce67623241c42818508b7a106195996cc..3adf4892a7b4293f76235e6e553338f633346a2e 100644
--- a/paddlespeech/audio/streamdata/tariterators.py
+++ b/paddlespeech/audio/streamdata/tariterators.py
@@ -20,7 +20,7 @@ trace = False
 meta_prefix = "__"
 meta_suffix = "__"
 
-import paddlespeech
+import paddleaudio
 import paddle
 import numpy as np
 
@@ -111,7 +111,7 @@ def tar_file_iterator(fileobj,
             assert pos > 0
             prefix, postfix = name[:pos], name[pos + 1:]
             if postfix == 'wav':
-                waveform, sample_rate = paddlespeech.audio.load(
+                waveform, sample_rate = paddleaudio.backends.soundfile_load(
                     stream.extractfile(tarinfo), normal=False)
                 result = dict(
                     fname=prefix, wav=waveform, sample_rate=sample_rate)
@@ -163,7 +163,7 @@ def tar_file_and_group_iterator(fileobj,
                 if postfix == 'txt':
                     example['txt'] = file_obj.read().decode('utf8').strip()
                 elif postfix in AUDIO_FORMAT_SETS:
-                    waveform, sample_rate = paddlespeech.audio.load(
+                    waveform, sample_rate = paddleaudio.backends.soundfile_load(
                         file_obj, normal=False)
                     waveform = paddle.to_tensor(
                         np.expand_dims(np.array(waveform), 0),
diff --git a/paddlespeech/audio/third_party/.gitignore b/paddlespeech/audio/third_party/.gitignore
deleted file mode 100644
index 2d788f6b4b9d67cdde429f552d0514dbec8c9d87..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/third_party/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-archives/
-install/
diff --git a/paddlespeech/audio/third_party/CMakeLists.txt b/paddlespeech/audio/third_party/CMakeLists.txt
deleted file mode 100644
index 43288f39b3517b3b4f8e1f60064720900c0123fa..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/third_party/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
-
-################################################################################
-# sox
-################################################################################
-if (BUILD_SOX)
-  add_subdirectory(sox)
-endif()
-
-################################################################################
-# kaldi
-################################################################################
-if (BUILD_KALDI)
-  add_subdirectory(kaldi)
-endif()
\ No newline at end of file
diff --git a/paddlespeech/audio/third_party/kaldi/CMakeLists.txt b/paddlespeech/audio/third_party/kaldi/CMakeLists.txt
deleted file mode 100644
index d25a22124876a83bc6114d66ca9c05d6480fe160..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/third_party/kaldi/CMakeLists.txt
+++ /dev/null
@@ -1,117 +0,0 @@
-# checkout the thirdparty/kaldi/base/kaldi-types.h
-# compile kaldi without openfst
-add_definitions("-DCOMPILE_WITHOUT_OPENFST")
-
-# function (define_library name source include_dirs link_libraries compile_defs)
-#   add_library(${name} INTERFACE ${source})
-#   target_include_directories(${name} INTERFACE ${include_dirs})
-#   target_link_libraries(${name} INTERFACE ${link_libraries})
-#   target_compile_definitions(${name} INTERFACE ${compile_defs})
-#   set_target_properties(${name} PROPERTIES PREFIX "")
-#   if (MSVC)
-#     set_target_properties(${name} PROPERTIES SUFFIX ".pyd")
-#   endif(MSVC)
-#   install(
-#     TARGETS ${name}
-#     LIBRARY DESTINATION lib
-#     RUNTIME DESTINATION lib  # For Windows
-#     )
-# endfunction()
-
-# kaldi-base
-add_library(kaldi-base STATIC
-  base/io-funcs.cc
-  base/kaldi-error.cc
-  base/kaldi-math.cc
-  base/kaldi-utils.cc
-  base/timer.cc
-)
-target_include_directories(kaldi-base PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
-
-
-# kaldi-matrix
-add_library(kaldi-matrix STATIC
-  matrix/compressed-matrix.cc
-  matrix/matrix-functions.cc
-  matrix/kaldi-matrix.cc
-  matrix/kaldi-vector.cc
-  matrix/optimization.cc
-  matrix/packed-matrix.cc
-  matrix/qr.cc
-  matrix/sparse-matrix.cc
-  matrix/sp-matrix.cc
-  matrix/srfft.cc
-  matrix/tp-matrix.cc
-)
-target_include_directories(kaldi-matrix PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
-target_link_libraries(kaldi-matrix PUBLIC gfortran kaldi-base libopenblas)
-
-
-# kaldi-util
-add_library(kaldi-util STATIC
-  util/kaldi-holder.cc
-  util/kaldi-io.cc
-  util/kaldi-semaphore.cc
-  util/kaldi-table.cc
-  util/kaldi-thread.cc
-  util/parse-options.cc
-  util/simple-io-funcs.cc
-  util/simple-options.cc
-  util/text-utils.cc
-)
-target_include_directories(kaldi-util PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
-target_link_libraries(kaldi-util PUBLIC kaldi-base kaldi-matrix)
-
-
-# kaldi-feat-common
-add_library(kaldi-feat-common STATIC
-  feat/cmvn.cc
-  feat/feature-functions.cc
-  feat/feature-window.cc
-  feat/mel-computations.cc
-  feat/pitch-functions.cc
-  feat/resample.cc
-  feat/signal.cc
-  feat/wave-reader.cc
-)
-target_include_directories(kaldi-feat-common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
-target_link_libraries(kaldi-feat-common PUBLIC kaldi-base kaldi-matrix kaldi-util)
-
-
-# kaldi-mfcc
-add_library(kaldi-mfcc STATIC
-  feat/feature-mfcc.cc
-)
-target_include_directories(kaldi-mfcc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
-target_link_libraries(kaldi-mfcc PUBLIC kaldi-feat-common)
-
-
-# kaldi-fbank
-add_library(kaldi-fbank STATIC
-  feat/feature-fbank.cc
-)
-target_include_directories(kaldi-fbank PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
-target_link_libraries(kaldi-fbank PUBLIC kaldi-feat-common)
-
-
-set(KALDI_LIBRARIES
-  ${CMAKE_CURRENT_BINARY_DIR}/libkaldi-base.a
-  ${CMAKE_CURRENT_BINARY_DIR}/libkaldi-matrix.a
-  ${CMAKE_CURRENT_BINARY_DIR}/libkaldi-util.a
-  ${CMAKE_CURRENT_BINARY_DIR}/libkaldi-feat-common.a
-  ${CMAKE_CURRENT_BINARY_DIR}/libkaldi-mfcc.a
-  ${CMAKE_CURRENT_BINARY_DIR}/libkaldi-fbank.a
-)
-
-add_library(libkaldi INTERFACE)
-add_dependencies(libkaldi kaldi-base kaldi-matrix kaldi-util kaldi-feat-common kaldi-mfcc kaldi-fbank)
-target_include_directories(libkaldi INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
-target_link_libraries(libkaldi INTERFACE   
-  # --whole-archive  for undefined symbol when link static lib into shared lib
-  -Wl,--start-group -Wl,--whole-archive 
-  ${KALDI_LIBRARIES}
-  libopenblas
-  gfortran
-  -Wl,--no-whole-archive -Wl,--end-group
-)
-target_compile_definitions(libkaldi INTERFACE "-DCOMPILE_WITHOUT_OPENFST")
diff --git a/paddlespeech/audio/third_party/kaldi/base b/paddlespeech/audio/third_party/kaldi/base
deleted file mode 120000
index cf286c165bb237d500322b9457be19950246faf2..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/third_party/kaldi/base
+++ /dev/null
@@ -1 +0,0 @@
-../../../../speechx/speechx/kaldi/base
\ No newline at end of file
diff --git a/paddlespeech/audio/third_party/kaldi/feat b/paddlespeech/audio/third_party/kaldi/feat
deleted file mode 120000
index 796991243a5969ec1d5fd5f7ec300087cfec503d..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/third_party/kaldi/feat
+++ /dev/null
@@ -1 +0,0 @@
-../../../../speechx/speechx/kaldi/feat
\ No newline at end of file
diff --git a/paddlespeech/audio/third_party/kaldi/matrix b/paddlespeech/audio/third_party/kaldi/matrix
deleted file mode 120000
index 184fa3233c882d57759966470b38ca34a92e18f2..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/third_party/kaldi/matrix
+++ /dev/null
@@ -1 +0,0 @@
-../../../../speechx/speechx/kaldi/matrix
\ No newline at end of file
diff --git a/paddlespeech/audio/third_party/kaldi/util b/paddlespeech/audio/third_party/kaldi/util
deleted file mode 120000
index f3017b6022df93925e304fb9eb5b0ac49af14f77..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/third_party/kaldi/util
+++ /dev/null
@@ -1 +0,0 @@
-../../../../speechx/speechx/kaldi/util
\ No newline at end of file
diff --git a/paddlespeech/audio/third_party/patches/config.guess b/paddlespeech/audio/third_party/patches/config.guess
deleted file mode 100644
index 7f76b6228f73d674f58cfcc3523f99e253ee5515..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/third_party/patches/config.guess
+++ /dev/null
@@ -1,1754 +0,0 @@
-#! /bin/sh
-# Attempt to guess a canonical system name.
-#   Copyright 1992-2022 Free Software Foundation, Inc.
-
-# shellcheck disable=SC2006,SC2268 # see below for rationale
-
-timestamp='2022-01-09'
-
-# This file is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, see <https://www.gnu.org/licenses/>.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that
-# program.  This Exception is an additional permission under section 7
-# of the GNU General Public License, version 3 ("GPLv3").
-#
-# Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
-#
-# You can get the latest version of this script from:
-# https://git.savannah.gnu.org/cgit/config.git/plain/config.guess
-#
-# Please send patches to <config-patches@gnu.org>.
-
-
-# The "shellcheck disable" line above the timestamp inhibits complaints
-# about features and limitations of the classic Bourne shell that were
-# superseded or lifted in POSIX.  However, this script identifies a wide
-# variety of pre-POSIX systems that do not have POSIX shells at all, and
-# even some reasonably current systems (Solaris 10 as case-in-point) still
-# have a pre-POSIX /bin/sh.
-
-
-me=`echo "$0" | sed -e 's,.*/,,'`
-
-usage="\
-Usage: $0 [OPTION]
-
-Output the configuration name of the system \`$me' is run on.
-
-Options:
-  -h, --help         print this help, then exit
-  -t, --time-stamp   print date of last modification, then exit
-  -v, --version      print version number, then exit
-
-Report bugs and patches to <config-patches@gnu.org>."
-
-version="\
-GNU config.guess ($timestamp)
-
-Originally written by Per Bothner.
-Copyright 1992-2022 Free Software Foundation, Inc.
-
-This is free software; see the source for copying conditions.  There is NO
-warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
-
-help="
-Try \`$me --help' for more information."
-
-# Parse command line
-while test $# -gt 0 ; do
-  case $1 in
-    --time-stamp | --time* | -t )
-       echo "$timestamp" ; exit ;;
-    --version | -v )
-       echo "$version" ; exit ;;
-    --help | --h* | -h )
-       echo "$usage"; exit ;;
-    -- )     # Stop option processing
-       shift; break ;;
-    - )	# Use stdin as input.
-       break ;;
-    -* )
-       echo "$me: invalid option $1$help" >&2
-       exit 1 ;;
-    * )
-       break ;;
-  esac
-done
-
-if test $# != 0; then
-  echo "$me: too many arguments$help" >&2
-  exit 1
-fi
-
-# Just in case it came from the environment.
-GUESS=
-
-# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
-# compiler to aid in system detection is discouraged as it requires
-# temporary files to be created and, as you can see below, it is a
-# headache to deal with in a portable fashion.
-
-# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
-# use `HOST_CC' if defined, but it is deprecated.
-
-# Portable tmp directory creation inspired by the Autoconf team.
-
-tmp=
-# shellcheck disable=SC2172
-trap 'test -z "$tmp" || rm -fr "$tmp"' 0 1 2 13 15
-
-set_cc_for_build() {
-    # prevent multiple calls if $tmp is already set
-    test "$tmp" && return 0
-    : "${TMPDIR=/tmp}"
-    # shellcheck disable=SC2039,SC3028
-    { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
-	{ test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } ||
-	{ tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } ||
-	{ echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; }
-    dummy=$tmp/dummy
-    case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in
-	,,)    echo "int x;" > "$dummy.c"
-	       for driver in cc gcc c89 c99 ; do
-		   if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
-		       CC_FOR_BUILD=$driver
-		       break
-		   fi
-	       done
-	       if test x"$CC_FOR_BUILD" = x ; then
-		   CC_FOR_BUILD=no_compiler_found
-	       fi
-	       ;;
-	,,*)   CC_FOR_BUILD=$CC ;;
-	,*,*)  CC_FOR_BUILD=$HOST_CC ;;
-    esac
-}
-
-# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
-# (ghazi@noc.rutgers.edu 1994-08-24)
-if test -f /.attbin/uname ; then
-	PATH=$PATH:/.attbin ; export PATH
-fi
-
-UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
-UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
-UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
-UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
-
-case $UNAME_SYSTEM in
-Linux|GNU|GNU/*)
-	LIBC=unknown
-
-	set_cc_for_build
-	cat <<-EOF > "$dummy.c"
-	#include <features.h>
-	#if defined(__UCLIBC__)
-	LIBC=uclibc
-	#elif defined(__dietlibc__)
-	LIBC=dietlibc
-	#elif defined(__GLIBC__)
-	LIBC=gnu
-	#else
-	#include <stdarg.h>
-	/* First heuristic to detect musl libc.  */
-	#ifdef __DEFINED_va_list
-	LIBC=musl
-	#endif
-	#endif
-	EOF
-	cc_set_libc=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`
-	eval "$cc_set_libc"
-
-	# Second heuristic to detect musl libc.
-	if [ "$LIBC" = unknown ] &&
-	   command -v ldd >/dev/null &&
-	   ldd --version 2>&1 | grep -q ^musl; then
-		LIBC=musl
-	fi
-
-	# If the system lacks a compiler, then just pick glibc.
-	# We could probably try harder.
-	if [ "$LIBC" = unknown ]; then
-		LIBC=gnu
-	fi
-	;;
-esac
-
-# Note: order is significant - the case branches are not exclusive.
-
-case $UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION in
-    *:NetBSD:*:*)
-	# NetBSD (nbsd) targets should (where applicable) match one or
-	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
-	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
-	# switched to ELF, *-*-netbsd* would select the old
-	# object file format.  This provides both forward
-	# compatibility and a consistent mechanism for selecting the
-	# object file format.
-	#
-	# Note: NetBSD doesn't particularly care about the vendor
-	# portion of the name.  We always set it to "unknown".
-	UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \
-	    /sbin/sysctl -n hw.machine_arch 2>/dev/null || \
-	    /usr/sbin/sysctl -n hw.machine_arch 2>/dev/null || \
-	    echo unknown)`
-	case $UNAME_MACHINE_ARCH in
-	    aarch64eb) machine=aarch64_be-unknown ;;
-	    armeb) machine=armeb-unknown ;;
-	    arm*) machine=arm-unknown ;;
-	    sh3el) machine=shl-unknown ;;
-	    sh3eb) machine=sh-unknown ;;
-	    sh5el) machine=sh5le-unknown ;;
-	    earmv*)
-		arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'`
-		endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'`
-		machine=${arch}${endian}-unknown
-		;;
-	    *) machine=$UNAME_MACHINE_ARCH-unknown ;;
-	esac
-	# The Operating System including object format, if it has switched
-	# to ELF recently (or will in the future) and ABI.
-	case $UNAME_MACHINE_ARCH in
-	    earm*)
-		os=netbsdelf
-		;;
-	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
-		set_cc_for_build
-		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
-			| grep -q __ELF__
-		then
-		    # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
-		    # Return netbsd for either.  FIX?
-		    os=netbsd
-		else
-		    os=netbsdelf
-		fi
-		;;
-	    *)
-		os=netbsd
-		;;
-	esac
-	# Determine ABI tags.
-	case $UNAME_MACHINE_ARCH in
-	    earm*)
-		expr='s/^earmv[0-9]/-eabi/;s/eb$//'
-		abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"`
-		;;
-	esac
-	# The OS release
-	# Debian GNU/NetBSD machines have a different userland, and
-	# thus, need a distinct triplet. However, they do not need
-	# kernel version information, so it can be replaced with a
-	# suitable tag, in the style of linux-gnu.
-	case $UNAME_VERSION in
-	    Debian*)
-		release='-gnu'
-		;;
-	    *)
-		release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2`
-		;;
-	esac
-	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
-	# contains redundant information, the shorter form:
-	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
-	GUESS=$machine-${os}${release}${abi-}
-	;;
-    *:Bitrig:*:*)
-	UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
-	GUESS=$UNAME_MACHINE_ARCH-unknown-bitrig$UNAME_RELEASE
-	;;
-    *:OpenBSD:*:*)
-	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
-	GUESS=$UNAME_MACHINE_ARCH-unknown-openbsd$UNAME_RELEASE
-	;;
-    *:SecBSD:*:*)
-	UNAME_MACHINE_ARCH=`arch | sed 's/SecBSD.//'`
-	GUESS=$UNAME_MACHINE_ARCH-unknown-secbsd$UNAME_RELEASE
-	;;
-    *:LibertyBSD:*:*)
-	UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'`
-	GUESS=$UNAME_MACHINE_ARCH-unknown-libertybsd$UNAME_RELEASE
-	;;
-    *:MidnightBSD:*:*)
-	GUESS=$UNAME_MACHINE-unknown-midnightbsd$UNAME_RELEASE
-	;;
-    *:ekkoBSD:*:*)
-	GUESS=$UNAME_MACHINE-unknown-ekkobsd$UNAME_RELEASE
-	;;
-    *:SolidBSD:*:*)
-	GUESS=$UNAME_MACHINE-unknown-solidbsd$UNAME_RELEASE
-	;;
-    *:OS108:*:*)
-	GUESS=$UNAME_MACHINE-unknown-os108_$UNAME_RELEASE
-	;;
-    macppc:MirBSD:*:*)
-	GUESS=powerpc-unknown-mirbsd$UNAME_RELEASE
-	;;
-    *:MirBSD:*:*)
-	GUESS=$UNAME_MACHINE-unknown-mirbsd$UNAME_RELEASE
-	;;
-    *:Sortix:*:*)
-	GUESS=$UNAME_MACHINE-unknown-sortix
-	;;
-    *:Twizzler:*:*)
-	GUESS=$UNAME_MACHINE-unknown-twizzler
-	;;
-    *:Redox:*:*)
-	GUESS=$UNAME_MACHINE-unknown-redox
-	;;
-    mips:OSF1:*.*)
-	GUESS=mips-dec-osf1
-	;;
-    alpha:OSF1:*:*)
-	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
-	trap '' 0
-	case $UNAME_RELEASE in
-	*4.0)
-		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
-		;;
-	*5.*)
-		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
-		;;
-	esac
-	# According to Compaq, /usr/sbin/psrinfo has been available on
-	# OSF/1 and Tru64 systems produced since 1995.  I hope that
-	# covers most systems running today.  This code pipes the CPU
-	# types through head -n 1, so we only detect the type of CPU 0.
-	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
-	case $ALPHA_CPU_TYPE in
-	    "EV4 (21064)")
-		UNAME_MACHINE=alpha ;;
-	    "EV4.5 (21064)")
-		UNAME_MACHINE=alpha ;;
-	    "LCA4 (21066/21068)")
-		UNAME_MACHINE=alpha ;;
-	    "EV5 (21164)")
-		UNAME_MACHINE=alphaev5 ;;
-	    "EV5.6 (21164A)")
-		UNAME_MACHINE=alphaev56 ;;
-	    "EV5.6 (21164PC)")
-		UNAME_MACHINE=alphapca56 ;;
-	    "EV5.7 (21164PC)")
-		UNAME_MACHINE=alphapca57 ;;
-	    "EV6 (21264)")
-		UNAME_MACHINE=alphaev6 ;;
-	    "EV6.7 (21264A)")
-		UNAME_MACHINE=alphaev67 ;;
-	    "EV6.8CB (21264C)")
-		UNAME_MACHINE=alphaev68 ;;
-	    "EV6.8AL (21264B)")
-		UNAME_MACHINE=alphaev68 ;;
-	    "EV6.8CX (21264D)")
-		UNAME_MACHINE=alphaev68 ;;
-	    "EV6.9A (21264/EV69A)")
-		UNAME_MACHINE=alphaev69 ;;
-	    "EV7 (21364)")
-		UNAME_MACHINE=alphaev7 ;;
-	    "EV7.9 (21364A)")
-		UNAME_MACHINE=alphaev79 ;;
-	esac
-	# A Pn.n version is a patched version.
-	# A Vn.n version is a released version.
-	# A Tn.n version is a released field test version.
-	# A Xn.n version is an unreleased experimental baselevel.
-	# 1.2 uses "1.2" for uname -r.
-	OSF_REL=`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
-	GUESS=$UNAME_MACHINE-dec-osf$OSF_REL
-	;;
-    Amiga*:UNIX_System_V:4.0:*)
-	GUESS=m68k-unknown-sysv4
-	;;
-    *:[Aa]miga[Oo][Ss]:*:*)
-	GUESS=$UNAME_MACHINE-unknown-amigaos
-	;;
-    *:[Mm]orph[Oo][Ss]:*:*)
-	GUESS=$UNAME_MACHINE-unknown-morphos
-	;;
-    *:OS/390:*:*)
-	GUESS=i370-ibm-openedition
-	;;
-    *:z/VM:*:*)
-	GUESS=s390-ibm-zvmoe
-	;;
-    *:OS400:*:*)
-	GUESS=powerpc-ibm-os400
-	;;
-    arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
-	GUESS=arm-acorn-riscix$UNAME_RELEASE
-	;;
-    arm*:riscos:*:*|arm*:RISCOS:*:*)
-	GUESS=arm-unknown-riscos
-	;;
-    SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
-	GUESS=hppa1.1-hitachi-hiuxmpp
-	;;
-    Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
-	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
-	case `(/bin/universe) 2>/dev/null` in
-	    att) GUESS=pyramid-pyramid-sysv3 ;;
-	    *)   GUESS=pyramid-pyramid-bsd   ;;
-	esac
-	;;
-    NILE*:*:*:dcosx)
-	GUESS=pyramid-pyramid-svr4
-	;;
-    DRS?6000:unix:4.0:6*)
-	GUESS=sparc-icl-nx6
-	;;
-    DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
-	case `/usr/bin/uname -p` in
-	    sparc) GUESS=sparc-icl-nx7 ;;
-	esac
-	;;
-    s390x:SunOS:*:*)
-	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
-	GUESS=$UNAME_MACHINE-ibm-solaris2$SUN_REL
-	;;
-    sun4H:SunOS:5.*:*)
-	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
-	GUESS=sparc-hal-solaris2$SUN_REL
-	;;
-    sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
-	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
-	GUESS=sparc-sun-solaris2$SUN_REL
-	;;
-    i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
-	GUESS=i386-pc-auroraux$UNAME_RELEASE
-	;;
-    i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
-	set_cc_for_build
-	SUN_ARCH=i386
-	# If there is a compiler, see if it is configured for 64-bit objects.
-	# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
-	# This test works for both compilers.
-	if test "$CC_FOR_BUILD" != no_compiler_found; then
-	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
-		(CCOPTS="" $CC_FOR_BUILD -m64 -E - 2>/dev/null) | \
-		grep IS_64BIT_ARCH >/dev/null
-	    then
-		SUN_ARCH=x86_64
-	    fi
-	fi
-	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
-	GUESS=$SUN_ARCH-pc-solaris2$SUN_REL
-	;;
-    sun4*:SunOS:6*:*)
-	# According to config.sub, this is the proper way to canonicalize
-	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
-	# it's likely to be more like Solaris than SunOS4.
-	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
-	GUESS=sparc-sun-solaris3$SUN_REL
-	;;
-    sun4*:SunOS:*:*)
-	case `/usr/bin/arch -k` in
-	    Series*|S4*)
-		UNAME_RELEASE=`uname -v`
-		;;
-	esac
-	# Japanese Language versions have a version number like `4.1.3-JL'.
-	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/'`
-	GUESS=sparc-sun-sunos$SUN_REL
-	;;
-    sun3*:SunOS:*:*)
-	GUESS=m68k-sun-sunos$UNAME_RELEASE
-	;;
-    sun*:*:4.2BSD:*)
-	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
-	test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3
-	case `/bin/arch` in
-	    sun3)
-		GUESS=m68k-sun-sunos$UNAME_RELEASE
-		;;
-	    sun4)
-		GUESS=sparc-sun-sunos$UNAME_RELEASE
-		;;
-	esac
-	;;
-    aushp:SunOS:*:*)
-	GUESS=sparc-auspex-sunos$UNAME_RELEASE
-	;;
-    # The situation for MiNT is a little confusing.  The machine name
-    # can be virtually everything (everything which is not
-    # "atarist" or "atariste" at least should have a processor
-    # > m68000).  The system name ranges from "MiNT" over "FreeMiNT"
-    # to the lowercase version "mint" (or "freemint").  Finally
-    # the system name "TOS" denotes a system which is actually not
-    # MiNT.  But MiNT is downward compatible to TOS, so this should
-    # be no problem.
-    atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
-	GUESS=m68k-atari-mint$UNAME_RELEASE
-	;;
-    atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
-	GUESS=m68k-atari-mint$UNAME_RELEASE
-	;;
-    *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
-	GUESS=m68k-atari-mint$UNAME_RELEASE
-	;;
-    milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
-	GUESS=m68k-milan-mint$UNAME_RELEASE
-	;;
-    hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
-	GUESS=m68k-hades-mint$UNAME_RELEASE
-	;;
-    *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
-	GUESS=m68k-unknown-mint$UNAME_RELEASE
-	;;
-    m68k:machten:*:*)
-	GUESS=m68k-apple-machten$UNAME_RELEASE
-	;;
-    powerpc:machten:*:*)
-	GUESS=powerpc-apple-machten$UNAME_RELEASE
-	;;
-    RISC*:Mach:*:*)
-	GUESS=mips-dec-mach_bsd4.3
-	;;
-    RISC*:ULTRIX:*:*)
-	GUESS=mips-dec-ultrix$UNAME_RELEASE
-	;;
-    VAX*:ULTRIX*:*:*)
-	GUESS=vax-dec-ultrix$UNAME_RELEASE
-	;;
-    2020:CLIX:*:* | 2430:CLIX:*:*)
-	GUESS=clipper-intergraph-clix$UNAME_RELEASE
-	;;
-    mips:*:*:UMIPS | mips:*:*:RISCos)
-	set_cc_for_build
-	sed 's/^	//' << EOF > "$dummy.c"
-#ifdef __cplusplus
-#include <stdio.h>  /* for printf() prototype */
-	int main (int argc, char *argv[]) {
-#else
-	int main (argc, argv) int argc; char *argv[]; {
-#endif
-	#if defined (host_mips) && defined (MIPSEB)
-	#if defined (SYSTYPE_SYSV)
-	  printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0);
-	#endif
-	#if defined (SYSTYPE_SVR4)
-	  printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0);
-	#endif
-	#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
-	  printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0);
-	#endif
-	#endif
-	  exit (-1);
-	}
-EOF
-	$CC_FOR_BUILD -o "$dummy" "$dummy.c" &&
-	  dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` &&
-	  SYSTEM_NAME=`"$dummy" "$dummyarg"` &&
-	    { echo "$SYSTEM_NAME"; exit; }
-	GUESS=mips-mips-riscos$UNAME_RELEASE
-	;;
-    Motorola:PowerMAX_OS:*:*)
-	GUESS=powerpc-motorola-powermax
-	;;
-    Motorola:*:4.3:PL8-*)
-	GUESS=powerpc-harris-powermax
-	;;
-    Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
-	GUESS=powerpc-harris-powermax
-	;;
-    Night_Hawk:Power_UNIX:*:*)
-	GUESS=powerpc-harris-powerunix
-	;;
-    m88k:CX/UX:7*:*)
-	GUESS=m88k-harris-cxux7
-	;;
-    m88k:*:4*:R4*)
-	GUESS=m88k-motorola-sysv4
-	;;
-    m88k:*:3*:R3*)
-	GUESS=m88k-motorola-sysv3
-	;;
-    AViiON:dgux:*:*)
-	# DG/UX returns AViiON for all architectures
-	UNAME_PROCESSOR=`/usr/bin/uname -p`
-	if test "$UNAME_PROCESSOR" = mc88100 || test "$UNAME_PROCESSOR" = mc88110
-	then
-	    if test "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx || \
-	       test "$TARGET_BINARY_INTERFACE"x = x
-	    then
-		GUESS=m88k-dg-dgux$UNAME_RELEASE
-	    else
-		GUESS=m88k-dg-dguxbcs$UNAME_RELEASE
-	    fi
-	else
-	    GUESS=i586-dg-dgux$UNAME_RELEASE
-	fi
-	;;
-    M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
-	GUESS=m88k-dolphin-sysv3
-	;;
-    M88*:*:R3*:*)
-	# Delta 88k system running SVR3
-	GUESS=m88k-motorola-sysv3
-	;;
-    XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
-	GUESS=m88k-tektronix-sysv3
-	;;
-    Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
-	GUESS=m68k-tektronix-bsd
-	;;
-    *:IRIX*:*:*)
-	IRIX_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/g'`
-	GUESS=mips-sgi-irix$IRIX_REL
-	;;
-    ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
-	GUESS=romp-ibm-aix    # uname -m gives an 8 hex-code CPU id
-	;;                    # Note that: echo "'`uname -s`'" gives 'AIX '
-    i*86:AIX:*:*)
-	GUESS=i386-ibm-aix
-	;;
-    ia64:AIX:*:*)
-	if test -x /usr/bin/oslevel ; then
-		IBM_REV=`/usr/bin/oslevel`
-	else
-		IBM_REV=$UNAME_VERSION.$UNAME_RELEASE
-	fi
-	GUESS=$UNAME_MACHINE-ibm-aix$IBM_REV
-	;;
-    *:AIX:2:3)
-	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
-		set_cc_for_build
-		sed 's/^		//' << EOF > "$dummy.c"
-		#include <sys/systemcfg.h>
-
-		main()
-			{
-			if (!__power_pc())
-				exit(1);
-			puts("powerpc-ibm-aix3.2.5");
-			exit(0);
-			}
-EOF
-		if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"`
-		then
-			GUESS=$SYSTEM_NAME
-		else
-			GUESS=rs6000-ibm-aix3.2.5
-		fi
-	elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
-		GUESS=rs6000-ibm-aix3.2.4
-	else
-		GUESS=rs6000-ibm-aix3.2
-	fi
-	;;
-    *:AIX:*:[4567])
-	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
-	if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then
-		IBM_ARCH=rs6000
-	else
-		IBM_ARCH=powerpc
-	fi
-	if test -x /usr/bin/lslpp ; then
-		IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | \
-			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/`
-	else
-		IBM_REV=$UNAME_VERSION.$UNAME_RELEASE
-	fi
-	GUESS=$IBM_ARCH-ibm-aix$IBM_REV
-	;;
-    *:AIX:*:*)
-	GUESS=rs6000-ibm-aix
-	;;
-    ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*)
-	GUESS=romp-ibm-bsd4.4
-	;;
-    ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
-	GUESS=romp-ibm-bsd$UNAME_RELEASE    # 4.3 with uname added to
-	;;                                  # report: romp-ibm BSD 4.3
-    *:BOSX:*:*)
-	GUESS=rs6000-bull-bosx
-	;;
-    DPX/2?00:B.O.S.:*:*)
-	GUESS=m68k-bull-sysv3
-	;;
-    9000/[34]??:4.3bsd:1.*:*)
-	GUESS=m68k-hp-bsd
-	;;
-    hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
-	GUESS=m68k-hp-bsd4.4
-	;;
-    9000/[34678]??:HP-UX:*:*)
-	HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'`
-	case $UNAME_MACHINE in
-	    9000/31?)            HP_ARCH=m68000 ;;
-	    9000/[34]??)         HP_ARCH=m68k ;;
-	    9000/[678][0-9][0-9])
-		if test -x /usr/bin/getconf; then
-		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
-		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
-		    case $sc_cpu_version in
-		      523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0
-		      528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1
-		      532)                      # CPU_PA_RISC2_0
-			case $sc_kernel_bits in
-			  32) HP_ARCH=hppa2.0n ;;
-			  64) HP_ARCH=hppa2.0w ;;
-			  '') HP_ARCH=hppa2.0 ;;   # HP-UX 10.20
-			esac ;;
-		    esac
-		fi
-		if test "$HP_ARCH" = ""; then
-		    set_cc_for_build
-		    sed 's/^		//' << EOF > "$dummy.c"
-
-		#define _HPUX_SOURCE
-		#include <stdlib.h>
-		#include <unistd.h>
-
-		int main ()
-		{
-		#if defined(_SC_KERNEL_BITS)
-		    long bits = sysconf(_SC_KERNEL_BITS);
-		#endif
-		    long cpu  = sysconf (_SC_CPU_VERSION);
-
-		    switch (cpu)
-			{
-			case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
-			case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
-			case CPU_PA_RISC2_0:
-		#if defined(_SC_KERNEL_BITS)
-			    switch (bits)
-				{
-				case 64: puts ("hppa2.0w"); break;
-				case 32: puts ("hppa2.0n"); break;
-				default: puts ("hppa2.0"); break;
-				} break;
-		#else  /* !defined(_SC_KERNEL_BITS) */
-			    puts ("hppa2.0"); break;
-		#endif
-			default: puts ("hppa1.0"); break;
-			}
-		    exit (0);
-		}
-EOF
-		    (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"`
-		    test -z "$HP_ARCH" && HP_ARCH=hppa
-		fi ;;
-	esac
-	if test "$HP_ARCH" = hppa2.0w
-	then
-	    set_cc_for_build
-
-	    # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
-	    # 32-bit code.  hppa64-hp-hpux* has the same kernel and a compiler
-	    # generating 64-bit code.  GNU and HP use different nomenclature:
-	    #
-	    # $ CC_FOR_BUILD=cc ./config.guess
-	    # => hppa2.0w-hp-hpux11.23
-	    # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
-	    # => hppa64-hp-hpux11.23
-
-	    if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) |
-		grep -q __LP64__
-	    then
-		HP_ARCH=hppa2.0w
-	    else
-		HP_ARCH=hppa64
-	    fi
-	fi
-	GUESS=$HP_ARCH-hp-hpux$HPUX_REV
-	;;
-    ia64:HP-UX:*:*)
-	HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'`
-	GUESS=ia64-hp-hpux$HPUX_REV
-	;;
-    3050*:HI-UX:*:*)
-	set_cc_for_build
-	sed 's/^	//' << EOF > "$dummy.c"
-	#include <unistd.h>
-	int
-	main ()
-	{
-	  long cpu = sysconf (_SC_CPU_VERSION);
-	  /* The order matters, because CPU_IS_HP_MC68K erroneously returns
-	     true for CPU_PA_RISC1_0.  CPU_IS_PA_RISC returns correct
-	     results, however.  */
-	  if (CPU_IS_PA_RISC (cpu))
-	    {
-	      switch (cpu)
-		{
-		  case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
-		  case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
-		  case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
-		  default: puts ("hppa-hitachi-hiuxwe2"); break;
-		}
-	    }
-	  else if (CPU_IS_HP_MC68K (cpu))
-	    puts ("m68k-hitachi-hiuxwe2");
-	  else puts ("unknown-hitachi-hiuxwe2");
-	  exit (0);
-	}
-EOF
-	$CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` &&
-		{ echo "$SYSTEM_NAME"; exit; }
-	GUESS=unknown-hitachi-hiuxwe2
-	;;
-    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*)
-	GUESS=hppa1.1-hp-bsd
-	;;
-    9000/8??:4.3bsd:*:*)
-	GUESS=hppa1.0-hp-bsd
-	;;
-    *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
-	GUESS=hppa1.0-hp-mpeix
-	;;
-    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*)
-	GUESS=hppa1.1-hp-osf
-	;;
-    hp8??:OSF1:*:*)
-	GUESS=hppa1.0-hp-osf
-	;;
-    i*86:OSF1:*:*)
-	if test -x /usr/sbin/sysversion ; then
-	    GUESS=$UNAME_MACHINE-unknown-osf1mk
-	else
-	    GUESS=$UNAME_MACHINE-unknown-osf1
-	fi
-	;;
-    parisc*:Lites*:*:*)
-	GUESS=hppa1.1-hp-lites
-	;;
-    C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
-	GUESS=c1-convex-bsd
-	;;
-    C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
-	if getsysinfo -f scalar_acc
-	then echo c32-convex-bsd
-	else echo c2-convex-bsd
-	fi
-	exit ;;
-    C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
-	GUESS=c34-convex-bsd
-	;;
-    C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
-	GUESS=c38-convex-bsd
-	;;
-    C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
-	GUESS=c4-convex-bsd
-	;;
-    CRAY*Y-MP:*:*:*)
-	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
-	GUESS=ymp-cray-unicos$CRAY_REL
-	;;
-    CRAY*[A-Z]90:*:*:*)
-	echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \
-	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
-	      -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
-	      -e 's/\.[^.]*$/.X/'
-	exit ;;
-    CRAY*TS:*:*:*)
-	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
-	GUESS=t90-cray-unicos$CRAY_REL
-	;;
-    CRAY*T3E:*:*:*)
-	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
-	GUESS=alphaev5-cray-unicosmk$CRAY_REL
-	;;
-    CRAY*SV1:*:*:*)
-	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
-	GUESS=sv1-cray-unicos$CRAY_REL
-	;;
-    *:UNICOS/mp:*:*)
-	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
-	GUESS=craynv-cray-unicosmp$CRAY_REL
-	;;
-    F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
-	FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
-	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
-	FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'`
-	GUESS=${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}
-	;;
-    5000:UNIX_System_V:4.*:*)
-	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
-	FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'`
-	GUESS=sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}
-	;;
-    i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
-	GUESS=$UNAME_MACHINE-pc-bsdi$UNAME_RELEASE
-	;;
-    sparc*:BSD/OS:*:*)
-	GUESS=sparc-unknown-bsdi$UNAME_RELEASE
-	;;
-    *:BSD/OS:*:*)
-	GUESS=$UNAME_MACHINE-unknown-bsdi$UNAME_RELEASE
-	;;
-    arm:FreeBSD:*:*)
-	UNAME_PROCESSOR=`uname -p`
-	set_cc_for_build
-	if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
-	    | grep -q __ARM_PCS_VFP
-	then
-	    FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
-	    GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabi
-	else
-	    FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
-	    GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabihf
-	fi
-	;;
-    *:FreeBSD:*:*)
-	UNAME_PROCESSOR=`/usr/bin/uname -p`
-	case $UNAME_PROCESSOR in
-	    amd64)
-		UNAME_PROCESSOR=x86_64 ;;
-	    i386)
-		UNAME_PROCESSOR=i586 ;;
-	esac
-	FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
-	GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL
-	;;
-    i*:CYGWIN*:*)
-	GUESS=$UNAME_MACHINE-pc-cygwin
-	;;
-    *:MINGW64*:*)
-	GUESS=$UNAME_MACHINE-pc-mingw64
-	;;
-    *:MINGW*:*)
-	GUESS=$UNAME_MACHINE-pc-mingw32
-	;;
-    *:MSYS*:*)
-	GUESS=$UNAME_MACHINE-pc-msys
-	;;
-    i*:PW*:*)
-	GUESS=$UNAME_MACHINE-pc-pw32
-	;;
-    *:SerenityOS:*:*)
-        GUESS=$UNAME_MACHINE-pc-serenity
-        ;;
-    *:Interix*:*)
-	case $UNAME_MACHINE in
-	    x86)
-		GUESS=i586-pc-interix$UNAME_RELEASE
-		;;
-	    authenticamd | genuineintel | EM64T)
-		GUESS=x86_64-unknown-interix$UNAME_RELEASE
-		;;
-	    IA64)
-		GUESS=ia64-unknown-interix$UNAME_RELEASE
-		;;
-	esac ;;
-    i*:UWIN*:*)
-	GUESS=$UNAME_MACHINE-pc-uwin
-	;;
-    amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
-	GUESS=x86_64-pc-cygwin
-	;;
-    prep*:SunOS:5.*:*)
-	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
-	GUESS=powerpcle-unknown-solaris2$SUN_REL
-	;;
-    *:GNU:*:*)
-	# the GNU system
-	GNU_ARCH=`echo "$UNAME_MACHINE" | sed -e 's,[-/].*$,,'`
-	GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's,/.*$,,'`
-	GUESS=$GNU_ARCH-unknown-$LIBC$GNU_REL
-	;;
-    *:GNU/*:*:*)
-	# other systems with GNU libc and userland
-	GNU_SYS=`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"`
-	GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
-	GUESS=$UNAME_MACHINE-unknown-$GNU_SYS$GNU_REL-$LIBC
-	;;
-    *:Minix:*:*)
-	GUESS=$UNAME_MACHINE-unknown-minix
-	;;
-    aarch64:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    aarch64_be:Linux:*:*)
-	UNAME_MACHINE=aarch64_be
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    alpha:Linux:*:*)
-	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null` in
-	  EV5)   UNAME_MACHINE=alphaev5 ;;
-	  EV56)  UNAME_MACHINE=alphaev56 ;;
-	  PCA56) UNAME_MACHINE=alphapca56 ;;
-	  PCA57) UNAME_MACHINE=alphapca56 ;;
-	  EV6)   UNAME_MACHINE=alphaev6 ;;
-	  EV67)  UNAME_MACHINE=alphaev67 ;;
-	  EV68*) UNAME_MACHINE=alphaev68 ;;
-	esac
-	objdump --private-headers /bin/sh | grep -q ld.so.1
-	if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    arc:Linux:*:* | arceb:Linux:*:* | arc32:Linux:*:* | arc64:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    arm*:Linux:*:*)
-	set_cc_for_build
-	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
-	    | grep -q __ARM_EABI__
-	then
-	    GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	else
-	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
-		| grep -q __ARM_PCS_VFP
-	    then
-		GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabi
-	    else
-		GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabihf
-	    fi
-	fi
-	;;
-    avr32*:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    cris:Linux:*:*)
-	GUESS=$UNAME_MACHINE-axis-linux-$LIBC
-	;;
-    crisv32:Linux:*:*)
-	GUESS=$UNAME_MACHINE-axis-linux-$LIBC
-	;;
-    e2k:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    frv:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    hexagon:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    i*86:Linux:*:*)
-	GUESS=$UNAME_MACHINE-pc-linux-$LIBC
-	;;
-    ia64:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    k1om:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    m32r*:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    m68*:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    mips:Linux:*:* | mips64:Linux:*:*)
-	set_cc_for_build
-	IS_GLIBC=0
-	test x"${LIBC}" = xgnu && IS_GLIBC=1
-	sed 's/^	//' << EOF > "$dummy.c"
-	#undef CPU
-	#undef mips
-	#undef mipsel
-	#undef mips64
-	#undef mips64el
-	#if ${IS_GLIBC} && defined(_ABI64)
-	LIBCABI=gnuabi64
-	#else
-	#if ${IS_GLIBC} && defined(_ABIN32)
-	LIBCABI=gnuabin32
-	#else
-	LIBCABI=${LIBC}
-	#endif
-	#endif
-
-	#if ${IS_GLIBC} && defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6
-	CPU=mipsisa64r6
-	#else
-	#if ${IS_GLIBC} && !defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6
-	CPU=mipsisa32r6
-	#else
-	#if defined(__mips64)
-	CPU=mips64
-	#else
-	CPU=mips
-	#endif
-	#endif
-	#endif
-
-	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
-	MIPS_ENDIAN=el
-	#else
-	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
-	MIPS_ENDIAN=
-	#else
-	MIPS_ENDIAN=
-	#endif
-	#endif
-EOF
-	cc_set_vars=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI'`
-	eval "$cc_set_vars"
-	test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; }
-	;;
-    mips64el:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    openrisc*:Linux:*:*)
-	GUESS=or1k-unknown-linux-$LIBC
-	;;
-    or32:Linux:*:* | or1k*:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    padre:Linux:*:*)
-	GUESS=sparc-unknown-linux-$LIBC
-	;;
-    parisc64:Linux:*:* | hppa64:Linux:*:*)
-	GUESS=hppa64-unknown-linux-$LIBC
-	;;
-    parisc:Linux:*:* | hppa:Linux:*:*)
-	# Look for CPU level
-	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
-	  PA7*) GUESS=hppa1.1-unknown-linux-$LIBC ;;
-	  PA8*) GUESS=hppa2.0-unknown-linux-$LIBC ;;
-	  *)    GUESS=hppa-unknown-linux-$LIBC ;;
-	esac
-	;;
-    ppc64:Linux:*:*)
-	GUESS=powerpc64-unknown-linux-$LIBC
-	;;
-    ppc:Linux:*:*)
-	GUESS=powerpc-unknown-linux-$LIBC
-	;;
-    ppc64le:Linux:*:*)
-	GUESS=powerpc64le-unknown-linux-$LIBC
-	;;
-    ppcle:Linux:*:*)
-	GUESS=powerpcle-unknown-linux-$LIBC
-	;;
-    riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | riscv64be:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    s390:Linux:*:* | s390x:Linux:*:*)
-	GUESS=$UNAME_MACHINE-ibm-linux-$LIBC
-	;;
-    sh64*:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    sh*:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    sparc:Linux:*:* | sparc64:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    tile*:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    vax:Linux:*:*)
-	GUESS=$UNAME_MACHINE-dec-linux-$LIBC
-	;;
-    x86_64:Linux:*:*)
-	set_cc_for_build
-	LIBCABI=$LIBC
-	if test "$CC_FOR_BUILD" != no_compiler_found; then
-	    if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \
-		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
-		grep IS_X32 >/dev/null
-	    then
-		LIBCABI=${LIBC}x32
-	    fi
-	fi
-	GUESS=$UNAME_MACHINE-pc-linux-$LIBCABI
-	;;
-    xtensa*:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
-	;;
-    i*86:DYNIX/ptx:4*:*)
-	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
-	# earlier versions are messed up and put the nodename in both
-	# sysname and nodename.
-	GUESS=i386-sequent-sysv4
-	;;
-    i*86:UNIX_SV:4.2MP:2.*)
-	# Unixware is an offshoot of SVR4, but it has its own version
-	# number series starting with 2...
-	# I am not positive that other SVR4 systems won't match this,
-	# I just have to hope.  -- rms.
-	# Use sysv4.2uw... so that sysv4* matches it.
-	GUESS=$UNAME_MACHINE-pc-sysv4.2uw$UNAME_VERSION
-	;;
-    i*86:OS/2:*:*)
-	# If we were able to find `uname', then EMX Unix compatibility
-	# is probably installed.
-	GUESS=$UNAME_MACHINE-pc-os2-emx
-	;;
-    i*86:XTS-300:*:STOP)
-	GUESS=$UNAME_MACHINE-unknown-stop
-	;;
-    i*86:atheos:*:*)
-	GUESS=$UNAME_MACHINE-unknown-atheos
-	;;
-    i*86:syllable:*:*)
-	GUESS=$UNAME_MACHINE-pc-syllable
-	;;
-    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
-	GUESS=i386-unknown-lynxos$UNAME_RELEASE
-	;;
-    i*86:*DOS:*:*)
-	GUESS=$UNAME_MACHINE-pc-msdosdjgpp
-	;;
-    i*86:*:4.*:*)
-	UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'`
-	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
-		GUESS=$UNAME_MACHINE-univel-sysv$UNAME_REL
-	else
-		GUESS=$UNAME_MACHINE-pc-sysv$UNAME_REL
-	fi
-	;;
-    i*86:*:5:[678]*)
-	# UnixWare 7.x, OpenUNIX and OpenServer 6.
-	case `/bin/uname -X | grep "^Machine"` in
-	    *486*)	     UNAME_MACHINE=i486 ;;
-	    *Pentium)	     UNAME_MACHINE=i586 ;;
-	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
-	esac
-	GUESS=$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
-	;;
-    i*86:*:3.2:*)
-	if test -f /usr/options/cb.name; then
-		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
-		GUESS=$UNAME_MACHINE-pc-isc$UNAME_REL
-	elif /bin/uname -X 2>/dev/null >/dev/null ; then
-		UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
-		(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
-		(/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
-			&& UNAME_MACHINE=i586
-		(/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \
-			&& UNAME_MACHINE=i686
-		(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
-			&& UNAME_MACHINE=i686
-		GUESS=$UNAME_MACHINE-pc-sco$UNAME_REL
-	else
-		GUESS=$UNAME_MACHINE-pc-sysv32
-	fi
-	;;
-    pc:*:*:*)
-	# Left here for compatibility:
-	# uname -m prints for DJGPP always 'pc', but it prints nothing about
-	# the processor, so we play safe by assuming i586.
-	# Note: whatever this is, it MUST be the same as what config.sub
-	# prints for the "djgpp" host, or else GDB configure will decide that
-	# this is a cross-build.
-	GUESS=i586-pc-msdosdjgpp
-	;;
-    Intel:Mach:3*:*)
-	GUESS=i386-pc-mach3
-	;;
-    paragon:*:*:*)
-	GUESS=i860-intel-osf1
-	;;
-    i860:*:4.*:*) # i860-SVR4
-	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
-	  GUESS=i860-stardent-sysv$UNAME_RELEASE    # Stardent Vistra i860-SVR4
-	else # Add other i860-SVR4 vendors below as they are discovered.
-	  GUESS=i860-unknown-sysv$UNAME_RELEASE     # Unknown i860-SVR4
-	fi
-	;;
-    mini*:CTIX:SYS*5:*)
-	# "miniframe"
-	GUESS=m68010-convergent-sysv
-	;;
-    mc68k:UNIX:SYSTEM5:3.51m)
-	GUESS=m68k-convergent-sysv
-	;;
-    M680?0:D-NIX:5.3:*)
-	GUESS=m68k-diab-dnix
-	;;
-    M68*:*:R3V[5678]*:*)
-	test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
-    3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
-	OS_REL=''
-	test -r /etc/.relid \
-	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
-	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-	  && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
-	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
-	  && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
-    3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
-	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-	  && { echo i486-ncr-sysv4; exit; } ;;
-    NCR*:*:4.2:* | MPRAS*:*:4.2:*)
-	OS_REL='.3'
-	test -r /etc/.relid \
-	    && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
-	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-	    && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
-	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
-	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; }
-	/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
-	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
-    m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
-	GUESS=m68k-unknown-lynxos$UNAME_RELEASE
-	;;
-    mc68030:UNIX_System_V:4.*:*)
-	GUESS=m68k-atari-sysv4
-	;;
-    TSUNAMI:LynxOS:2.*:*)
-	GUESS=sparc-unknown-lynxos$UNAME_RELEASE
-	;;
-    rs6000:LynxOS:2.*:*)
-	GUESS=rs6000-unknown-lynxos$UNAME_RELEASE
-	;;
-    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
-	GUESS=powerpc-unknown-lynxos$UNAME_RELEASE
-	;;
-    SM[BE]S:UNIX_SV:*:*)
-	GUESS=mips-dde-sysv$UNAME_RELEASE
-	;;
-    RM*:ReliantUNIX-*:*:*)
-	GUESS=mips-sni-sysv4
-	;;
-    RM*:SINIX-*:*:*)
-	GUESS=mips-sni-sysv4
-	;;
-    *:SINIX-*:*:*)
-	if uname -p 2>/dev/null >/dev/null ; then
-		UNAME_MACHINE=`(uname -p) 2>/dev/null`
-		GUESS=$UNAME_MACHINE-sni-sysv4
-	else
-		GUESS=ns32k-sni-sysv
-	fi
-	;;
-    PENTIUM:*:4.0*:*)	# Unisys `ClearPath HMP IX 4000' SVR4/MP effort
-			# says <Richard.M.Bartel@ccMail.Census.GOV>
-	GUESS=i586-unisys-sysv4
-	;;
-    *:UNIX_System_V:4*:FTX*)
-	# From Gerald Hewes <hewes@openmarket.com>.
-	# How about differentiating between stratus architectures? -djm
-	GUESS=hppa1.1-stratus-sysv4
-	;;
-    *:*:*:FTX*)
-	# From seanf@swdc.stratus.com.
-	GUESS=i860-stratus-sysv4
-	;;
-    i*86:VOS:*:*)
-	# From Paul.Green@stratus.com.
-	GUESS=$UNAME_MACHINE-stratus-vos
-	;;
-    *:VOS:*:*)
-	# From Paul.Green@stratus.com.
-	GUESS=hppa1.1-stratus-vos
-	;;
-    mc68*:A/UX:*:*)
-	GUESS=m68k-apple-aux$UNAME_RELEASE
-	;;
-    news*:NEWS-OS:6*:*)
-	GUESS=mips-sony-newsos6
-	;;
-    R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
-	if test -d /usr/nec; then
-		GUESS=mips-nec-sysv$UNAME_RELEASE
-	else
-		GUESS=mips-unknown-sysv$UNAME_RELEASE
-	fi
-	;;
-    BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
-	GUESS=powerpc-be-beos
-	;;
-    BeMac:BeOS:*:*)	# BeOS running on Mac or Mac clone, PPC only.
-	GUESS=powerpc-apple-beos
-	;;
-    BePC:BeOS:*:*)	# BeOS running on Intel PC compatible.
-	GUESS=i586-pc-beos
-	;;
-    BePC:Haiku:*:*)	# Haiku running on Intel PC compatible.
-	GUESS=i586-pc-haiku
-	;;
-    x86_64:Haiku:*:*)
-	GUESS=x86_64-unknown-haiku
-	;;
-    SX-4:SUPER-UX:*:*)
-	GUESS=sx4-nec-superux$UNAME_RELEASE
-	;;
-    SX-5:SUPER-UX:*:*)
-	GUESS=sx5-nec-superux$UNAME_RELEASE
-	;;
-    SX-6:SUPER-UX:*:*)
-	GUESS=sx6-nec-superux$UNAME_RELEASE
-	;;
-    SX-7:SUPER-UX:*:*)
-	GUESS=sx7-nec-superux$UNAME_RELEASE
-	;;
-    SX-8:SUPER-UX:*:*)
-	GUESS=sx8-nec-superux$UNAME_RELEASE
-	;;
-    SX-8R:SUPER-UX:*:*)
-	GUESS=sx8r-nec-superux$UNAME_RELEASE
-	;;
-    SX-ACE:SUPER-UX:*:*)
-	GUESS=sxace-nec-superux$UNAME_RELEASE
-	;;
-    Power*:Rhapsody:*:*)
-	GUESS=powerpc-apple-rhapsody$UNAME_RELEASE
-	;;
-    *:Rhapsody:*:*)
-	GUESS=$UNAME_MACHINE-apple-rhapsody$UNAME_RELEASE
-	;;
-    arm64:Darwin:*:*)
-	GUESS=aarch64-apple-darwin$UNAME_RELEASE
-	;;
-    *:Darwin:*:*)
-	UNAME_PROCESSOR=`uname -p`
-	case $UNAME_PROCESSOR in
-	    unknown) UNAME_PROCESSOR=powerpc ;;
-	esac
-	if command -v xcode-select > /dev/null 2> /dev/null && \
-		! xcode-select --print-path > /dev/null 2> /dev/null ; then
-	    # Avoid executing cc if there is no toolchain installed as
-	    # cc will be a stub that puts up a graphical alert
-	    # prompting the user to install developer tools.
-	    CC_FOR_BUILD=no_compiler_found
-	else
-	    set_cc_for_build
-	fi
-	if test "$CC_FOR_BUILD" != no_compiler_found; then
-	    if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
-		   (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
-		   grep IS_64BIT_ARCH >/dev/null
-	    then
-		case $UNAME_PROCESSOR in
-		    i386) UNAME_PROCESSOR=x86_64 ;;
-		    powerpc) UNAME_PROCESSOR=powerpc64 ;;
-		esac
-	    fi
-	    # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc
-	    if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \
-		   (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
-		   grep IS_PPC >/dev/null
-	    then
-		UNAME_PROCESSOR=powerpc
-	    fi
-	elif test "$UNAME_PROCESSOR" = i386 ; then
-	    # uname -m returns i386 or x86_64
-	    UNAME_PROCESSOR=$UNAME_MACHINE
-	fi
-	GUESS=$UNAME_PROCESSOR-apple-darwin$UNAME_RELEASE
-	;;
-    *:procnto*:*:* | *:QNX:[0123456789]*:*)
-	UNAME_PROCESSOR=`uname -p`
-	if test "$UNAME_PROCESSOR" = x86; then
-		UNAME_PROCESSOR=i386
-		UNAME_MACHINE=pc
-	fi
-	GUESS=$UNAME_PROCESSOR-$UNAME_MACHINE-nto-qnx$UNAME_RELEASE
-	;;
-    *:QNX:*:4*)
-	GUESS=i386-pc-qnx
-	;;
-    NEO-*:NONSTOP_KERNEL:*:*)
-	GUESS=neo-tandem-nsk$UNAME_RELEASE
-	;;
-    NSE-*:NONSTOP_KERNEL:*:*)
-	GUESS=nse-tandem-nsk$UNAME_RELEASE
-	;;
-    NSR-*:NONSTOP_KERNEL:*:*)
-	GUESS=nsr-tandem-nsk$UNAME_RELEASE
-	;;
-    NSV-*:NONSTOP_KERNEL:*:*)
-	GUESS=nsv-tandem-nsk$UNAME_RELEASE
-	;;
-    NSX-*:NONSTOP_KERNEL:*:*)
-	GUESS=nsx-tandem-nsk$UNAME_RELEASE
-	;;
-    *:NonStop-UX:*:*)
-	GUESS=mips-compaq-nonstopux
-	;;
-    BS2000:POSIX*:*:*)
-	GUESS=bs2000-siemens-sysv
-	;;
-    DS/*:UNIX_System_V:*:*)
-	GUESS=$UNAME_MACHINE-$UNAME_SYSTEM-$UNAME_RELEASE
-	;;
-    *:Plan9:*:*)
-	# "uname -m" is not consistent, so use $cputype instead. 386
-	# is converted to i386 for consistency with other x86
-	# operating systems.
-	if test "${cputype-}" = 386; then
-	    UNAME_MACHINE=i386
-	elif test "x${cputype-}" != x; then
-	    UNAME_MACHINE=$cputype
-	fi
-	GUESS=$UNAME_MACHINE-unknown-plan9
-	;;
-    *:TOPS-10:*:*)
-	GUESS=pdp10-unknown-tops10
-	;;
-    *:TENEX:*:*)
-	GUESS=pdp10-unknown-tenex
-	;;
-    KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
-	GUESS=pdp10-dec-tops20
-	;;
-    XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
-	GUESS=pdp10-xkl-tops20
-	;;
-    *:TOPS-20:*:*)
-	GUESS=pdp10-unknown-tops20
-	;;
-    *:ITS:*:*)
-	GUESS=pdp10-unknown-its
-	;;
-    SEI:*:*:SEIUX)
-	GUESS=mips-sei-seiux$UNAME_RELEASE
-	;;
-    *:DragonFly:*:*)
-	DRAGONFLY_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
-	GUESS=$UNAME_MACHINE-unknown-dragonfly$DRAGONFLY_REL
-	;;
-    *:*VMS:*:*)
-	UNAME_MACHINE=`(uname -p) 2>/dev/null`
-	case $UNAME_MACHINE in
-	    A*) GUESS=alpha-dec-vms ;;
-	    I*) GUESS=ia64-dec-vms ;;
-	    V*) GUESS=vax-dec-vms ;;
-	esac ;;
-    *:XENIX:*:SysV)
-	GUESS=i386-pc-xenix
-	;;
-    i*86:skyos:*:*)
-	SKYOS_REL=`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'`
-	GUESS=$UNAME_MACHINE-pc-skyos$SKYOS_REL
-	;;
-    i*86:rdos:*:*)
-	GUESS=$UNAME_MACHINE-pc-rdos
-	;;
-    i*86:Fiwix:*:*)
-	GUESS=$UNAME_MACHINE-pc-fiwix
-	;;
-    *:AROS:*:*)
-	GUESS=$UNAME_MACHINE-unknown-aros
-	;;
-    x86_64:VMkernel:*:*)
-	GUESS=$UNAME_MACHINE-unknown-esx
-	;;
-    amd64:Isilon\ OneFS:*:*)
-	GUESS=x86_64-unknown-onefs
-	;;
-    *:Unleashed:*:*)
-	GUESS=$UNAME_MACHINE-unknown-unleashed$UNAME_RELEASE
-	;;
-esac
-
-# Do we have a guess based on uname results?
-if test "x$GUESS" != x; then
-    echo "$GUESS"
-    exit
-fi
-
-# No uname command or uname output not recognized.
-set_cc_for_build
-cat > "$dummy.c" <<EOF
-#ifdef _SEQUENT_
-#include <sys/types.h>
-#include <sys/utsname.h>
-#endif
-#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__)
-#if defined (vax) || defined (__vax) || defined (__vax__) || defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__)
-#include <signal.h>
-#if defined(_SIZE_T_) || defined(SIGLOST)
-#include <sys/utsname.h>
-#endif
-#endif
-#endif
-main ()
-{
-#if defined (sony)
-#if defined (MIPSEB)
-  /* BFD wants "bsd" instead of "newsos".  Perhaps BFD should be changed,
-     I don't know....  */
-  printf ("mips-sony-bsd\n"); exit (0);
-#else
-#include <sys/param.h>
-  printf ("m68k-sony-newsos%s\n",
-#ifdef NEWSOS4
-  "4"
-#else
-  ""
-#endif
-  ); exit (0);
-#endif
-#endif
-
-#if defined (NeXT)
-#if !defined (__ARCHITECTURE__)
-#define __ARCHITECTURE__ "m68k"
-#endif
-  int version;
-  version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
-  if (version < 4)
-    printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
-  else
-    printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
-  exit (0);
-#endif
-
-#if defined (MULTIMAX) || defined (n16)
-#if defined (UMAXV)
-  printf ("ns32k-encore-sysv\n"); exit (0);
-#else
-#if defined (CMU)
-  printf ("ns32k-encore-mach\n"); exit (0);
-#else
-  printf ("ns32k-encore-bsd\n"); exit (0);
-#endif
-#endif
-#endif
-
-#if defined (__386BSD__)
-  printf ("i386-pc-bsd\n"); exit (0);
-#endif
-
-#if defined (sequent)
-#if defined (i386)
-  printf ("i386-sequent-dynix\n"); exit (0);
-#endif
-#if defined (ns32000)
-  printf ("ns32k-sequent-dynix\n"); exit (0);
-#endif
-#endif
-
-#if defined (_SEQUENT_)
-  struct utsname un;
-
-  uname(&un);
-  if (strncmp(un.version, "V2", 2) == 0) {
-    printf ("i386-sequent-ptx2\n"); exit (0);
-  }
-  if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
-    printf ("i386-sequent-ptx1\n"); exit (0);
-  }
-  printf ("i386-sequent-ptx\n"); exit (0);
-#endif
-
-#if defined (vax)
-#if !defined (ultrix)
-#include <sys/param.h>
-#if defined (BSD)
-#if BSD == 43
-  printf ("vax-dec-bsd4.3\n"); exit (0);
-#else
-#if BSD == 199006
-  printf ("vax-dec-bsd4.3reno\n"); exit (0);
-#else
-  printf ("vax-dec-bsd\n"); exit (0);
-#endif
-#endif
-#else
-  printf ("vax-dec-bsd\n"); exit (0);
-#endif
-#else
-#if defined(_SIZE_T_) || defined(SIGLOST)
-  struct utsname un;
-  uname (&un);
-  printf ("vax-dec-ultrix%s\n", un.release); exit (0);
-#else
-  printf ("vax-dec-ultrix\n"); exit (0);
-#endif
-#endif
-#endif
-#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__)
-#if defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__)
-#if defined(_SIZE_T_) || defined(SIGLOST)
-  struct utsname *un;
-  uname (&un);
-  printf ("mips-dec-ultrix%s\n", un.release); exit (0);
-#else
-  printf ("mips-dec-ultrix\n"); exit (0);
-#endif
-#endif
-#endif
-
-#if defined (alliant) && defined (i860)
-  printf ("i860-alliant-bsd\n"); exit (0);
-#endif
-
-  exit (1);
-}
-EOF
-
-$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=`"$dummy"` &&
-	{ echo "$SYSTEM_NAME"; exit; }
-
-# Apollos put the system type in the environment.
-test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; }
-
-echo "$0: unable to guess system type" >&2
-
-case $UNAME_MACHINE:$UNAME_SYSTEM in
-    mips:Linux | mips64:Linux)
-	# If we got here on MIPS GNU/Linux, output extra information.
-	cat >&2 <<EOF
-
-NOTE: MIPS GNU/Linux systems require a C compiler to fully recognize
-the system type. Please install a C compiler and try again.
-EOF
-	;;
-esac
-
-cat >&2 <<EOF
-
-This script (version $timestamp), has failed to recognize the
-operating system you are using. If your script is old, overwrite *all*
-copies of config.guess and config.sub with the latest versions from:
-
-  https://git.savannah.gnu.org/cgit/config.git/plain/config.guess
-and
-  https://git.savannah.gnu.org/cgit/config.git/plain/config.sub
-EOF
-
-our_year=`echo $timestamp | sed 's,-.*,,'`
-thisyear=`date +%Y`
-# shellcheck disable=SC2003
-script_age=`expr "$thisyear" - "$our_year"`
-if test "$script_age" -lt 3 ; then
-   cat >&2 <<EOF
-
-If $0 has already been updated, send the following data and any
-information you think might be pertinent to config-patches@gnu.org to
-provide the necessary information to handle your system.
-
-config.guess timestamp = $timestamp
-
-uname -m = `(uname -m) 2>/dev/null || echo unknown`
-uname -r = `(uname -r) 2>/dev/null || echo unknown`
-uname -s = `(uname -s) 2>/dev/null || echo unknown`
-uname -v = `(uname -v) 2>/dev/null || echo unknown`
-
-/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
-/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`
-
-hostinfo               = `(hostinfo) 2>/dev/null`
-/bin/universe          = `(/bin/universe) 2>/dev/null`
-/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`
-/bin/arch              = `(/bin/arch) 2>/dev/null`
-/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`
-/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
-
-UNAME_MACHINE = "$UNAME_MACHINE"
-UNAME_RELEASE = "$UNAME_RELEASE"
-UNAME_SYSTEM  = "$UNAME_SYSTEM"
-UNAME_VERSION = "$UNAME_VERSION"
-EOF
-fi
-
-exit 1
-
-# Local variables:
-# eval: (add-hook 'before-save-hook 'time-stamp)
-# time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
-# time-stamp-end: "'"
-# End:
diff --git a/paddlespeech/audio/third_party/patches/config.sub b/paddlespeech/audio/third_party/patches/config.sub
deleted file mode 100644
index dba16e84c77c7d25871d80c24deff717faf4c094..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/third_party/patches/config.sub
+++ /dev/null
@@ -1,1890 +0,0 @@
-#! /bin/sh
-# Configuration validation subroutine script.
-#   Copyright 1992-2022 Free Software Foundation, Inc.
-
-# shellcheck disable=SC2006,SC2268 # see below for rationale
-
-timestamp='2022-01-03'
-
-# This file is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, see <https://www.gnu.org/licenses/>.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that
-# program.  This Exception is an additional permission under section 7
-# of the GNU General Public License, version 3 ("GPLv3").
-
-
-# Please send patches to <config-patches@gnu.org>.
-#
-# Configuration subroutine to validate and canonicalize a configuration type.
-# Supply the specified configuration type as an argument.
-# If it is invalid, we print an error message on stderr and exit with code 1.
-# Otherwise, we print the canonical config type on stdout and succeed.
-
-# You can get the latest version of this script from:
-# https://git.savannah.gnu.org/cgit/config.git/plain/config.sub
-
-# This file is supposed to be the same for all GNU packages
-# and recognize all the CPU types, system types and aliases
-# that are meaningful with *any* GNU software.
-# Each package is responsible for reporting which valid configurations
-# it does not support.  The user should be able to distinguish
-# a failure to support a valid configuration from a meaningless
-# configuration.
-
-# The goal of this file is to map all the various variations of a given
-# machine specification into a single specification in the form:
-#	CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
-# or in some cases, the newer four-part form:
-#	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
-# It is wrong to echo any other type of specification.
-
-# The "shellcheck disable" line above the timestamp inhibits complaints
-# about features and limitations of the classic Bourne shell that were
-# superseded or lifted in POSIX.  However, this script identifies a wide
-# variety of pre-POSIX systems that do not have POSIX shells at all, and
-# even some reasonably current systems (Solaris 10 as case-in-point) still
-# have a pre-POSIX /bin/sh.
-
-me=`echo "$0" | sed -e 's,.*/,,'`
-
-usage="\
-Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
-
-Canonicalize a configuration name.
-
-Options:
-  -h, --help         print this help, then exit
-  -t, --time-stamp   print date of last modification, then exit
-  -v, --version      print version number, then exit
-
-Report bugs and patches to <config-patches@gnu.org>."
-
-version="\
-GNU config.sub ($timestamp)
-
-Copyright 1992-2022 Free Software Foundation, Inc.
-
-This is free software; see the source for copying conditions.  There is NO
-warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
-
-help="
-Try \`$me --help' for more information."
-
-# Parse command line
-while test $# -gt 0 ; do
-  case $1 in
-    --time-stamp | --time* | -t )
-       echo "$timestamp" ; exit ;;
-    --version | -v )
-       echo "$version" ; exit ;;
-    --help | --h* | -h )
-       echo "$usage"; exit ;;
-    -- )     # Stop option processing
-       shift; break ;;
-    - )	# Use stdin as input.
-       break ;;
-    -* )
-       echo "$me: invalid option $1$help" >&2
-       exit 1 ;;
-
-    *local*)
-       # First pass through any local machine types.
-       echo "$1"
-       exit ;;
-
-    * )
-       break ;;
-  esac
-done
-
-case $# in
- 0) echo "$me: missing argument$help" >&2
-    exit 1;;
- 1) ;;
- *) echo "$me: too many arguments$help" >&2
-    exit 1;;
-esac
-
-# Split fields of configuration type
-# shellcheck disable=SC2162
-saved_IFS=$IFS
-IFS="-" read field1 field2 field3 field4 <<EOF
-$1
-EOF
-IFS=$saved_IFS
-
-# Separate into logical components for further validation
-case $1 in
-	*-*-*-*-*)
-		echo Invalid configuration \`"$1"\': more than four components >&2
-		exit 1
-		;;
-	*-*-*-*)
-		basic_machine=$field1-$field2
-		basic_os=$field3-$field4
-		;;
-	*-*-*)
-		# Ambiguous whether COMPANY is present, or skipped and KERNEL-OS is two
-		# parts
-		maybe_os=$field2-$field3
-		case $maybe_os in
-			nto-qnx* | linux-* | uclinux-uclibc* \
-			| uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \
-			| netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \
-			| storm-chaos* | os2-emx* | rtmk-nova*)
-				basic_machine=$field1
-				basic_os=$maybe_os
-				;;
-			android-linux)
-				basic_machine=$field1-unknown
-				basic_os=linux-android
-				;;
-			*)
-				basic_machine=$field1-$field2
-				basic_os=$field3
-				;;
-		esac
-		;;
-	*-*)
-		# A lone config we happen to match not fitting any pattern
-		case $field1-$field2 in
-			decstation-3100)
-				basic_machine=mips-dec
-				basic_os=
-				;;
-			*-*)
-				# Second component is usually, but not always the OS
-				case $field2 in
-					# Prevent following clause from handling this valid os
-					sun*os*)
-						basic_machine=$field1
-						basic_os=$field2
-						;;
-					zephyr*)
-						basic_machine=$field1-unknown
-						basic_os=$field2
-						;;
-					# Manufacturers
-					dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \
-					| att* | 7300* | 3300* | delta* | motorola* | sun[234]* \
-					| unicom* | ibm* | next | hp | isi* | apollo | altos* \
-					| convergent* | ncr* | news | 32* | 3600* | 3100* \
-					| hitachi* | c[123]* | convex* | sun | crds | omron* | dg \
-					| ultra | tti* | harris | dolphin | highlevel | gould \
-					| cbm | ns | masscomp | apple | axis | knuth | cray \
-					| microblaze* | sim | cisco \
-					| oki | wec | wrs | winbond)
-						basic_machine=$field1-$field2
-						basic_os=
-						;;
-					*)
-						basic_machine=$field1
-						basic_os=$field2
-						;;
-				esac
-			;;
-		esac
-		;;
-	*)
-		# Convert single-component short-hands not valid as part of
-		# multi-component configurations.
-		case $field1 in
-			386bsd)
-				basic_machine=i386-pc
-				basic_os=bsd
-				;;
-			a29khif)
-				basic_machine=a29k-amd
-				basic_os=udi
-				;;
-			adobe68k)
-				basic_machine=m68010-adobe
-				basic_os=scout
-				;;
-			alliant)
-				basic_machine=fx80-alliant
-				basic_os=
-				;;
-			altos | altos3068)
-				basic_machine=m68k-altos
-				basic_os=
-				;;
-			am29k)
-				basic_machine=a29k-none
-				basic_os=bsd
-				;;
-			amdahl)
-				basic_machine=580-amdahl
-				basic_os=sysv
-				;;
-			amiga)
-				basic_machine=m68k-unknown
-				basic_os=
-				;;
-			amigaos | amigados)
-				basic_machine=m68k-unknown
-				basic_os=amigaos
-				;;
-			amigaunix | amix)
-				basic_machine=m68k-unknown
-				basic_os=sysv4
-				;;
-			apollo68)
-				basic_machine=m68k-apollo
-				basic_os=sysv
-				;;
-			apollo68bsd)
-				basic_machine=m68k-apollo
-				basic_os=bsd
-				;;
-			aros)
-				basic_machine=i386-pc
-				basic_os=aros
-				;;
-			aux)
-				basic_machine=m68k-apple
-				basic_os=aux
-				;;
-			balance)
-				basic_machine=ns32k-sequent
-				basic_os=dynix
-				;;
-			blackfin)
-				basic_machine=bfin-unknown
-				basic_os=linux
-				;;
-			cegcc)
-				basic_machine=arm-unknown
-				basic_os=cegcc
-				;;
-			convex-c1)
-				basic_machine=c1-convex
-				basic_os=bsd
-				;;
-			convex-c2)
-				basic_machine=c2-convex
-				basic_os=bsd
-				;;
-			convex-c32)
-				basic_machine=c32-convex
-				basic_os=bsd
-				;;
-			convex-c34)
-				basic_machine=c34-convex
-				basic_os=bsd
-				;;
-			convex-c38)
-				basic_machine=c38-convex
-				basic_os=bsd
-				;;
-			cray)
-				basic_machine=j90-cray
-				basic_os=unicos
-				;;
-			crds | unos)
-				basic_machine=m68k-crds
-				basic_os=
-				;;
-			da30)
-				basic_machine=m68k-da30
-				basic_os=
-				;;
-			decstation | pmax | pmin | dec3100 | decstatn)
-				basic_machine=mips-dec
-				basic_os=
-				;;
-			delta88)
-				basic_machine=m88k-motorola
-				basic_os=sysv3
-				;;
-			dicos)
-				basic_machine=i686-pc
-				basic_os=dicos
-				;;
-			djgpp)
-				basic_machine=i586-pc
-				basic_os=msdosdjgpp
-				;;
-			ebmon29k)
-				basic_machine=a29k-amd
-				basic_os=ebmon
-				;;
-			es1800 | OSE68k | ose68k | ose | OSE)
-				basic_machine=m68k-ericsson
-				basic_os=ose
-				;;
-			gmicro)
-				basic_machine=tron-gmicro
-				basic_os=sysv
-				;;
-			go32)
-				basic_machine=i386-pc
-				basic_os=go32
-				;;
-			h8300hms)
-				basic_machine=h8300-hitachi
-				basic_os=hms
-				;;
-			h8300xray)
-				basic_machine=h8300-hitachi
-				basic_os=xray
-				;;
-			h8500hms)
-				basic_machine=h8500-hitachi
-				basic_os=hms
-				;;
-			harris)
-				basic_machine=m88k-harris
-				basic_os=sysv3
-				;;
-			hp300 | hp300hpux)
-				basic_machine=m68k-hp
-				basic_os=hpux
-				;;
-			hp300bsd)
-				basic_machine=m68k-hp
-				basic_os=bsd
-				;;
-			hppaosf)
-				basic_machine=hppa1.1-hp
-				basic_os=osf
-				;;
-			hppro)
-				basic_machine=hppa1.1-hp
-				basic_os=proelf
-				;;
-			i386mach)
-				basic_machine=i386-mach
-				basic_os=mach
-				;;
-			isi68 | isi)
-				basic_machine=m68k-isi
-				basic_os=sysv
-				;;
-			m68knommu)
-				basic_machine=m68k-unknown
-				basic_os=linux
-				;;
-			magnum | m3230)
-				basic_machine=mips-mips
-				basic_os=sysv
-				;;
-			merlin)
-				basic_machine=ns32k-utek
-				basic_os=sysv
-				;;
-			mingw64)
-				basic_machine=x86_64-pc
-				basic_os=mingw64
-				;;
-			mingw32)
-				basic_machine=i686-pc
-				basic_os=mingw32
-				;;
-			mingw32ce)
-				basic_machine=arm-unknown
-				basic_os=mingw32ce
-				;;
-			monitor)
-				basic_machine=m68k-rom68k
-				basic_os=coff
-				;;
-			morphos)
-				basic_machine=powerpc-unknown
-				basic_os=morphos
-				;;
-			moxiebox)
-				basic_machine=moxie-unknown
-				basic_os=moxiebox
-				;;
-			msdos)
-				basic_machine=i386-pc
-				basic_os=msdos
-				;;
-			msys)
-				basic_machine=i686-pc
-				basic_os=msys
-				;;
-			mvs)
-				basic_machine=i370-ibm
-				basic_os=mvs
-				;;
-			nacl)
-				basic_machine=le32-unknown
-				basic_os=nacl
-				;;
-			ncr3000)
-				basic_machine=i486-ncr
-				basic_os=sysv4
-				;;
-			netbsd386)
-				basic_machine=i386-pc
-				basic_os=netbsd
-				;;
-			netwinder)
-				basic_machine=armv4l-rebel
-				basic_os=linux
-				;;
-			news | news700 | news800 | news900)
-				basic_machine=m68k-sony
-				basic_os=newsos
-				;;
-			news1000)
-				basic_machine=m68030-sony
-				basic_os=newsos
-				;;
-			necv70)
-				basic_machine=v70-nec
-				basic_os=sysv
-				;;
-			nh3000)
-				basic_machine=m68k-harris
-				basic_os=cxux
-				;;
-			nh[45]000)
-				basic_machine=m88k-harris
-				basic_os=cxux
-				;;
-			nindy960)
-				basic_machine=i960-intel
-				basic_os=nindy
-				;;
-			mon960)
-				basic_machine=i960-intel
-				basic_os=mon960
-				;;
-			nonstopux)
-				basic_machine=mips-compaq
-				basic_os=nonstopux
-				;;
-			os400)
-				basic_machine=powerpc-ibm
-				basic_os=os400
-				;;
-			OSE68000 | ose68000)
-				basic_machine=m68000-ericsson
-				basic_os=ose
-				;;
-			os68k)
-				basic_machine=m68k-none
-				basic_os=os68k
-				;;
-			paragon)
-				basic_machine=i860-intel
-				basic_os=osf
-				;;
-			parisc)
-				basic_machine=hppa-unknown
-				basic_os=linux
-				;;
-			psp)
-				basic_machine=mipsallegrexel-sony
-				basic_os=psp
-				;;
-			pw32)
-				basic_machine=i586-unknown
-				basic_os=pw32
-				;;
-			rdos | rdos64)
-				basic_machine=x86_64-pc
-				basic_os=rdos
-				;;
-			rdos32)
-				basic_machine=i386-pc
-				basic_os=rdos
-				;;
-			rom68k)
-				basic_machine=m68k-rom68k
-				basic_os=coff
-				;;
-			sa29200)
-				basic_machine=a29k-amd
-				basic_os=udi
-				;;
-			sei)
-				basic_machine=mips-sei
-				basic_os=seiux
-				;;
-			sequent)
-				basic_machine=i386-sequent
-				basic_os=
-				;;
-			sps7)
-				basic_machine=m68k-bull
-				basic_os=sysv2
-				;;
-			st2000)
-				basic_machine=m68k-tandem
-				basic_os=
-				;;
-			stratus)
-				basic_machine=i860-stratus
-				basic_os=sysv4
-				;;
-			sun2)
-				basic_machine=m68000-sun
-				basic_os=
-				;;
-			sun2os3)
-				basic_machine=m68000-sun
-				basic_os=sunos3
-				;;
-			sun2os4)
-				basic_machine=m68000-sun
-				basic_os=sunos4
-				;;
-			sun3)
-				basic_machine=m68k-sun
-				basic_os=
-				;;
-			sun3os3)
-				basic_machine=m68k-sun
-				basic_os=sunos3
-				;;
-			sun3os4)
-				basic_machine=m68k-sun
-				basic_os=sunos4
-				;;
-			sun4)
-				basic_machine=sparc-sun
-				basic_os=
-				;;
-			sun4os3)
-				basic_machine=sparc-sun
-				basic_os=sunos3
-				;;
-			sun4os4)
-				basic_machine=sparc-sun
-				basic_os=sunos4
-				;;
-			sun4sol2)
-				basic_machine=sparc-sun
-				basic_os=solaris2
-				;;
-			sun386 | sun386i | roadrunner)
-				basic_machine=i386-sun
-				basic_os=
-				;;
-			sv1)
-				basic_machine=sv1-cray
-				basic_os=unicos
-				;;
-			symmetry)
-				basic_machine=i386-sequent
-				basic_os=dynix
-				;;
-			t3e)
-				basic_machine=alphaev5-cray
-				basic_os=unicos
-				;;
-			t90)
-				basic_machine=t90-cray
-				basic_os=unicos
-				;;
-			toad1)
-				basic_machine=pdp10-xkl
-				basic_os=tops20
-				;;
-			tpf)
-				basic_machine=s390x-ibm
-				basic_os=tpf
-				;;
-			udi29k)
-				basic_machine=a29k-amd
-				basic_os=udi
-				;;
-			ultra3)
-				basic_machine=a29k-nyu
-				basic_os=sym1
-				;;
-			v810 | necv810)
-				basic_machine=v810-nec
-				basic_os=none
-				;;
-			vaxv)
-				basic_machine=vax-dec
-				basic_os=sysv
-				;;
-			vms)
-				basic_machine=vax-dec
-				basic_os=vms
-				;;
-			vsta)
-				basic_machine=i386-pc
-				basic_os=vsta
-				;;
-			vxworks960)
-				basic_machine=i960-wrs
-				basic_os=vxworks
-				;;
-			vxworks68)
-				basic_machine=m68k-wrs
-				basic_os=vxworks
-				;;
-			vxworks29k)
-				basic_machine=a29k-wrs
-				basic_os=vxworks
-				;;
-			xbox)
-				basic_machine=i686-pc
-				basic_os=mingw32
-				;;
-			ymp)
-				basic_machine=ymp-cray
-				basic_os=unicos
-				;;
-			*)
-				basic_machine=$1
-				basic_os=
-				;;
-		esac
-		;;
-esac
-
-# Decode 1-component or ad-hoc basic machines
-case $basic_machine in
-	# Here we handle the default manufacturer of certain CPU types.  It is in
-	# some cases the only manufacturer, in others, it is the most popular.
-	w89k)
-		cpu=hppa1.1
-		vendor=winbond
-		;;
-	op50n)
-		cpu=hppa1.1
-		vendor=oki
-		;;
-	op60c)
-		cpu=hppa1.1
-		vendor=oki
-		;;
-	ibm*)
-		cpu=i370
-		vendor=ibm
-		;;
-	orion105)
-		cpu=clipper
-		vendor=highlevel
-		;;
-	mac | mpw | mac-mpw)
-		cpu=m68k
-		vendor=apple
-		;;
-	pmac | pmac-mpw)
-		cpu=powerpc
-		vendor=apple
-		;;
-
-	# Recognize the various machine names and aliases which stand
-	# for a CPU type and a company and sometimes even an OS.
-	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
-		cpu=m68000
-		vendor=att
-		;;
-	3b*)
-		cpu=we32k
-		vendor=att
-		;;
-	bluegene*)
-		cpu=powerpc
-		vendor=ibm
-		basic_os=cnk
-		;;
-	decsystem10* | dec10*)
-		cpu=pdp10
-		vendor=dec
-		basic_os=tops10
-		;;
-	decsystem20* | dec20*)
-		cpu=pdp10
-		vendor=dec
-		basic_os=tops20
-		;;
-	delta | 3300 | motorola-3300 | motorola-delta \
-	      | 3300-motorola | delta-motorola)
-		cpu=m68k
-		vendor=motorola
-		;;
-	dpx2*)
-		cpu=m68k
-		vendor=bull
-		basic_os=sysv3
-		;;
-	encore | umax | mmax)
-		cpu=ns32k
-		vendor=encore
-		;;
-	elxsi)
-		cpu=elxsi
-		vendor=elxsi
-		basic_os=${basic_os:-bsd}
-		;;
-	fx2800)
-		cpu=i860
-		vendor=alliant
-		;;
-	genix)
-		cpu=ns32k
-		vendor=ns
-		;;
-	h3050r* | hiux*)
-		cpu=hppa1.1
-		vendor=hitachi
-		basic_os=hiuxwe2
-		;;
-	hp3k9[0-9][0-9] | hp9[0-9][0-9])
-		cpu=hppa1.0
-		vendor=hp
-		;;
-	hp9k2[0-9][0-9] | hp9k31[0-9])
-		cpu=m68000
-		vendor=hp
-		;;
-	hp9k3[2-9][0-9])
-		cpu=m68k
-		vendor=hp
-		;;
-	hp9k6[0-9][0-9] | hp6[0-9][0-9])
-		cpu=hppa1.0
-		vendor=hp
-		;;
-	hp9k7[0-79][0-9] | hp7[0-79][0-9])
-		cpu=hppa1.1
-		vendor=hp
-		;;
-	hp9k78[0-9] | hp78[0-9])
-		# FIXME: really hppa2.0-hp
-		cpu=hppa1.1
-		vendor=hp
-		;;
-	hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
-		# FIXME: really hppa2.0-hp
-		cpu=hppa1.1
-		vendor=hp
-		;;
-	hp9k8[0-9][13679] | hp8[0-9][13679])
-		cpu=hppa1.1
-		vendor=hp
-		;;
-	hp9k8[0-9][0-9] | hp8[0-9][0-9])
-		cpu=hppa1.0
-		vendor=hp
-		;;
-	i*86v32)
-		cpu=`echo "$1" | sed -e 's/86.*/86/'`
-		vendor=pc
-		basic_os=sysv32
-		;;
-	i*86v4*)
-		cpu=`echo "$1" | sed -e 's/86.*/86/'`
-		vendor=pc
-		basic_os=sysv4
-		;;
-	i*86v)
-		cpu=`echo "$1" | sed -e 's/86.*/86/'`
-		vendor=pc
-		basic_os=sysv
-		;;
-	i*86sol2)
-		cpu=`echo "$1" | sed -e 's/86.*/86/'`
-		vendor=pc
-		basic_os=solaris2
-		;;
-	j90 | j90-cray)
-		cpu=j90
-		vendor=cray
-		basic_os=${basic_os:-unicos}
-		;;
-	iris | iris4d)
-		cpu=mips
-		vendor=sgi
-		case $basic_os in
-		    irix*)
-			;;
-		    *)
-			basic_os=irix4
-			;;
-		esac
-		;;
-	miniframe)
-		cpu=m68000
-		vendor=convergent
-		;;
-	*mint | mint[0-9]* | *MiNT | *MiNT[0-9]*)
-		cpu=m68k
-		vendor=atari
-		basic_os=mint
-		;;
-	news-3600 | risc-news)
-		cpu=mips
-		vendor=sony
-		basic_os=newsos
-		;;
-	next | m*-next)
-		cpu=m68k
-		vendor=next
-		case $basic_os in
-		    openstep*)
-		        ;;
-		    nextstep*)
-			;;
-		    ns2*)
-		      basic_os=nextstep2
-			;;
-		    *)
-		      basic_os=nextstep3
-			;;
-		esac
-		;;
-	np1)
-		cpu=np1
-		vendor=gould
-		;;
-	op50n-* | op60c-*)
-		cpu=hppa1.1
-		vendor=oki
-		basic_os=proelf
-		;;
-	pa-hitachi)
-		cpu=hppa1.1
-		vendor=hitachi
-		basic_os=hiuxwe2
-		;;
-	pbd)
-		cpu=sparc
-		vendor=tti
-		;;
-	pbb)
-		cpu=m68k
-		vendor=tti
-		;;
-	pc532)
-		cpu=ns32k
-		vendor=pc532
-		;;
-	pn)
-		cpu=pn
-		vendor=gould
-		;;
-	power)
-		cpu=power
-		vendor=ibm
-		;;
-	ps2)
-		cpu=i386
-		vendor=ibm
-		;;
-	rm[46]00)
-		cpu=mips
-		vendor=siemens
-		;;
-	rtpc | rtpc-*)
-		cpu=romp
-		vendor=ibm
-		;;
-	sde)
-		cpu=mipsisa32
-		vendor=sde
-		basic_os=${basic_os:-elf}
-		;;
-	simso-wrs)
-		cpu=sparclite
-		vendor=wrs
-		basic_os=vxworks
-		;;
-	tower | tower-32)
-		cpu=m68k
-		vendor=ncr
-		;;
-	vpp*|vx|vx-*)
-		cpu=f301
-		vendor=fujitsu
-		;;
-	w65)
-		cpu=w65
-		vendor=wdc
-		;;
-	w89k-*)
-		cpu=hppa1.1
-		vendor=winbond
-		basic_os=proelf
-		;;
-	none)
-		cpu=none
-		vendor=none
-		;;
-	leon|leon[3-9])
-		cpu=sparc
-		vendor=$basic_machine
-		;;
-	leon-*|leon[3-9]-*)
-		cpu=sparc
-		vendor=`echo "$basic_machine" | sed 's/-.*//'`
-		;;
-
-	*-*)
-		# shellcheck disable=SC2162
-		saved_IFS=$IFS
-		IFS="-" read cpu vendor <<EOF
-$basic_machine
-EOF
-		IFS=$saved_IFS
-		;;
-	# We use `pc' rather than `unknown'
-	# because (1) that's what they normally are, and
-	# (2) the word "unknown" tends to confuse beginning users.
-	i*86 | x86_64)
-		cpu=$basic_machine
-		vendor=pc
-		;;
-	# These rules are duplicated from below for sake of the special case above;
-	# i.e. things that normalized to x86 arches should also default to "pc"
-	pc98)
-		cpu=i386
-		vendor=pc
-		;;
-	x64 | amd64)
-		cpu=x86_64
-		vendor=pc
-		;;
-	# Recognize the basic CPU types without company name.
-	*)
-		cpu=$basic_machine
-		vendor=unknown
-		;;
-esac
-
-unset -v basic_machine
-
-# Decode basic machines in the full and proper CPU-Company form.
-case $cpu-$vendor in
-	# Here we handle the default manufacturer of certain CPU types in canonical form. It is in
-	# some cases the only manufacturer, in others, it is the most popular.
-	craynv-unknown)
-		vendor=cray
-		basic_os=${basic_os:-unicosmp}
-		;;
-	c90-unknown | c90-cray)
-		vendor=cray
-		basic_os=${Basic_os:-unicos}
-		;;
-	fx80-unknown)
-		vendor=alliant
-		;;
-	romp-unknown)
-		vendor=ibm
-		;;
-	mmix-unknown)
-		vendor=knuth
-		;;
-	microblaze-unknown | microblazeel-unknown)
-		vendor=xilinx
-		;;
-	rs6000-unknown)
-		vendor=ibm
-		;;
-	vax-unknown)
-		vendor=dec
-		;;
-	pdp11-unknown)
-		vendor=dec
-		;;
-	we32k-unknown)
-		vendor=att
-		;;
-	cydra-unknown)
-		vendor=cydrome
-		;;
-	i370-ibm*)
-		vendor=ibm
-		;;
-	orion-unknown)
-		vendor=highlevel
-		;;
-	xps-unknown | xps100-unknown)
-		cpu=xps100
-		vendor=honeywell
-		;;
-
-	# Here we normalize CPU types with a missing or matching vendor
-	armh-unknown | armh-alt)
-		cpu=armv7l
-		vendor=alt
-		basic_os=${basic_os:-linux-gnueabihf}
-		;;
-	dpx20-unknown | dpx20-bull)
-		cpu=rs6000
-		vendor=bull
-		basic_os=${basic_os:-bosx}
-		;;
-
-	# Here we normalize CPU types irrespective of the vendor
-	amd64-*)
-		cpu=x86_64
-		;;
-	blackfin-*)
-		cpu=bfin
-		basic_os=linux
-		;;
-	c54x-*)
-		cpu=tic54x
-		;;
-	c55x-*)
-		cpu=tic55x
-		;;
-	c6x-*)
-		cpu=tic6x
-		;;
-	e500v[12]-*)
-		cpu=powerpc
-		basic_os=${basic_os}"spe"
-		;;
-	mips3*-*)
-		cpu=mips64
-		;;
-	ms1-*)
-		cpu=mt
-		;;
-	m68knommu-*)
-		cpu=m68k
-		basic_os=linux
-		;;
-	m9s12z-* | m68hcs12z-* | hcs12z-* | s12z-*)
-		cpu=s12z
-		;;
-	openrisc-*)
-		cpu=or32
-		;;
-	parisc-*)
-		cpu=hppa
-		basic_os=linux
-		;;
-	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
-		cpu=i586
-		;;
-	pentiumpro-* | p6-* | 6x86-* | athlon-* | athalon_*-*)
-		cpu=i686
-		;;
-	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
-		cpu=i686
-		;;
-	pentium4-*)
-		cpu=i786
-		;;
-	pc98-*)
-		cpu=i386
-		;;
-	ppc-* | ppcbe-*)
-		cpu=powerpc
-		;;
-	ppcle-* | powerpclittle-*)
-		cpu=powerpcle
-		;;
-	ppc64-*)
-		cpu=powerpc64
-		;;
-	ppc64le-* | powerpc64little-*)
-		cpu=powerpc64le
-		;;
-	sb1-*)
-		cpu=mipsisa64sb1
-		;;
-	sb1el-*)
-		cpu=mipsisa64sb1el
-		;;
-	sh5e[lb]-*)
-		cpu=`echo "$cpu" | sed 's/^\(sh.\)e\(.\)$/\1\2e/'`
-		;;
-	spur-*)
-		cpu=spur
-		;;
-	strongarm-* | thumb-*)
-		cpu=arm
-		;;
-	tx39-*)
-		cpu=mipstx39
-		;;
-	tx39el-*)
-		cpu=mipstx39el
-		;;
-	x64-*)
-		cpu=x86_64
-		;;
-	xscale-* | xscalee[bl]-*)
-		cpu=`echo "$cpu" | sed 's/^xscale/arm/'`
-		;;
-	arm64-* | aarch64le-*)
-		cpu=aarch64
-		;;
-
-	# Recognize the canonical CPU Types that limit and/or modify the
-	# company names they are paired with.
-	cr16-*)
-		basic_os=${basic_os:-elf}
-		;;
-	crisv32-* | etraxfs*-*)
-		cpu=crisv32
-		vendor=axis
-		;;
-	cris-* | etrax*-*)
-		cpu=cris
-		vendor=axis
-		;;
-	crx-*)
-		basic_os=${basic_os:-elf}
-		;;
-	neo-tandem)
-		cpu=neo
-		vendor=tandem
-		;;
-	nse-tandem)
-		cpu=nse
-		vendor=tandem
-		;;
-	nsr-tandem)
-		cpu=nsr
-		vendor=tandem
-		;;
-	nsv-tandem)
-		cpu=nsv
-		vendor=tandem
-		;;
-	nsx-tandem)
-		cpu=nsx
-		vendor=tandem
-		;;
-	mipsallegrexel-sony)
-		cpu=mipsallegrexel
-		vendor=sony
-		;;
-	tile*-*)
-		basic_os=${basic_os:-linux-gnu}
-		;;
-
-	*)
-		# Recognize the canonical CPU types that are allowed with any
-		# company name.
-		case $cpu in
-			1750a | 580 \
-			| a29k \
-			| aarch64 | aarch64_be \
-			| abacus \
-			| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \
-			| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] \
-			| alphapca5[67] | alpha64pca5[67] \
-			| am33_2.0 \
-			| amdgcn \
-			| arc | arceb | arc32 | arc64 \
-			| arm | arm[lb]e | arme[lb] | armv* \
-			| avr | avr32 \
-			| asmjs \
-			| ba \
-			| be32 | be64 \
-			| bfin | bpf | bs2000 \
-			| c[123]* | c30 | [cjt]90 | c4x \
-			| c8051 | clipper | craynv | csky | cydra \
-			| d10v | d30v | dlx | dsp16xx \
-			| e2k | elxsi | epiphany \
-			| f30[01] | f700 | fido | fr30 | frv | ft32 | fx80 \
-			| h8300 | h8500 \
-			| hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
-			| hexagon \
-			| i370 | i*86 | i860 | i960 | ia16 | ia64 \
-			| ip2k | iq2000 \
-			| k1om \
-			| le32 | le64 \
-			| lm32 \
-			| loongarch32 | loongarch64 | loongarchx32 \
-			| m32c | m32r | m32rle \
-			| m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \
-			| m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \
-			| m88110 | m88k | maxq | mb | mcore | mep | metag \
-			| microblaze | microblazeel \
-			| mips | mipsbe | mipseb | mipsel | mipsle \
-			| mips16 \
-			| mips64 | mips64eb | mips64el \
-			| mips64octeon | mips64octeonel \
-			| mips64orion | mips64orionel \
-			| mips64r5900 | mips64r5900el \
-			| mips64vr | mips64vrel \
-			| mips64vr4100 | mips64vr4100el \
-			| mips64vr4300 | mips64vr4300el \
-			| mips64vr5000 | mips64vr5000el \
-			| mips64vr5900 | mips64vr5900el \
-			| mipsisa32 | mipsisa32el \
-			| mipsisa32r2 | mipsisa32r2el \
-			| mipsisa32r3 | mipsisa32r3el \
-			| mipsisa32r5 | mipsisa32r5el \
-			| mipsisa32r6 | mipsisa32r6el \
-			| mipsisa64 | mipsisa64el \
-			| mipsisa64r2 | mipsisa64r2el \
-			| mipsisa64r3 | mipsisa64r3el \
-			| mipsisa64r5 | mipsisa64r5el \
-			| mipsisa64r6 | mipsisa64r6el \
-			| mipsisa64sb1 | mipsisa64sb1el \
-			| mipsisa64sr71k | mipsisa64sr71kel \
-			| mipsr5900 | mipsr5900el \
-			| mipstx39 | mipstx39el \
-			| mmix \
-			| mn10200 | mn10300 \
-			| moxie \
-			| mt \
-			| msp430 \
-			| nds32 | nds32le | nds32be \
-			| nfp \
-			| nios | nios2 | nios2eb | nios2el \
-			| none | np1 | ns16k | ns32k | nvptx \
-			| open8 \
-			| or1k* \
-			| or32 \
-			| orion \
-			| picochip \
-			| pdp10 | pdp11 | pj | pjl | pn | power \
-			| powerpc | powerpc64 | powerpc64le | powerpcle | powerpcspe \
-			| pru \
-			| pyramid \
-			| riscv | riscv32 | riscv32be | riscv64 | riscv64be \
-			| rl78 | romp | rs6000 | rx \
-			| s390 | s390x \
-			| score \
-			| sh | shl \
-			| sh[1234] | sh[24]a | sh[24]ae[lb] | sh[23]e | she[lb] | sh[lb]e \
-			| sh[1234]e[lb] |  sh[12345][lb]e | sh[23]ele | sh64 | sh64le \
-			| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet \
-			| sparclite \
-			| sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \
-			| spu \
-			| tahoe \
-			| thumbv7* \
-			| tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \
-			| tron \
-			| ubicom32 \
-			| v70 | v850 | v850e | v850e1 | v850es | v850e2 | v850e2v3 \
-			| vax \
-			| visium \
-			| w65 \
-			| wasm32 | wasm64 \
-			| we32k \
-			| x86 | x86_64 | xc16x | xgate | xps100 \
-			| xstormy16 | xtensa* \
-			| ymp \
-			| z8k | z80)
-				;;
-
-			*)
-				echo Invalid configuration \`"$1"\': machine \`"$cpu-$vendor"\' not recognized 1>&2
-				exit 1
-				;;
-		esac
-		;;
-esac
-
-# Here we canonicalize certain aliases for manufacturers.
-case $vendor in
-	digital*)
-		vendor=dec
-		;;
-	commodore*)
-		vendor=cbm
-		;;
-	*)
-		;;
-esac
-
-# Decode manufacturer-specific aliases for certain operating systems.
-
-if test x$basic_os != x
-then
-
-# First recognize some ad-hoc cases, or perhaps split kernel-os, or else just
-# set os.
-case $basic_os in
-	gnu/linux*)
-		kernel=linux
-		os=`echo "$basic_os" | sed -e 's|gnu/linux|gnu|'`
-		;;
-	os2-emx)
-		kernel=os2
-		os=`echo "$basic_os" | sed -e 's|os2-emx|emx|'`
-		;;
-	nto-qnx*)
-		kernel=nto
-		os=`echo "$basic_os" | sed -e 's|nto-qnx|qnx|'`
-		;;
-	*-*)
-		# shellcheck disable=SC2162
-		saved_IFS=$IFS
-		IFS="-" read kernel os <<EOF
-$basic_os
-EOF
-		IFS=$saved_IFS
-		;;
-	# Default OS when just kernel was specified
-	nto*)
-		kernel=nto
-		os=`echo "$basic_os" | sed -e 's|nto|qnx|'`
-		;;
-	linux*)
-		kernel=linux
-		os=`echo "$basic_os" | sed -e 's|linux|gnu|'`
-		;;
-	*)
-		kernel=
-		os=$basic_os
-		;;
-esac
-
-# Now, normalize the OS (knowing we just have one component, it's not a kernel,
-# etc.)
-case $os in
-	# First match some system type aliases that might get confused
-	# with valid system types.
-	# solaris* is a basic system type, with this one exception.
-	auroraux)
-		os=auroraux
-		;;
-	bluegene*)
-		os=cnk
-		;;
-	solaris1 | solaris1.*)
-		os=`echo "$os" | sed -e 's|solaris1|sunos4|'`
-		;;
-	solaris)
-		os=solaris2
-		;;
-	unixware*)
-		os=sysv4.2uw
-		;;
-	# es1800 is here to avoid being matched by es* (a different OS)
-	es1800*)
-		os=ose
-		;;
-	# Some version numbers need modification
-	chorusos*)
-		os=chorusos
-		;;
-	isc)
-		os=isc2.2
-		;;
-	sco6)
-		os=sco5v6
-		;;
-	sco5)
-		os=sco3.2v5
-		;;
-	sco4)
-		os=sco3.2v4
-		;;
-	sco3.2.[4-9]*)
-		os=`echo "$os" | sed -e 's/sco3.2./sco3.2v/'`
-		;;
-	sco*v* | scout)
-		# Don't match below
-		;;
-	sco*)
-		os=sco3.2v2
-		;;
-	psos*)
-		os=psos
-		;;
-	qnx*)
-		os=qnx
-		;;
-	hiux*)
-		os=hiuxwe2
-		;;
-	lynx*178)
-		os=lynxos178
-		;;
-	lynx*5)
-		os=lynxos5
-		;;
-	lynxos*)
-		# don't get caught up in next wildcard
-		;;
-	lynx*)
-		os=lynxos
-		;;
-	mac[0-9]*)
-		os=`echo "$os" | sed -e 's|mac|macos|'`
-		;;
-	opened*)
-		os=openedition
-		;;
-	os400*)
-		os=os400
-		;;
-	sunos5*)
-		os=`echo "$os" | sed -e 's|sunos5|solaris2|'`
-		;;
-	sunos6*)
-		os=`echo "$os" | sed -e 's|sunos6|solaris3|'`
-		;;
-	wince*)
-		os=wince
-		;;
-	utek*)
-		os=bsd
-		;;
-	dynix*)
-		os=bsd
-		;;
-	acis*)
-		os=aos
-		;;
-	atheos*)
-		os=atheos
-		;;
-	syllable*)
-		os=syllable
-		;;
-	386bsd)
-		os=bsd
-		;;
-	ctix* | uts*)
-		os=sysv
-		;;
-	nova*)
-		os=rtmk-nova
-		;;
-	ns2)
-		os=nextstep2
-		;;
-	# Preserve the version number of sinix5.
-	sinix5.*)
-		os=`echo "$os" | sed -e 's|sinix|sysv|'`
-		;;
-	sinix*)
-		os=sysv4
-		;;
-	tpf*)
-		os=tpf
-		;;
-	triton*)
-		os=sysv3
-		;;
-	oss*)
-		os=sysv3
-		;;
-	svr4*)
-		os=sysv4
-		;;
-	svr3)
-		os=sysv3
-		;;
-	sysvr4)
-		os=sysv4
-		;;
-	ose*)
-		os=ose
-		;;
-	*mint | mint[0-9]* | *MiNT | MiNT[0-9]*)
-		os=mint
-		;;
-	dicos*)
-		os=dicos
-		;;
-	pikeos*)
-		# Until real need of OS specific support for
-		# particular features comes up, bare metal
-		# configurations are quite functional.
-		case $cpu in
-		    arm*)
-			os=eabi
-			;;
-		    *)
-			os=elf
-			;;
-		esac
-		;;
-	*)
-		# No normalization, but not necessarily accepted, that comes below.
-		;;
-esac
-
-else
-
-# Here we handle the default operating systems that come with various machines.
-# The value should be what the vendor currently ships out the door with their
-# machine or put another way, the most popular os provided with the machine.
-
-# Note that if you're going to try to match "-MANUFACTURER" here (say,
-# "-sun"), then you have to tell the case statement up towards the top
-# that MANUFACTURER isn't an operating system.  Otherwise, code above
-# will signal an error saying that MANUFACTURER isn't an operating
-# system, and we'll never get to this point.
-
-kernel=
-case $cpu-$vendor in
-	score-*)
-		os=elf
-		;;
-	spu-*)
-		os=elf
-		;;
-	*-acorn)
-		os=riscix1.2
-		;;
-	arm*-rebel)
-		kernel=linux
-		os=gnu
-		;;
-	arm*-semi)
-		os=aout
-		;;
-	c4x-* | tic4x-*)
-		os=coff
-		;;
-	c8051-*)
-		os=elf
-		;;
-	clipper-intergraph)
-		os=clix
-		;;
-	hexagon-*)
-		os=elf
-		;;
-	tic54x-*)
-		os=coff
-		;;
-	tic55x-*)
-		os=coff
-		;;
-	tic6x-*)
-		os=coff
-		;;
-	# This must come before the *-dec entry.
-	pdp10-*)
-		os=tops20
-		;;
-	pdp11-*)
-		os=none
-		;;
-	*-dec | vax-*)
-		os=ultrix4.2
-		;;
-	m68*-apollo)
-		os=domain
-		;;
-	i386-sun)
-		os=sunos4.0.2
-		;;
-	m68000-sun)
-		os=sunos3
-		;;
-	m68*-cisco)
-		os=aout
-		;;
-	mep-*)
-		os=elf
-		;;
-	mips*-cisco)
-		os=elf
-		;;
-	mips*-*)
-		os=elf
-		;;
-	or32-*)
-		os=coff
-		;;
-	*-tti)	# must be before sparc entry or we get the wrong os.
-		os=sysv3
-		;;
-	sparc-* | *-sun)
-		os=sunos4.1.1
-		;;
-	pru-*)
-		os=elf
-		;;
-	*-be)
-		os=beos
-		;;
-	*-ibm)
-		os=aix
-		;;
-	*-knuth)
-		os=mmixware
-		;;
-	*-wec)
-		os=proelf
-		;;
-	*-winbond)
-		os=proelf
-		;;
-	*-oki)
-		os=proelf
-		;;
-	*-hp)
-		os=hpux
-		;;
-	*-hitachi)
-		os=hiux
-		;;
-	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
-		os=sysv
-		;;
-	*-cbm)
-		os=amigaos
-		;;
-	*-dg)
-		os=dgux
-		;;
-	*-dolphin)
-		os=sysv3
-		;;
-	m68k-ccur)
-		os=rtu
-		;;
-	m88k-omron*)
-		os=luna
-		;;
-	*-next)
-		os=nextstep
-		;;
-	*-sequent)
-		os=ptx
-		;;
-	*-crds)
-		os=unos
-		;;
-	*-ns)
-		os=genix
-		;;
-	i370-*)
-		os=mvs
-		;;
-	*-gould)
-		os=sysv
-		;;
-	*-highlevel)
-		os=bsd
-		;;
-	*-encore)
-		os=bsd
-		;;
-	*-sgi)
-		os=irix
-		;;
-	*-siemens)
-		os=sysv4
-		;;
-	*-masscomp)
-		os=rtu
-		;;
-	f30[01]-fujitsu | f700-fujitsu)
-		os=uxpv
-		;;
-	*-rom68k)
-		os=coff
-		;;
-	*-*bug)
-		os=coff
-		;;
-	*-apple)
-		os=macos
-		;;
-	*-atari*)
-		os=mint
-		;;
-	*-wrs)
-		os=vxworks
-		;;
-	*)
-		os=none
-		;;
-esac
-
-fi
-
-# Now, validate our (potentially fixed-up) OS.
-case $os in
-	# Sometimes we do "kernel-libc", so those need to count as OSes.
-	musl* | newlib* | relibc* | uclibc*)
-		;;
-	# Likewise for "kernel-abi"
-	eabi* | gnueabi*)
-		;;
-	# VxWorks passes extra cpu info in the 4th filed.
-	simlinux | simwindows | spe)
-		;;
-	# Now accept the basic system types.
-	# The portable systems comes first.
-	# Each alternative MUST end in a * to match a version number.
-	gnu* | android* | bsd* | mach* | minix* | genix* | ultrix* | irix* \
-	     | *vms* | esix* | aix* | cnk* | sunos | sunos[34]* \
-	     | hpux* | unos* | osf* | luna* | dgux* | auroraux* | solaris* \
-	     | sym* |  plan9* | psp* | sim* | xray* | os68k* | v88r* \
-	     | hiux* | abug | nacl* | netware* | windows* \
-	     | os9* | macos* | osx* | ios* \
-	     | mpw* | magic* | mmixware* | mon960* | lnews* \
-	     | amigaos* | amigados* | msdos* | newsos* | unicos* | aof* \
-	     | aos* | aros* | cloudabi* | sortix* | twizzler* \
-	     | nindy* | vxsim* | vxworks* | ebmon* | hms* | mvs* \
-	     | clix* | riscos* | uniplus* | iris* | isc* | rtu* | xenix* \
-	     | mirbsd* | netbsd* | dicos* | openedition* | ose* \
-	     | bitrig* | openbsd* | secbsd* | solidbsd* | libertybsd* | os108* \
-	     | ekkobsd* | freebsd* | riscix* | lynxos* | os400* \
-	     | bosx* | nextstep* | cxux* | aout* | elf* | oabi* \
-	     | ptx* | coff* | ecoff* | winnt* | domain* | vsta* \
-	     | udi* | lites* | ieee* | go32* | aux* | hcos* \
-	     | chorusrdb* | cegcc* | glidix* | serenity* \
-	     | cygwin* | msys* | pe* | moss* | proelf* | rtems* \
-	     | midipix* | mingw32* | mingw64* | mint* \
-	     | uxpv* | beos* | mpeix* | udk* | moxiebox* \
-	     | interix* | uwin* | mks* | rhapsody* | darwin* \
-	     | openstep* | oskit* | conix* | pw32* | nonstopux* \
-	     | storm-chaos* | tops10* | tenex* | tops20* | its* \
-	     | os2* | vos* | palmos* | uclinux* | nucleus* | morphos* \
-	     | scout* | superux* | sysv* | rtmk* | tpf* | windiss* \
-	     | powermax* | dnix* | nx6 | nx7 | sei* | dragonfly* \
-	     | skyos* | haiku* | rdos* | toppers* | drops* | es* \
-	     | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \
-	     | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \
-	     | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx* | zephyr* \
-	     | fiwix* )
-		;;
-	# This one is extra strict with allowed versions
-	sco3.2v2 | sco3.2v[4-9]* | sco5v6*)
-		# Don't forget version if it is 3.2v4 or newer.
-		;;
-	none)
-		;;
-	*)
-		echo Invalid configuration \`"$1"\': OS \`"$os"\' not recognized 1>&2
-		exit 1
-		;;
-esac
-
-# As a final step for OS-related things, validate the OS-kernel combination
-# (given a valid OS), if there is a kernel.
-case $kernel-$os in
-	linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* \
-		   | linux-musl* | linux-relibc* | linux-uclibc* )
-		;;
-	uclinux-uclibc* )
-		;;
-	-dietlibc* | -newlib* | -musl* | -relibc* | -uclibc* )
-		# These are just libc implementations, not actual OSes, and thus
-		# require a kernel.
-		echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2
-		exit 1
-		;;
-	kfreebsd*-gnu* | kopensolaris*-gnu*)
-		;;
-	vxworks-simlinux | vxworks-simwindows | vxworks-spe)
-		;;
-	nto-qnx*)
-		;;
-	os2-emx)
-		;;
-	*-eabi* | *-gnueabi*)
-		;;
-	-*)
-		# Blank kernel with real OS is always fine.
-		;;
-	*-*)
-		echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2
-		exit 1
-		;;
-esac
-
-# Here we handle the case where we know the os, and the CPU type, but not the
-# manufacturer.  We pick the logical manufacturer.
-case $vendor in
-	unknown)
-		case $cpu-$os in
-			*-riscix*)
-				vendor=acorn
-				;;
-			*-sunos*)
-				vendor=sun
-				;;
-			*-cnk* | *-aix*)
-				vendor=ibm
-				;;
-			*-beos*)
-				vendor=be
-				;;
-			*-hpux*)
-				vendor=hp
-				;;
-			*-mpeix*)
-				vendor=hp
-				;;
-			*-hiux*)
-				vendor=hitachi
-				;;
-			*-unos*)
-				vendor=crds
-				;;
-			*-dgux*)
-				vendor=dg
-				;;
-			*-luna*)
-				vendor=omron
-				;;
-			*-genix*)
-				vendor=ns
-				;;
-			*-clix*)
-				vendor=intergraph
-				;;
-			*-mvs* | *-opened*)
-				vendor=ibm
-				;;
-			*-os400*)
-				vendor=ibm
-				;;
-			s390-* | s390x-*)
-				vendor=ibm
-				;;
-			*-ptx*)
-				vendor=sequent
-				;;
-			*-tpf*)
-				vendor=ibm
-				;;
-			*-vxsim* | *-vxworks* | *-windiss*)
-				vendor=wrs
-				;;
-			*-aux*)
-				vendor=apple
-				;;
-			*-hms*)
-				vendor=hitachi
-				;;
-			*-mpw* | *-macos*)
-				vendor=apple
-				;;
-			*-*mint | *-mint[0-9]* | *-*MiNT | *-MiNT[0-9]*)
-				vendor=atari
-				;;
-			*-vos*)
-				vendor=stratus
-				;;
-		esac
-		;;
-esac
-
-echo "$cpu-$vendor-${kernel:+$kernel-}$os"
-exit
-
-# Local variables:
-# eval: (add-hook 'before-save-hook 'time-stamp)
-# time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
-# time-stamp-end: "'"
-# End:
diff --git a/paddlespeech/audio/third_party/patches/libmad.patch b/paddlespeech/audio/third_party/patches/libmad.patch
deleted file mode 100644
index a805787831f48ecde0eebc9468440ee179f55c75..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/third_party/patches/libmad.patch
+++ /dev/null
@@ -1,86 +0,0 @@
-See the followings for the origin of this patch
-http://www.linuxfromscratch.org/blfs/view/svn/multimedia/libmad.html
-http://www.linuxfromscratch.org/patches/blfs/svn/libmad-0.15.1b-fixes-1.patch
---- src/libmad/configure	2004-02-05 09:34:07.000000000 +0000
-+++ src/libmad/configure.new	2020-06-30 21:10:28.528018931 +0000
-@@ -19083,71 +19083,7 @@
- 
- if test "$GCC" = yes
- then
--    if test -z "$arch"
--    then
--	case "$host" in
--	    i386-*)           ;;
--	    i?86-*)           arch="-march=i486" ;;
--	    arm*-empeg-*)     arch="-march=armv4 -mtune=strongarm1100" ;;
--	    armv4*-*)         arch="-march=armv4 -mtune=strongarm" ;;
--	    powerpc-*)        ;;
--	    mips*-agenda-*)   arch="-mcpu=vr4100" ;;
--	    mips*-luxsonor-*) arch="-mips1 -mcpu=r3000 -Wa,-m4010" ;;
--	esac
--    fi
--
--    case "$optimize" in
--	-O|"-O "*)
--	    optimize="-O"
--	    optimize="$optimize -fforce-mem"
--	    optimize="$optimize -fforce-addr"
--	    : #x optimize="$optimize -finline-functions"
--	    : #- optimize="$optimize -fstrength-reduce"
--	    optimize="$optimize -fthread-jumps"
--	    optimize="$optimize -fcse-follow-jumps"
--	    optimize="$optimize -fcse-skip-blocks"
--	    : #x optimize="$optimize -frerun-cse-after-loop"
--	    : #x optimize="$optimize -frerun-loop-opt"
--	    : #x optimize="$optimize -fgcse"
--	    optimize="$optimize -fexpensive-optimizations"
--	    optimize="$optimize -fregmove"
--	    : #* optimize="$optimize -fdelayed-branch"
--	    : #x optimize="$optimize -fschedule-insns"
--	    optimize="$optimize -fschedule-insns2"
--	    : #? optimize="$optimize -ffunction-sections"
--	    : #? optimize="$optimize -fcaller-saves"
--	    : #> optimize="$optimize -funroll-loops"
--	    : #> optimize="$optimize -funroll-all-loops"
--	    : #x optimize="$optimize -fmove-all-movables"
--	    : #x optimize="$optimize -freduce-all-givs"
--	    : #? optimize="$optimize -fstrict-aliasing"
--	    : #* optimize="$optimize -fstructure-noalias"
--
--	    case "$host" in
--		arm*-*)
--		    optimize="$optimize -fstrength-reduce"
--		    ;;
--		mips*-*)
--		    optimize="$optimize -fstrength-reduce"
--		    optimize="$optimize -finline-functions"
--		    ;;
--		i?86-*)
--		    optimize="$optimize -fstrength-reduce"
--		    ;;
--		powerpc-apple-*)
--		    # this triggers an internal compiler error with gcc2
--		    : #optimize="$optimize -fstrength-reduce"
--
--		    # this is really only beneficial with gcc3
--		    : #optimize="$optimize -finline-functions"
--		    ;;
--		*)
--		    # this sometimes provokes bugs in gcc 2.95.2
--		    : #optimize="$optimize -fstrength-reduce"
--		    ;;
--	    esac
--	    ;;
--    esac
-+    optimize="-O2"
- fi
- 
- case "$host" in
-@@ -21497,6 +21433,7 @@
- then
-     case "$host" in
- 	i?86-*)     FPM="INTEL"  ;;
-+	x86_64*)    FPM="64BIT"  ;;
- 	arm*-*)     FPM="ARM"    ;;
- 	mips*-*)    FPM="MIPS"   ;;
- 	sparc*-*)   FPM="SPARC"  ;;
diff --git a/paddlespeech/audio/third_party/patches/sox.patch b/paddlespeech/audio/third_party/patches/sox.patch
deleted file mode 100644
index fe8df945c078045f58dc661a5a02d8c5f38599ca..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/third_party/patches/sox.patch
+++ /dev/null
@@ -1,16 +0,0 @@
-See https://github.com/pytorch/audio/pull/1297
-diff -ru sox/src/formats.c sox/src/formats.c
---- sox/src/formats.c	2014-10-26 19:55:50.000000000 -0700
-+++ sox/src/formats.c	2021-02-22 16:01:02.833144070 -0800
-@@ -333,6 +333,10 @@
-   assert(ft);
-   if (!ft->fp)
-     return sox_false;
--  fstat(fileno((FILE*)ft->fp), &st);
-+  int fd = fileno((FILE*)ft->fp);
-+  if (fd < 0)
-+    return sox_false;
-+  if (fstat(fd, &st) < 0)
-+    return sox_false;
-   return ((st.st_mode & S_IFMT) == S_IFREG);
- }
diff --git a/paddlespeech/audio/third_party/sox/CMakeLists.txt b/paddlespeech/audio/third_party/sox/CMakeLists.txt
deleted file mode 100644
index 8a5bc55c731f1cbcfa5304166645324b0700fed6..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/third_party/sox/CMakeLists.txt
+++ /dev/null
@@ -1,254 +0,0 @@
-find_package(PkgConfig REQUIRED)
-
-include(ExternalProject)
-
-set(INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../install)
-set(ARCHIVE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../archives)
-set(patch_dir ${CMAKE_CURRENT_SOURCE_DIR}/../patches)
-set(COMMON_ARGS --quiet --disable-shared --enable-static --prefix=${INSTALL_DIR} --with-pic --disable-dependency-tracking --disable-debug --disable-examples --disable-doc)
-
-# To pass custom environment variables to ExternalProject_Add command,
-# we need to do `${CMAKE_COMMAND} -E env ${envs} <COMMANAD>`.
-# https://stackoverflow.com/a/62437353
-# We constrcut the custom environment variables here
-set(envs
-  "PKG_CONFIG_PATH=${INSTALL_DIR}/lib/pkgconfig"
-  "LDFLAGS=-L${INSTALL_DIR}/lib $ENV{LDFLAGS}"
-  "CFLAGS=-I${INSTALL_DIR}/include -fvisibility=hidden $ENV{CFLAGS}"
-)
-
-if (BUILD_MAD)
-  ExternalProject_Add(mad
-    PREFIX ${CMAKE_CURRENT_BINARY_DIR}
-    DOWNLOAD_DIR ${ARCHIVE_DIR}
-    URL https://downloads.sourceforge.net/project/mad/libmad/0.15.1b/libmad-0.15.1b.tar.gz
-    URL_HASH SHA256=bbfac3ed6bfbc2823d3775ebb931087371e142bb0e9bb1bee51a76a6e0078690
-    PATCH_COMMAND patch < ${patch_dir}/libmad.patch && cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/mad/
-    CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/mad/configure ${COMMON_ARGS}
-    DOWNLOAD_NO_PROGRESS ON
-    LOG_DOWNLOAD ON
-    LOG_UPDATE ON
-    LOG_CONFIGURE ON
-    LOG_BUILD ON
-    LOG_INSTALL ON
-    LOG_MERGED_STDOUTERR ON
-    LOG_OUTPUT_ON_FAILURE ON
-  )
-endif (BUILD_MAD)
-
-ExternalProject_Add(amr
-  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
-  DOWNLOAD_DIR ${ARCHIVE_DIR}
-  URL https://sourceforge.net/projects/opencore-amr/files/opencore-amr/opencore-amr-0.1.5.tar.gz
-  URL_HASH SHA256=2c006cb9d5f651bfb5e60156dbff6af3c9d35c7bbcc9015308c0aff1e14cd341
-  PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/amr/
-  CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/amr/configure ${COMMON_ARGS}
-  DOWNLOAD_NO_PROGRESS ON
-  LOG_DOWNLOAD ON
-  LOG_UPDATE ON
-  LOG_CONFIGURE ON
-  LOG_BUILD ON
-  LOG_INSTALL ON
-  LOG_MERGED_STDOUTERR ON
-  LOG_OUTPUT_ON_FAILURE ON
-)
-
-ExternalProject_Add(lame
-  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
-  DOWNLOAD_DIR ${ARCHIVE_DIR}
-  URL https://downloads.sourceforge.net/project/lame/lame/3.99/lame-3.99.5.tar.gz
-  URL_HASH SHA256=24346b4158e4af3bd9f2e194bb23eb473c75fb7377011523353196b19b9a23ff
-  PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/lame/
-  CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/lame/configure ${COMMON_ARGS} --enable-nasm
-  DOWNLOAD_NO_PROGRESS ON
-  LOG_DOWNLOAD ON
-  LOG_UPDATE ON
-  LOG_CONFIGURE ON
-  LOG_BUILD ON
-  LOG_INSTALL ON
-  LOG_MERGED_STDOUTERR ON
-  LOG_OUTPUT_ON_FAILURE ON
-)
-
-ExternalProject_Add(ogg
-  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
-  DOWNLOAD_DIR ${ARCHIVE_DIR}
-  URL https://ftp.osuosl.org/pub/xiph/releases/ogg/libogg-1.3.3.tar.gz
-  URL_HASH SHA256=c2e8a485110b97550f453226ec644ebac6cb29d1caef2902c007edab4308d985
-  PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/ogg/
-  CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/ogg/configure ${COMMON_ARGS}
-  DOWNLOAD_NO_PROGRESS ON
-  LOG_DOWNLOAD ON
-  LOG_UPDATE ON
-  LOG_CONFIGURE ON
-  LOG_BUILD ON
-  LOG_INSTALL ON
-  LOG_MERGED_STDOUTERR ON
-  LOG_OUTPUT_ON_FAILURE ON
-)
-
-ExternalProject_Add(flac
-  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
-  DEPENDS ogg
-  DOWNLOAD_DIR ${ARCHIVE_DIR}
-  URL https://ftp.osuosl.org/pub/xiph/releases/flac/flac-1.3.2.tar.xz
-  URL_HASH SHA256=91cfc3ed61dc40f47f050a109b08610667d73477af6ef36dcad31c31a4a8d53f
-  PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/flac/
-  CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/flac/configure ${COMMON_ARGS} --with-ogg --disable-cpplibs
-  DOWNLOAD_NO_PROGRESS ON
-  LOG_DOWNLOAD ON
-  LOG_UPDATE ON
-  LOG_CONFIGURE ON
-  LOG_BUILD ON
-  LOG_INSTALL ON
-  LOG_MERGED_STDOUTERR ON
-  LOG_OUTPUT_ON_FAILURE ON
-)
-
-ExternalProject_Add(vorbis
-  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
-  DEPENDS ogg
-  DOWNLOAD_DIR ${ARCHIVE_DIR}
-  URL https://ftp.osuosl.org/pub/xiph/releases/vorbis/libvorbis-1.3.6.tar.gz
-  URL_HASH SHA256=6ed40e0241089a42c48604dc00e362beee00036af2d8b3f46338031c9e0351cb
-  PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/vorbis/
-  CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/vorbis/configure ${COMMON_ARGS} --with-ogg
-  DOWNLOAD_NO_PROGRESS ON
-  LOG_DOWNLOAD ON
-  LOG_UPDATE ON
-  LOG_CONFIGURE ON
-  LOG_BUILD ON
-  LOG_INSTALL ON
-  LOG_MERGED_STDOUTERR ON
-  LOG_OUTPUT_ON_FAILURE ON
-)
-
-ExternalProject_Add(opus
-  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
-  DEPENDS ogg
-  DOWNLOAD_DIR ${ARCHIVE_DIR}
-  URL https://ftp.osuosl.org/pub/xiph/releases/opus/opus-1.3.1.tar.gz
-  URL_HASH SHA256=65b58e1e25b2a114157014736a3d9dfeaad8d41be1c8179866f144a2fb44ff9d
-  PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/opus/
-  CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/opus/configure ${COMMON_ARGS} --with-ogg
-  DOWNLOAD_NO_PROGRESS ON
-  LOG_DOWNLOAD ON
-  LOG_UPDATE ON
-  LOG_CONFIGURE ON
-  LOG_BUILD ON
-  LOG_INSTALL ON
-  LOG_MERGED_STDOUTERR ON
-  LOG_OUTPUT_ON_FAILURE ON
-)
-
-ExternalProject_Add(opusfile
-  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
-  DEPENDS opus
-  DOWNLOAD_DIR ${ARCHIVE_DIR}
-  URL https://ftp.osuosl.org/pub/xiph/releases/opus/opusfile-0.12.tar.gz
-  URL_HASH SHA256=118d8601c12dd6a44f52423e68ca9083cc9f2bfe72da7a8c1acb22a80ae3550b
-  PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/opusfile/
-  CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/opusfile/configure ${COMMON_ARGS} --disable-http
-  DOWNLOAD_NO_PROGRESS ON
-  LOG_DOWNLOAD ON
-  LOG_UPDATE ON
-  LOG_CONFIGURE ON
-  LOG_BUILD ON
-  LOG_INSTALL ON
-  LOG_MERGED_STDOUTERR ON
-  LOG_OUTPUT_ON_FAILURE ON
-)
-
-# OpenMP is by default compiled against GNU OpenMP, which conflicts with the version of OpenMP that PyTorch uses.
-# See https://github.com/pytorch/audio/pull/1026
-# TODO: Add flags like https://github.com/suphoff/pytorch_parallel_extension_cpp/blob/master/setup.py
-set(SOX_OPTIONS
-  --disable-openmp
-  --with-amrnb
-  --with-amrwb
-  --with-flac
-  --with-lame
-  --with-oggvorbis
-  --with-opus
-  --without-alsa
-  --without-ao
-  --without-coreaudio
-  --without-oss
-  --without-id3tag
-  --without-ladspa
-  --without-magic
-  --without-png
-  --without-pulseaudio
-  --without-sndfile
-  --without-sndio
-  --without-sunaudio
-  --without-waveaudio
-  --without-wavpack
-  --without-twolame
-  )
-
-set(SOX_LIBRARIES
-  ${INSTALL_DIR}/lib/libsox.a
-  ${INSTALL_DIR}/lib/libopencore-amrnb.a
-  ${INSTALL_DIR}/lib/libopencore-amrwb.a
-  ${INSTALL_DIR}/lib/libmp3lame.a
-  ${INSTALL_DIR}/lib/libFLAC.a
-  ${INSTALL_DIR}/lib/libopusfile.a
-  ${INSTALL_DIR}/lib/libopus.a
-  ${INSTALL_DIR}/lib/libvorbisenc.a
-  ${INSTALL_DIR}/lib/libvorbisfile.a
-  ${INSTALL_DIR}/lib/libvorbis.a
-  ${INSTALL_DIR}/lib/libogg.a
-  )
-
-set(sox_depends
-  ogg flac vorbis opusfile lame amr
-  )
-
-if (BUILD_MAD)
-  list(
-    APPEND
-    SOX_OPTIONS
-    --with-mad
-    )
-  list(
-    APPEND
-    SOX_LIBRARIES
-    ${INSTALL_DIR}/lib/libmad.a
-    )
-  list(
-    APPEND
-    sox_depends
-    mad
-    )
-else ()
-  list(
-    APPEND
-    SOX_OPTIONS
-    --without-mad
-    )  
-endif (BUILD_MAD)
-
-ExternalProject_Add(sox
-  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
-  DEPENDS ${sox_depends}
-  DOWNLOAD_DIR ${ARCHIVE_DIR}
-  URL https://downloads.sourceforge.net/project/sox/sox/14.4.2/sox-14.4.2.tar.bz2
-  URL_HASH SHA256=81a6956d4330e75b5827316e44ae381e6f1e8928003c6aa45896da9041ea149c
-  PATCH_COMMAND patch -p1 < ${patch_dir}/sox.patch && cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/sox/
-  CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/sox/configure ${COMMON_ARGS} ${SOX_OPTIONS}
-  BUILD_BYPRODUCTS ${SOX_LIBRARIES}
-  DOWNLOAD_NO_PROGRESS ON
-  LOG_DOWNLOAD ON
-  LOG_UPDATE ON
-  LOG_CONFIGURE ON
-  LOG_BUILD ON
-  LOG_INSTALL ON
-  LOG_MERGED_STDOUTERR ON
-  LOG_OUTPUT_ON_FAILURE ON
-)
-
-add_library(libsox INTERFACE)
-add_dependencies(libsox sox)
-target_include_directories(libsox INTERFACE ${INSTALL_DIR}/include)
-target_link_libraries(libsox INTERFACE ${SOX_LIBRARIES})
\ No newline at end of file
diff --git a/paddlespeech/audio/transform/spectrogram.py b/paddlespeech/audio/transform/spectrogram.py
index 864f3f9940b7a34d81c1836157c2740c9b85c1ca..86c0b953e38126f6567663128e1d96f8e70f176e 100644
--- a/paddlespeech/audio/transform/spectrogram.py
+++ b/paddlespeech/audio/transform/spectrogram.py
@@ -17,7 +17,7 @@ import numpy as np
 import paddle
 from python_speech_features import logfbank
 
-from ..compliance import kaldi
+from paddleaudio.compliance import kaldi
 
 
 def stft(x,
diff --git a/paddlespeech/audio/utils/sox_utils.py b/paddlespeech/audio/utils/sox_utils.py
deleted file mode 100644
index 37696a5d91f29a3b83d1e661b2a87f6d66b98670..0000000000000000000000000000000000000000
--- a/paddlespeech/audio/utils/sox_utils.py
+++ /dev/null
@@ -1,101 +0,0 @@
-from typing import Dict, List
-
-from paddlespeech.audio._internal import module_utils as _mod_utils
-from paddlespeech.audio import _paddleaudio
-
-@_mod_utils.requires_sox()
-def set_seed(seed: int):
-    """Set libsox's PRNG
-
-    Args:
-        seed (int): seed value. valid range is int32.
-
-    See Also:
-        http://sox.sourceforge.net/sox.html
-    """
-    _paddleaudio.sox_utils_set_seed(seed)
-
-
-@_mod_utils.requires_sox()
-def set_verbosity(verbosity: int):
-    """Set libsox's verbosity
-
-    Args:
-        verbosity (int): Set verbosity level of libsox.
-
-            * ``1`` failure messages
-            * ``2`` warnings
-            * ``3`` details of processing
-            * ``4``-``6`` increasing levels of debug messages
-
-    See Also:
-        http://sox.sourceforge.net/sox.html
-    """
-    _paddleaudio.sox_utils_set_verbosity(verbosity)
-
-
-@_mod_utils.requires_sox()
-def set_buffer_size(buffer_size: int):
-    """Set buffer size for sox effect chain
-
-    Args:
-        buffer_size (int): Set the size in bytes of the buffers used for processing audio.
-
-    See Also:
-        http://sox.sourceforge.net/sox.html
-    """
-    _paddleaudio.sox_utils_set_buffer_size(buffer_size)
-
-
-@_mod_utils.requires_sox()
-def set_use_threads(use_threads: bool):
-    """Set multithread option for sox effect chain
-
-    Args:
-        use_threads (bool): When ``True``, enables ``libsox``'s parallel effects channels processing.
-            To use mutlithread, the underlying ``libsox`` has to be compiled with OpenMP support.
-
-    See Also:
-        http://sox.sourceforge.net/sox.html
-    """
-    _paddleaudio.sox_utils_set_use_threads(use_threads)
-
-
-@_mod_utils.requires_sox()
-def list_effects() -> Dict[str, str]:
-    """List the available sox effect names
-
-    Returns:
-        Dict[str, str]: Mapping from ``effect name`` to ``usage``
-    """
-    return dict(_paddleaudio.sox_utils_list_effects())
-
-
-@_mod_utils.requires_sox()
-def list_read_formats() -> List[str]:
-    """List the supported audio formats for read
-
-    Returns:
-        List[str]: List of supported audio formats
-    """
-    return _paddleaudio.sox_utils_list_read_formats()
-
-
-@_mod_utils.requires_sox()
-def list_write_formats() -> List[str]:
-    """List the supported audio formats for write
-
-    Returns:
-        List[str]: List of supported audio formats
-    """
-    return _paddleaudio.sox_utils_list_write_formats()
-
-
-@_mod_utils.requires_sox()
-def get_buffer_size() -> int:
-    """Get buffer size for sox effect chain
-
-    Returns:
-        int: size in bytes of buffers used for processing audio.
-    """
-    return _paddleaudio.sox_utils_get_buffer_size()