make loading _paddleaudio.so work

cffe555c · Yang Zhou · 3bb904bd · cffe555c · cffe555c · cffe555c
7 changed file
--- a/paddlespeech/audio/__init__.py
+++ b/paddlespeech/audio/__init__.py
@@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from . import _extension
 from . import compliance
 from . import datasets
 from . import features
@@ -18,7 +20,6 @@ from . import functional
 from . import io
 from . import metric
 from . import utils
-from ._ops import ops
 from paddlespeech.audio.backends import get_audio_backend
 from paddlespeech.audio.backends import list_audio_backends
 from paddlespeech.audio.backends import set_audio_backend
@@ -30,7 +31,6 @@ __all__ = [
    "functional",
    "features",
    "utils",
-    'ops'
    "list_audio_backends",
    "get_audio_backend",
    "set_audio_backend",

--- a/paddlespeech/audio/_extension.py
+++ b/paddlespeech/audio/_extension.py
@@ -4,8 +4,69 @@ from pathlib import Path
 from ._internal import module_utils as _mod_utils  # noqa: F401
-_LIB_DIR = Path(__file__) / "lib"
+import contextlib
+import ctypes
+import os
+import sys
+import types
+# Query `hasattr` only once.
+_SET_GLOBAL_FLAGS = hasattr(sys, 'getdlopenflags') and hasattr(sys,
+                                                               'setdlopenflags')
+@contextlib.contextmanager
+def dl_open_guard():
+    """
+    # https://manpages.debian.org/bullseye/manpages-dev/dlopen.3.en.html
+    Context manager to set the RTLD_GLOBAL dynamic linker flag while we open a
+    shared library to load custom operators.
+    """
+    if _SET_GLOBAL_FLAGS:
+        old_flags = sys.getdlopenflags()
+        sys.setdlopenflags(old_flags | ctypes.RTLD_GLOBAL)
+    yield
+    if _SET_GLOBAL_FLAGS:
+        sys.setdlopenflags(old_flags)
+def resolve_library_path(path: str) -> str:
+    return os.path.realpath(path)
+class _Ops(types.ModuleType):
+    #__file__ = '_ops.py'
+    def __init__(self):
+        super(_Ops, self).__init__('paddlespeech.ops')
+        self.loaded_libraries = set()
+    def load_library(self, path):
+        """
+        Loads a shared library from the given path into the current process.
+        This allows dynamically loading custom operators. For this, 
+        you should compile your operator and 
+        the static registration code into a shared library object, and then
+        call ``paddlespeech.ops.load_library('path/to/libcustom.so')`` to load the
+        shared object.
+        After the library is loaded, it is added to the
+        ``paddlespeech.ops.loaded_libraries`` attribute, a set that may be inspected
+        for the paths of all libraries loaded using this function.
+        Args:
+            path (str): A path to a shared library to load.
+        """
+        path = resolve_library_path(path)
+        with dl_open_guard():
+            # https://docs.python.org/3/library/ctypes.html?highlight=ctypes#loading-shared-libraries
+            # Import the shared library into the process, thus running its
+            # static (global) initialization code in order to register custom
+            # operators with the JIT.
+            ctypes.CDLL(path)
+        self.loaded_libraries.add(path)
+_LIB_DIR = Path(__file__).parent / "lib"
 def _get_lib_path(lib: str):
    suffix = "pyd" if os.name == "nt" else "so"
@@ -42,9 +103,12 @@ def _load_lib(lib: str) -> bool:
            If a dependency is missing, then users have to install it.
    """
    path = _get_lib_path(lib)
+    warnings.warn("lib path is :" + str(path))
    if not path.exists():
+        warnings.warn("lib path is not exists:" + str(path))
        return False
-    paddlespeech.audio.ops.load_library(path)
+    #paddlespeech.audio.ops.load_library(path)
+    ops.load_library(path)
    return True
@@ -56,7 +120,7 @@ def _init_ffmpeg():
    if _FFMPEG_INITIALIZED:
        return
-    if not paddlespeech.audio.ops.paddlleaudio.is_ffmpeg_available():
+    if not paddlespeech.audio.paddlleaudio.is_ffmpeg_available():
        raise RuntimeError(
            "paddlleaudio is not compiled with FFmpeg integration. Please set USE_FFMPEG=1 when compiling paddlleaudio."
        )
@@ -69,15 +133,15 @@ def _init_ffmpeg():
    import paddllespeech.audio._paddlleaudio_ffmpeg  # noqa
-    paddlespeech.audio.ops.paddlleaudio.ffmpeg_init()
+    paddlespeech.audio.paddlleaudio.ffmpeg_init()
-    if paddlespeech.audio.ops.paddlleaudio.ffmpeg_get_log_level() > 8:
+    if paddlespeech.audio.paddlleaudio.ffmpeg_get_log_level() > 8:
-        paddlespeech.audio.ops.paddlleaudio.ffmpeg_set_log_level(8)
+        paddlespeech.audio.paddlleaudio.ffmpeg_set_log_level(8)
    _FFMPEG_INITIALIZED = True
 def _init_extension():
-    if not _mod_utils.is_module_available("paddlespeech._paddleaudio"):
+    if not _mod_utils.is_module_available("paddlespeech.audio._paddleaudio"):
        warnings.warn("paddlespeech C++ extension is not available.")
        return
@@ -96,4 +160,6 @@ def _init_extension():
        pass
+ops = _Ops()
 _init_extension()
--- a/paddlespeech/audio/backends/common.py
+++ b/paddlespeech/audio/backends/common.py
+# code from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/common.py
+class AudioMetaData:
+    """Return type of ``torchaudio.info`` function.
+    This class is used by :ref:`"sox_io" backend<sox_io_backend>` and
+    :ref:`"soundfile" backend with the new interface<soundfile_backend>`.
+    :ivar int sample_rate: Sample rate
+    :ivar int num_frames: The number of frames
+    :ivar int num_channels: The number of channels
+    :ivar int bits_per_sample: The number of bits per sample. This is 0 for lossy formats,
+        or when it cannot be accurately inferred.
+    :ivar str encoding: Audio encoding
+        The values encoding can take are one of the following:
+            * ``PCM_S``: Signed integer linear PCM
+            * ``PCM_U``: Unsigned integer linear PCM
+            * ``PCM_F``: Floating point linear PCM
+            * ``FLAC``: Flac, Free Lossless Audio Codec
+            * ``ULAW``: Mu-law
+            * ``ALAW``: A-law
+            * ``MP3`` : MP3, MPEG-1 Audio Layer III
+            * ``VORBIS``: OGG Vorbis
+            * ``AMR_WB``: Adaptive Multi-Rate
+            * ``AMR_NB``: Adaptive Multi-Rate Wideband
+            * ``OPUS``: Opus
+            * ``HTK``: Single channel 16-bit PCM
+            * ``UNKNOWN`` : None of above
+    """
+    def __init__(
+        self,
+        sample_rate: int,
+        num_frames: int,
+        num_channels: int,
+        bits_per_sample: int,
+        encoding: str,
+    ):
+        self.sample_rate = sample_rate
+        self.num_frames = num_frames
+        self.num_channels = num_channels
+        self.bits_per_sample = bits_per_sample
+        self.encoding = encoding
+    def __str__(self):
+        return (
+            f"AudioMetaData("
+            f"sample_rate={self.sample_rate}, "
+            f"num_frames={self.num_frames}, "
+            f"num_channels={self.num_channels}, "
+            f"bits_per_sample={self.bits_per_sample}, "
+            f"encoding={self.encoding}"
+            f")"
+        )
--- a/paddlespeech/audio/backends/sox_io_backend.py
+++ b/paddlespeech/audio/backends/sox_io_backend.py
@@ -6,9 +6,42 @@ from typing import Tuple
 from typing import Union
 from paddle import Tensor
+from .common import AudioMetaData
+from paddlespeech.audio._internal import module_utils  as _mod_utils
+from paddlespeech.audio._paddleaudio import get_info_file
+from paddlespeech.audio._paddleaudio import get_info_fileobj
 #https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py
+def _fail_info(filepath: str, format: Optional[str]) -> AudioMetaData:
+    raise RuntimeError("Failed to fetch metadata from {}".format(filepath))
+def _fail_info_fileobj(fileobj, format: Optional[str]) -> AudioMetaData:
+    raise RuntimeError("Failed to fetch metadata from {}".format(fileobj))
+# Note: need to comply TorchScript syntax -- need annotation and no f-string
+def _fail_load(
+    filepath: str,
+    frame_offset: int = 0,
+    num_frames: int = -1,
+    normalize: bool = True,
+    channels_first: bool = True,
+    format: Optional[str] = None,
+) -> Tuple[paddle.Tensor, int]:
+    raise RuntimeError("Failed to load audio from {}".format(filepath))
+def _fail_load_fileobj(fileobj, *args, **kwargs):
+    raise RuntimeError(f"Failed to load audio from {fileobj}")
+_fallback_info = _fail_info
+_fallback_info_fileobj = _fail_info_fileobj
+_fallback_load = _fail_load
+_fallback_load_filebj = _fail_load_fileobj
 def load(
    filepath: Union[str, Path],
    out: Optional[Tensor] = None,
@@ -20,10 +53,12 @@ def load(
 ) -> Tuple[Tensor, int]:
    raise RuntimeError("No audio I/O backend is available.")
 def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None:
    raise RuntimeError("No audio I/O backend is available.")
+@_mod_utils.requires_sox()
-def info(filepath: str) -> None:
+def info(filepath: str, format: Optional[str]) -> None:
-    raise RuntimeError("No audio I/O backend is available.")
+    sinfo = paddleaudio._paddleaudio.get_info_file(filepath, format)
\ No newline at end of file
+    if sinfo is not None:
+        return AudioMetaData(*sinfo)
+    return _fallback_info(filepath, format)
--- a/paddlespeech/audio/kaldi/__init__.py
+++ b/paddlespeech/audio/kaldi/__init__.py
@@ -11,5 +11,5 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from . import fbank
+from .kaldi import fbank
-from . import pitch
+from .kaldi import pitch
--- a/paddlespeech/audio/kaldi/kaldi.py
+++ b/paddlespeech/audio/kaldi/kaldi.py
@@ -13,12 +13,12 @@
 # limitations under the License.
 from paddlespeech.audio._internal import module_utils 
-import paddlespeech.audio.ops.paddleaudio.ComputeFbank as ComputeFbank
+from paddlespeech.audio._paddleaudio import ComputeFbank as ComputeFbank
-import paddlespeech.audio.ops.paddleaudio.PitchExtractionOptions as PitchExtractionOptions
+from paddlespeech.audio._paddleaudio import PitchExtractionOptions as PitchExtractionOptions
-import paddlespeech.audio.ops.paddleaudio.FrameExtractionOptions as FrameExtractionOptions
+from paddlespeech.audio._paddleaudio import FrameExtractionOptions as FrameExtractionOptions
-import paddlespeech.audio.ops.paddleaudio.MelBanksOptions as MelBanksOptions
+from paddlespeech.audio._paddleaudio import MelBanksOptions as MelBanksOptions
-import paddlespeech.audio.ops.paddleaudio.FbankOptions as FbankOptions
+from paddlespeech.audio._paddleaudio import FbankOptions as FbankOptions
-import paddlespeech.audio.ops.paddleaudio.ComputeKaldiPitch as ComputeKaldiPitch
+from paddlespeech.audio._paddleaudio import ComputeKaldiPitch as ComputeKaldiPitch
 __all__ = [
    'fbank',

--- a/tests/unit/audio/features/test_kaldi_feat.py
+++ b/tests/unit/audio/features/test_kaldi_feat.py
@@ -16,8 +16,8 @@ import unittest
 import numpy as np
 import paddle
-import paddlespeech.audio.kaldi.fbank as fbank
+from paddlespeech.audio.kaldi import fbank as fbank
-import paddlespeech.audio.kaldi.pitch as pitch
+from paddlespeech.audio.kaldi import pitch as pitch
 from kaldiio import ReadHelper
 # the groundtruth feats computed in kaldi command below.