Merge pull request #2089 from zh794390558/cpplint

[audio] format code

Merge pull request #2089 from zh794390558/cpplint
[audio] format code
a3911ab5 · Hui Zhang · GitHub · fb7cbb34 · fb1300f9 · a3911ab5
32 changed file
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -76,4 +76,4 @@ repos:
        entry: bash .pre-commit-hooks/cpplint.hook
        language: system
        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$
-        exclude: (?=speechx/speechx/kaldi|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin).*(\.cpp|\.cc|\.h|\.py)$
+        exclude: (?=speechx/speechx/kaldi|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|paddlespeech/audio/src/optional).*(\.cpp|\.cc|\.h|\.hpp)$
\ No newline at end of file
--- a/audio/audio/csrc/pybind/pybind.cpp
+++ b/audio/audio/csrc/pybind/pybind.cpp
 #include "pybind/sox/io.h"
 PYBIND11_MODULE(_paddleaudio, m) {
-  m.def("get_info_file", &paddleaudio::sox_io::get_info_file,
+    m.def("get_info_file",
+          &paddleaudio::sox_io::get_info_file,
          "Get metadata of audio file.");
-  m.def("get_info_fileobj", &paddleaudio::sox_io::get_info_fileobj,
+    m.def("get_info_fileobj",
+          &paddleaudio::sox_io::get_info_fileobj,
          "Get metadata of audio in file object.");
 }
\ No newline at end of file
--- a/audio/audio/csrc/pybind/sox/io.cpp
+++ b/audio/audio/csrc/pybind/sox/io.cpp
@@ -8,7 +8,8 @@ namespace sox_io {
 auto get_info_file(const std::string &path, const std::string &format)
    -> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> {
-  SoxFormat sf(sox_open_read(path.data(),
+    SoxFormat sf(
+        sox_open_read(path.data(),
                      /*signal=*/nullptr,
                      /*encoding=*/nullptr,
                      /*filetype=*/format.empty() ? nullptr : format.data()));
@@ -38,7 +39,9 @@ auto get_info_fileobj(py::object fileobj, const std::string &format)
    // If the file is shorter than 256, then libsox cannot read the header.
    auto buf_size = (num_read > 256) ? num_read : 256;
-  SoxFormat sf(sox_open_mem_read(buf, buf_size,
+    SoxFormat sf(sox_open_mem_read(
+        buf,
+        buf_size,
        /*signal=*/nullptr,
        /*encoding=*/nullptr,
        /*filetype=*/format.empty() ? nullptr : format.data()));

--- a/audio/audio/csrc/pybind/sox/io.h
+++ b/audio/audio/csrc/pybind/sox/io.h
--- a/audio/audio/csrc/pybind/sox/utils.cpp
+++ b/audio/audio/csrc/pybind/sox/utils.cpp
@@ -31,7 +31,8 @@ auto read_fileobj(py::object *fileobj, const uint64_t size, char *buffer)
        }
        if (chunk_len > request) {
            std::ostringstream message;
-      message << "Requested up to " << request << " bytes but, "
+            message
+                << "Requested up to " << request << " bytes but, "
                << "received " << chunk_len << " bytes. "
                << "The given object does not confirm to read protocol of file "
                   "object.";
@@ -48,8 +49,8 @@ int64_t get_buffer_size() { return sox_get_globals()->bufsiz; }
 void validate_input_file(const SoxFormat &sf, const std::string &path) {
    if (static_cast<sox_format_t *>(sf) == nullptr) {
-    throw std::runtime_error("Error loading audio file: failed to open file " +
+        throw std::runtime_error(
-                             path);
+            "Error loading audio file: failed to open file " + path);
    }
    if (sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
        throw std::runtime_error("Error loading audio file: unknown encoding.");

--- a/audio/audio/csrc/pybind/sox/utils.h
+++ b/audio/audio/csrc/pybind/sox/utils.h
@@ -22,7 +22,7 @@ struct SoxFormat {
    void close();
-private:
+  private:
    sox_format_t *fd_;
 };

--- a/paddlespeech/__init__.py
+++ b/paddlespeech/__init__.py
@@ -14,5 +14,3 @@
 import _locale
 _locale._getdefaultlocale = (lambda *args: ['en_US', 'utf8'])
--- a/paddlespeech/audio/README.md
+++ b/paddlespeech/audio/README.md
--- a/paddlespeech/audio/__init__.py
+++ b/paddlespeech/audio/__init__.py
--- a/paddlespeech/audio/_class.py
+++ b/paddlespeech/audio/_class.py
 import types
 class _ClassNamespace(types.ModuleType):
    def __init__(self, name):
        super(_ClassNamespace, self).__init__('paddlespeech.classes' + name)
@@ -11,6 +12,7 @@ class _ClassNamespace(types.ModuleType):
            raise RuntimeError(f'Class {self.name}.{attr} not registered!')
        return proxy
 class _Classes(types.ModuleType):
    __file__ = '_classes.py'
@@ -43,5 +45,6 @@ class _Classes(types.ModuleType):
        """
        paddlespeech.ops.load_library(path)
 # The classes "namespace"
 classes = _Classes()
--- a/paddlespeech/audio/_extension.py
+++ b/paddlespeech/audio/_extension.py
@@ -64,7 +64,8 @@ def _init_ffmpeg():
    try:
        _load_lib("libpaddlleaudio_ffmpeg")
    except OSError as err:
-        raise ImportError("FFmpeg libraries are not found. Please install FFmpeg.") from err
+        raise ImportError(
+            "FFmpeg libraries are not found. Please install FFmpeg.") from err
    import paddllespeech._paddlleaudio_ffmpeg  # noqa

--- a/paddlespeech/audio/_internal/module_utils.py
+++ b/paddlespeech/audio/_internal/module_utils.py
@@ -3,6 +3,7 @@ import warnings
 from functools import wraps
 from typing import Optional
 def is_module_available(*modules: str) -> bool:
    r"""Returns if a top-level module with :attr:`name` exists *without**
    importing it. This is generally safer than try-catch block around a
@@ -26,19 +27,21 @@ def requires_module(*modules: str):
            return func
    else:
-        req = f"module: {missing[0]}" if len(missing) == 1 else f"modules: {missing}"
+        req = f"module: {missing[0]}" if len(
+            missing) == 1 else f"modules: {missing}"
        def decorator(func):
            @wraps(func)
            def wrapped(*args, **kwargs):
-                raise RuntimeError(f"{func.__module__}.{func.__name__} requires {req}")
+                raise RuntimeError(
+                    f"{func.__module__}.{func.__name__} requires {req}")
            return wrapped
    return decorator
-def deprecated(direction: str, version: Optional[str] = None):
+def deprecated(direction: str, version: Optional[str]=None):
    """Decorator to add deprecation message
    Args:
        direction (str): Migration steps to be given to users.
@@ -51,8 +54,7 @@ def deprecated(direction: str, version: Optional[str] = None):
            message = (
                f"{func.__module__}.{func.__name__} has been deprecated "
                f'and will be removed from {"future" if version is None else version} release. '
-                f"{direction}"
+                f"{direction}")
-            )
            warnings.warn(message, stacklevel=2)
            return func(*args, **kwargs)
@@ -62,7 +64,7 @@ def deprecated(direction: str, version: Optional[str] = None):
 def is_kaldi_available():
-    return is_module_available("paddlespeech"._paddleaudio") and paddlespeech.ops.paddleaudio.is_kaldi_available()
+    return is_module_available("paddlespeech.audio._paddleaudio")
 def requires_kaldi():
@@ -76,7 +78,8 @@ def requires_kaldi():
        def decorator(func):
            @wraps(func)
            def wrapped(*args, **kwargs):
-                raise RuntimeError(f"{func.__module__}.{func.__name__} requires kaldi")
+                raise RuntimeError(
+                    f"{func.__module__}.{func.__name__} requires kaldi")
            return wrapped
@@ -91,7 +94,8 @@ def _check_soundfile_importable():
        return True
    except Exception:
-        warnings.warn("Failed to import soundfile. 'soundfile' backend is not available.")
+        warnings.warn(
+            "Failed to import soundfile. 'soundfile' backend is not available.")
        return False
@@ -113,7 +117,8 @@ def requires_soundfile():
        def decorator(func):
            @wraps(func)
            def wrapped(*args, **kwargs):
-                raise RuntimeError(f"{func.__module__}.{func.__name__} requires soundfile")
+                raise RuntimeError(
+                    f"{func.__module__}.{func.__name__} requires soundfile")
            return wrapped
@@ -121,7 +126,7 @@ def requires_soundfile():
 def is_sox_available():
-    return is_module_available("paddlespeech._paddleaudio") and paddlespeech.ops.paddleaudio.is_sox_available()
+    return is_module_available("paddlespeech.audio._paddleaudio")
 def requires_sox():
@@ -135,7 +140,8 @@ def requires_sox():
        def decorator(func):
            @wraps(func)
            def wrapped(*args, **kwargs):
-                raise RuntimeError(f"{func.__module__}.{func.__name__} requires sox")
+                raise RuntimeError(
+                    f"{func.__module__}.{func.__name__} requires sox")
            return wrapped

--- a/paddlespeech/audio/_ops.py
+++ b/paddlespeech/audio/_ops.py
 import contextlib
 import ctypes
-import sys
 import os
+import sys
 import types
 # Query `hasattr` only once.
-_SET_GLOBAL_FLAGS = hasattr(sys, 'getdlopenflags') and hasattr(sys, 'setdlopenflags')
+_SET_GLOBAL_FLAGS = hasattr(sys, 'getdlopenflags') and hasattr(sys,
+                                                               'setdlopenflags')
 @contextlib.contextmanager

--- a/paddlespeech/audio/src/optional/optional.hpp
+++ b/paddlespeech/audio/src/optional/optional.hpp
--- a/paddlespeech/audio/src/pybind/kaldi_frontend/feature_common.h
+++ b/paddlespeech/audio/src/pybind/kaldi_frontend/feature_common.h
@@ -14,9 +14,9 @@
 #pragma once
-#include "feat/feature-window.h"
-#include <pybind11/pybind11.h>
 #include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include "feat/feature-window.h"
 namespace paddleaudio {
@@ -29,13 +29,9 @@ class StreamingFeatureTpl {
    StreamingFeatureTpl(const Options& opts);
    bool ComputeFeature(const kaldi::VectorBase<kaldi::BaseFloat>& wav,
                        kaldi::Vector<kaldi::BaseFloat>* feats);
-    void Reset() {
+    void Reset() { remained_wav_.Resize(0); }
-        remained_wav_.Resize(0);
-    }
-    int Dim() {
+    int Dim() { return computer_.Dim(); }
-      return computer_.Dim();
-    }
  private:
    bool Compute(const kaldi::Vector<kaldi::BaseFloat>& waves,
@@ -49,4 +45,3 @@ class StreamingFeatureTpl {
 }  // namespace ppspeech
 #include "feature_common_inl.h"
--- a/paddlespeech/audio/src/pybind/kaldi_frontend/feature_common_inl.h
+++ b/paddlespeech/audio/src/pybind/kaldi_frontend/feature_common_inl.h
@@ -17,15 +17,14 @@
 namespace paddleaudio {
 template <class F>
-StreamingFeatureTpl<F>::StreamingFeatureTpl(
+StreamingFeatureTpl<F>::StreamingFeatureTpl(const Options& opts)
-    const Options& opts)
+    : opts_(opts), computer_(opts), window_function_(opts.frame_opts) {
-    : opts_(opts), computer_(opts), 
+    // window_function_(computer_.GetFrameOptions()) { the opt set to zero
-      window_function_(opts.frame_opts) {
-      //window_function_(computer_.GetFrameOptions()) { the opt set to zero
 }
 template <class F>
-bool StreamingFeatureTpl<F>::ComputeFeature(const kaldi::VectorBase<kaldi::BaseFloat>& wav, 
+bool StreamingFeatureTpl<F>::ComputeFeature(
+    const kaldi::VectorBase<kaldi::BaseFloat>& wav,
    kaldi::Vector<kaldi::BaseFloat>* feats) {
    // append remaned waves
    kaldi::int32 wav_len = wav.Dim();

--- a/paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.cc
+++ b/paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.cc
-#include <pybind11/pybind11.h>
 #include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
 #include "kaldi_feature_wrapper.h"
-namespace py=pybind11;
+namespace py = pybind11;
-bool InitFbank(
+bool InitFbank(float samp_freq,  // frame opts
-    float samp_freq, // frame opts 
               float frame_shift_ms,
               float frame_length_ms,
               float dither,
@@ -139,5 +138,7 @@ PYBIND11_MODULE(kaldi_featurepy, m) {
    m.def("InitFbank", &InitFbank, "init fbank");
    m.def("ResetFbank", &ResetFbank, "reset fbank");
    m.def("ComputeFbank", &ComputeFbank, "compute fbank");
-    m.def("ComputeFbankStreaming", &ComputeFbankStreaming, "compute fbank streaming");
+    m.def("ComputeFbankStreaming",
+          &ComputeFbankStreaming,
+          "compute fbank streaming");
 }
--- a/paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.h
+++ b/paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.h
-#include <pybind11/pybind11.h>
 #include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
 #include "kaldi_feature_wrapper.h"
-namespace py=pybind11;
+namespace py = pybind11;
-bool InitFbank(
+bool InitFbank(float samp_freq,  // frame opts
-    float samp_freq, // frame opts 
               float frame_shift_ms,
               float frame_length_ms,
               float dither,

--- a/paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.cc
+++ b/paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.cc
@@ -12,7 +12,8 @@ bool KaldiFeatureWrapper::InitFbank(kaldi::FbankOptions opts) {
    return true;
 }
-py::array_t<double> KaldiFeatureWrapper::ComputeFbank(const py::array_t<double> wav) {
+py::array_t<double> KaldiFeatureWrapper::ComputeFbank(
+    const py::array_t<double> wav) {
    py::buffer_info info = wav.request();
    kaldi::Vector<kaldi::BaseFloat> input_wav(info.size);
    double* wav_ptr = (double*)info.ptr;
@@ -38,8 +39,8 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(const py::array_t<double>
        res_ptr++;
    }
-    return result.reshape({ feats.Dim() / Dim(), Dim()});
+    return result.reshape({feats.Dim() / Dim(), Dim()});
-/*
+    /*
         py::buffer_info info = wav.request();
        std::cout << info.size << std::endl;
        auto result = py::array_t<double>(info.size);
@@ -48,9 +49,10 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(const py::array_t<double>
        py::buffer_info info_re = result.request();
        memcpy(input_wav.Data(), (double*)info.ptr, wav.nbytes());
-    memcpy((double*)info_re.ptr, input_wav.Data(), input_wav.Dim()* sizeof(double));
+        memcpy((double*)info_re.ptr, input_wav.Data(), input_wav.Dim()*
+       sizeof(double));
        return result;
-*/
+    */
 }

--- a/paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.h
+++ b/paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.h
 #include "base/kaldi-common.h"
-#include "feature_common.h"
 #include "feat/feature-fbank.h"
+#include "feature_common.h"
 #pragma once
@@ -14,12 +14,8 @@ class KaldiFeatureWrapper {
    static KaldiFeatureWrapper* GetInstance();
    bool InitFbank(kaldi::FbankOptions opts);
    py::array_t<double> ComputeFbank(const py::array_t<double> wav);
-    int Dim() {
+    int Dim() { return fbank_->Dim(); }
-      return fbank_->Dim();
+    void ResetFbank() { fbank_->Reset(); }
-    }
-    void ResetFbank() {
-      fbank_->Reset();
-    }
  private:
    std::unique_ptr<paddleaudio::Fbank> fbank_;

--- a/paddlespeech/audio/src/pybind/pybind.cpp
+++ b/paddlespeech/audio/src/pybind/pybind.cpp
-//Copyright (c) 2017 Facebook Inc. (Soumith Chintala), 
+// Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
-//All rights reserved.
+// All rights reserved.
 #include "paddlespeech/audio/src/pybind/sox/io.h"
 PYBIND11_MODULE(_paddleaudio, m) {
-  m.def("get_info_file", &paddleaudio::sox_io::get_info_file,
+    m.def("get_info_file",
+          &paddleaudio::sox_io::get_info_file,
          "Get metadata of audio file.");
-  m.def("get_info_fileobj", &paddleaudio::sox_io::get_info_fileobj,
+    m.def("get_info_fileobj",
+          &paddleaudio::sox_io::get_info_fileobj,
          "Get metadata of audio in file object.");
 }
\ No newline at end of file
--- a/paddlespeech/audio/src/pybind/sox/io.cpp
+++ b/paddlespeech/audio/src/pybind/sox/io.cpp
-//Copyright (c) 2017 Facebook Inc. (Soumith Chintala), 
+// Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
-//All rights reserved.
+// All rights reserved.
 #include "paddlespeech/audio/src/pybind/sox/io.h"
 #include "paddlespeech/audio/src/pybind/sox/utils.h"
@@ -11,7 +11,8 @@ namespace sox_io {
 auto get_info_file(const std::string &path, const std::string &format)
    -> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> {
-  SoxFormat sf(sox_open_read(path.data(),
+    SoxFormat sf(
+        sox_open_read(path.data(),
                      /*signal=*/nullptr,
                      /*encoding=*/nullptr,
                      /*filetype=*/format.empty() ? nullptr : format.data()));
@@ -41,7 +42,9 @@ auto get_info_fileobj(py::object fileobj, const std::string &format)
    // If the file is shorter than 256, then libsox cannot read the header.
    auto buf_size = (num_read > 256) ? num_read : 256;
-  SoxFormat sf(sox_open_mem_read(buf, buf_size,
+    SoxFormat sf(sox_open_mem_read(
+        buf,
+        buf_size,
        /*signal=*/nullptr,
        /*encoding=*/nullptr,
        /*filetype=*/format.empty() ? nullptr : format.data()));

--- a/paddlespeech/audio/src/pybind/sox/io.h
+++ b/paddlespeech/audio/src/pybind/sox/io.h
-//Copyright (c) 2017 Facebook Inc. (Soumith Chintala), 
+// Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
-//All rights reserved.
+// All rights reserved.
 #ifndef PADDLEAUDIO_PYBIND_SOX_IO_H
 #define PADDLEAUDIO_PYBIND_SOX_IO_H

--- a/paddlespeech/audio/src/pybind/sox/utils.cpp
+++ b/paddlespeech/audio/src/pybind/sox/utils.cpp
-//Copyright (c) 2017 Facebook Inc. (Soumith Chintala), 
+// Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
-//All rights reserved.
+// All rights reserved.
 #include "paddlespeech/audio/src/pybind/sox/utils.h"
@@ -34,7 +34,8 @@ auto read_fileobj(py::object *fileobj, const uint64_t size, char *buffer)
        }
        if (chunk_len > request) {
            std::ostringstream message;
-      message << "Requested up to " << request << " bytes but, "
+            message
+                << "Requested up to " << request << " bytes but, "
                << "received " << chunk_len << " bytes. "
                << "The given object does not confirm to read protocol of file "
                   "object.";
@@ -51,8 +52,8 @@ int64_t get_buffer_size() { return sox_get_globals()->bufsiz; }
 void validate_input_file(const SoxFormat &sf, const std::string &path) {
    if (static_cast<sox_format_t *>(sf) == nullptr) {
-    throw std::runtime_error("Error loading audio file: failed to open file " +
+        throw std::runtime_error(
-                             path);
+            "Error loading audio file: failed to open file " + path);
    }
    if (sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
        throw std::runtime_error("Error loading audio file: unknown encoding.");

--- a/paddlespeech/audio/src/pybind/sox/utils.h
+++ b/paddlespeech/audio/src/pybind/sox/utils.h
-//Copyright (c) 2017 Facebook Inc. (Soumith Chintala), 
+// Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
-//All rights reserved.
+// All rights reserved.
 #ifndef PADDLEAUDIO_PYBIND_SOX_UTILS_H
 #define PADDLEAUDIO_PYBIND_SOX_UTILS_H
@@ -25,7 +25,7 @@ struct SoxFormat {
    void close();
-private:
+  private:
    sox_format_t *fd_;
 };

--- a/paddlespeech/audio/src/sox/io.cpp
+++ b/paddlespeech/audio/src/sox/io.cpp
@@ -11,8 +11,7 @@ namespace paddleaudio {
 namespace sox_io {
 tl::optional<MetaDataTuple> get_info_file(
-    const std::string& path,
+    const std::string& path, const tl::optional<std::string>& format) {
-    const tl::optional<std::string>& format) {
    SoxFormat sf(sox_open_read(
        path.c_str(),
        /*signal=*/nullptr,
@@ -73,8 +72,7 @@ tl::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
        path, effects, normalize, channels_first, format);
 }
-void save_audio_file(
+void save_audio_file(const std::string& path,
-    const std::string& path,
                     torch::Tensor tensor,
                     int64_t sample_rate,
                     bool channels_first,
@@ -85,25 +83,23 @@ void save_audio_file(
    validate_input_tensor(tensor);
    const auto filetype = [&]() {
-    if (format.has_value())
+        if (format.has_value()) return format.value();
-      return format.value();
        return get_filetype(path);
    }();
    if (filetype == "amr-nb") {
        const auto num_channels = tensor.size(channels_first ? 0 : 1);
-    TORCH_CHECK(
+        TORCH_CHECK(num_channels == 1,
-        num_channels == 1, "amr-nb format only supports single channel audio.");
+                    "amr-nb format only supports single channel audio.");
    } else if (filetype == "htk") {
        const auto num_channels = tensor.size(channels_first ? 0 : 1);
-    TORCH_CHECK(
+        TORCH_CHECK(num_channels == 1,
-        num_channels == 1, "htk format only supports single channel audio.");
+                    "htk format only supports single channel audio.");
    } else if (filetype == "gsm") {
        const auto num_channels = tensor.size(channels_first ? 0 : 1);
-    TORCH_CHECK(
+        TORCH_CHECK(num_channels == 1,
-        num_channels == 1, "gsm format only supports single channel audio.");
+                    "gsm format only supports single channel audio.");
-    TORCH_CHECK(
+        TORCH_CHECK(sample_rate == 8000,
-        sample_rate == 8000,
                    "gsm format only supports a sampling rate of 8kHz.");
    }
    const auto signal_info =
@@ -111,8 +107,7 @@ void save_audio_file(
    const auto encoding_info = get_encodinginfo_for_save(
        filetype, tensor.dtype(), compression, encoding, bits_per_sample);
-  SoxFormat sf(sox_open_write(
+    SoxFormat sf(sox_open_write(path.c_str(),
-      path.c_str(),
                                &signal_info,
                                &encoding_info,
                                /*filetype=*/filetype.c_str(),
@@ -134,11 +129,9 @@ void save_audio_file(
 TORCH_LIBRARY_FRAGMENT(paddleaudio, m) {
    m.def("paddleaudio::sox_io_get_info", &paddleaudio::sox_io::get_info_file);
-  m.def(
+    m.def("paddleaudio::sox_io_load_audio_file",
-      "paddleaudio::sox_io_load_audio_file",
          &paddleaudio::sox_io::load_audio_file);
-  m.def(
+    m.def("paddleaudio::sox_io_save_audio_file",
-      "paddleaudio::sox_io_save_audio_file",
          &paddleaudio::sox_io::save_audio_file);
 }

--- a/paddlespeech/audio/src/sox/io.h
+++ b/paddlespeech/audio/src/sox/io.h
-//Copyright (c) 2017 Facebook Inc. (Soumith Chintala), 
+// Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
-//All rights reserved.
+// All rights reserved.
 #ifndef PADDLEAUDIO_SOX_IO_H
 #define PADDLEAUDIO_SOX_IO_H
@@ -11,8 +11,7 @@
 namespace paddleaudio {
 namespace sox_io {
-auto get_effects(
+auto get_effects(const tl::optional<int64_t>& frame_offset,
-    const tl::optional<int64_t>& frame_offset,
                 const tl::optional<int64_t>& num_frames)
    -> std::vector<std::vector<std::string>>;
@@ -20,8 +19,7 @@ using MetaDataTuple =
    std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;
 tl::optional<MetaDataTuple> get_info_file(
-    const std::string& path,
+    const std::string& path, const tl::optional<std::string>& format);
-    const tl::optional<std::string>& format);
 tl::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
    const std::string& path,
@@ -31,8 +29,7 @@ tl::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
    tl::optional<bool> channels_first,
    const tl::optional<std::string>& format);
-void save_audio_file(
+void save_audio_file(const std::string& path,
-    const std::string& path,
                     torch::Tensor tensor,
                     int64_t sample_rate,
                     bool channels_first,

--- a/paddlespeech/audio/src/utils.cpp
+++ b/paddlespeech/audio/src/utils.cpp
--- a/setup.py
+++ b/setup.py
@@ -18,9 +18,9 @@ import os
 import subprocess as sp
 import sys
 from pathlib import Path
-from typing import Union
-from typing import Tuple
 from typing import List
+from typing import Tuple
+from typing import Union
 import distutils.command.clean
 from setuptools import Command
@@ -38,43 +38,13 @@ VERSION = '0.0.0'
 COMMITID = 'none'
 base = [
-    "editdistance",
+    "editdistance", "g2p_en", "g2pM", "h5py", "inflect", "jieba", "jsonlines",
-    "g2p_en", 
+    "kaldiio", "librosa==0.8.1", "loguru", "matplotlib", "nara_wpe",
-    "g2pM", 
+    "onnxruntime", "pandas", "paddlenlp", "paddlespeech_feat", "praatio==5.0.0",
-    "h5py", 
+    "pypinyin", "pypinyin-dict", "python-dateutil", "pyworld", "resampy==0.2.2",
-    "inflect", 
+    "sacrebleu", "scipy", "sentencepiece~=0.1.96", "soundfile~=0.10",
-    "jieba", 
+    "textgrid", "timer", "tqdm", "typeguard", "visualdl", "webrtcvad",
-    "jsonlines",
+    "yacs~=0.1.8", "prettytable", "zhon", 'colorlog', 'pathos == 0.2.8'
-    "kaldiio", 
-    "librosa==0.8.1",
-    "loguru", 
-    "matplotlib", 
-    "nara_wpe",
-    "onnxruntime", 
-    "pandas", 
-    "paddlenlp", 
-    "paddlespeech_feat", 
-    "praatio==5.0.0",
-    "pypinyin", 
-    "pypinyin-dict", 
-    "python-dateutil", 
-    "pyworld", 
-    "resampy==0.2.2",
-    "sacrebleu", 
-    "scipy", 
-    "sentencepiece~=0.1.96", 
-    "soundfile~=0.10",
-    "textgrid", 
-    "timer", 
-    "tqdm", 
-    "typeguard", 
-    "visualdl", 
-    "webrtcvad",
-    "yacs~=0.1.8", 
-    "prettytable", 
-    "zhon", 
-    'colorlog', 
-    'pathos == 0.2.8'
 ]
 server = [
@@ -264,6 +234,7 @@ class clean(distutils.command.clean.clean):
                print(f"removing '{path}' (and everything under it)")
                shutil.rmtree(str(path), ignore_errors=True)
 def main():
    sha = check_output(["git", "rev-parse", "HEAD"])  # commit id
    branch = check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"])
@@ -319,7 +290,8 @@ def main():
            requirements["develop"],
            'doc': [
                "sphinx", "sphinx-rtd-theme", "numpydoc", "myst_parser",
-                "recommonmark>=0.5.0", "sphinx-markdown-tables", "sphinx-autobuild"
+                "recommonmark>=0.5.0", "sphinx-markdown-tables",
+                "sphinx-autobuild"
            ],
            'test': ['nose', 'torchaudio==0.10.2'],
        },
@@ -358,5 +330,6 @@ def main():
    setup(**setup_info)
 if __name__ == '__main__':
    main()
--- a/speechx/examples/ds2_ol/onnx/local/onnx_infer_shape.py
+++ b/speechx/examples/ds2_ol/onnx/local/onnx_infer_shape.py
@@ -494,6 +494,11 @@ class SymbolicShapeInference:
            # contrib ops
            'Attention', 'BiasGelu', \
            'EmbedLayerNormalization', \
            'FastGelu', 'Gelu', 'LayerNormalization', \

--- a/tools/setup_helpers/__init__.py
+++ b/tools/setup_helpers/__init__.py
--- a/tools/setup_helpers/extension.py
+++ b/tools/setup_helpers/extension.py
-import distutils.sysconfig
 import os
 import platform
 import subprocess
 from pathlib import Path
+import distutils.sysconfig
 from setuptools import Extension
 from setuptools.command.build_ext import build_ext
@@ -27,11 +27,13 @@ def _get_build(var, default=False):
    if val in trues:
        return True
    if val not in falses:
-        print(f"WARNING: Unexpected environment variable value `{var}={val}`. " f"Expected one of {trues + falses}")
+        print(f"WARNING: Unexpected environment variable value `{var}={val}`. "
+              f"Expected one of {trues + falses}")
    return False
-_BUILD_SOX = False if platform.system() == "Windows" else _get_build("BUILD_SOX", True)
+_BUILD_SOX = False if platform.system() == "Windows" else _get_build(
+    "BUILD_SOX", True)
 _BUILD_MAD = _get_build("BUILD_MAD", False)
 # _BUILD_KALDI = False if platform.system() == "Windows" else _get_build("BUILD_KALDI", True)
 # _BUILD_RNNT = _get_build("BUILD_RNNT", True)
@@ -40,7 +42,8 @@ _BUILD_MAD = _get_build("BUILD_MAD", False)
 # _USE_ROCM = _get_build("USE_ROCM", torch.cuda.is_available() and torch.version.hip is not None)
 # _USE_CUDA = _get_build("USE_CUDA", torch.cuda.is_available() and torch.version.hip is None)
 # _USE_OPENMP = _get_build("USE_OPENMP", True) and "ATen parallel backend: OpenMP" in torch.__config__.parallel_info()
-_PADDLESPEECH_CUDA_ARCH_LIST = os.environ.get("PADDLESPEECH_CUDA_ARCH_LIST", None)
+_PADDLESPEECH_CUDA_ARCH_LIST = os.environ.get("PADDLESPEECH_CUDA_ARCH_LIST",
+                                              None)
 def get_ext_modules():
@@ -71,7 +74,8 @@ class CMakeBuild(build_ext):
        if ext.name != "paddlespeech.audio._paddleaudio":
            return
-        extdir = os.path.abspath(os.path.dirname(self.get_ext_filename(ext.name)))
+        extdir = os.path.abspath(
+            os.path.dirname(self.get_ext_filename(ext.name)))
        # required for auto-detection of auxiliary "native" libs
        if not extdir.endswith(os.path.sep):
@@ -101,8 +105,12 @@ class CMakeBuild(build_ext):
        if _PADDLESPEECH_CUDA_ARCH_LIST is not None:
            # Convert MAJOR.MINOR[+PTX] list to new style one
            # defined at https://cmake.org/cmake/help/latest/prop_tgt/CUDA_ARCHITECTURES.html
-            _arches = _PADDLESPEECH_CUDA_ARCH_LIST.replace(".", "").replace(" ", ";").split(";")
+            _arches = _PADDLESPEECH_CUDA_ARCH_LIST.replace(".", "").replace(
-            _arches = [arch[:-4] if arch.endswith("+PTX") else f"{arch}-real" for arch in _arches]
+                " ", ";").split(";")
+            _arches = [
+                arch[:-4] if arch.endswith("+PTX") else f"{arch}-real"
+                for arch in _arches
+            ]
            cmake_args += [f"-DCMAKE_CUDA_ARCHITECTURES={';'.join(_arches)}"]
        # Default to Ninja
@@ -131,10 +139,13 @@ class CMakeBuild(build_ext):
        if not os.path.exists(self.build_temp):
            os.makedirs(self.build_temp)
-        print(f"cmake {_ROOT_DIR} {' '.join(cmake_args)}, cwd={self.build_temp}")
+        print(
-        subprocess.check_call(["cmake", str(_ROOT_DIR)] + cmake_args, cwd=self.build_temp)
+            f"cmake {_ROOT_DIR} {' '.join(cmake_args)}, cwd={self.build_temp}")
+        subprocess.check_call(
+            ["cmake", str(_ROOT_DIR)] + cmake_args, cwd=self.build_temp)
        print(f"cmake --build . {' '.join(build_args)}, cwd={self.build_temp}")
-        subprocess.check_call(["cmake", "--build", "."] + build_args, cwd=self.build_temp)
+        subprocess.check_call(
+            ["cmake", "--build", "."] + build_args, cwd=self.build_temp)
    def get_ext_filename(self, fullname):
        ext_filename = super().get_ext_filename(fullname)