From 357a364815c332f59518dd7f8dd04f20cce56108 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Wed, 29 Jun 2022 08:00:44 +0000 Subject: [PATCH] pybind kaldi can make --- paddlespeech/audio/src/CMakeLists.txt | 32 +++++++--- .../feature_common.h | 14 +++-- .../feature_common_inl.h | 50 +++++++-------- .../kaldi_feature.cc | 42 +++++++------ .../{kaldi_frontend => kaldi}/kaldi_feature.h | 26 +++++++- .../kaldi_feature_wrapper.cc | 29 ++++++--- .../src/pybind/kaldi/kaldi_feature_wrapper.h | 40 ++++++++++++ .../src/pybind/kaldi_frontend/CMakeLists.txt | 13 ---- .../kaldi_frontend/kaldi_feature_wrapper.h | 24 -------- paddlespeech/audio/src/pybind/pybind.cpp | 13 +++- paddlespeech/audio/src/pybind/sox/utils.h | 5 +- paddlespeech/audio/third_party/CMakeLists.txt | 1 - .../audio/third_party/kaldi/CMakeLists.txt | 61 ++++++++++++------- tools/setup_helpers/extension.py | 3 +- 14 files changed, 220 insertions(+), 133 deletions(-) rename paddlespeech/audio/src/pybind/{kaldi_frontend => kaldi}/feature_common.h (72%) rename paddlespeech/audio/src/pybind/{kaldi_frontend => kaldi}/feature_common_inl.h (60%) rename paddlespeech/audio/src/pybind/{kaldi_frontend => kaldi}/kaldi_feature.cc (78%) rename paddlespeech/audio/src/pybind/{kaldi_frontend => kaldi}/kaldi_feature.h (70%) rename paddlespeech/audio/src/pybind/{kaldi_frontend => kaldi}/kaldi_feature_wrapper.cc (59%) create mode 100644 paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h delete mode 100644 paddlespeech/audio/src/pybind/kaldi_frontend/CMakeLists.txt delete mode 100644 paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.h diff --git a/paddlespeech/audio/src/CMakeLists.txt b/paddlespeech/audio/src/CMakeLists.txt index e7b6892f..4249e04e 100644 --- a/paddlespeech/audio/src/CMakeLists.txt +++ b/paddlespeech/audio/src/CMakeLists.txt @@ -35,11 +35,6 @@ if(BUILD_SOX) list( APPEND LIBPADDLEAUDIO_SOURCES - # sox/io.cpp - # sox/utils.cpp - # sox/effects.cpp - # sox/effects_chain.cpp - # sox/types.cpp ) list( APPEND @@ -49,6 +44,20 @@ if(BUILD_SOX) endif() +if(BUILD_KALDI) + list( + APPEND + LIBPADDLEAUDIO_LINK_LIBRARIES + libkaldi + ) + list( + APPEND + LIBPADDLEAUDIO_COMPILE_DEFINITIONS + INCLUDE_KALDI + COMPILE_WITHOUT_OPENFST + ) +endif() + #------------------------------------------------------------------------------# # END OF CUSTOMIZATION LOGICS #------------------------------------------------------------------------------# @@ -79,9 +88,9 @@ define_library( ) if (APPLE) - set(TORCHAUDIO_LIBRARY libtorchaudio CACHE INTERNAL "") + set(TORCHAUDIO_LIBRARY libpaddleaudio CACHE INTERNAL "") else() - set(TORCHAUDIO_LIBRARY -Wl,--no-as-needed libtorchaudio -Wl,--as-needed CACHE INTERNAL "") + set(TORCHAUDIO_LIBRARY -Wl,--no-as-needed libpaddleaudio -Wl,--as-needed CACHE INTERNAL "") endif() ################################################################################ @@ -136,6 +145,15 @@ if(BUILD_SOX) pybind/sox/utils.cpp ) endif() + +if(BUILD_KALDI) + list( + APPEND + EXTENSION_SOURCES + pybind/kaldi/kaldi_feature_wrapper.cc + pybind/kaldi/kaldi_feature.cc + ) +endif() #----------------------------------------------------------------------------# # END OF CUSTOMIZATION LOGICS #----------------------------------------------------------------------------# diff --git a/paddlespeech/audio/src/pybind/kaldi_frontend/feature_common.h b/paddlespeech/audio/src/pybind/kaldi/feature_common.h similarity index 72% rename from paddlespeech/audio/src/pybind/kaldi_frontend/feature_common.h rename to paddlespeech/audio/src/pybind/kaldi/feature_common.h index ef4facea..dbac4cea 100644 --- a/paddlespeech/audio/src/pybind/kaldi_frontend/feature_common.h +++ b/paddlespeech/audio/src/pybind/kaldi/feature_common.h @@ -19,6 +19,7 @@ #include "feat/feature-window.h" namespace paddleaudio { +namespace kaldi { namespace py = pybind11; @@ -27,21 +28,22 @@ class StreamingFeatureTpl { public: typedef typename F::Options Options; StreamingFeatureTpl(const Options& opts); - bool ComputeFeature(const kaldi::VectorBase& wav, - kaldi::Vector* feats); + bool ComputeFeature(const ::kaldi::VectorBase<::kaldi::BaseFloat>& wav, + ::kaldi::Vector<::kaldi::BaseFloat>* feats); void Reset() { remained_wav_.Resize(0); } int Dim() { return computer_.Dim(); } private: - bool Compute(const kaldi::Vector& waves, - kaldi::Vector* feats); + bool Compute(const ::kaldi::Vector<::kaldi::BaseFloat>& waves, + ::kaldi::Vector<::kaldi::BaseFloat>* feats); Options opts_; - kaldi::FeatureWindowFunction window_function_; - kaldi::Vector remained_wav_; + ::kaldi::FeatureWindowFunction window_function_; + ::kaldi::Vector<::kaldi::BaseFloat> remained_wav_; F computer_; }; +} // namespace kaldi } // namespace ppspeech #include "feature_common_inl.h" diff --git a/paddlespeech/audio/src/pybind/kaldi_frontend/feature_common_inl.h b/paddlespeech/audio/src/pybind/kaldi/feature_common_inl.h similarity index 60% rename from paddlespeech/audio/src/pybind/kaldi_frontend/feature_common_inl.h rename to paddlespeech/audio/src/pybind/kaldi/feature_common_inl.h index 809cc60d..5844bf1b 100644 --- a/paddlespeech/audio/src/pybind/kaldi_frontend/feature_common_inl.h +++ b/paddlespeech/audio/src/pybind/kaldi/feature_common_inl.h @@ -15,6 +15,7 @@ #include "base/kaldi-common.h" namespace paddleaudio { +namespace kaldi { template StreamingFeatureTpl::StreamingFeatureTpl(const Options& opts) @@ -24,21 +25,21 @@ StreamingFeatureTpl::StreamingFeatureTpl(const Options& opts) template bool StreamingFeatureTpl::ComputeFeature( - const kaldi::VectorBase& wav, - kaldi::Vector* feats) { + const ::kaldi::VectorBase<::kaldi::BaseFloat>& wav, + ::kaldi::Vector<::kaldi::BaseFloat>* feats) { // append remaned waves - kaldi::int32 wav_len = wav.Dim(); + ::kaldi::int32 wav_len = wav.Dim(); if (wav_len == 0) return false; - kaldi::int32 left_len = remained_wav_.Dim(); - kaldi::Vector waves(left_len + wav_len); + ::kaldi::int32 left_len = remained_wav_.Dim(); + ::kaldi::Vector<::kaldi::BaseFloat> waves(left_len + wav_len); waves.Range(0, left_len).CopyFromVec(remained_wav_); waves.Range(left_len, wav_len).CopyFromVec(wav); // cache remaned waves - kaldi::FrameExtractionOptions frame_opts = computer_.GetFrameOptions(); - kaldi::int32 num_frames = kaldi::NumFrames(waves.Dim(), frame_opts); - kaldi::int32 frame_shift = frame_opts.WindowShift(); - kaldi::int32 left_samples = waves.Dim() - frame_shift * num_frames; + ::kaldi::FrameExtractionOptions frame_opts = computer_.GetFrameOptions(); + ::kaldi::int32 num_frames = ::kaldi::NumFrames(waves.Dim(), frame_opts); + ::kaldi::int32 frame_shift = frame_opts.WindowShift(); + ::kaldi::int32 left_samples = waves.Dim() - frame_shift * num_frames; remained_wav_.Resize(left_samples); remained_wav_.CopyFromVec( waves.Range(frame_shift * num_frames, left_samples)); @@ -51,26 +52,26 @@ bool StreamingFeatureTpl::ComputeFeature( // Compute feat template bool StreamingFeatureTpl::Compute( - const kaldi::Vector& waves, - kaldi::Vector* feats) { - kaldi::BaseFloat vtln_warp = 1.0; - const kaldi::FrameExtractionOptions& frame_opts = + const ::kaldi::Vector<::kaldi::BaseFloat>& waves, + ::kaldi::Vector<::kaldi::BaseFloat>* feats) { + ::kaldi::BaseFloat vtln_warp = 1.0; + const ::kaldi::FrameExtractionOptions& frame_opts = computer_.GetFrameOptions(); - kaldi::int32 num_samples = waves.Dim(); - kaldi::int32 frame_length = frame_opts.WindowSize(); - kaldi::int32 sample_rate = frame_opts.samp_freq; + ::kaldi::int32 num_samples = waves.Dim(); + ::kaldi::int32 frame_length = frame_opts.WindowSize(); + ::kaldi::int32 sample_rate = frame_opts.samp_freq; if (num_samples < frame_length) { return false; } - kaldi::int32 num_frames = kaldi::NumFrames(num_samples, frame_opts); + ::kaldi::int32 num_frames = ::kaldi::NumFrames(num_samples, frame_opts); feats->Resize(num_frames * Dim()); - kaldi::Vector window; + ::kaldi::Vector<::kaldi::BaseFloat> window; bool need_raw_log_energy = computer_.NeedRawLogEnergy(); - for (kaldi::int32 frame = 0; frame < num_frames; frame++) { - kaldi::BaseFloat raw_log_energy = 0.0; - kaldi::ExtractWindow(0, + for (::kaldi::int32 frame = 0; frame < num_frames; frame++) { + ::kaldi::BaseFloat raw_log_energy = 0.0; + ::kaldi::ExtractWindow(0, waves, frame, frame_opts, @@ -78,14 +79,15 @@ bool StreamingFeatureTpl::Compute( &window, need_raw_log_energy ? &raw_log_energy : NULL); - kaldi::Vector this_feature(computer_.Dim(), - kaldi::kUndefined); + ::kaldi::Vector<::kaldi::BaseFloat> this_feature(computer_.Dim(), + ::kaldi::kUndefined); computer_.Compute(raw_log_energy, vtln_warp, &window, &this_feature); - kaldi::SubVector output_row( + ::kaldi::SubVector<::kaldi::BaseFloat> output_row( feats->Data() + frame * Dim(), Dim()); output_row.CopyFromVec(this_feature); } return true; } +} // namespace kaldi } // namespace paddleaudio diff --git a/paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.cc b/paddlespeech/audio/src/pybind/kaldi/kaldi_feature.cc similarity index 78% rename from paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.cc rename to paddlespeech/audio/src/pybind/kaldi/kaldi_feature.cc index 5a367b12..faafac90 100644 --- a/paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.cc +++ b/paddlespeech/audio/src/pybind/kaldi/kaldi_feature.cc @@ -1,10 +1,21 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. -#include -#include +#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h" -#include "kaldi_feature_wrapper.h" - -namespace py = pybind11; +namespace paddleaudio { +namespace kaldi { bool InitFbank(float samp_freq, // frame opts float frame_shift_ms, @@ -32,7 +43,7 @@ bool InitFbank(float samp_freq, // frame opts bool htk_compat, bool use_log_fbank, bool use_power) { - kaldi::FbankOptions opts; + ::kaldi::FbankOptions opts; opts.frame_opts.samp_freq = samp_freq; // frame opts opts.frame_opts.frame_shift_ms = frame_shift_ms; opts.frame_opts.frame_length_ms = frame_length_ms; @@ -61,12 +72,12 @@ bool InitFbank(float samp_freq, // frame opts opts.htk_compat = htk_compat; opts.use_log_fbank = use_log_fbank; opts.use_power = use_power; - paddleaudio::KaldiFeatureWrapper::GetInstance()->InitFbank(opts); + paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->InitFbank(opts); return true; } py::array_t ComputeFbankStreaming(const py::array_t& wav) { - return paddleaudio::KaldiFeatureWrapper::GetInstance()->ComputeFbank(wav); + return paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ComputeFbank(wav); } py::array_t ComputeFbank( @@ -124,21 +135,14 @@ py::array_t ComputeFbank( use_log_fbank, use_power); py::array_t result = ComputeFbankStreaming(wav); - paddleaudio::KaldiFeatureWrapper::GetInstance()->ResetFbank(); + paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ResetFbank(); return result; } void ResetFbank() { - paddleaudio::KaldiFeatureWrapper::GetInstance()->ResetFbank(); + paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ResetFbank(); } -PYBIND11_MODULE(kaldi_featurepy, m) { - m.doc() = "kaldi_feature example"; - m.def("InitFbank", &InitFbank, "init fbank"); - m.def("ResetFbank", &ResetFbank, "reset fbank"); - m.def("ComputeFbank", &ComputeFbank, "compute fbank"); - m.def("ComputeFbankStreaming", - &ComputeFbankStreaming, - "compute fbank streaming"); -} +} // kaldi +} // paddleaudio diff --git a/paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.h b/paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h similarity index 70% rename from paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.h rename to paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h index adb49222..ff55a707 100644 --- a/paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.h +++ b/paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h @@ -1,10 +1,29 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + #include #include -#include "kaldi_feature_wrapper.h" +#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h" namespace py = pybind11; +namespace paddleaudio { +namespace kaldi { + bool InitFbank(float samp_freq, // frame opts float frame_shift_ms, float frame_length_ms, @@ -41,7 +60,7 @@ py::array_t ComputeFbank( bool remove_dc_offset, std::string window_type, // e.g. Hamming window bool round_to_power_of_two, - kaldi::BaseFloat blackman_coeff, + ::kaldi::BaseFloat blackman_coeff, bool snip_edges, bool allow_downsample, bool allow_upsample, @@ -68,3 +87,6 @@ void ResetFbank(); py::array_t ComputeFbankStreaming(const py::array_t& wav); py::array_t TestFun(const py::array_t& wav); + +} // namespace kaldi +} // namespace paddleaudio \ No newline at end of file diff --git a/paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.cc b/paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.cc similarity index 59% rename from paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.cc rename to paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.cc index ce5ae412..23caf6cc 100644 --- a/paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.cc +++ b/paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.cc @@ -1,13 +1,28 @@ -#include "kaldi_feature_wrapper.h" +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h" namespace paddleaudio { +namespace kaldi { KaldiFeatureWrapper* KaldiFeatureWrapper::GetInstance() { static KaldiFeatureWrapper instance; return &instance; } -bool KaldiFeatureWrapper::InitFbank(kaldi::FbankOptions opts) { +bool KaldiFeatureWrapper::InitFbank(::kaldi::FbankOptions opts) { fbank_.reset(new Fbank(opts)); return true; } @@ -15,7 +30,7 @@ bool KaldiFeatureWrapper::InitFbank(kaldi::FbankOptions opts) { py::array_t KaldiFeatureWrapper::ComputeFbank( const py::array_t wav) { py::buffer_info info = wav.request(); - kaldi::Vector input_wav(info.size); + ::kaldi::Vector<::kaldi::BaseFloat> input_wav(info.size); double* wav_ptr = (double*)info.ptr; for (int idx = 0; idx < info.size; ++idx) { input_wav(idx) = *wav_ptr; @@ -23,7 +38,7 @@ py::array_t KaldiFeatureWrapper::ComputeFbank( } - kaldi::Vector feats; + ::kaldi::Vector<::kaldi::BaseFloat> feats; bool flag = fbank_->ComputeFeature(input_wav, &feats); if (flag == false || feats.Dim() == 0) return py::array_t(); auto result = py::array_t(feats.Dim()); @@ -44,8 +59,8 @@ py::array_t KaldiFeatureWrapper::ComputeFbank( py::buffer_info info = wav.request(); std::cout << info.size << std::endl; auto result = py::array_t(info.size); - //kaldi::Vector input_wav(info.size); - kaldi::Vector input_wav(info.size); + //::kaldi::Vector<::kaldi::BaseFloat> input_wav(info.size); + ::kaldi::Vector input_wav(info.size); py::buffer_info info_re = result.request(); memcpy(input_wav.Data(), (double*)info.ptr, wav.nbytes()); @@ -55,5 +70,5 @@ py::array_t KaldiFeatureWrapper::ComputeFbank( */ } - +} // namesapce kaldi } // namespace paddleaudio diff --git a/paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h b/paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h new file mode 100644 index 00000000..d2c01a7d --- /dev/null +++ b/paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h @@ -0,0 +1,40 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "base/kaldi-common.h" +#include "feat/feature-fbank.h" + +#include "paddlespeech/audio/src/pybind/kaldi/feature_common.h" + +namespace paddleaudio { +namespace kaldi { + +typedef StreamingFeatureTpl<::kaldi::FbankComputer> Fbank; + +class KaldiFeatureWrapper { + public: + static KaldiFeatureWrapper* GetInstance(); + bool InitFbank(::kaldi::FbankOptions opts); + py::array_t ComputeFbank(const py::array_t wav); + int Dim() { return fbank_->Dim(); } + void ResetFbank() { fbank_->Reset(); } + + private: + std::unique_ptr fbank_; +}; + +} // namespace kaldi +} // namespace paddleaudio diff --git a/paddlespeech/audio/src/pybind/kaldi_frontend/CMakeLists.txt b/paddlespeech/audio/src/pybind/kaldi_frontend/CMakeLists.txt deleted file mode 100644 index b9212210..00000000 --- a/paddlespeech/audio/src/pybind/kaldi_frontend/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ - -include_directories( -${CMAKE_CURRENT_SOURCE_DIR} -) - -add_library(kaldi_feature - kaldi_feature.cc - kaldi_feature_wrapper.cc -) -target_link_libraries(kaldi_feature kaldi-fbank) - -pybind11_add_module(kaldi_frontend kaldi_feature.cc kaldi_feature_wrapper.cc) -target_link_libraries(kaldi_frontend PRIVATE kaldi_feature) diff --git a/paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.h b/paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.h deleted file mode 100644 index d025bf39..00000000 --- a/paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.h +++ /dev/null @@ -1,24 +0,0 @@ - -#include "base/kaldi-common.h" -#include "feat/feature-fbank.h" -#include "feature_common.h" - -#pragma once - -namespace paddleaudio { - -typedef StreamingFeatureTpl Fbank; - -class KaldiFeatureWrapper { - public: - static KaldiFeatureWrapper* GetInstance(); - bool InitFbank(kaldi::FbankOptions opts); - py::array_t ComputeFbank(const py::array_t wav); - int Dim() { return fbank_->Dim(); } - void ResetFbank() { fbank_->Reset(); } - - private: - std::unique_ptr fbank_; -}; - -} // namespace paddleaudio diff --git a/paddlespeech/audio/src/pybind/pybind.cpp b/paddlespeech/audio/src/pybind/pybind.cpp index 38960136..949ce23c 100644 --- a/paddlespeech/audio/src/pybind/pybind.cpp +++ b/paddlespeech/audio/src/pybind/pybind.cpp @@ -1,8 +1,10 @@ -// Copyright (c) 2017 Facebook Inc. (Soumith Chintala), -// All rights reserved. +// Copyright (c) 2017 Facebook Inc. (Soumith Chintala), All rights reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. #include "paddlespeech/audio/src/pybind/sox/io.h" +#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h" +// Sox PYBIND11_MODULE(_paddleaudio, m) { m.def("get_info_file", &paddleaudio::sox_io::get_info_file, @@ -10,4 +12,11 @@ PYBIND11_MODULE(_paddleaudio, m) { m.def("get_info_fileobj", &paddleaudio::sox_io::get_info_fileobj, "Get metadata of audio in file object."); + + m.def("InitFbank", &paddleaudio::kaldi::InitFbank, "init fbank"); + m.def("ResetFbank", &paddleaudio::kaldi::ResetFbank, "reset fbank"); + m.def("ComputeFbank", &paddleaudio::kaldi::ComputeFbank, "compute fbank"); + m.def("ComputeFbankStreaming", + &paddleaudio::kaldi::ComputeFbankStreaming, + "compute fbank streaming"); } \ No newline at end of file diff --git a/paddlespeech/audio/src/pybind/sox/utils.h b/paddlespeech/audio/src/pybind/sox/utils.h index 9445fe31..b294b808 100644 --- a/paddlespeech/audio/src/pybind/sox/utils.h +++ b/paddlespeech/audio/src/pybind/sox/utils.h @@ -1,8 +1,7 @@ // Copyright (c) 2017 Facebook Inc. (Soumith Chintala), // All rights reserved. -#ifndef PADDLEAUDIO_PYBIND_SOX_UTILS_H -#define PADDLEAUDIO_PYBIND_SOX_UTILS_H +#pragma once #include #include @@ -41,5 +40,3 @@ std::string get_encoding(sox_encoding_t encoding); } // namespace paddleaudio } // namespace sox_utils - -#endif diff --git a/paddlespeech/audio/third_party/CMakeLists.txt b/paddlespeech/audio/third_party/CMakeLists.txt index 8dc43c36..43288f39 100644 --- a/paddlespeech/audio/third_party/CMakeLists.txt +++ b/paddlespeech/audio/third_party/CMakeLists.txt @@ -12,5 +12,4 @@ endif() ################################################################################ if (BUILD_KALDI) add_subdirectory(kaldi) - message(STATUS "Build Kaldi") endif() \ No newline at end of file diff --git a/paddlespeech/audio/third_party/kaldi/CMakeLists.txt b/paddlespeech/audio/third_party/kaldi/CMakeLists.txt index 2310fb8c..14844544 100644 --- a/paddlespeech/audio/third_party/kaldi/CMakeLists.txt +++ b/paddlespeech/audio/third_party/kaldi/CMakeLists.txt @@ -2,11 +2,6 @@ # compile kaldi without openfst add_definitions("-DCOMPILE_WITHOUT_OPENFST") -# include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/kaldi) -# include_directories(/usr/include/python3.7m) - -set(INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) - # function (define_library name source include_dirs link_libraries compile_defs) # add_library(${name} INTERFACE ${source}) # target_include_directories(${name} INTERFACE ${include_dirs}) @@ -24,18 +19,18 @@ set(INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) # endfunction() # kaldi-base -add_library(kaldi-base INTERFACE +add_library(kaldi-base STATIC base/io-funcs.cc base/kaldi-error.cc base/kaldi-math.cc base/kaldi-utils.cc base/timer.cc ) -target_include_directories(kaldi-base INTERFACE ${INSTALL_DIR}/base) -target_compile_definitions(kaldi-base INTERFACE "-DCOMPILE_WITHOUT_OPENFST") +target_include_directories(kaldi-base PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + # kaldi-matrix -add_library(kaldi-matrix INTERFACE +add_library(kaldi-matrix STATIC matrix/compressed-matrix.cc matrix/kaldi-matrix.cc matrix/kaldi-vector.cc @@ -48,11 +43,12 @@ add_library(kaldi-matrix INTERFACE matrix/srfft.cc matrix/tp-matrix.cc ) -target_include_directories(kaldi-matrix INTERFACE ${INSTALL_DIR}/matrix) -target_link_libraries(kaldi-matrix INTERFACE gfortran kaldi-base libopenblas.a) +target_include_directories(kaldi-matrix PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(kaldi-matrix PUBLIC gfortran kaldi-base libopenblas.a) + # kaldi-util -add_library(kaldi-util INTERFACE +add_library(kaldi-util STATIC util/kaldi-holder.cc util/kaldi-io.cc util/kaldi-semaphore.cc @@ -63,11 +59,12 @@ add_library(kaldi-util INTERFACE util/simple-options.cc util/text-utils.cc ) -target_include_directories(kaldi-util INTERFACE ${INSTALL_DIR}/util) -target_link_libraries(kaldi-util INTERFACE kaldi-base kaldi-matrix) +target_include_directories(kaldi-util PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(kaldi-util PUBLIC kaldi-base kaldi-matrix) + # kaldi-feat-common -add_library(kaldi-feat-common INTERFACE +add_library(kaldi-feat-common STATIC feat/wave-reader.cc feat/signal.cc feat/feature-functions.cc @@ -76,19 +73,37 @@ add_library(kaldi-feat-common INTERFACE feat/mel-computations.cc feat/cmvn.cc ) -target_include_directories(kaldi-feat-common INTERFACE ${INSTALL_DIR}/feat) -target_link_libraries(kaldi-feat-common INTERFACE kaldi-base kaldi-matrix kaldi-util) +target_include_directories(kaldi-feat-common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(kaldi-feat-common PUBLIC kaldi-base kaldi-matrix kaldi-util) + # kaldi-mfcc -add_library(kaldi-mfcc INTERFACE +add_library(kaldi-mfcc STATIC feat/feature-mfcc.cc ) -target_include_directories(kaldi-mfcc INTERFACE ${INSTALL_DIR}/feat) -target_link_libraries(kaldi-mfcc INTERFACE kaldi-feat-common) +target_include_directories(kaldi-mfcc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(kaldi-mfcc PUBLIC kaldi-feat-common) + # kaldi-fbank -add_library(kaldi-fbank INTERFACE +add_library(kaldi-fbank STATIC feat/feature-fbank.cc ) -target_include_directories(kaldi-fbank INTERFACE ${INSTALL_DIR}/feat) -target_link_libraries(kaldi-fbank INTERFACE kaldi-feat-common) \ No newline at end of file +target_include_directories(kaldi-fbank PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(kaldi-fbank PUBLIC kaldi-feat-common) + + +set(KALDI_LIBRARIES + ${CMAKE_CURRENT_BINARY_DIR}/libkaldi-base.a + ${CMAKE_CURRENT_BINARY_DIR}/libkaldi-matrix.a + ${CMAKE_CURRENT_BINARY_DIR}/libkaldi-util.a + ${CMAKE_CURRENT_BINARY_DIR}/libkaldi-feat-common.a + ${CMAKE_CURRENT_BINARY_DIR}/libkaldi-mfcc.a + ${CMAKE_CURRENT_BINARY_DIR}/libkaldi-fbank.a +) + +add_library(libkaldi INTERFACE) +add_dependencies(libkaldi kaldi-base kaldi-matrix kaldi-util kaldi-feat-common kaldi-mfcc kaldi-fbank) +target_include_directories(libkaldi INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(libkaldi INTERFACE ${KALDI_LIBRARIES}) +target_compile_definitions(libkaldi INTERFACE "-DCOMPILE_WITHOUT_OPENFST") \ No newline at end of file diff --git a/tools/setup_helpers/extension.py b/tools/setup_helpers/extension.py index e8b48ec6..97babab6 100644 --- a/tools/setup_helpers/extension.py +++ b/tools/setup_helpers/extension.py @@ -88,7 +88,8 @@ class CMakeBuild(build_ext): # f"-DCMAKE_PREFIX_PATH={torch.utils.cmake_prefix_path}", f"-DCMAKE_INSTALL_PREFIX={extdir}", "-DCMAKE_VERBOSE_MAKEFILE=ON", - f"-DPython_INCLUDE_DIRS={distutils.sysconfig.get_python_inc()}", + f"-DPython_INCLUDE_DIR={distutils.sysconfig.get_python_inc()}", + f"-DPYTHON_LIBRARY={distutils.sysconfig.get_config_var('LIBDIR')}", f"-DBUILD_SOX:BOOL={'ON' if _BUILD_SOX else 'OFF'}", f"-DBUILD_MAD:BOOL={'ON' if _BUILD_MAD else 'OFF'}", # f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}", -- GitLab