提交 357a3648 编写于 作者: H Hui Zhang

pybind kaldi can make

上级 52477a10
...@@ -35,11 +35,6 @@ if(BUILD_SOX) ...@@ -35,11 +35,6 @@ if(BUILD_SOX)
list( list(
APPEND APPEND
LIBPADDLEAUDIO_SOURCES LIBPADDLEAUDIO_SOURCES
# sox/io.cpp
# sox/utils.cpp
# sox/effects.cpp
# sox/effects_chain.cpp
# sox/types.cpp
) )
list( list(
APPEND APPEND
...@@ -49,6 +44,20 @@ if(BUILD_SOX) ...@@ -49,6 +44,20 @@ if(BUILD_SOX)
endif() endif()
if(BUILD_KALDI)
list(
APPEND
LIBPADDLEAUDIO_LINK_LIBRARIES
libkaldi
)
list(
APPEND
LIBPADDLEAUDIO_COMPILE_DEFINITIONS
INCLUDE_KALDI
COMPILE_WITHOUT_OPENFST
)
endif()
#------------------------------------------------------------------------------# #------------------------------------------------------------------------------#
# END OF CUSTOMIZATION LOGICS # END OF CUSTOMIZATION LOGICS
#------------------------------------------------------------------------------# #------------------------------------------------------------------------------#
...@@ -79,9 +88,9 @@ define_library( ...@@ -79,9 +88,9 @@ define_library(
) )
if (APPLE) if (APPLE)
set(TORCHAUDIO_LIBRARY libtorchaudio CACHE INTERNAL "") set(TORCHAUDIO_LIBRARY libpaddleaudio CACHE INTERNAL "")
else() else()
set(TORCHAUDIO_LIBRARY -Wl,--no-as-needed libtorchaudio -Wl,--as-needed CACHE INTERNAL "") set(TORCHAUDIO_LIBRARY -Wl,--no-as-needed libpaddleaudio -Wl,--as-needed CACHE INTERNAL "")
endif() endif()
################################################################################ ################################################################################
...@@ -136,6 +145,15 @@ if(BUILD_SOX) ...@@ -136,6 +145,15 @@ if(BUILD_SOX)
pybind/sox/utils.cpp pybind/sox/utils.cpp
) )
endif() endif()
if(BUILD_KALDI)
list(
APPEND
EXTENSION_SOURCES
pybind/kaldi/kaldi_feature_wrapper.cc
pybind/kaldi/kaldi_feature.cc
)
endif()
#----------------------------------------------------------------------------# #----------------------------------------------------------------------------#
# END OF CUSTOMIZATION LOGICS # END OF CUSTOMIZATION LOGICS
#----------------------------------------------------------------------------# #----------------------------------------------------------------------------#
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "feat/feature-window.h" #include "feat/feature-window.h"
namespace paddleaudio { namespace paddleaudio {
namespace kaldi {
namespace py = pybind11; namespace py = pybind11;
...@@ -27,21 +28,22 @@ class StreamingFeatureTpl { ...@@ -27,21 +28,22 @@ class StreamingFeatureTpl {
public: public:
typedef typename F::Options Options; typedef typename F::Options Options;
StreamingFeatureTpl(const Options& opts); StreamingFeatureTpl(const Options& opts);
bool ComputeFeature(const kaldi::VectorBase<kaldi::BaseFloat>& wav, bool ComputeFeature(const ::kaldi::VectorBase<::kaldi::BaseFloat>& wav,
kaldi::Vector<kaldi::BaseFloat>* feats); ::kaldi::Vector<::kaldi::BaseFloat>* feats);
void Reset() { remained_wav_.Resize(0); } void Reset() { remained_wav_.Resize(0); }
int Dim() { return computer_.Dim(); } int Dim() { return computer_.Dim(); }
private: private:
bool Compute(const kaldi::Vector<kaldi::BaseFloat>& waves, bool Compute(const ::kaldi::Vector<::kaldi::BaseFloat>& waves,
kaldi::Vector<kaldi::BaseFloat>* feats); ::kaldi::Vector<::kaldi::BaseFloat>* feats);
Options opts_; Options opts_;
kaldi::FeatureWindowFunction window_function_; ::kaldi::FeatureWindowFunction window_function_;
kaldi::Vector<kaldi::BaseFloat> remained_wav_; ::kaldi::Vector<::kaldi::BaseFloat> remained_wav_;
F computer_; F computer_;
}; };
} // namespace kaldi
} // namespace ppspeech } // namespace ppspeech
#include "feature_common_inl.h" #include "feature_common_inl.h"
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "base/kaldi-common.h" #include "base/kaldi-common.h"
namespace paddleaudio { namespace paddleaudio {
namespace kaldi {
template <class F> template <class F>
StreamingFeatureTpl<F>::StreamingFeatureTpl(const Options& opts) StreamingFeatureTpl<F>::StreamingFeatureTpl(const Options& opts)
...@@ -24,21 +25,21 @@ StreamingFeatureTpl<F>::StreamingFeatureTpl(const Options& opts) ...@@ -24,21 +25,21 @@ StreamingFeatureTpl<F>::StreamingFeatureTpl(const Options& opts)
template <class F> template <class F>
bool StreamingFeatureTpl<F>::ComputeFeature( bool StreamingFeatureTpl<F>::ComputeFeature(
const kaldi::VectorBase<kaldi::BaseFloat>& wav, const ::kaldi::VectorBase<::kaldi::BaseFloat>& wav,
kaldi::Vector<kaldi::BaseFloat>* feats) { ::kaldi::Vector<::kaldi::BaseFloat>* feats) {
// append remaned waves // append remaned waves
kaldi::int32 wav_len = wav.Dim(); ::kaldi::int32 wav_len = wav.Dim();
if (wav_len == 0) return false; if (wav_len == 0) return false;
kaldi::int32 left_len = remained_wav_.Dim(); ::kaldi::int32 left_len = remained_wav_.Dim();
kaldi::Vector<kaldi::BaseFloat> waves(left_len + wav_len); ::kaldi::Vector<::kaldi::BaseFloat> waves(left_len + wav_len);
waves.Range(0, left_len).CopyFromVec(remained_wav_); waves.Range(0, left_len).CopyFromVec(remained_wav_);
waves.Range(left_len, wav_len).CopyFromVec(wav); waves.Range(left_len, wav_len).CopyFromVec(wav);
// cache remaned waves // cache remaned waves
kaldi::FrameExtractionOptions frame_opts = computer_.GetFrameOptions(); ::kaldi::FrameExtractionOptions frame_opts = computer_.GetFrameOptions();
kaldi::int32 num_frames = kaldi::NumFrames(waves.Dim(), frame_opts); ::kaldi::int32 num_frames = ::kaldi::NumFrames(waves.Dim(), frame_opts);
kaldi::int32 frame_shift = frame_opts.WindowShift(); ::kaldi::int32 frame_shift = frame_opts.WindowShift();
kaldi::int32 left_samples = waves.Dim() - frame_shift * num_frames; ::kaldi::int32 left_samples = waves.Dim() - frame_shift * num_frames;
remained_wav_.Resize(left_samples); remained_wav_.Resize(left_samples);
remained_wav_.CopyFromVec( remained_wav_.CopyFromVec(
waves.Range(frame_shift * num_frames, left_samples)); waves.Range(frame_shift * num_frames, left_samples));
...@@ -51,26 +52,26 @@ bool StreamingFeatureTpl<F>::ComputeFeature( ...@@ -51,26 +52,26 @@ bool StreamingFeatureTpl<F>::ComputeFeature(
// Compute feat // Compute feat
template <class F> template <class F>
bool StreamingFeatureTpl<F>::Compute( bool StreamingFeatureTpl<F>::Compute(
const kaldi::Vector<kaldi::BaseFloat>& waves, const ::kaldi::Vector<::kaldi::BaseFloat>& waves,
kaldi::Vector<kaldi::BaseFloat>* feats) { ::kaldi::Vector<::kaldi::BaseFloat>* feats) {
kaldi::BaseFloat vtln_warp = 1.0; ::kaldi::BaseFloat vtln_warp = 1.0;
const kaldi::FrameExtractionOptions& frame_opts = const ::kaldi::FrameExtractionOptions& frame_opts =
computer_.GetFrameOptions(); computer_.GetFrameOptions();
kaldi::int32 num_samples = waves.Dim(); ::kaldi::int32 num_samples = waves.Dim();
kaldi::int32 frame_length = frame_opts.WindowSize(); ::kaldi::int32 frame_length = frame_opts.WindowSize();
kaldi::int32 sample_rate = frame_opts.samp_freq; ::kaldi::int32 sample_rate = frame_opts.samp_freq;
if (num_samples < frame_length) { if (num_samples < frame_length) {
return false; return false;
} }
kaldi::int32 num_frames = kaldi::NumFrames(num_samples, frame_opts); ::kaldi::int32 num_frames = ::kaldi::NumFrames(num_samples, frame_opts);
feats->Resize(num_frames * Dim()); feats->Resize(num_frames * Dim());
kaldi::Vector<kaldi::BaseFloat> window; ::kaldi::Vector<::kaldi::BaseFloat> window;
bool need_raw_log_energy = computer_.NeedRawLogEnergy(); bool need_raw_log_energy = computer_.NeedRawLogEnergy();
for (kaldi::int32 frame = 0; frame < num_frames; frame++) { for (::kaldi::int32 frame = 0; frame < num_frames; frame++) {
kaldi::BaseFloat raw_log_energy = 0.0; ::kaldi::BaseFloat raw_log_energy = 0.0;
kaldi::ExtractWindow(0, ::kaldi::ExtractWindow(0,
waves, waves,
frame, frame,
frame_opts, frame_opts,
...@@ -78,14 +79,15 @@ bool StreamingFeatureTpl<F>::Compute( ...@@ -78,14 +79,15 @@ bool StreamingFeatureTpl<F>::Compute(
&window, &window,
need_raw_log_energy ? &raw_log_energy : NULL); need_raw_log_energy ? &raw_log_energy : NULL);
kaldi::Vector<kaldi::BaseFloat> this_feature(computer_.Dim(), ::kaldi::Vector<::kaldi::BaseFloat> this_feature(computer_.Dim(),
kaldi::kUndefined); ::kaldi::kUndefined);
computer_.Compute(raw_log_energy, vtln_warp, &window, &this_feature); computer_.Compute(raw_log_energy, vtln_warp, &window, &this_feature);
kaldi::SubVector<kaldi::BaseFloat> output_row( ::kaldi::SubVector<::kaldi::BaseFloat> output_row(
feats->Data() + frame * Dim(), Dim()); feats->Data() + frame * Dim(), Dim());
output_row.CopyFromVec(this_feature); output_row.CopyFromVec(this_feature);
} }
return true; return true;
} }
} // namespace kaldi
} // namespace paddleaudio } // namespace paddleaudio
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <pybind11/numpy.h> #include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h"
#include <pybind11/pybind11.h>
#include "kaldi_feature_wrapper.h" namespace paddleaudio {
namespace kaldi {
namespace py = pybind11;
bool InitFbank(float samp_freq, // frame opts bool InitFbank(float samp_freq, // frame opts
float frame_shift_ms, float frame_shift_ms,
...@@ -32,7 +43,7 @@ bool InitFbank(float samp_freq, // frame opts ...@@ -32,7 +43,7 @@ bool InitFbank(float samp_freq, // frame opts
bool htk_compat, bool htk_compat,
bool use_log_fbank, bool use_log_fbank,
bool use_power) { bool use_power) {
kaldi::FbankOptions opts; ::kaldi::FbankOptions opts;
opts.frame_opts.samp_freq = samp_freq; // frame opts opts.frame_opts.samp_freq = samp_freq; // frame opts
opts.frame_opts.frame_shift_ms = frame_shift_ms; opts.frame_opts.frame_shift_ms = frame_shift_ms;
opts.frame_opts.frame_length_ms = frame_length_ms; opts.frame_opts.frame_length_ms = frame_length_ms;
...@@ -61,12 +72,12 @@ bool InitFbank(float samp_freq, // frame opts ...@@ -61,12 +72,12 @@ bool InitFbank(float samp_freq, // frame opts
opts.htk_compat = htk_compat; opts.htk_compat = htk_compat;
opts.use_log_fbank = use_log_fbank; opts.use_log_fbank = use_log_fbank;
opts.use_power = use_power; opts.use_power = use_power;
paddleaudio::KaldiFeatureWrapper::GetInstance()->InitFbank(opts); paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->InitFbank(opts);
return true; return true;
} }
py::array_t<double> ComputeFbankStreaming(const py::array_t<double>& wav) { py::array_t<double> ComputeFbankStreaming(const py::array_t<double>& wav) {
return paddleaudio::KaldiFeatureWrapper::GetInstance()->ComputeFbank(wav); return paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ComputeFbank(wav);
} }
py::array_t<double> ComputeFbank( py::array_t<double> ComputeFbank(
...@@ -124,21 +135,14 @@ py::array_t<double> ComputeFbank( ...@@ -124,21 +135,14 @@ py::array_t<double> ComputeFbank(
use_log_fbank, use_log_fbank,
use_power); use_power);
py::array_t<double> result = ComputeFbankStreaming(wav); py::array_t<double> result = ComputeFbankStreaming(wav);
paddleaudio::KaldiFeatureWrapper::GetInstance()->ResetFbank(); paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ResetFbank();
return result; return result;
} }
void ResetFbank() { void ResetFbank() {
paddleaudio::KaldiFeatureWrapper::GetInstance()->ResetFbank(); paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ResetFbank();
} }
PYBIND11_MODULE(kaldi_featurepy, m) { } // kaldi
m.doc() = "kaldi_feature example"; } // paddleaudio
m.def("InitFbank", &InitFbank, "init fbank");
m.def("ResetFbank", &ResetFbank, "reset fbank");
m.def("ComputeFbank", &ComputeFbank, "compute fbank");
m.def("ComputeFbankStreaming",
&ComputeFbankStreaming,
"compute fbank streaming");
}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pybind11/numpy.h> #include <pybind11/numpy.h>
#include <pybind11/pybind11.h> #include <pybind11/pybind11.h>
#include "kaldi_feature_wrapper.h" #include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h"
namespace py = pybind11; namespace py = pybind11;
namespace paddleaudio {
namespace kaldi {
bool InitFbank(float samp_freq, // frame opts bool InitFbank(float samp_freq, // frame opts
float frame_shift_ms, float frame_shift_ms,
float frame_length_ms, float frame_length_ms,
...@@ -41,7 +60,7 @@ py::array_t<double> ComputeFbank( ...@@ -41,7 +60,7 @@ py::array_t<double> ComputeFbank(
bool remove_dc_offset, bool remove_dc_offset,
std::string window_type, // e.g. Hamming window std::string window_type, // e.g. Hamming window
bool round_to_power_of_two, bool round_to_power_of_two,
kaldi::BaseFloat blackman_coeff, ::kaldi::BaseFloat blackman_coeff,
bool snip_edges, bool snip_edges,
bool allow_downsample, bool allow_downsample,
bool allow_upsample, bool allow_upsample,
...@@ -68,3 +87,6 @@ void ResetFbank(); ...@@ -68,3 +87,6 @@ void ResetFbank();
py::array_t<double> ComputeFbankStreaming(const py::array_t<double>& wav); py::array_t<double> ComputeFbankStreaming(const py::array_t<double>& wav);
py::array_t<double> TestFun(const py::array_t<double>& wav); py::array_t<double> TestFun(const py::array_t<double>& wav);
} // namespace kaldi
} // namespace paddleaudio
\ No newline at end of file
#include "kaldi_feature_wrapper.h" // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h"
namespace paddleaudio { namespace paddleaudio {
namespace kaldi {
KaldiFeatureWrapper* KaldiFeatureWrapper::GetInstance() { KaldiFeatureWrapper* KaldiFeatureWrapper::GetInstance() {
static KaldiFeatureWrapper instance; static KaldiFeatureWrapper instance;
return &instance; return &instance;
} }
bool KaldiFeatureWrapper::InitFbank(kaldi::FbankOptions opts) { bool KaldiFeatureWrapper::InitFbank(::kaldi::FbankOptions opts) {
fbank_.reset(new Fbank(opts)); fbank_.reset(new Fbank(opts));
return true; return true;
} }
...@@ -15,7 +30,7 @@ bool KaldiFeatureWrapper::InitFbank(kaldi::FbankOptions opts) { ...@@ -15,7 +30,7 @@ bool KaldiFeatureWrapper::InitFbank(kaldi::FbankOptions opts) {
py::array_t<double> KaldiFeatureWrapper::ComputeFbank( py::array_t<double> KaldiFeatureWrapper::ComputeFbank(
const py::array_t<double> wav) { const py::array_t<double> wav) {
py::buffer_info info = wav.request(); py::buffer_info info = wav.request();
kaldi::Vector<kaldi::BaseFloat> input_wav(info.size); ::kaldi::Vector<::kaldi::BaseFloat> input_wav(info.size);
double* wav_ptr = (double*)info.ptr; double* wav_ptr = (double*)info.ptr;
for (int idx = 0; idx < info.size; ++idx) { for (int idx = 0; idx < info.size; ++idx) {
input_wav(idx) = *wav_ptr; input_wav(idx) = *wav_ptr;
...@@ -23,7 +38,7 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank( ...@@ -23,7 +38,7 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(
} }
kaldi::Vector<kaldi::BaseFloat> feats; ::kaldi::Vector<::kaldi::BaseFloat> feats;
bool flag = fbank_->ComputeFeature(input_wav, &feats); bool flag = fbank_->ComputeFeature(input_wav, &feats);
if (flag == false || feats.Dim() == 0) return py::array_t<double>(); if (flag == false || feats.Dim() == 0) return py::array_t<double>();
auto result = py::array_t<double>(feats.Dim()); auto result = py::array_t<double>(feats.Dim());
...@@ -44,8 +59,8 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank( ...@@ -44,8 +59,8 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(
py::buffer_info info = wav.request(); py::buffer_info info = wav.request();
std::cout << info.size << std::endl; std::cout << info.size << std::endl;
auto result = py::array_t<double>(info.size); auto result = py::array_t<double>(info.size);
//kaldi::Vector<kaldi::BaseFloat> input_wav(info.size); //::kaldi::Vector<::kaldi::BaseFloat> input_wav(info.size);
kaldi::Vector<double> input_wav(info.size); ::kaldi::Vector<double> input_wav(info.size);
py::buffer_info info_re = result.request(); py::buffer_info info_re = result.request();
memcpy(input_wav.Data(), (double*)info.ptr, wav.nbytes()); memcpy(input_wav.Data(), (double*)info.ptr, wav.nbytes());
...@@ -55,5 +70,5 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank( ...@@ -55,5 +70,5 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(
*/ */
} }
} // namesapce kaldi
} // namespace paddleaudio } // namespace paddleaudio
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "base/kaldi-common.h"
#include "feat/feature-fbank.h"
#include "paddlespeech/audio/src/pybind/kaldi/feature_common.h"
namespace paddleaudio {
namespace kaldi {
typedef StreamingFeatureTpl<::kaldi::FbankComputer> Fbank;
class KaldiFeatureWrapper {
public:
static KaldiFeatureWrapper* GetInstance();
bool InitFbank(::kaldi::FbankOptions opts);
py::array_t<double> ComputeFbank(const py::array_t<double> wav);
int Dim() { return fbank_->Dim(); }
void ResetFbank() { fbank_->Reset(); }
private:
std::unique_ptr<paddleaudio::kaldi::Fbank> fbank_;
};
} // namespace kaldi
} // namespace paddleaudio
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}
)
add_library(kaldi_feature
kaldi_feature.cc
kaldi_feature_wrapper.cc
)
target_link_libraries(kaldi_feature kaldi-fbank)
pybind11_add_module(kaldi_frontend kaldi_feature.cc kaldi_feature_wrapper.cc)
target_link_libraries(kaldi_frontend PRIVATE kaldi_feature)
#include "base/kaldi-common.h"
#include "feat/feature-fbank.h"
#include "feature_common.h"
#pragma once
namespace paddleaudio {
typedef StreamingFeatureTpl<kaldi::FbankComputer> Fbank;
class KaldiFeatureWrapper {
public:
static KaldiFeatureWrapper* GetInstance();
bool InitFbank(kaldi::FbankOptions opts);
py::array_t<double> ComputeFbank(const py::array_t<double> wav);
int Dim() { return fbank_->Dim(); }
void ResetFbank() { fbank_->Reset(); }
private:
std::unique_ptr<paddleaudio::Fbank> fbank_;
};
} // namespace paddleaudio
// Copyright (c) 2017 Facebook Inc. (Soumith Chintala), // Copyright (c) 2017 Facebook Inc. (Soumith Chintala), All rights reserved.
// All rights reserved. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#include "paddlespeech/audio/src/pybind/sox/io.h" #include "paddlespeech/audio/src/pybind/sox/io.h"
#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h"
// Sox
PYBIND11_MODULE(_paddleaudio, m) { PYBIND11_MODULE(_paddleaudio, m) {
m.def("get_info_file", m.def("get_info_file",
&paddleaudio::sox_io::get_info_file, &paddleaudio::sox_io::get_info_file,
...@@ -10,4 +12,11 @@ PYBIND11_MODULE(_paddleaudio, m) { ...@@ -10,4 +12,11 @@ PYBIND11_MODULE(_paddleaudio, m) {
m.def("get_info_fileobj", m.def("get_info_fileobj",
&paddleaudio::sox_io::get_info_fileobj, &paddleaudio::sox_io::get_info_fileobj,
"Get metadata of audio in file object."); "Get metadata of audio in file object.");
m.def("InitFbank", &paddleaudio::kaldi::InitFbank, "init fbank");
m.def("ResetFbank", &paddleaudio::kaldi::ResetFbank, "reset fbank");
m.def("ComputeFbank", &paddleaudio::kaldi::ComputeFbank, "compute fbank");
m.def("ComputeFbankStreaming",
&paddleaudio::kaldi::ComputeFbankStreaming,
"compute fbank streaming");
} }
\ No newline at end of file
// Copyright (c) 2017 Facebook Inc. (Soumith Chintala), // Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
// All rights reserved. // All rights reserved.
#ifndef PADDLEAUDIO_PYBIND_SOX_UTILS_H #pragma once
#define PADDLEAUDIO_PYBIND_SOX_UTILS_H
#include <pybind11/pybind11.h> #include <pybind11/pybind11.h>
#include <sox.h> #include <sox.h>
...@@ -41,5 +40,3 @@ std::string get_encoding(sox_encoding_t encoding); ...@@ -41,5 +40,3 @@ std::string get_encoding(sox_encoding_t encoding);
} // namespace paddleaudio } // namespace paddleaudio
} // namespace sox_utils } // namespace sox_utils
#endif
...@@ -12,5 +12,4 @@ endif() ...@@ -12,5 +12,4 @@ endif()
################################################################################ ################################################################################
if (BUILD_KALDI) if (BUILD_KALDI)
add_subdirectory(kaldi) add_subdirectory(kaldi)
message(STATUS "Build Kaldi")
endif() endif()
\ No newline at end of file
...@@ -2,11 +2,6 @@ ...@@ -2,11 +2,6 @@
# compile kaldi without openfst # compile kaldi without openfst
add_definitions("-DCOMPILE_WITHOUT_OPENFST") add_definitions("-DCOMPILE_WITHOUT_OPENFST")
# include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/kaldi)
# include_directories(/usr/include/python3.7m)
set(INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR})
# function (define_library name source include_dirs link_libraries compile_defs) # function (define_library name source include_dirs link_libraries compile_defs)
# add_library(${name} INTERFACE ${source}) # add_library(${name} INTERFACE ${source})
# target_include_directories(${name} INTERFACE ${include_dirs}) # target_include_directories(${name} INTERFACE ${include_dirs})
...@@ -24,18 +19,18 @@ set(INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) ...@@ -24,18 +19,18 @@ set(INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR})
# endfunction() # endfunction()
# kaldi-base # kaldi-base
add_library(kaldi-base INTERFACE add_library(kaldi-base STATIC
base/io-funcs.cc base/io-funcs.cc
base/kaldi-error.cc base/kaldi-error.cc
base/kaldi-math.cc base/kaldi-math.cc
base/kaldi-utils.cc base/kaldi-utils.cc
base/timer.cc base/timer.cc
) )
target_include_directories(kaldi-base INTERFACE ${INSTALL_DIR}/base) target_include_directories(kaldi-base PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_compile_definitions(kaldi-base INTERFACE "-DCOMPILE_WITHOUT_OPENFST")
# kaldi-matrix # kaldi-matrix
add_library(kaldi-matrix INTERFACE add_library(kaldi-matrix STATIC
matrix/compressed-matrix.cc matrix/compressed-matrix.cc
matrix/kaldi-matrix.cc matrix/kaldi-matrix.cc
matrix/kaldi-vector.cc matrix/kaldi-vector.cc
...@@ -48,11 +43,12 @@ add_library(kaldi-matrix INTERFACE ...@@ -48,11 +43,12 @@ add_library(kaldi-matrix INTERFACE
matrix/srfft.cc matrix/srfft.cc
matrix/tp-matrix.cc matrix/tp-matrix.cc
) )
target_include_directories(kaldi-matrix INTERFACE ${INSTALL_DIR}/matrix) target_include_directories(kaldi-matrix PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(kaldi-matrix INTERFACE gfortran kaldi-base libopenblas.a) target_link_libraries(kaldi-matrix PUBLIC gfortran kaldi-base libopenblas.a)
# kaldi-util # kaldi-util
add_library(kaldi-util INTERFACE add_library(kaldi-util STATIC
util/kaldi-holder.cc util/kaldi-holder.cc
util/kaldi-io.cc util/kaldi-io.cc
util/kaldi-semaphore.cc util/kaldi-semaphore.cc
...@@ -63,11 +59,12 @@ add_library(kaldi-util INTERFACE ...@@ -63,11 +59,12 @@ add_library(kaldi-util INTERFACE
util/simple-options.cc util/simple-options.cc
util/text-utils.cc util/text-utils.cc
) )
target_include_directories(kaldi-util INTERFACE ${INSTALL_DIR}/util) target_include_directories(kaldi-util PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(kaldi-util INTERFACE kaldi-base kaldi-matrix) target_link_libraries(kaldi-util PUBLIC kaldi-base kaldi-matrix)
# kaldi-feat-common # kaldi-feat-common
add_library(kaldi-feat-common INTERFACE add_library(kaldi-feat-common STATIC
feat/wave-reader.cc feat/wave-reader.cc
feat/signal.cc feat/signal.cc
feat/feature-functions.cc feat/feature-functions.cc
...@@ -76,19 +73,37 @@ add_library(kaldi-feat-common INTERFACE ...@@ -76,19 +73,37 @@ add_library(kaldi-feat-common INTERFACE
feat/mel-computations.cc feat/mel-computations.cc
feat/cmvn.cc feat/cmvn.cc
) )
target_include_directories(kaldi-feat-common INTERFACE ${INSTALL_DIR}/feat) target_include_directories(kaldi-feat-common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(kaldi-feat-common INTERFACE kaldi-base kaldi-matrix kaldi-util) target_link_libraries(kaldi-feat-common PUBLIC kaldi-base kaldi-matrix kaldi-util)
# kaldi-mfcc # kaldi-mfcc
add_library(kaldi-mfcc INTERFACE add_library(kaldi-mfcc STATIC
feat/feature-mfcc.cc feat/feature-mfcc.cc
) )
target_include_directories(kaldi-mfcc INTERFACE ${INSTALL_DIR}/feat) target_include_directories(kaldi-mfcc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(kaldi-mfcc INTERFACE kaldi-feat-common) target_link_libraries(kaldi-mfcc PUBLIC kaldi-feat-common)
# kaldi-fbank # kaldi-fbank
add_library(kaldi-fbank INTERFACE add_library(kaldi-fbank STATIC
feat/feature-fbank.cc feat/feature-fbank.cc
) )
target_include_directories(kaldi-fbank INTERFACE ${INSTALL_DIR}/feat) target_include_directories(kaldi-fbank PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(kaldi-fbank INTERFACE kaldi-feat-common) target_link_libraries(kaldi-fbank PUBLIC kaldi-feat-common)
\ No newline at end of file
set(KALDI_LIBRARIES
${CMAKE_CURRENT_BINARY_DIR}/libkaldi-base.a
${CMAKE_CURRENT_BINARY_DIR}/libkaldi-matrix.a
${CMAKE_CURRENT_BINARY_DIR}/libkaldi-util.a
${CMAKE_CURRENT_BINARY_DIR}/libkaldi-feat-common.a
${CMAKE_CURRENT_BINARY_DIR}/libkaldi-mfcc.a
${CMAKE_CURRENT_BINARY_DIR}/libkaldi-fbank.a
)
add_library(libkaldi INTERFACE)
add_dependencies(libkaldi kaldi-base kaldi-matrix kaldi-util kaldi-feat-common kaldi-mfcc kaldi-fbank)
target_include_directories(libkaldi INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(libkaldi INTERFACE ${KALDI_LIBRARIES})
target_compile_definitions(libkaldi INTERFACE "-DCOMPILE_WITHOUT_OPENFST")
\ No newline at end of file
...@@ -88,7 +88,8 @@ class CMakeBuild(build_ext): ...@@ -88,7 +88,8 @@ class CMakeBuild(build_ext):
# f"-DCMAKE_PREFIX_PATH={torch.utils.cmake_prefix_path}", # f"-DCMAKE_PREFIX_PATH={torch.utils.cmake_prefix_path}",
f"-DCMAKE_INSTALL_PREFIX={extdir}", f"-DCMAKE_INSTALL_PREFIX={extdir}",
"-DCMAKE_VERBOSE_MAKEFILE=ON", "-DCMAKE_VERBOSE_MAKEFILE=ON",
f"-DPython_INCLUDE_DIRS={distutils.sysconfig.get_python_inc()}", f"-DPython_INCLUDE_DIR={distutils.sysconfig.get_python_inc()}",
f"-DPYTHON_LIBRARY={distutils.sysconfig.get_config_var('LIBDIR')}",
f"-DBUILD_SOX:BOOL={'ON' if _BUILD_SOX else 'OFF'}", f"-DBUILD_SOX:BOOL={'ON' if _BUILD_SOX else 'OFF'}",
f"-DBUILD_MAD:BOOL={'ON' if _BUILD_MAD else 'OFF'}", f"-DBUILD_MAD:BOOL={'ON' if _BUILD_MAD else 'OFF'}",
# f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}", # f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册