未验证 提交 b35fc01a 编写于 作者: H Hui Zhang 提交者: GitHub

opt to compile asr,cls,vad; add vad; format code (#2968)

上级 78e29c8e
engine/common/base/flags.h
engine/common/base/log.h
tools/valgrind* tools/valgrind*
*log *log
fc_patch/* fc_patch/*
...@@ -20,8 +20,7 @@ project(paddlespeech VERSION 0.1) ...@@ -20,8 +20,7 @@ project(paddlespeech VERSION 0.1)
set(CMAKE_VERBOSE_MAKEFILE on) set(CMAKE_VERBOSE_MAKEFILE on)
# set std-14
set(CMAKE_CXX_STANDARD 14)
include(FetchContent) include(FetchContent)
include(ExternalProject) include(ExternalProject)
...@@ -31,15 +30,28 @@ set(FETCHCONTENT_QUIET off) ...@@ -31,15 +30,28 @@ set(FETCHCONTENT_QUIET off)
get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR}") get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR}")
set(FETCHCONTENT_BASE_DIR ${fc_patch}) set(FETCHCONTENT_BASE_DIR ${fc_patch})
set(CMAKE_CXX_FLAGS)
set(CMAKE_CXX_FLAGS_DEBUG)
set(CMAKE_CXX_FLAGS_RELEASE)
# set std-14
set(CMAKE_CXX_STANDARD 14)
# compiler option # compiler option
# Keep the same with openfst, -fPIC or -fpic # Keep the same with openfst, -fPIC or -fpic
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ldl") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ldl")
SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ggdb") SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ggdb")
SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O3 -Wall") SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O3 -Wall")
add_compile_options(-fPIC)
############################################################################### ###############################################################################
# Option Configurations # Option Configurations
############################################################################### ###############################################################################
option(WITH_ASR "build asr" ON)
option(WITH_CLS "build cls" ON)
option(WITH_VAD "build vad" ON)
option(TEST_DEBUG "option for debug" OFF) option(TEST_DEBUG "option for debug" OFF)
option(USE_PROFILING "enable c++ profling" OFF) option(USE_PROFILING "enable c++ profling" OFF)
option(WITH_TESTING "unit test" ON) option(WITH_TESTING "unit test" ON)
...@@ -47,31 +59,40 @@ option(WITH_TESTING "unit test" ON) ...@@ -47,31 +59,40 @@ option(WITH_TESTING "unit test" ON)
option(USING_GPU "u2 compute on GPU." OFF) option(USING_GPU "u2 compute on GPU." OFF)
############################################################################### ###############################################################################
# Include third party # Include Third Party
############################################################################### ###############################################################################
include(gflags) include(gflags)
include(glog) include(glog)
# openfst
include(openfst)
add_dependencies(openfst gflags glog)
# paddle lib
include(paddleinference)
# gtest # gtest
if(WITH_TESTING) if(WITH_TESTING)
include(gtest) # download, build, install gtest include(gtest) # download, build, install gtest
endif() endif()
# fastdeploy
include(fastdeploy)
if(WITH_ASR)
# openfst
include(openfst)
add_dependencies(openfst gflags glog)
endif()
###############################################################################
# Find Package
###############################################################################
# python/pybind11/threads # python/pybind11/threads
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
# https://cmake.org/cmake/help/latest/module/FindPython3.html#module:FindPython3 # https://cmake.org/cmake/help/latest/module/FindPython3.html#module:FindPython3
find_package(Python3 COMPONENTS Interpreter Development) find_package(Python3 COMPONENTS Interpreter Development)
find_package(pybind11 CONFIG) find_package(pybind11 CONFIG)
if(Python3_FOUND)
if(WITH_ASR)
if(Python3_FOUND)
message(STATUS "Python3_FOUND = ${Python3_FOUND}") message(STATUS "Python3_FOUND = ${Python3_FOUND}")
message(STATUS "Python3_EXECUTABLE = ${Python3_EXECUTABLE}") message(STATUS "Python3_EXECUTABLE = ${Python3_EXECUTABLE}")
message(STATUS "Python3_LIBRARIES = ${Python3_LIBRARIES}") message(STATUS "Python3_LIBRARIES = ${Python3_LIBRARIES}")
...@@ -79,70 +100,76 @@ if(Python3_FOUND) ...@@ -79,70 +100,76 @@ if(Python3_FOUND)
message(STATUS "Python3_LINK_OPTIONS = ${Python3_LINK_OPTIONS}") message(STATUS "Python3_LINK_OPTIONS = ${Python3_LINK_OPTIONS}")
set(PYTHON_LIBRARIES ${Python3_LIBRARIES} CACHE STRING "python lib" FORCE) set(PYTHON_LIBRARIES ${Python3_LIBRARIES} CACHE STRING "python lib" FORCE)
set(PYTHON_INCLUDE_DIR ${Python3_INCLUDE_DIRS} CACHE STRING "python inc" FORCE) set(PYTHON_INCLUDE_DIR ${Python3_INCLUDE_DIRS} CACHE STRING "python inc" FORCE)
endif() endif()
message(STATUS "PYTHON_LIBRARIES = ${PYTHON_LIBRARIES}") message(STATUS "PYTHON_LIBRARIES = ${PYTHON_LIBRARIES}")
message(STATUS "PYTHON_INCLUDE_DIR = ${PYTHON_INCLUDE_DIR}") message(STATUS "PYTHON_INCLUDE_DIR = ${PYTHON_INCLUDE_DIR}")
if(pybind11_FOUND) if(pybind11_FOUND)
message(STATUS "pybind11_INCLUDES = ${pybind11_INCLUDE_DIRS}") message(STATUS "pybind11_INCLUDES = ${pybind11_INCLUDE_DIRS}")
message(STATUS "pybind11_LIBRARIES=${pybind11_LIBRARIES}") message(STATUS "pybind11_LIBRARIES=${pybind11_LIBRARIES}")
message(STATUS "pybind11_DEFINITIONS=${pybind11_DEFINITIONS}") message(STATUS "pybind11_DEFINITIONS=${pybind11_DEFINITIONS}")
endif() endif()
# paddle libpaddle.so # paddle libpaddle.so
# paddle include and link option # paddle include and link option
# -L/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/libs -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/fluid -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so # -L/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/libs -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/fluid -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so
execute_process( execute_process(
COMMAND python -c "\ COMMAND python -c "\
import os;\ import os;\
import paddle;\ import paddle;\
include_dir=paddle.sysconfig.get_include();\ include_dir=paddle.sysconfig.get_include();\
paddle_dir=os.path.split(include_dir)[0];\ paddle_dir=os.path.split(include_dir)[0];\
libs_dir=os.path.join(paddle_dir, 'libs');\ libs_dir=os.path.join(paddle_dir, 'libs');\
fluid_dir=os.path.join(paddle_dir, 'fluid');\ fluid_dir=os.path.join(paddle_dir, 'fluid');\
out=' '.join([\"-L\" + libs_dir, \"-L\" + fluid_dir]);\ out=' '.join([\"-L\" + libs_dir, \"-L\" + fluid_dir]);\
out += \" -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so\"; print(out);\ out += \" -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so\"; print(out);\
" "
OUTPUT_VARIABLE PADDLE_LINK_FLAGS OUTPUT_VARIABLE PADDLE_LINK_FLAGS
RESULT_VARIABLE SUCESS) RESULT_VARIABLE SUCESS)
message(STATUS PADDLE_LINK_FLAGS= ${PADDLE_LINK_FLAGS}) message(STATUS PADDLE_LINK_FLAGS= ${PADDLE_LINK_FLAGS})
string(STRIP ${PADDLE_LINK_FLAGS} PADDLE_LINK_FLAGS) string(STRIP ${PADDLE_LINK_FLAGS} PADDLE_LINK_FLAGS)
# paddle compile option # paddle compile option
# -I/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/include # -I/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/include
execute_process( execute_process(
COMMAND python -c "\ COMMAND python -c "\
import paddle; \ import paddle; \
include_dir = paddle.sysconfig.get_include(); \ include_dir = paddle.sysconfig.get_include(); \
print(f\"-I{include_dir}\"); \ print(f\"-I{include_dir}\"); \
" "
OUTPUT_VARIABLE PADDLE_COMPILE_FLAGS) OUTPUT_VARIABLE PADDLE_COMPILE_FLAGS)
message(STATUS PADDLE_COMPILE_FLAGS= ${PADDLE_COMPILE_FLAGS}) message(STATUS PADDLE_COMPILE_FLAGS= ${PADDLE_COMPILE_FLAGS})
string(STRIP ${PADDLE_COMPILE_FLAGS} PADDLE_COMPILE_FLAGS) string(STRIP ${PADDLE_COMPILE_FLAGS} PADDLE_COMPILE_FLAGS)
# for LD_LIBRARY_PATH # for LD_LIBRARY_PATH
# set(PADDLE_LIB_DIRS /workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid:/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/libs/) # set(PADDLE_LIB_DIRS /workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid:/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/libs/)
execute_process( execute_process(
COMMAND python -c "\ COMMAND python -c "\
import os; \ import os; \
import paddle; \ import paddle; \
include_dir=paddle.sysconfig.get_include(); \ include_dir=paddle.sysconfig.get_include(); \
paddle_dir=os.path.split(include_dir)[0]; \ paddle_dir=os.path.split(include_dir)[0]; \
libs_dir=os.path.join(paddle_dir, 'libs'); \ libs_dir=os.path.join(paddle_dir, 'libs'); \
fluid_dir=os.path.join(paddle_dir, 'fluid'); \ fluid_dir=os.path.join(paddle_dir, 'fluid'); \
out=':'.join([libs_dir, fluid_dir]); print(out); \ out=':'.join([libs_dir, fluid_dir]); print(out); \
" "
OUTPUT_VARIABLE PADDLE_LIB_DIRS) OUTPUT_VARIABLE PADDLE_LIB_DIRS)
message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS}) message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS})
endif()
add_compile_options(-fPIC)
############################################################################### ###############################################################################
# Add local library # Add local library
############################################################################### ###############################################################################
set(ENGINE_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/engine) set(ENGINE_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/engine)
message(STATUS "CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}")
message(STATUS "CMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}")
message(STATUS "CMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}")
add_subdirectory(engine) add_subdirectory(engine)
...@@ -4,5 +4,5 @@ set -xe ...@@ -4,5 +4,5 @@ set -xe
# the build script had verified in the paddlepaddle docker image. # the build script had verified in the paddlepaddle docker image.
# please follow the instruction below to install PaddlePaddle image. # please follow the instruction below to install PaddlePaddle image.
# https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html # https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html
cmake -B build cmake -B build -DWITH_ASR=OFF -DWITH_CLS=OFF
cmake --build build -j cmake --build build -j
...@@ -8,11 +8,11 @@ windows_x86") ...@@ -8,11 +8,11 @@ windows_x86")
set(CMAKE_VERBOSE_MAKEFILE ON) set(CMAKE_VERBOSE_MAKEFILE ON)
set(FASTDEPLOY_DIR ${CMAKE_SOURCE_DIR}/fc_patch/fastdeploy) set(FASTDEPLOY_DIR ${CMAKE_SOURCE_DIR}/fc_patch/fastdeploy)
if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz) if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz)
exec_program("mkdir -p ${FASTDEPLOY_DIR} && exec_program("mkdir -p ${FASTDEPLOY_DIR} &&
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.2.tgz -P ${FASTDEPLOY_DIR} && wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.4.tgz -P ${FASTDEPLOY_DIR} &&
tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz -C ${FASTDEPLOY_DIR} && tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz -C ${FASTDEPLOY_DIR} &&
mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2 ${FASTDEPLOY_DIR}/linux-x64") mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4 ${FASTDEPLOY_DIR}/linux-x64")
endif() endif()
if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared.tgz) if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared.tgz)
...@@ -36,4 +36,9 @@ elseif (ARCH STREQUAL "android_armv7") ...@@ -36,4 +36,9 @@ elseif (ARCH STREQUAL "android_armv7")
endif() endif()
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
# fix compiler flags conflict, since fastdeploy using c++11 for project
set(CMAKE_CXX_STANDARD 14)
include_directories(${FASTDEPLOY_INCS}) include_directories(${FASTDEPLOY_INCS})
message(STATUS "FASTDEPLOY_INCS=${FASTDEPLOY_INCS}")
\ No newline at end of file
...@@ -6,8 +6,19 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}) ...@@ -6,8 +6,19 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kaldi) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kaldi)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/common) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/common)
add_subdirectory(asr)
add_subdirectory(common)
add_subdirectory(kaldi) add_subdirectory(kaldi)
add_subdirectory(common)
if(WITH_ASR)
add_subdirectory(asr)
endif()
if(WITH_CLS)
add_subdirectory(cls)
endif()
if(WITH_VAD)
add_subdirectory(vad)
endif()
add_subdirectory(codelab) add_subdirectory(codelab)
\ No newline at end of file
add_subdirectory(cls)
\ No newline at end of file
...@@ -38,7 +38,8 @@ U2Recognizer::U2Recognizer(const U2RecognizerResource& resource) ...@@ -38,7 +38,8 @@ U2Recognizer::U2Recognizer(const U2RecognizerResource& resource)
decoder_ = std::make_unique<CTCPrefixBeamSearch>( decoder_ = std::make_unique<CTCPrefixBeamSearch>(
resource.vocab_path, resource.decoder_opts.ctc_prefix_search_opts); resource.vocab_path, resource.decoder_opts.ctc_prefix_search_opts);
} else { } else {
decoder_ = std::make_unique<TLGDecoder>(resource.decoder_opts.tlg_decoder_opts); decoder_ = std::make_unique<TLGDecoder>(
resource.decoder_opts.tlg_decoder_opts);
} }
symbol_table_ = decoder_->WordSymbolTable(); symbol_table_ = decoder_->WordSymbolTable();
......
...@@ -3,7 +3,7 @@ ${CMAKE_CURRENT_SOURCE_DIR} ...@@ -3,7 +3,7 @@ ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/../ ${CMAKE_CURRENT_SOURCE_DIR}/../
) )
add_subdirectory(utils) add_subdirectory(utils)
add_subdirectory(base)
add_subdirectory(matrix) add_subdirectory(matrix)
include_directories( include_directories(
......
if(WITH_ASR)
add_compile_options(-DWITH_ASR)
set(PPS_FLAGS_LIB "fst/flags.h")
set(PPS_GLOB_LIB "fst/log.h")
else()
set(PPS_FLAGS_LIB "gflags/gflags.h")
set(PPS_GLOB_LIB "glog/logging.h")
endif()
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/flags.h.in
${CMAKE_CURRENT_SOURCE_DIR}/flags.h @ONLY
)
message(STATUS "Generated ${CMAKE_CURRENT_SOURCE_DIR}/flags.h")
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/log.h.in
${CMAKE_CURRENT_SOURCE_DIR}/log.h @ONLY
)
message(STATUS "Generated ${CMAKE_CURRENT_SOURCE_DIR}/log.h")
\ No newline at end of file
...@@ -14,4 +14,4 @@ ...@@ -14,4 +14,4 @@
#pragma once #pragma once
#include "fst/flags.h" #include "@PPS_FLAGS_LIB@"
\ No newline at end of file
...@@ -14,4 +14,4 @@ ...@@ -14,4 +14,4 @@
#pragma once #pragma once
#include "fst/log.h" #include "@PPS_GLOB_LIB@"
...@@ -33,7 +33,7 @@ CMVN::CMVN(std::string cmvn_file, unique_ptr<FrontendInterface> base_extractor) ...@@ -33,7 +33,7 @@ CMVN::CMVN(std::string cmvn_file, unique_ptr<FrontendInterface> base_extractor)
dim_ = mean_stats_.size() - 1; dim_ = mean_stats_.size() - 1;
} }
void CMVN::ReadCMVNFromJson(string cmvn_file) { void CMVN::ReadCMVNFromJson(std::string cmvn_file) {
std::string json_str = ppspeech::ReadFile2String(cmvn_file); std::string json_str = ppspeech::ReadFile2String(cmvn_file);
picojson::value value; picojson::value value;
std::string err; std::string err;
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_ #ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_ #define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
#include <limits>
#include <map> #include <map>
#include "frontend/feature-window.h" #include "frontend/feature-window.h"
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "frontend/feature-window.h" #include "frontend/feature-window.h"
#include <cmath> #include <cmath>
#include <limits>
#include <vector> #include <vector>
#ifndef M_2PI #ifndef M_2PI
......
...@@ -17,12 +17,12 @@ ...@@ -17,12 +17,12 @@
*/ */
#include "frontend/rfft.h" #include "frontend/rfft.h"
#include "base/log.h"
#include <cmath> #include <cmath>
#include <memory>
#include <vector> #include <vector>
#include "base/log.h"
// see fftsg.c // see fftsg.c
#ifdef __cplusplus #ifdef __cplusplus
extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w); extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w);
......
...@@ -25,40 +25,41 @@ ...@@ -25,40 +25,41 @@
namespace kaldi { namespace kaldi {
/// Empty constructor /// Empty constructor
template<typename Real> template <typename Real>
Matrix<Real>::Matrix(): MatrixBase<Real>(NULL, 0, 0, 0) { } Matrix<Real>::Matrix() : MatrixBase<Real>(NULL, 0, 0, 0) {}
/* /*
template<> template<>
template<> template<>
void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float> &ra, const VectorBase<float> &rb); void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float>
&ra, const VectorBase<float> &rb);
template<> template<>
template<> template<>
void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double> &ra, const VectorBase<double> &rb); void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double>
&ra, const VectorBase<double> &rb);
*/ */
template<typename Real> template <typename Real>
inline std::ostream & operator << (std::ostream & os, const MatrixBase<Real> & M) { inline std::ostream& operator<<(std::ostream& os, const MatrixBase<Real>& M) {
M.Write(os, false); M.Write(os, false);
return os; return os;
} }
template<typename Real> template <typename Real>
inline std::istream & operator >> (std::istream & is, Matrix<Real> & M) { inline std::istream& operator>>(std::istream& is, Matrix<Real>& M) {
M.Read(is, false); M.Read(is, false);
return is; return is;
} }
template<typename Real> template <typename Real>
inline std::istream & operator >> (std::istream & is, MatrixBase<Real> & M) { inline std::istream& operator>>(std::istream& is, MatrixBase<Real>& M) {
M.Read(is, false); M.Read(is, false);
return is; return is;
} }
}// namespace kaldi } // namespace kaldi
#endif // KALDI_MATRIX_KALDI_MATRIX_INL_H_ #endif // KALDI_MATRIX_KALDI_MATRIX_INL_H_
...@@ -26,32 +26,33 @@ ...@@ -26,32 +26,33 @@
namespace kaldi { namespace kaldi {
template<typename Real> template <typename Real>
std::ostream & operator << (std::ostream &os, const VectorBase<Real> &rv) { std::ostream &operator<<(std::ostream &os, const VectorBase<Real> &rv) {
rv.Write(os, false); rv.Write(os, false);
return os; return os;
} }
template<typename Real> template <typename Real>
std::istream &operator >> (std::istream &is, VectorBase<Real> &rv) { std::istream &operator>>(std::istream &is, VectorBase<Real> &rv) {
rv.Read(is, false); rv.Read(is, false);
return is; return is;
} }
template<typename Real> template <typename Real>
std::istream &operator >> (std::istream &is, Vector<Real> &rv) { std::istream &operator>>(std::istream &is, Vector<Real> &rv) {
rv.Read(is, false); rv.Read(is, false);
return is; return is;
} }
//template<> // template<>
//template<> // template<>
//void VectorBase<float>::AddVec(const float alpha, const VectorBase<float> &rv); // void VectorBase<float>::AddVec(const float alpha, const VectorBase<float>
// &rv);
//template<> // template<>
//template<> // template<>
//void VectorBase<double>::AddVec<double>(const double alpha, // void VectorBase<double>::AddVec<double>(const double alpha,
//const VectorBase<double> &rv); // const VectorBase<double> &rv);
} // namespace kaldi } // namespace kaldi
......
...@@ -37,7 +37,7 @@ namespace kaldi { ...@@ -37,7 +37,7 @@ namespace kaldi {
/// Provides a vector abstraction class. /// Provides a vector abstraction class.
/// This class provides a way to work with vectors in kaldi. /// This class provides a way to work with vectors in kaldi.
/// It encapsulates basic operations and memory optimizations. /// It encapsulates basic operations and memory optimizations.
template<typename Real> template <typename Real>
class VectorBase { class VectorBase {
public: public:
/// Set vector to all zeros. /// Set vector to all zeros.
...@@ -53,23 +53,23 @@ class VectorBase { ...@@ -53,23 +53,23 @@ class VectorBase {
inline MatrixIndexT Dim() const { return dim_; } inline MatrixIndexT Dim() const { return dim_; }
/// Returns the size in memory of the vector, in bytes. /// Returns the size in memory of the vector, in bytes.
inline MatrixIndexT SizeInBytes() const { return (dim_*sizeof(Real)); } inline MatrixIndexT SizeInBytes() const { return (dim_ * sizeof(Real)); }
/// Returns a pointer to the start of the vector's data. /// Returns a pointer to the start of the vector's data.
inline Real* Data() { return data_; } inline Real *Data() { return data_; }
/// Returns a pointer to the start of the vector's data (const). /// Returns a pointer to the start of the vector's data (const).
inline const Real* Data() const { return data_; } inline const Real *Data() const { return data_; }
/// Indexing operator (const). /// Indexing operator (const).
inline Real operator() (MatrixIndexT i) const { inline Real operator()(MatrixIndexT i) const {
KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) < KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
static_cast<UnsignedMatrixIndexT>(dim_)); static_cast<UnsignedMatrixIndexT>(dim_));
return *(data_ + i); return *(data_ + i);
} }
/// Indexing operator (non-const). /// Indexing operator (non-const).
inline Real & operator() (MatrixIndexT i) { inline Real &operator()(MatrixIndexT i) {
KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) < KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
static_cast<UnsignedMatrixIndexT>(dim_)); static_cast<UnsignedMatrixIndexT>(dim_));
return *(data_ + i); return *(data_ + i);
...@@ -98,12 +98,12 @@ class VectorBase { ...@@ -98,12 +98,12 @@ class VectorBase {
void CopyFromVec(const VectorBase<Real> &v); void CopyFromVec(const VectorBase<Real> &v);
/// Copy data from another vector of different type (double vs. float) /// Copy data from another vector of different type (double vs. float)
template<typename OtherReal> template <typename OtherReal>
void CopyFromVec(const VectorBase<OtherReal> &v); void CopyFromVec(const VectorBase<OtherReal> &v);
/// Performs a row stack of the matrix M /// Performs a row stack of the matrix M
void CopyRowsFromMat(const MatrixBase<Real> &M); void CopyRowsFromMat(const MatrixBase<Real> &M);
template<typename OtherReal> template <typename OtherReal>
void CopyRowsFromMat(const MatrixBase<OtherReal> &M); void CopyRowsFromMat(const MatrixBase<OtherReal> &M);
/// Performs a column stack of the matrix M /// Performs a column stack of the matrix M
...@@ -113,12 +113,12 @@ class VectorBase { ...@@ -113,12 +113,12 @@ class VectorBase {
/// this->Copy(M[row]). /// this->Copy(M[row]).
void CopyRowFromMat(const MatrixBase<Real> &M, MatrixIndexT row); void CopyRowFromMat(const MatrixBase<Real> &M, MatrixIndexT row);
/// Extracts a row of the matrix M with type conversion. /// Extracts a row of the matrix M with type conversion.
template<typename OtherReal> template <typename OtherReal>
void CopyRowFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT row); void CopyRowFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT row);
/// Extracts a column of the matrix M. /// Extracts a column of the matrix M.
template<typename OtherReal> template <typename OtherReal>
void CopyColFromMat(const MatrixBase<OtherReal> &M , MatrixIndexT col); void CopyColFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT col);
/// Reads from C++ stream (option to add to existing contents). /// Reads from C++ stream (option to add to existing contents).
/// Throws exception on failure /// Throws exception on failure
...@@ -129,19 +129,21 @@ class VectorBase { ...@@ -129,19 +129,21 @@ class VectorBase {
friend class VectorBase<double>; friend class VectorBase<double>;
friend class VectorBase<float>; friend class VectorBase<float>;
protected: protected:
/// Destructor; does not deallocate memory, this is handled by child classes. /// Destructor; does not deallocate memory, this is handled by child
/// classes.
/// This destructor is protected so this object can only be /// This destructor is protected so this object can only be
/// deleted via a child. /// deleted via a child.
~VectorBase() {} ~VectorBase() {}
/// Empty initializer, corresponds to vector of zero size. /// Empty initializer, corresponds to vector of zero size.
explicit VectorBase(): data_(NULL), dim_(0) { explicit VectorBase() : data_(NULL), dim_(0) {
KALDI_ASSERT_IS_FLOATING_TYPE(Real); KALDI_ASSERT_IS_FLOATING_TYPE(Real);
} }
/// data memory area /// data memory area
Real* data_; Real *data_;
/// dimension of vector /// dimension of vector
MatrixIndexT dim_; MatrixIndexT dim_;
KALDI_DISALLOW_COPY_AND_ASSIGN(VectorBase); KALDI_DISALLOW_COPY_AND_ASSIGN(VectorBase);
...@@ -151,25 +153,28 @@ class VectorBase { ...@@ -151,25 +153,28 @@ class VectorBase {
* *
* This class provides a way to work with vectors in kaldi. * This class provides a way to work with vectors in kaldi.
* It encapsulates basic operations and memory optimizations. */ * It encapsulates basic operations and memory optimizations. */
template<typename Real> template <typename Real>
class Vector: public VectorBase<Real> { class Vector : public VectorBase<Real> {
public: public:
/// Constructor that takes no arguments. Initializes to empty. /// Constructor that takes no arguments. Initializes to empty.
Vector(): VectorBase<Real>() {} Vector() : VectorBase<Real>() {}
/// Constructor with specific size. Sets to all-zero by default /// Constructor with specific size. Sets to all-zero by default
/// if set_zero == false, memory contents are undefined. /// if set_zero == false, memory contents are undefined.
explicit Vector(const MatrixIndexT s, explicit Vector(const MatrixIndexT s,
MatrixResizeType resize_type = kSetZero) MatrixResizeType resize_type = kSetZero)
: VectorBase<Real>() { Resize(s, resize_type); } : VectorBase<Real>() {
Resize(s, resize_type);
}
/// Copy constructor from CUDA vector /// Copy constructor from CUDA vector
/// This is defined in ../cudamatrix/cu-vector.h /// This is defined in ../cudamatrix/cu-vector.h
//template<typename OtherReal> // template<typename OtherReal>
//explicit Vector(const CuVectorBase<OtherReal> &cu); // explicit Vector(const CuVectorBase<OtherReal> &cu);
/// Copy constructor. The need for this is controversial. /// Copy constructor. The need for this is controversial.
Vector(const Vector<Real> &v) : VectorBase<Real>() { // (cannot be explicit) Vector(const Vector<Real> &v)
: VectorBase<Real>() { // (cannot be explicit)
Resize(v.Dim(), kUndefined); Resize(v.Dim(), kUndefined);
this->CopyFromVec(v); this->CopyFromVec(v);
} }
...@@ -181,19 +186,19 @@ class Vector: public VectorBase<Real> { ...@@ -181,19 +186,19 @@ class Vector: public VectorBase<Real> {
} }
/// Type conversion constructor. /// Type conversion constructor.
template<typename OtherReal> template <typename OtherReal>
explicit Vector(const VectorBase<OtherReal> &v): VectorBase<Real>() { explicit Vector(const VectorBase<OtherReal> &v) : VectorBase<Real>() {
Resize(v.Dim(), kUndefined); Resize(v.Dim(), kUndefined);
this->CopyFromVec(v); this->CopyFromVec(v);
} }
// Took this out since it is unsafe : Arnab // Took this out since it is unsafe : Arnab
// /// Constructor from a pointer and a size; copies the data to a location // /// Constructor from a pointer and a size; copies the data to a location
// /// it owns. // /// it owns.
// Vector(const Real* Data, const MatrixIndexT s): VectorBase<Real>() { // Vector(const Real* Data, const MatrixIndexT s): VectorBase<Real>() {
// Resize(s); // Resize(s);
// CopyFromPtr(Data, s); // CopyFromPtr(Data, s);
// } // }
/// Swaps the contents of *this and *other. Shallow swap. /// Swaps the contents of *this and *other. Shallow swap.
...@@ -219,59 +224,63 @@ class Vector: public VectorBase<Real> { ...@@ -219,59 +224,63 @@ class Vector: public VectorBase<Real> {
void RemoveElement(MatrixIndexT i); void RemoveElement(MatrixIndexT i);
/// Assignment operator. /// Assignment operator.
Vector<Real> &operator = (const Vector<Real> &other) { Vector<Real> &operator=(const Vector<Real> &other) {
Resize(other.Dim(), kUndefined); Resize(other.Dim(), kUndefined);
this->CopyFromVec(other); this->CopyFromVec(other);
return *this; return *this;
} }
/// Assignment operator that takes VectorBase. /// Assignment operator that takes VectorBase.
Vector<Real> &operator = (const VectorBase<Real> &other) { Vector<Real> &operator=(const VectorBase<Real> &other) {
Resize(other.Dim(), kUndefined); Resize(other.Dim(), kUndefined);
this->CopyFromVec(other); this->CopyFromVec(other);
return *this; return *this;
} }
private: private:
/// Init assumes the current contents of the class are invalid (i.e. junk or /// Init assumes the current contents of the class are invalid (i.e. junk or
/// has already been freed), and it sets the vector to newly allocated memory /// has already been freed), and it sets the vector to newly allocated
/// with the specified dimension. dim == 0 is acceptable. The memory contents /// memory
/// with the specified dimension. dim == 0 is acceptable. The memory
/// contents
/// pointed to by data_ will be undefined. /// pointed to by data_ will be undefined.
void Init(const MatrixIndexT dim); void Init(const MatrixIndexT dim);
/// Destroy function, called internally. /// Destroy function, called internally.
void Destroy(); void Destroy();
}; };
/// Represents a non-allocating general vector which can be defined /// Represents a non-allocating general vector which can be defined
/// as a sub-vector of higher-level vector [or as the row of a matrix]. /// as a sub-vector of higher-level vector [or as the row of a matrix].
template<typename Real> template <typename Real>
class SubVector : public VectorBase<Real> { class SubVector : public VectorBase<Real> {
public: public:
/// Constructor from a Vector or SubVector. /// Constructor from a Vector or SubVector.
/// SubVectors are not const-safe and it's very hard to make them /// SubVectors are not const-safe and it's very hard to make them
/// so for now we just give up. This function contains const_cast. /// so for now we just give up. This function contains const_cast.
SubVector(const VectorBase<Real> &t, const MatrixIndexT origin, SubVector(const VectorBase<Real> &t,
const MatrixIndexT length) : VectorBase<Real>() { const MatrixIndexT origin,
const MatrixIndexT length)
: VectorBase<Real>() {
// following assert equiv to origin>=0 && length>=0 && // following assert equiv to origin>=0 && length>=0 &&
// origin+length <= rt.dim_ // origin+length <= rt.dim_
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin)+ KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin) +
static_cast<UnsignedMatrixIndexT>(length) <= static_cast<UnsignedMatrixIndexT>(length) <=
static_cast<UnsignedMatrixIndexT>(t.Dim())); static_cast<UnsignedMatrixIndexT>(t.Dim()));
VectorBase<Real>::data_ = const_cast<Real*> (t.Data()+origin); VectorBase<Real>::data_ = const_cast<Real *>(t.Data() + origin);
VectorBase<Real>::dim_ = length; VectorBase<Real>::dim_ = length;
} }
/// This constructor initializes the vector to point at the contents /// This constructor initializes the vector to point at the contents
/// of this packed matrix (SpMatrix or TpMatrix). /// of this packed matrix (SpMatrix or TpMatrix).
// SubVector(const PackedMatrix<Real> &M) { // SubVector(const PackedMatrix<Real> &M) {
//VectorBase<Real>::data_ = const_cast<Real*> (M.Data()); // VectorBase<Real>::data_ = const_cast<Real*> (M.Data());
//VectorBase<Real>::dim_ = (M.NumRows()*(M.NumRows()+1))/2; // VectorBase<Real>::dim_ = (M.NumRows()*(M.NumRows()+1))/2;
//} //}
/// Copy constructor /// Copy constructor
SubVector(const SubVector &other) : VectorBase<Real> () { SubVector(const SubVector &other) : VectorBase<Real>() {
// this copy constructor needed for Range() to work in base class. // this copy constructor needed for Range() to work in base class.
VectorBase<Real>::data_ = other.data_; VectorBase<Real>::data_ = other.data_;
VectorBase<Real>::dim_ = other.dim_; VectorBase<Real>::dim_ = other.dim_;
...@@ -280,14 +289,14 @@ class SubVector : public VectorBase<Real> { ...@@ -280,14 +289,14 @@ class SubVector : public VectorBase<Real> {
/// Constructor from a pointer to memory and a length. Keeps a pointer /// Constructor from a pointer to memory and a length. Keeps a pointer
/// to the data but does not take ownership (will never delete). /// to the data but does not take ownership (will never delete).
/// Caution: this constructor enables you to evade const constraints. /// Caution: this constructor enables you to evade const constraints.
SubVector(const Real *data, MatrixIndexT length) : VectorBase<Real> () { SubVector(const Real *data, MatrixIndexT length) : VectorBase<Real>() {
VectorBase<Real>::data_ = const_cast<Real*>(data); VectorBase<Real>::data_ = const_cast<Real *>(data);
VectorBase<Real>::dim_ = length; VectorBase<Real>::dim_ = length;
} }
/// This operation does not preserve const-ness, so be careful. /// This operation does not preserve const-ness, so be careful.
SubVector(const MatrixBase<Real> &matrix, MatrixIndexT row) { SubVector(const MatrixBase<Real> &matrix, MatrixIndexT row) {
VectorBase<Real>::data_ = const_cast<Real*>(matrix.RowData(row)); VectorBase<Real>::data_ = const_cast<Real *>(matrix.RowData(row));
VectorBase<Real>::dim_ = matrix.NumCols(); VectorBase<Real>::dim_ = matrix.NumCols();
} }
...@@ -295,7 +304,7 @@ class SubVector : public VectorBase<Real> { ...@@ -295,7 +304,7 @@ class SubVector : public VectorBase<Real> {
private: private:
/// Disallow assignment operator. /// Disallow assignment operator.
SubVector & operator = (const SubVector &other) {} SubVector &operator=(const SubVector &other) {}
}; };
/// @} end of "addtogroup matrix_group" /// @} end of "addtogroup matrix_group"
...@@ -303,43 +312,41 @@ class SubVector : public VectorBase<Real> { ...@@ -303,43 +312,41 @@ class SubVector : public VectorBase<Real> {
/// @{ /// @{
/// Output to a C++ stream. Non-binary by default (use Write for /// Output to a C++ stream. Non-binary by default (use Write for
/// binary output). /// binary output).
template<typename Real> template <typename Real>
std::ostream & operator << (std::ostream & out, const VectorBase<Real> & v); std::ostream &operator<<(std::ostream &out, const VectorBase<Real> &v);
/// Input from a C++ stream. Will automatically read text or /// Input from a C++ stream. Will automatically read text or
/// binary data from the stream. /// binary data from the stream.
template<typename Real> template <typename Real>
std::istream & operator >> (std::istream & in, VectorBase<Real> & v); std::istream &operator>>(std::istream &in, VectorBase<Real> &v);
/// Input from a C++ stream. Will automatically read text or /// Input from a C++ stream. Will automatically read text or
/// binary data from the stream. /// binary data from the stream.
template<typename Real> template <typename Real>
std::istream & operator >> (std::istream & in, Vector<Real> & v); std::istream &operator>>(std::istream &in, Vector<Real> &v);
/// @} end of \addtogroup matrix_funcs_io /// @} end of \addtogroup matrix_funcs_io
/// \addtogroup matrix_funcs_scalar /// \addtogroup matrix_funcs_scalar
/// @{ /// @{
//template<typename Real> // template<typename Real>
//bool ApproxEqual(const VectorBase<Real> &a, // bool ApproxEqual(const VectorBase<Real> &a,
//const VectorBase<Real> &b, Real tol = 0.01) { // const VectorBase<Real> &b, Real tol = 0.01) {
//return a.ApproxEqual(b, tol); // return a.ApproxEqual(b, tol);
//} //}
//template<typename Real> // template<typename Real>
//inline void AssertEqual(VectorBase<Real> &a, VectorBase<Real> &b, // inline void AssertEqual(VectorBase<Real> &a, VectorBase<Real> &b,
//float tol = 0.01) { // float tol = 0.01) {
//KALDI_ASSERT(a.ApproxEqual(b, tol)); // KALDI_ASSERT(a.ApproxEqual(b, tol));
//} //}
} // namespace kaldi } // namespace kaldi
// we need to include the implementation // we need to include the implementation
#include "matrix/kaldi-vector-inl.h" #include "matrix/kaldi-vector-inl.h"
#endif // KALDI_MATRIX_KALDI_VECTOR_H_ #endif // KALDI_MATRIX_KALDI_VECTOR_H_
...@@ -27,18 +27,15 @@ ...@@ -27,18 +27,15 @@
namespace kaldi { namespace kaldi {
// this enums equal to CblasTrans and CblasNoTrans constants from CBLAS library // this enums equal to CblasTrans and CblasNoTrans constants from CBLAS library
// we are writing them as literals because we don't want to include here matrix/kaldi-blas.h, // we are writing them as literals because we don't want to include here
// matrix/kaldi-blas.h,
// which puts many symbols into global scope (like "real") via the header f2c.h // which puts many symbols into global scope (like "real") via the header f2c.h
typedef enum { typedef enum {
kTrans = 112, // = CblasTrans kTrans = 112, // = CblasTrans
kNoTrans = 111 // = CblasNoTrans kNoTrans = 111 // = CblasNoTrans
} MatrixTransposeType; } MatrixTransposeType;
typedef enum { typedef enum { kSetZero, kUndefined, kCopyData } MatrixResizeType;
kSetZero,
kUndefined,
kCopyData
} MatrixResizeType;
typedef enum { typedef enum {
...@@ -53,24 +50,33 @@ typedef enum { ...@@ -53,24 +50,33 @@ typedef enum {
kTakeMeanAndCheck kTakeMeanAndCheck
} SpCopyType; } SpCopyType;
template<typename Real> class VectorBase; template <typename Real>
template<typename Real> class Vector; class VectorBase;
template<typename Real> class SubVector; template <typename Real>
template<typename Real> class MatrixBase; class Vector;
template<typename Real> class SubMatrix; template <typename Real>
template<typename Real> class Matrix; class SubVector;
template <typename Real>
class MatrixBase;
template <typename Real>
class SubMatrix;
template <typename Real>
class Matrix;
/// This class provides a way for switching between double and float types. /// This class provides a way for switching between double and float types.
template<typename T> class OtherReal { }; // useful in reading+writing routines template <typename T>
class OtherReal {}; // useful in reading+writing routines
// to switch double and float. // to switch double and float.
/// A specialized class for switching from float to double. /// A specialized class for switching from float to double.
template<> class OtherReal<float> { template <>
class OtherReal<float> {
public: public:
typedef double Real; typedef double Real;
}; };
/// A specialized class for switching from double to float. /// A specialized class for switching from double to float.
template<> class OtherReal<double> { template <>
class OtherReal<double> {
public: public:
typedef float Real; typedef float Real;
}; };
...@@ -81,12 +87,10 @@ typedef int32 SignedMatrixIndexT; ...@@ -81,12 +87,10 @@ typedef int32 SignedMatrixIndexT;
typedef uint32 UnsignedMatrixIndexT; typedef uint32 UnsignedMatrixIndexT;
// If you want to use size_t for the index type, do as follows instead: // If you want to use size_t for the index type, do as follows instead:
//typedef size_t MatrixIndexT; // typedef size_t MatrixIndexT;
//typedef ssize_t SignedMatrixIndexT; // typedef ssize_t SignedMatrixIndexT;
//typedef size_t UnsignedMatrixIndexT; // typedef size_t UnsignedMatrixIndexT;
} }
#endif // KALDI_MATRIX_MATRIX_COMMON_H_ #endif // KALDI_MATRIX_MATRIX_COMMON_H_
project(kaldi)
include_directories( include_directories(
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
) )
add_subdirectory(base) add_subdirectory(base)
add_subdirectory(util) add_subdirectory(util)
add_subdirectory(lat) if(WITH_ASR)
add_subdirectory(fstext) add_subdirectory(lat)
add_subdirectory(decoder) add_subdirectory(fstext)
add_subdirectory(lm) add_subdirectory(decoder)
add_subdirectory(lm)
add_subdirectory(fstbin) add_subdirectory(fstbin)
add_subdirectory(lmbin) add_subdirectory(lmbin)
endif()
...@@ -44,7 +44,19 @@ typedef float BaseFloat; ...@@ -44,7 +44,19 @@ typedef float BaseFloat;
#ifndef COMPILE_WITHOUT_OPENFST #ifndef COMPILE_WITHOUT_OPENFST
#ifdef WITH_ASR
#include <fst/types.h> #include <fst/types.h>
#else
using int8 = int8_t;
using int16 = int16_t;
using int32 = int32_t;
using int64 = int64_t;
using uint8 = uint8_t;
using uint16 = uint16_t;
using uint32 = uint32_t;
using uint64 = uint64_t;
#endif
namespace kaldi { namespace kaldi {
using ::int16; using ::int16;
......
# set(CMAKE_CXX_STANDARD 11)
# # 指定下载解压后的fastdeploy库路径
# set(FASTDEPLOY_INSTALL_DIR "fdlib/fastdeploy-linux-x64-1.0.4" CACHE STRING force)
# if(NOT EXISTS ${FASTDEPLOY_INSTALL_DIR})
# message(FATAL_ERROR "Please using cmake -B build -DFASTDEPLOY_INSTALL_DIR=${FASTDEPLOY_INSTALL_DIR}")
# endif()
# include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
# # 添加FastDeploy依赖头文件
# include_directories(${FASTDEPLOY_INCS})
add_executable(infer_onnx_silero_vad ${CMAKE_CURRENT_SOURCE_DIR}/infer_onnx_silero_vad.cc wav.h vad.cc vad.h)
# 添加FastDeploy库依赖
target_link_libraries(infer_onnx_silero_vad ${FASTDEPLOY_LIBS})
English | [简体中文](README_CN.md)
# Silero VAD Deployment Example
This directory provides examples that `infer_onnx_silero_vad` fast finishes the deployment of VAD models on CPU/GPU.
Before deployment, two steps require confirmation.
- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../docs/en/build_and_install/download_prebuilt_libraries.md).
- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../docs/en/build_and_install/download_prebuilt_libraries.md).
Taking VAD inference on Linux as an example, the compilation test can be completed by executing the following command in this directory.
```bash
mkdir build
cd build
# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
tar xvf fastdeploy-linux-x64-x.x.x.tgz
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
make -j
# Download the VAD model file and test audio. After decompression, place the model and test audio in the infer_onnx_silero_vad.cc peer directory
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav
# inference
./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav
```
- The above command works for Linux or MacOS. Refer to:
- [How to use FastDeploy C++ SDK in Windows](../../../../docs/en/faq/use_sdk_on_windows.md) for SDK use-pattern in Windows
## VAD C++ Interface
### Vad Class
```c++
Vad::Vad(const std::string& model_file,
const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption())
```
**Parameter**
> * **model_file**(str): Model file path
> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default. (use the default configuration)
### setAudioCofig function
**Must be called before the `init` function**
```c++
void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms);
```
**Parameter**
> * **sr**(int): sampling rate
> * **frame_ms**(int): The length of each detection frame, and it is used to calculate the detection window size
> * **threshold**(float): Result probability judgment threshold
> * **min_silence_duration_ms**(int): The threshold used to calculate whether it is silence
> * **speech_pad_ms**(int): Used to calculate the end time of the speech
### init function
Used to initialize audio-related parameters.
```c++
void Vad::init();
```
### loadAudio function
Load audio.
```c++
void Vad::loadAudio(const std::string& wavPath)
```
**Parameter**
> * **wavPath**(str): Audio file path
### Predict function
Used to start model reasoning.
```c++
bool Vad::Predict();
```
### getResult function
**Used to obtain reasoning results**
```c++
std::vector<std::map<std::string, float>> Vad::getResult(
float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0,
float mergeThreshold = 0.3);
```
**Parameter**
> * **removeThreshold**(float): Discard result fragment threshold; If some recognition results are too short, they will be discarded according to this threshold
> * **expandHeadThreshold**(float): Offset at the beginning of the segment; The recognized start time may be too close to the voice part, so move forward the start time accordingly
> * **expandTailThreshold**(float): Offset at the end of the segment; The recognized end time may be too close to the voice part, so the end time is moved back accordingly
> * **mergeThreshold**(float): Some result segments are very close and can be combined into one, and the vocal segments can be combined accordingly
**The output result format is**`std::vector<std::map<std::string, float>>`
> Output a list, each element is a speech fragment
>
> Each clip can use 'start' to get the start time and 'end' to get the end time
### Tips
1. `The setAudioCofig`function must be called before the `init` function
2. The sampling rate of the input audio file must be consistent with that set in the code
- [Model Description](../)
- [How to switch the model inference backend engine](../../../../docs/en/faq/how_to_change_backend.md)
[English](README.md) | 简体中文
# Silero VAD 部署示例
本目录下提供`infer_onnx_silero_vad`快速完成 Silero VAD 模型在CPU/GPU。
在部署前,需确认以下两个步骤
- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
- 2. 根据开发环境,下载预编译部署库和samples代码,参考[FastDeploy预编译库](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
以Linux上 VAD 推理为例,在本目录执行如下命令即可完成编译测试。
```bash
mkdir build
cd build
# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
tar xvf fastdeploy-linux-x64-x.x.x.tgz
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
make -j
# 下载 VAD 模型文件和测试音频,解压后将模型和测试音频放置在与 infer_onnx_silero_vad.cc 同级目录下
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav
# 推理
./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav
```
以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:
- [如何在Windows中使用FastDeploy C++ SDK](../../../../docs/cn/faq/use_sdk_on_windows.md)
## VAD C++ 接口
### Vad 类
```c++
Vad::Vad(const std::string& model_file,
const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption())
```
**参数**
> * **model_file**(str): 模型文件路径
> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置
### setAudioCofig 函数
**必须在`init`函数前调用**
```c++
void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms);
```
**参数**
> * **sr**(int): 采样率
> * **frame_ms**(int): 每次检测帧长,用于计算检测窗口大小
> * **threshold**(float): 结果概率判断阈值
> * **min_silence_duration_ms**(int): 用于计算判断是否是 silence 的阈值
> * **speech_pad_ms**(int): 用于计算 speach 结束时刻
### init 函数
用于初始化音频相关参数
```c++
void Vad::init();
```
### loadAudio 函数
加载音频
```c++
void Vad::loadAudio(const std::string& wavPath)
```
**参数**
> * **wavPath**(str): 音频文件路径
### Predict 函数
用于开始模型推理
```c++
bool Vad::Predict();
```
### getResult 函数
**用于获取推理结果**
```c++
std::vector<std::map<std::string, float>> Vad::getResult(
float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0,
float mergeThreshold = 0.3);
```
**参数**
> * **removeThreshold**(float): 丢弃结果片段阈值;部分识别结果太短则根据此阈值丢弃
> * **expandHeadThreshold**(float): 结果片段开始时刻偏移;识别到的开始时刻可能过于贴近发声部分,因此据此前移开始时刻
> * **expandTailThreshold**(float): 结果片段结束时刻偏移;识别到的结束时刻可能过于贴近发声部分,因此据此后移结束时刻
> * **mergeThreshold**(float): 有的结果片段十分靠近,可以合并成一个,据此合并发声片段
**输出结果格式为**`std::vector<std::map<std::string, float>>`
> 输出一个列表,每个元素是一个讲话片段
>
> 每个片段可以用 'start' 获取到开始时刻,用 'end' 获取到结束时刻
### 提示
1. `setAudioCofig`函数必须在`init`函数前调用
2. 输入的音频文件的采样率必须与代码中设置的保持一致
- [模型介绍](../)
- [如何切换模型推理后端引擎](../../../../docs/cn/faq/how_to_change_backend.md)
#include "vad.h"
int main(int argc, char* argv[]) {
if (argc < 3) {
std::cout << "Usage: infer_onnx_silero_vad path/to/model path/to/audio "
"run_option, "
"e.g ./infer_onnx_silero_vad silero_vad.onnx sample.wav"
<< std::endl;
return -1;
}
std::string model_file = argv[1];
std::string audio_file = argv[2];
int sr = 16000;
Vad vad(model_file);
// custom config, but must be set before init
vad.SetConfig(sr, 32, 0.45f, 200, 0, 0);
vad.Init();
std::vector<float> inputWav; // [0, 1]
wav::WavReader wav_reader = wav::WavReader(audio_file);
assert(wav_reader.sample_rate() == sr);
auto num_samples = wav_reader.num_samples();
inputWav.resize(num_samples);
for (int i = 0; i < num_samples; i++) {
inputWav[i] = wav_reader.data()[i] / 32768;
}
int window_size_samples = vad.WindowSizeSamples();
for (int64_t j = 0; j < num_samples; j += window_size_samples) {
auto start = j;
auto end = start + window_size_samples >= num_samples
? num_samples
: start + window_size_samples;
auto current_chunk_size = end - start;
std::vector<float> r{&inputWav[0] + start, &inputWav[0] + end};
assert(r.size() == current_chunk_size);
if (!vad.ForwardChunk(r)) {
std::cerr << "Failed to inference while using model:"
<< vad.ModelName() << "." << std::endl;
return false;
}
Vad::State s = vad.Postprocess();
std::cout << s << " ";
}
std::cout << std::endl;
std::vector<std::map<std::string, float>> result = vad.GetResult();
for (auto& res : result) {
std::cout << "speak start: " << res["start"]
<< " s, end: " << res["end"] << " s | ";
}
std::cout << "\b\b " << std::endl;
vad.Reset();
return 0;
}
此差异已折叠。
// Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <mutex>
#include <vector>
#include "./wav.h"
#include "fastdeploy/fastdeploy_model.h"
#include "fastdeploy/runtime.h"
class Vad : public fastdeploy::FastDeployModel {
public:
enum class State { SIL = 0, START, SPEECH, END };
friend std::ostream& operator<<(std::ostream& os, const Vad::State& s);
Vad(const std::string& model_file,
const fastdeploy::RuntimeOption& custom_option =
fastdeploy::RuntimeOption());
void Init();
void Reset();
void SetConfig(int sr,
int frame_ms,
float threshold,
int min_silence_duration_ms,
int speech_pad_left_ms,
int speech_pad_right_ms);
bool ForwardChunk(std::vector<float>& chunk);
const State& Postprocess();
const std::vector<std::map<std::string, float>> GetResult(
float removeThreshold = 0.0,
float expandHeadThreshold = 0.0,
float expandTailThreshold = 0,
float mergeThreshold = 0.0) const;
const std::vector<State> GetStates() const { return states_; }
int SampleRate() const { return sample_rate_; }
int FrameMs() const { return frame_ms_; }
int64_t WindowSizeSamples() const { return window_size_samples_; }
float Threshold() const { return threshold_; }
int MinSilenceDurationMs() const {
return min_silence_samples_ / sample_rate_;
}
int SpeechPadLeftMs() const {
return speech_pad_left_samples_ / sample_rate_;
}
int SpeechPadRightMs() const {
return speech_pad_right_samples_ / sample_rate_;
}
int MinSilenceSamples() const { return min_silence_samples_; }
int SpeechPadLeftSamples() const { return speech_pad_left_samples_; }
int SpeechPadRightSamples() const { return speech_pad_right_samples_; }
std::string ModelName() const override;
private:
bool Initialize();
private:
std::once_flag init_;
// input and output
std::vector<fastdeploy::FDTensor> inputTensors_;
std::vector<fastdeploy::FDTensor> outputTensors_;
// model states
bool triggerd_ = false;
unsigned int speech_start_ = 0;
unsigned int speech_end_ = 0;
unsigned int temp_end_ = 0;
unsigned int current_sample_ = 0;
unsigned int current_chunk_size_ = 0;
// MAX 4294967295 samples / 8sample per ms / 1000 / 60 = 8947 minutes
float outputProb_;
std::vector<float> speakStart_;
mutable std::vector<float> speakEnd_;
std::vector<State> states_;
/* ========================================================================
*/
int sample_rate_ = 16000;
int frame_ms_ = 32; // 32, 64, 96 for 16k
float threshold_ = 0.5f;
int64_t window_size_samples_; // support 256 512 768 for 8k; 512 1024 1536
// for 16k.
int sr_per_ms_; // support 8 or 16
int min_silence_samples_; // sr_per_ms_ * frame_ms_
int speech_pad_left_samples_{0}; // usually 250ms
int speech_pad_right_samples_{0}; // usually 0
/* ========================================================================
*/
std::vector<int64_t> sr_;
const size_t size_hc_ = 2 * 1 * 64; // It's FIXED.
std::vector<float> h_;
std::vector<float> c_;
std::vector<int64_t> input_node_dims_;
const std::vector<int64_t> sr_node_dims_ = {1};
const std::vector<int64_t> hc_node_dims_ = {2, 1, 64};
};
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册