未验证 提交 b35fc01a 编写于 作者: H Hui Zhang 提交者: GitHub

opt to compile asr,cls,vad; add vad; format code (#2968)

上级 78e29c8e
engine/common/base/flags.h
engine/common/base/log.h
tools/valgrind*
*log
fc_patch/*
......@@ -20,8 +20,7 @@ project(paddlespeech VERSION 0.1)
set(CMAKE_VERBOSE_MAKEFILE on)
# set std-14
set(CMAKE_CXX_STANDARD 14)
include(FetchContent)
include(ExternalProject)
......@@ -31,15 +30,28 @@ set(FETCHCONTENT_QUIET off)
get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR}")
set(FETCHCONTENT_BASE_DIR ${fc_patch})
set(CMAKE_CXX_FLAGS)
set(CMAKE_CXX_FLAGS_DEBUG)
set(CMAKE_CXX_FLAGS_RELEASE)
# set std-14
set(CMAKE_CXX_STANDARD 14)
# compiler option
# Keep the same with openfst, -fPIC or -fpic
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ldl")
SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ggdb")
SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O3 -Wall")
add_compile_options(-fPIC)
###############################################################################
# Option Configurations
###############################################################################
option(WITH_ASR "build asr" ON)
option(WITH_CLS "build cls" ON)
option(WITH_VAD "build vad" ON)
option(TEST_DEBUG "option for debug" OFF)
option(USE_PROFILING "enable c++ profling" OFF)
option(WITH_TESTING "unit test" ON)
......@@ -47,102 +59,117 @@ option(WITH_TESTING "unit test" ON)
option(USING_GPU "u2 compute on GPU." OFF)
###############################################################################
# Include third party
# Include Third Party
###############################################################################
include(gflags)
include(glog)
# openfst
include(openfst)
add_dependencies(openfst gflags glog)
# paddle lib
include(paddleinference)
# gtest
if(WITH_TESTING)
include(gtest) # download, build, install gtest
endif()
# fastdeploy
include(fastdeploy)
if(WITH_ASR)
# openfst
include(openfst)
add_dependencies(openfst gflags glog)
endif()
###############################################################################
# Find Package
###############################################################################
# python/pybind11/threads
find_package(Threads REQUIRED)
# https://cmake.org/cmake/help/latest/module/FindPython3.html#module:FindPython3
find_package(Python3 COMPONENTS Interpreter Development)
find_package(pybind11 CONFIG)
if(Python3_FOUND)
message(STATUS "Python3_FOUND = ${Python3_FOUND}")
message(STATUS "Python3_EXECUTABLE = ${Python3_EXECUTABLE}")
message(STATUS "Python3_LIBRARIES = ${Python3_LIBRARIES}")
message(STATUS "Python3_INCLUDE_DIRS = ${Python3_INCLUDE_DIRS}")
message(STATUS "Python3_LINK_OPTIONS = ${Python3_LINK_OPTIONS}")
set(PYTHON_LIBRARIES ${Python3_LIBRARIES} CACHE STRING "python lib" FORCE)
set(PYTHON_INCLUDE_DIR ${Python3_INCLUDE_DIRS} CACHE STRING "python inc" FORCE)
endif()
message(STATUS "PYTHON_LIBRARIES = ${PYTHON_LIBRARIES}")
message(STATUS "PYTHON_INCLUDE_DIR = ${PYTHON_INCLUDE_DIR}")
if(pybind11_FOUND)
message(STATUS "pybind11_INCLUDES = ${pybind11_INCLUDE_DIRS}")
message(STATUS "pybind11_LIBRARIES=${pybind11_LIBRARIES}")
message(STATUS "pybind11_DEFINITIONS=${pybind11_DEFINITIONS}")
if(WITH_ASR)
if(Python3_FOUND)
message(STATUS "Python3_FOUND = ${Python3_FOUND}")
message(STATUS "Python3_EXECUTABLE = ${Python3_EXECUTABLE}")
message(STATUS "Python3_LIBRARIES = ${Python3_LIBRARIES}")
message(STATUS "Python3_INCLUDE_DIRS = ${Python3_INCLUDE_DIRS}")
message(STATUS "Python3_LINK_OPTIONS = ${Python3_LINK_OPTIONS}")
set(PYTHON_LIBRARIES ${Python3_LIBRARIES} CACHE STRING "python lib" FORCE)
set(PYTHON_INCLUDE_DIR ${Python3_INCLUDE_DIRS} CACHE STRING "python inc" FORCE)
endif()
message(STATUS "PYTHON_LIBRARIES = ${PYTHON_LIBRARIES}")
message(STATUS "PYTHON_INCLUDE_DIR = ${PYTHON_INCLUDE_DIR}")
if(pybind11_FOUND)
message(STATUS "pybind11_INCLUDES = ${pybind11_INCLUDE_DIRS}")
message(STATUS "pybind11_LIBRARIES=${pybind11_LIBRARIES}")
message(STATUS "pybind11_DEFINITIONS=${pybind11_DEFINITIONS}")
endif()
# paddle libpaddle.so
# paddle include and link option
# -L/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/libs -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/fluid -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so
execute_process(
COMMAND python -c "\
import os;\
import paddle;\
include_dir=paddle.sysconfig.get_include();\
paddle_dir=os.path.split(include_dir)[0];\
libs_dir=os.path.join(paddle_dir, 'libs');\
fluid_dir=os.path.join(paddle_dir, 'fluid');\
out=' '.join([\"-L\" + libs_dir, \"-L\" + fluid_dir]);\
out += \" -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so\"; print(out);\
"
OUTPUT_VARIABLE PADDLE_LINK_FLAGS
RESULT_VARIABLE SUCESS)
message(STATUS PADDLE_LINK_FLAGS= ${PADDLE_LINK_FLAGS})
string(STRIP ${PADDLE_LINK_FLAGS} PADDLE_LINK_FLAGS)
# paddle compile option
# -I/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/include
execute_process(
COMMAND python -c "\
import paddle; \
include_dir = paddle.sysconfig.get_include(); \
print(f\"-I{include_dir}\"); \
"
OUTPUT_VARIABLE PADDLE_COMPILE_FLAGS)
message(STATUS PADDLE_COMPILE_FLAGS= ${PADDLE_COMPILE_FLAGS})
string(STRIP ${PADDLE_COMPILE_FLAGS} PADDLE_COMPILE_FLAGS)
# for LD_LIBRARY_PATH
# set(PADDLE_LIB_DIRS /workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid:/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/libs/)
execute_process(
COMMAND python -c "\
import os; \
import paddle; \
include_dir=paddle.sysconfig.get_include(); \
paddle_dir=os.path.split(include_dir)[0]; \
libs_dir=os.path.join(paddle_dir, 'libs'); \
fluid_dir=os.path.join(paddle_dir, 'fluid'); \
out=':'.join([libs_dir, fluid_dir]); print(out); \
"
OUTPUT_VARIABLE PADDLE_LIB_DIRS)
message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS})
endif()
# paddle libpaddle.so
# paddle include and link option
# -L/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/libs -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/fluid -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so
execute_process(
COMMAND python -c "\
import os;\
import paddle;\
include_dir=paddle.sysconfig.get_include();\
paddle_dir=os.path.split(include_dir)[0];\
libs_dir=os.path.join(paddle_dir, 'libs');\
fluid_dir=os.path.join(paddle_dir, 'fluid');\
out=' '.join([\"-L\" + libs_dir, \"-L\" + fluid_dir]);\
out += \" -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so\"; print(out);\
"
OUTPUT_VARIABLE PADDLE_LINK_FLAGS
RESULT_VARIABLE SUCESS)
message(STATUS PADDLE_LINK_FLAGS= ${PADDLE_LINK_FLAGS})
string(STRIP ${PADDLE_LINK_FLAGS} PADDLE_LINK_FLAGS)
# paddle compile option
# -I/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/include
execute_process(
COMMAND python -c "\
import paddle; \
include_dir = paddle.sysconfig.get_include(); \
print(f\"-I{include_dir}\"); \
"
OUTPUT_VARIABLE PADDLE_COMPILE_FLAGS)
message(STATUS PADDLE_COMPILE_FLAGS= ${PADDLE_COMPILE_FLAGS})
string(STRIP ${PADDLE_COMPILE_FLAGS} PADDLE_COMPILE_FLAGS)
# for LD_LIBRARY_PATH
# set(PADDLE_LIB_DIRS /workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid:/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/libs/)
execute_process(
COMMAND python -c "\
import os; \
import paddle; \
include_dir=paddle.sysconfig.get_include(); \
paddle_dir=os.path.split(include_dir)[0]; \
libs_dir=os.path.join(paddle_dir, 'libs'); \
fluid_dir=os.path.join(paddle_dir, 'fluid'); \
out=':'.join([libs_dir, fluid_dir]); print(out); \
"
OUTPUT_VARIABLE PADDLE_LIB_DIRS)
message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS})
add_compile_options(-fPIC)
###############################################################################
# Add local library
###############################################################################
set(ENGINE_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/engine)
message(STATUS "CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}")
message(STATUS "CMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}")
message(STATUS "CMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}")
add_subdirectory(engine)
......@@ -4,5 +4,5 @@ set -xe
# the build script had verified in the paddlepaddle docker image.
# please follow the instruction below to install PaddlePaddle image.
# https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html
cmake -B build
cmake -B build -DWITH_ASR=OFF -DWITH_CLS=OFF
cmake --build build -j
......@@ -8,11 +8,11 @@ windows_x86")
set(CMAKE_VERBOSE_MAKEFILE ON)
set(FASTDEPLOY_DIR ${CMAKE_SOURCE_DIR}/fc_patch/fastdeploy)
if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz)
if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz)
exec_program("mkdir -p ${FASTDEPLOY_DIR} &&
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.2.tgz -P ${FASTDEPLOY_DIR} &&
tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz -C ${FASTDEPLOY_DIR} &&
mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2 ${FASTDEPLOY_DIR}/linux-x64")
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.4.tgz -P ${FASTDEPLOY_DIR} &&
tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz -C ${FASTDEPLOY_DIR} &&
mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4 ${FASTDEPLOY_DIR}/linux-x64")
endif()
if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared.tgz)
......@@ -36,4 +36,9 @@ elseif (ARCH STREQUAL "android_armv7")
endif()
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
include_directories(${FASTDEPLOY_INCS})
\ No newline at end of file
# fix compiler flags conflict, since fastdeploy using c++11 for project
set(CMAKE_CXX_STANDARD 14)
include_directories(${FASTDEPLOY_INCS})
message(STATUS "FASTDEPLOY_INCS=${FASTDEPLOY_INCS}")
\ No newline at end of file
......@@ -6,8 +6,19 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kaldi)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/common)
add_subdirectory(asr)
add_subdirectory(common)
add_subdirectory(kaldi)
add_subdirectory(codelab)
add_subdirectory(cls)
\ No newline at end of file
add_subdirectory(common)
if(WITH_ASR)
add_subdirectory(asr)
endif()
if(WITH_CLS)
add_subdirectory(cls)
endif()
if(WITH_VAD)
add_subdirectory(vad)
endif()
add_subdirectory(codelab)
\ No newline at end of file
......@@ -38,7 +38,8 @@ U2Recognizer::U2Recognizer(const U2RecognizerResource& resource)
decoder_ = std::make_unique<CTCPrefixBeamSearch>(
resource.vocab_path, resource.decoder_opts.ctc_prefix_search_opts);
} else {
decoder_ = std::make_unique<TLGDecoder>(resource.decoder_opts.tlg_decoder_opts);
decoder_ = std::make_unique<TLGDecoder>(
resource.decoder_opts.tlg_decoder_opts);
}
symbol_table_ = decoder_->WordSymbolTable();
......
......@@ -3,7 +3,7 @@ ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/../
)
add_subdirectory(utils)
add_subdirectory(base)
add_subdirectory(matrix)
include_directories(
......
if(WITH_ASR)
add_compile_options(-DWITH_ASR)
set(PPS_FLAGS_LIB "fst/flags.h")
set(PPS_GLOB_LIB "fst/log.h")
else()
set(PPS_FLAGS_LIB "gflags/gflags.h")
set(PPS_GLOB_LIB "glog/logging.h")
endif()
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/flags.h.in
${CMAKE_CURRENT_SOURCE_DIR}/flags.h @ONLY
)
message(STATUS "Generated ${CMAKE_CURRENT_SOURCE_DIR}/flags.h")
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/log.h.in
${CMAKE_CURRENT_SOURCE_DIR}/log.h @ONLY
)
message(STATUS "Generated ${CMAKE_CURRENT_SOURCE_DIR}/log.h")
\ No newline at end of file
......@@ -14,4 +14,4 @@
#pragma once
#include "fst/flags.h"
#include "@PPS_FLAGS_LIB@"
\ No newline at end of file
......@@ -14,4 +14,4 @@
#pragma once
#include "fst/log.h"
#include "@PPS_GLOB_LIB@"
......@@ -33,7 +33,7 @@ CMVN::CMVN(std::string cmvn_file, unique_ptr<FrontendInterface> base_extractor)
dim_ = mean_stats_.size() - 1;
}
void CMVN::ReadCMVNFromJson(string cmvn_file) {
void CMVN::ReadCMVNFromJson(std::string cmvn_file) {
std::string json_str = ppspeech::ReadFile2String(cmvn_file);
picojson::value value;
std::string err;
......
......@@ -21,6 +21,7 @@
#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
#include <limits>
#include <map>
#include "frontend/feature-window.h"
......
......@@ -7,6 +7,7 @@
#include "frontend/feature-window.h"
#include <cmath>
#include <limits>
#include <vector>
#ifndef M_2PI
......
......@@ -17,12 +17,12 @@
*/
#include "frontend/rfft.h"
#include "base/log.h"
#include <cmath>
#include <memory>
#include <vector>
#include "base/log.h"
// see fftsg.c
#ifdef __cplusplus
extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w);
......
......@@ -25,40 +25,41 @@
namespace kaldi {
/// Empty constructor
template<typename Real>
Matrix<Real>::Matrix(): MatrixBase<Real>(NULL, 0, 0, 0) { }
template <typename Real>
Matrix<Real>::Matrix() : MatrixBase<Real>(NULL, 0, 0, 0) {}
/*
template<>
template<>
void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float> &ra, const VectorBase<float> &rb);
void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float>
&ra, const VectorBase<float> &rb);
template<>
template<>
void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double> &ra, const VectorBase<double> &rb);
void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double>
&ra, const VectorBase<double> &rb);
*/
template<typename Real>
inline std::ostream & operator << (std::ostream & os, const MatrixBase<Real> & M) {
M.Write(os, false);
return os;
template <typename Real>
inline std::ostream& operator<<(std::ostream& os, const MatrixBase<Real>& M) {
M.Write(os, false);
return os;
}
template<typename Real>
inline std::istream & operator >> (std::istream & is, Matrix<Real> & M) {
M.Read(is, false);
return is;
template <typename Real>
inline std::istream& operator>>(std::istream& is, Matrix<Real>& M) {
M.Read(is, false);
return is;
}
template<typename Real>
inline std::istream & operator >> (std::istream & is, MatrixBase<Real> & M) {
M.Read(is, false);
return is;
template <typename Real>
inline std::istream& operator>>(std::istream& is, MatrixBase<Real>& M) {
M.Read(is, false);
return is;
}
}// namespace kaldi
} // namespace kaldi
#endif // KALDI_MATRIX_KALDI_MATRIX_INL_H_
......@@ -26,32 +26,33 @@
namespace kaldi {
template<typename Real>
std::ostream & operator << (std::ostream &os, const VectorBase<Real> &rv) {
rv.Write(os, false);
return os;
template <typename Real>
std::ostream &operator<<(std::ostream &os, const VectorBase<Real> &rv) {
rv.Write(os, false);
return os;
}
template<typename Real>
std::istream &operator >> (std::istream &is, VectorBase<Real> &rv) {
rv.Read(is, false);
return is;
template <typename Real>
std::istream &operator>>(std::istream &is, VectorBase<Real> &rv) {
rv.Read(is, false);
return is;
}
template<typename Real>
std::istream &operator >> (std::istream &is, Vector<Real> &rv) {
rv.Read(is, false);
return is;
template <typename Real>
std::istream &operator>>(std::istream &is, Vector<Real> &rv) {
rv.Read(is, false);
return is;
}
//template<>
//template<>
//void VectorBase<float>::AddVec(const float alpha, const VectorBase<float> &rv);
// template<>
// template<>
// void VectorBase<float>::AddVec(const float alpha, const VectorBase<float>
// &rv);
//template<>
//template<>
//void VectorBase<double>::AddVec<double>(const double alpha,
//const VectorBase<double> &rv);
// template<>
// template<>
// void VectorBase<double>::AddVec<double>(const double alpha,
// const VectorBase<double> &rv);
} // namespace kaldi
......
......@@ -27,52 +27,58 @@
namespace kaldi {
// this enums equal to CblasTrans and CblasNoTrans constants from CBLAS library
// we are writing them as literals because we don't want to include here matrix/kaldi-blas.h,
// which puts many symbols into global scope (like "real") via the header f2c.h
// we are writing them as literals because we don't want to include here
// matrix/kaldi-blas.h,
// which puts many symbols into global scope (like "real") via the header f2c.h
typedef enum {
kTrans = 112, // = CblasTrans
kNoTrans = 111 // = CblasNoTrans
kTrans = 112, // = CblasTrans
kNoTrans = 111 // = CblasNoTrans
} MatrixTransposeType;
typedef enum {
kSetZero,
kUndefined,
kCopyData
} MatrixResizeType;
typedef enum { kSetZero, kUndefined, kCopyData } MatrixResizeType;
typedef enum {
kDefaultStride,
kStrideEqualNumCols,
kDefaultStride,
kStrideEqualNumCols,
} MatrixStrideType;
typedef enum {
kTakeLower,
kTakeUpper,
kTakeMean,
kTakeMeanAndCheck
kTakeLower,
kTakeUpper,
kTakeMean,
kTakeMeanAndCheck
} SpCopyType;
template<typename Real> class VectorBase;
template<typename Real> class Vector;
template<typename Real> class SubVector;
template<typename Real> class MatrixBase;
template<typename Real> class SubMatrix;
template<typename Real> class Matrix;
template <typename Real>
class VectorBase;
template <typename Real>
class Vector;
template <typename Real>
class SubVector;
template <typename Real>
class MatrixBase;
template <typename Real>
class SubMatrix;
template <typename Real>
class Matrix;
/// This class provides a way for switching between double and float types.
template<typename T> class OtherReal { }; // useful in reading+writing routines
// to switch double and float.
template <typename T>
class OtherReal {}; // useful in reading+writing routines
// to switch double and float.
/// A specialized class for switching from float to double.
template<> class OtherReal<float> {
public:
typedef double Real;
template <>
class OtherReal<float> {
public:
typedef double Real;
};
/// A specialized class for switching from double to float.
template<> class OtherReal<double> {
public:
typedef float Real;
template <>
class OtherReal<double> {
public:
typedef float Real;
};
......@@ -81,12 +87,10 @@ typedef int32 SignedMatrixIndexT;
typedef uint32 UnsignedMatrixIndexT;
// If you want to use size_t for the index type, do as follows instead:
//typedef size_t MatrixIndexT;
//typedef ssize_t SignedMatrixIndexT;
//typedef size_t UnsignedMatrixIndexT;
// typedef size_t MatrixIndexT;
// typedef ssize_t SignedMatrixIndexT;
// typedef size_t UnsignedMatrixIndexT;
}
#endif // KALDI_MATRIX_MATRIX_COMMON_H_
project(kaldi)
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}
)
add_subdirectory(base)
add_subdirectory(util)
add_subdirectory(lat)
add_subdirectory(fstext)
add_subdirectory(decoder)
add_subdirectory(lm)
if(WITH_ASR)
add_subdirectory(lat)
add_subdirectory(fstext)
add_subdirectory(decoder)
add_subdirectory(lm)
add_subdirectory(fstbin)
add_subdirectory(lmbin)
add_subdirectory(fstbin)
add_subdirectory(lmbin)
endif()
......@@ -44,7 +44,19 @@ typedef float BaseFloat;
#ifndef COMPILE_WITHOUT_OPENFST
#ifdef WITH_ASR
#include <fst/types.h>
#else
using int8 = int8_t;
using int16 = int16_t;
using int32 = int32_t;
using int64 = int64_t;
using uint8 = uint8_t;
using uint16 = uint16_t;
using uint32 = uint32_t;
using uint64 = uint64_t;
#endif
namespace kaldi {
using ::int16;
......
# set(CMAKE_CXX_STANDARD 11)
# # 指定下载解压后的fastdeploy库路径
# set(FASTDEPLOY_INSTALL_DIR "fdlib/fastdeploy-linux-x64-1.0.4" CACHE STRING force)
# if(NOT EXISTS ${FASTDEPLOY_INSTALL_DIR})
# message(FATAL_ERROR "Please using cmake -B build -DFASTDEPLOY_INSTALL_DIR=${FASTDEPLOY_INSTALL_DIR}")
# endif()
# include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
# # 添加FastDeploy依赖头文件
# include_directories(${FASTDEPLOY_INCS})
add_executable(infer_onnx_silero_vad ${CMAKE_CURRENT_SOURCE_DIR}/infer_onnx_silero_vad.cc wav.h vad.cc vad.h)
# 添加FastDeploy库依赖
target_link_libraries(infer_onnx_silero_vad ${FASTDEPLOY_LIBS})
English | [简体中文](README_CN.md)
# Silero VAD Deployment Example
This directory provides examples that `infer_onnx_silero_vad` fast finishes the deployment of VAD models on CPU/GPU.
Before deployment, two steps require confirmation.
- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../docs/en/build_and_install/download_prebuilt_libraries.md).
- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../docs/en/build_and_install/download_prebuilt_libraries.md).
Taking VAD inference on Linux as an example, the compilation test can be completed by executing the following command in this directory.
```bash
mkdir build
cd build
# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
tar xvf fastdeploy-linux-x64-x.x.x.tgz
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
make -j
# Download the VAD model file and test audio. After decompression, place the model and test audio in the infer_onnx_silero_vad.cc peer directory
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav
# inference
./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav
```
- The above command works for Linux or MacOS. Refer to:
- [How to use FastDeploy C++ SDK in Windows](../../../../docs/en/faq/use_sdk_on_windows.md) for SDK use-pattern in Windows
## VAD C++ Interface
### Vad Class
```c++
Vad::Vad(const std::string& model_file,
const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption())
```
**Parameter**
> * **model_file**(str): Model file path
> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default. (use the default configuration)
### setAudioCofig function
**Must be called before the `init` function**
```c++
void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms);
```
**Parameter**
> * **sr**(int): sampling rate
> * **frame_ms**(int): The length of each detection frame, and it is used to calculate the detection window size
> * **threshold**(float): Result probability judgment threshold
> * **min_silence_duration_ms**(int): The threshold used to calculate whether it is silence
> * **speech_pad_ms**(int): Used to calculate the end time of the speech
### init function
Used to initialize audio-related parameters.
```c++
void Vad::init();
```
### loadAudio function
Load audio.
```c++
void Vad::loadAudio(const std::string& wavPath)
```
**Parameter**
> * **wavPath**(str): Audio file path
### Predict function
Used to start model reasoning.
```c++
bool Vad::Predict();
```
### getResult function
**Used to obtain reasoning results**
```c++
std::vector<std::map<std::string, float>> Vad::getResult(
float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0,
float mergeThreshold = 0.3);
```
**Parameter**
> * **removeThreshold**(float): Discard result fragment threshold; If some recognition results are too short, they will be discarded according to this threshold
> * **expandHeadThreshold**(float): Offset at the beginning of the segment; The recognized start time may be too close to the voice part, so move forward the start time accordingly
> * **expandTailThreshold**(float): Offset at the end of the segment; The recognized end time may be too close to the voice part, so the end time is moved back accordingly
> * **mergeThreshold**(float): Some result segments are very close and can be combined into one, and the vocal segments can be combined accordingly
**The output result format is**`std::vector<std::map<std::string, float>>`
> Output a list, each element is a speech fragment
>
> Each clip can use 'start' to get the start time and 'end' to get the end time
### Tips
1. `The setAudioCofig`function must be called before the `init` function
2. The sampling rate of the input audio file must be consistent with that set in the code
- [Model Description](../)
- [How to switch the model inference backend engine](../../../../docs/en/faq/how_to_change_backend.md)
[English](README.md) | 简体中文
# Silero VAD 部署示例
本目录下提供`infer_onnx_silero_vad`快速完成 Silero VAD 模型在CPU/GPU。
在部署前,需确认以下两个步骤
- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
- 2. 根据开发环境,下载预编译部署库和samples代码,参考[FastDeploy预编译库](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
以Linux上 VAD 推理为例,在本目录执行如下命令即可完成编译测试。
```bash
mkdir build
cd build
# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
tar xvf fastdeploy-linux-x64-x.x.x.tgz
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
make -j
# 下载 VAD 模型文件和测试音频,解压后将模型和测试音频放置在与 infer_onnx_silero_vad.cc 同级目录下
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav
# 推理
./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav
```
以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:
- [如何在Windows中使用FastDeploy C++ SDK](../../../../docs/cn/faq/use_sdk_on_windows.md)
## VAD C++ 接口
### Vad 类
```c++
Vad::Vad(const std::string& model_file,
const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption())
```
**参数**
> * **model_file**(str): 模型文件路径
> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置
### setAudioCofig 函数
**必须在`init`函数前调用**
```c++
void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms);
```
**参数**
> * **sr**(int): 采样率
> * **frame_ms**(int): 每次检测帧长,用于计算检测窗口大小
> * **threshold**(float): 结果概率判断阈值
> * **min_silence_duration_ms**(int): 用于计算判断是否是 silence 的阈值
> * **speech_pad_ms**(int): 用于计算 speach 结束时刻
### init 函数
用于初始化音频相关参数
```c++
void Vad::init();
```
### loadAudio 函数
加载音频
```c++
void Vad::loadAudio(const std::string& wavPath)
```
**参数**
> * **wavPath**(str): 音频文件路径
### Predict 函数
用于开始模型推理
```c++
bool Vad::Predict();
```
### getResult 函数
**用于获取推理结果**
```c++
std::vector<std::map<std::string, float>> Vad::getResult(
float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0,
float mergeThreshold = 0.3);
```
**参数**
> * **removeThreshold**(float): 丢弃结果片段阈值;部分识别结果太短则根据此阈值丢弃
> * **expandHeadThreshold**(float): 结果片段开始时刻偏移;识别到的开始时刻可能过于贴近发声部分,因此据此前移开始时刻
> * **expandTailThreshold**(float): 结果片段结束时刻偏移;识别到的结束时刻可能过于贴近发声部分,因此据此后移结束时刻
> * **mergeThreshold**(float): 有的结果片段十分靠近,可以合并成一个,据此合并发声片段
**输出结果格式为**`std::vector<std::map<std::string, float>>`
> 输出一个列表,每个元素是一个讲话片段
>
> 每个片段可以用 'start' 获取到开始时刻,用 'end' 获取到结束时刻
### 提示
1. `setAudioCofig`函数必须在`init`函数前调用
2. 输入的音频文件的采样率必须与代码中设置的保持一致
- [模型介绍](../)
- [如何切换模型推理后端引擎](../../../../docs/cn/faq/how_to_change_backend.md)
#include "vad.h"
int main(int argc, char* argv[]) {
if (argc < 3) {
std::cout << "Usage: infer_onnx_silero_vad path/to/model path/to/audio "
"run_option, "
"e.g ./infer_onnx_silero_vad silero_vad.onnx sample.wav"
<< std::endl;
return -1;
}
std::string model_file = argv[1];
std::string audio_file = argv[2];
int sr = 16000;
Vad vad(model_file);
// custom config, but must be set before init
vad.SetConfig(sr, 32, 0.45f, 200, 0, 0);
vad.Init();
std::vector<float> inputWav; // [0, 1]
wav::WavReader wav_reader = wav::WavReader(audio_file);
assert(wav_reader.sample_rate() == sr);
auto num_samples = wav_reader.num_samples();
inputWav.resize(num_samples);
for (int i = 0; i < num_samples; i++) {
inputWav[i] = wav_reader.data()[i] / 32768;
}
int window_size_samples = vad.WindowSizeSamples();
for (int64_t j = 0; j < num_samples; j += window_size_samples) {
auto start = j;
auto end = start + window_size_samples >= num_samples
? num_samples
: start + window_size_samples;
auto current_chunk_size = end - start;
std::vector<float> r{&inputWav[0] + start, &inputWav[0] + end};
assert(r.size() == current_chunk_size);
if (!vad.ForwardChunk(r)) {
std::cerr << "Failed to inference while using model:"
<< vad.ModelName() << "." << std::endl;
return false;
}
Vad::State s = vad.Postprocess();
std::cout << s << " ";
}
std::cout << std::endl;
std::vector<std::map<std::string, float>> result = vad.GetResult();
for (auto& res : result) {
std::cout << "speak start: " << res["start"]
<< " s, end: " << res["end"] << " s | ";
}
std::cout << "\b\b " << std::endl;
vad.Reset();
return 0;
}
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册