Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
78e29c8e
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
78e29c8e
编写于
2月 20, 2023
作者:
Mars懵
提交者:
GitHub
2月 20, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add cls engine (#2923)
上级
2f8aad95
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
966 addition
and
1 deletion
+966
-1
runtime/CMakeLists.txt
runtime/CMakeLists.txt
+1
-1
runtime/cmake/fastdeploy.cmake
runtime/cmake/fastdeploy.cmake
+39
-0
runtime/engine/CMakeLists.txt
runtime/engine/CMakeLists.txt
+1
-0
runtime/engine/cls/CMakeLists.txt
runtime/engine/cls/CMakeLists.txt
+7
-0
runtime/engine/cls/nnet/CMakeLists.txt
runtime/engine/cls/nnet/CMakeLists.txt
+8
-0
runtime/engine/cls/nnet/panns_interface.cc
runtime/engine/cls/nnet/panns_interface.cc
+78
-0
runtime/engine/cls/nnet/panns_interface.h
runtime/engine/cls/nnet/panns_interface.h
+27
-0
runtime/engine/cls/nnet/panns_nnet.cc
runtime/engine/cls/nnet/panns_nnet.cc
+228
-0
runtime/engine/cls/nnet/panns_nnet.h
runtime/engine/cls/nnet/panns_nnet.h
+74
-0
runtime/engine/cls/nnet/panns_nnet_main.cc
runtime/engine/cls/nnet/panns_nnet_main.cc
+49
-0
runtime/engine/common/base/config.h
runtime/engine/common/base/config.h
+338
-0
runtime/engine/common/utils/CMakeLists.txt
runtime/engine/common/utils/CMakeLists.txt
+1
-0
runtime/engine/common/utils/audio_process.cc
runtime/engine/common/utils/audio_process.cc
+83
-0
runtime/engine/common/utils/audio_process.h
runtime/engine/common/utils/audio_process.h
+32
-0
未找到文件。
runtime/CMakeLists.txt
浏览文件 @
78e29c8e
...
...
@@ -139,7 +139,7 @@ out=':'.join([libs_dir, fluid_dir]); print(out); \
OUTPUT_VARIABLE PADDLE_LIB_DIRS
)
message
(
STATUS PADDLE_LIB_DIRS=
${
PADDLE_LIB_DIRS
}
)
add_compile_options
(
-fPIC
)
###############################################################################
# Add local library
###############################################################################
...
...
runtime/cmake/fastdeploy.cmake
0 → 100644
浏览文件 @
78e29c8e
cmake_minimum_required
(
VERSION 3.14 FATAL_ERROR
)
set
(
ARCH
"mserver_x86_64"
CACHE STRING
"Target Architecture:
android_arm, android_armv7, android_armv8, android_x86, android_x86_64,
mserver_x86_64, ubuntu_x86_64, ios_armv7, ios_armv7s, ios_armv8, ios_x86_64, ios_x86,
windows_x86"
)
set
(
CMAKE_VERBOSE_MAKEFILE ON
)
set
(
FASTDEPLOY_DIR
${
CMAKE_SOURCE_DIR
}
/fc_patch/fastdeploy
)
if
(
NOT EXISTS
${
FASTDEPLOY_DIR
}
/fastdeploy-linux-x64-1.0.2.tgz
)
exec_program
(
"mkdir -p
${
FASTDEPLOY_DIR
}
&&
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.2.tgz -P
${
FASTDEPLOY_DIR
}
&&
tar xzvf
${
FASTDEPLOY_DIR
}
/fastdeploy-linux-x64-1.0.2.tgz -C
${
FASTDEPLOY_DIR
}
&&
mv
${
FASTDEPLOY_DIR
}
/fastdeploy-linux-x64-1.0.2
${
FASTDEPLOY_DIR
}
/linux-x64"
)
endif
()
if
(
NOT EXISTS
${
FASTDEPLOY_DIR
}
/fastdeploy-android-1.0.0-shared.tgz
)
exec_program
(
"mkdir -p
${
FASTDEPLOY_DIR
}
&&
wget https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.0-shared.tgz -P
${
FASTDEPLOY_DIR
}
&&
tar xzvf
${
FASTDEPLOY_DIR
}
/fastdeploy-android-1.0.0-shared.tgz -C
${
FASTDEPLOY_DIR
}
&&
mv
${
FASTDEPLOY_DIR
}
/fastdeploy-android-1.0.0-shared
${
FASTDEPLOY_DIR
}
/android-armv7v8"
)
endif
()
if
(
ARCH STREQUAL
"mserver_x86_64"
)
set
(
FASTDEPLOY_INSTALL_DIR
${
FASTDEPLOY_DIR
}
/linux-x64
)
add_definitions
(
"-DUSE_PADDLE_INFERENCE_BACKEND"
)
# add_definitions("-DUSE_ORT_BACKEND")
set
(
CMAKE_CXX_FLAGS_DEBUG
"
${
CMAKE_CXX_FLAGS_DEBUG
}
-msse -msse2"
)
set
(
CMAKE_CXX_FLAGS_RELEASE
"
${
CMAKE_CXX_FLAGS_RELEASE
}
-msse -msse2 -mavx -O3"
)
elseif
(
ARCH STREQUAL
"android_armv7"
)
set
(
FASTDEPLOY_INSTALL_DIR
${
FASTDEPLOY_DIR
}
/android-armv7v8
)
add_definitions
(
"-DUSE_PADDLE_LITE_BAKEND"
)
set
(
CMAKE_CXX_FLAGS_DEBUG
"
${
CMAKE_CXX_FLAGS_DEBUG
}
-g -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE"
)
set
(
CMAKE_CXX_FLAGS_RELEASE
"
${
CMAKE_CXX_FLAGS_RELEASE
}
-g0 -O3 -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE"
)
endif
()
include
(
${
FASTDEPLOY_INSTALL_DIR
}
/FastDeploy.cmake
)
include_directories
(
${
FASTDEPLOY_INCS
}
)
\ No newline at end of file
runtime/engine/CMakeLists.txt
浏览文件 @
78e29c8e
...
...
@@ -10,3 +10,4 @@ add_subdirectory(asr)
add_subdirectory
(
common
)
add_subdirectory
(
kaldi
)
add_subdirectory
(
codelab
)
add_subdirectory
(
cls
)
\ No newline at end of file
runtime/engine/cls/CMakeLists.txt
0 → 100644
浏览文件 @
78e29c8e
project
(
cls
)
include
(
fastdeploy
)
# add_definitions("-DTEST_DEBUG")
# add_definitions("-DPRINT_TIME")
add_subdirectory
(
nnet
)
\ No newline at end of file
runtime/engine/cls/nnet/CMakeLists.txt
0 → 100644
浏览文件 @
78e29c8e
set
(
srcs panns_nnet.cc panns_interface.cc
)
add_library
(
cls SHARED
${
srcs
}
)
target_link_libraries
(
cls -static-libstdc++;-Wl,-Bsymbolic
${
FASTDEPLOY_LIBS
}
kaldi-matrix kaldi-base frontend utils
)
set
(
bin_name panns_nnet_main
)
add_executable
(
${
bin_name
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
bin_name
}
.cc
)
target_link_libraries
(
${
bin_name
}
-static-libstdc++;-Wl,-Bsymbolic cls gflags glog
)
\ No newline at end of file
runtime/engine/cls/nnet/panns_interface.cc
0 → 100644
浏览文件 @
78e29c8e
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cls/nnet/panns_interface.h"
#include "cls/nnet/panns_nnet.h"
#include "common/base/config.h"
namespace
ppspeech
{
void
*
ClsCreateInstance
(
const
char
*
conf_path
)
{
Config
conf
(
conf_path
);
// cls init
ppspeech
::
ClsNnetConf
cls_nnet_conf
;
cls_nnet_conf
.
wav_normal_
=
conf
.
Read
(
"wav_normal"
,
true
);
cls_nnet_conf
.
wav_normal_type_
=
conf
.
Read
(
"wav_normal_type"
,
std
::
string
(
"linear"
));
cls_nnet_conf
.
wav_norm_mul_factor_
=
conf
.
Read
(
"wav_norm_mul_factor"
,
1.0
);
cls_nnet_conf
.
model_file_path_
=
conf
.
Read
(
"model_path"
,
std
::
string
(
""
));
cls_nnet_conf
.
param_file_path_
=
conf
.
Read
(
"param_path"
,
std
::
string
(
""
));
cls_nnet_conf
.
dict_file_path_
=
conf
.
Read
(
"dict_path"
,
std
::
string
(
""
));
cls_nnet_conf
.
num_cpu_thread_
=
conf
.
Read
(
"num_cpu_thread"
,
12
);
cls_nnet_conf
.
samp_freq
=
conf
.
Read
(
"samp_freq"
,
32000
);
cls_nnet_conf
.
frame_length_ms
=
conf
.
Read
(
"frame_length_ms"
,
32
);
cls_nnet_conf
.
frame_shift_ms
=
conf
.
Read
(
"frame_shift_ms"
,
10
);
cls_nnet_conf
.
num_bins
=
conf
.
Read
(
"num_bins"
,
64
);
cls_nnet_conf
.
low_freq
=
conf
.
Read
(
"low_freq"
,
50
);
cls_nnet_conf
.
high_freq
=
conf
.
Read
(
"high_freq"
,
14000
);
cls_nnet_conf
.
dither
=
conf
.
Read
(
"dither"
,
0.0
);
ppspeech
::
ClsNnet
*
cls_model
=
new
ppspeech
::
ClsNnet
();
int
ret
=
cls_model
->
Init
(
cls_nnet_conf
);
return
static_cast
<
void
*>
(
cls_model
);
}
int
ClsDestroyInstance
(
void
*
instance
)
{
ppspeech
::
ClsNnet
*
cls_model
=
static_cast
<
ppspeech
::
ClsNnet
*>
(
instance
);
if
(
cls_model
!=
NULL
)
{
delete
cls_model
;
cls_model
=
NULL
;
}
return
0
;
}
int
ClsFeedForward
(
void
*
instance
,
const
char
*
wav_path
,
int
topk
,
char
*
result
,
int
result_max_len
)
{
ppspeech
::
ClsNnet
*
cls_model
=
static_cast
<
ppspeech
::
ClsNnet
*>
(
instance
);
if
(
cls_model
==
NULL
)
{
printf
(
"instance is null
\n
"
);
return
-
1
;
}
int
ret
=
cls_model
->
Forward
(
wav_path
,
topk
,
result
,
result_max_len
);
return
0
;
}
int
ClsReset
(
void
*
instance
)
{
ppspeech
::
ClsNnet
*
cls_model
=
static_cast
<
ppspeech
::
ClsNnet
*>
(
instance
);
if
(
cls_model
==
NULL
)
{
printf
(
"instance is null
\n
"
);
return
-
1
;
}
cls_model
->
Reset
();
return
0
;
}
}
// namespace ppspeech
\ No newline at end of file
runtime/engine/cls/nnet/panns_interface.h
0 → 100644
浏览文件 @
78e29c8e
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
namespace
ppspeech
{
void
*
ClsCreateInstance
(
const
char
*
conf_path
);
int
ClsDestroyInstance
(
void
*
instance
);
int
ClsFeedForward
(
void
*
instance
,
const
char
*
wav_path
,
int
topk
,
char
*
result
,
int
result_max_len
);
int
ClsReset
(
void
*
instance
);
}
// namespace ppspeech
\ No newline at end of file
runtime/engine/cls/nnet/panns_nnet.cc
0 → 100644
浏览文件 @
78e29c8e
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cls/nnet/panns_nnet.h"
#ifdef PRINT_TIME
#include "kaldi/base/timer.h"
#endif
namespace
ppspeech
{
ClsNnet
::
ClsNnet
()
{
// wav_reader_ = NULL;
runtime_
=
NULL
;
}
void
ClsNnet
::
Reset
()
{
// wav_reader_->Clear();
ss_
.
str
(
""
);
}
int
ClsNnet
::
Init
(
const
ClsNnetConf
&
conf
)
{
conf_
=
conf
;
// init fbank opts
fbank_opts_
.
frame_opts
.
samp_freq
=
conf
.
samp_freq
;
fbank_opts_
.
frame_opts
.
frame_length_ms
=
conf
.
frame_length_ms
;
fbank_opts_
.
frame_opts
.
frame_shift_ms
=
conf
.
frame_shift_ms
;
fbank_opts_
.
mel_opts
.
num_bins
=
conf
.
num_bins
;
fbank_opts_
.
mel_opts
.
low_freq
=
conf
.
low_freq
;
fbank_opts_
.
mel_opts
.
high_freq
=
conf
.
high_freq
;
fbank_opts_
.
frame_opts
.
dither
=
conf
.
dither
;
fbank_opts_
.
use_log_fbank
=
false
;
// init dict
if
(
conf
.
dict_file_path_
!=
""
)
{
ReadFileToVector
(
conf
.
dict_file_path_
,
&
dict_
);
}
// init model
fastdeploy
::
RuntimeOption
runtime_option
;
#ifdef USE_ORT_BACKEND
runtime_option
.
SetModelPath
(
conf
.
model_file_path_
,
""
,
fastdeploy
::
ModelFormat
::
ONNX
);
// onnx
runtime_option
.
UseOrtBackend
();
// onnx
#endif
#ifdef USE_PADDLE_LITE_BACKEND
runtime_option
.
SetModelPath
(
conf
.
model_file_path_
,
conf
.
param_file_path_
,
fastdeploy
::
ModelFormat
::
PADDLE
);
runtime_option
.
UseLiteBackend
();
#endif
#ifdef USE_PADDLE_INFERENCE_BACKEND
runtime_option
.
SetModelPath
(
conf
.
model_file_path_
,
conf
.
param_file_path_
,
fastdeploy
::
ModelFormat
::
PADDLE
);
runtime_option
.
UsePaddleInferBackend
();
#endif
runtime_option
.
SetCpuThreadNum
(
conf
.
num_cpu_thread_
);
runtime_option
.
DeletePaddleBackendPass
(
"simplify_with_basic_ops_pass"
);
runtime_
=
std
::
unique_ptr
<
fastdeploy
::
Runtime
>
(
new
fastdeploy
::
Runtime
());
if
(
!
runtime_
->
Init
(
runtime_option
))
{
std
::
cerr
<<
"--- Init FastDeploy Runitme Failed! "
<<
"
\n
--- Model: "
<<
conf
.
model_file_path_
<<
std
::
endl
;
return
-
1
;
}
else
{
std
::
cout
<<
"--- Init FastDeploy Runitme Done! "
<<
"
\n
--- Model: "
<<
conf
.
model_file_path_
<<
std
::
endl
;
}
Reset
();
return
0
;
}
int
ClsNnet
::
Forward
(
const
char
*
wav_path
,
int
topk
,
char
*
result
,
int
result_max_len
)
{
#ifdef PRINT_TIME
kaldi
::
Timer
timer
;
timer
.
Reset
();
#endif
// read wav
std
::
ifstream
infile
(
wav_path
,
std
::
ifstream
::
in
);
kaldi
::
WaveData
wave_data
;
wave_data
.
Read
(
infile
);
int32
this_channel
=
0
;
kaldi
::
Matrix
<
float
>
wavform_kaldi
=
wave_data
.
Data
();
// only get channel 0
int
wavform_len
=
wavform_kaldi
.
NumCols
();
std
::
vector
<
float
>
wavform
(
wavform_kaldi
.
Data
(),
wavform_kaldi
.
Data
()
+
wavform_len
);
WaveformFloatNormal
(
&
wavform
);
WaveformNormal
(
&
wavform
,
conf_
.
wav_normal_
,
conf_
.
wav_normal_type_
,
conf_
.
wav_norm_mul_factor_
);
#ifdef TEST_DEBUG
{
std
::
ofstream
fp
(
"cls.wavform"
,
std
::
ios
::
out
);
for
(
int
i
=
0
;
i
<
wavform
.
size
();
++
i
)
{
fp
<<
std
::
setprecision
(
18
)
<<
wavform
[
i
]
<<
" "
;
}
fp
<<
"
\n
"
;
}
#endif
#ifdef PRINT_TIME
printf
(
"wav read consume: %fs
\n
"
,
timer
.
Elapsed
());
#endif
#ifdef PRINT_TIME
timer
.
Reset
();
#endif
std
::
vector
<
float
>
feats
;
std
::
unique_ptr
<
ppspeech
::
FrontendInterface
>
data_source
(
new
ppspeech
::
DataCache
());
ppspeech
::
Fbank
fbank
(
fbank_opts_
,
std
::
move
(
data_source
));
fbank
.
Accept
(
wavform
);
fbank
.
SetFinished
();
fbank
.
Read
(
&
feats
);
int
feat_dim
=
fbank_opts_
.
mel_opts
.
num_bins
;
int
num_frames
=
feats
.
size
()
/
feat_dim
;
for
(
int
i
=
0
;
i
<
num_frames
;
++
i
)
{
for
(
int
j
=
0
;
j
<
feat_dim
;
++
j
)
{
feats
[
i
*
feat_dim
+
j
]
=
PowerTodb
(
feats
[
i
*
feat_dim
+
j
]);
}
}
#ifdef TEST_DEBUG
{
std
::
ofstream
fp
(
"cls.feat"
,
std
::
ios
::
out
);
for
(
int
i
=
0
;
i
<
num_frames
;
++
i
)
{
for
(
int
j
=
0
;
j
<
feat_dim
;
++
j
)
{
fp
<<
std
::
setprecision
(
18
)
<<
feats
[
i
*
feat_dim
+
j
]
<<
" "
;
}
fp
<<
"
\n
"
;
}
}
#endif
#ifdef PRINT_TIME
printf
(
"extract fbank consume: %fs
\n
"
,
timer
.
Elapsed
());
#endif
// infer
std
::
vector
<
float
>
model_out
;
#ifdef PRINT_TIME
timer
.
Reset
();
#endif
ModelForward
(
feats
.
data
(),
num_frames
,
feat_dim
,
&
model_out
);
#ifdef PRINT_TIME
printf
(
"fast deploy infer consume: %fs
\n
"
,
timer
.
Elapsed
());
#endif
#ifdef TEST_DEBUG
{
std
::
ofstream
fp
(
"cls.logits"
,
std
::
ios
::
out
);
for
(
int
i
=
0
;
i
<
model_out
.
size
();
++
i
)
{
fp
<<
std
::
setprecision
(
18
)
<<
model_out
[
i
]
<<
"
\n
"
;
}
}
#endif
// construct result str
ss_
<<
"{"
;
GetTopkResult
(
topk
,
model_out
);
ss_
<<
"}"
;
if
(
result_max_len
<=
ss_
.
str
().
size
())
{
printf
(
"result_max_len is short than result len
\n
"
);
}
snprintf
(
result
,
result_max_len
,
"%s"
,
ss_
.
str
().
c_str
());
return
0
;
}
int
ClsNnet
::
ModelForward
(
float
*
features
,
const
int
num_frames
,
const
int
feat_dim
,
std
::
vector
<
float
>*
model_out
)
{
// init input tensor shape
fastdeploy
::
TensorInfo
info
=
runtime_
->
GetInputInfo
(
0
);
info
.
shape
=
{
1
,
num_frames
,
feat_dim
};
std
::
vector
<
fastdeploy
::
FDTensor
>
input_tensors
(
1
);
std
::
vector
<
fastdeploy
::
FDTensor
>
output_tensors
(
1
);
input_tensors
[
0
].
SetExternalData
({
1
,
num_frames
,
feat_dim
},
fastdeploy
::
FDDataType
::
FP32
,
static_cast
<
void
*>
(
features
));
// get input name
input_tensors
[
0
].
name
=
info
.
name
;
runtime_
->
Infer
(
input_tensors
,
&
output_tensors
);
// output_tensors[0].PrintInfo();
std
::
vector
<
int64_t
>
output_shape
=
output_tensors
[
0
].
Shape
();
model_out
->
resize
(
output_shape
[
0
]
*
output_shape
[
1
]);
memcpy
(
static_cast
<
void
*>
(
model_out
->
data
()),
output_tensors
[
0
].
Data
(),
output_shape
[
0
]
*
output_shape
[
1
]
*
sizeof
(
float
));
return
0
;
}
int
ClsNnet
::
GetTopkResult
(
int
k
,
const
std
::
vector
<
float
>&
model_out
)
{
std
::
vector
<
float
>
values
;
std
::
vector
<
int
>
indics
;
TopK
(
model_out
,
k
,
&
values
,
&
indics
);
for
(
int
i
=
0
;
i
<
k
;
++
i
)
{
if
(
i
!=
0
)
{
ss_
<<
","
;
}
ss_
<<
"
\"
"
<<
dict_
[
indics
[
i
]]
<<
"
\"
:
\"
"
<<
values
[
i
]
<<
"
\"
"
;
}
return
0
;
}
}
// namespace ppspeech
\ No newline at end of file
runtime/engine/cls/nnet/panns_nnet.h
0 → 100644
浏览文件 @
78e29c8e
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "common/frontend/data_cache.h"
#include "common/frontend/fbank.h"
#include "common/frontend/feature-fbank.h"
#include "common/frontend/frontend_itf.h"
#include "common/frontend/wave-reader.h"
#include "common/utils/audio_process.h"
#include "common/utils/file_utils.h"
#include "fastdeploy/runtime.h"
#include "kaldi/util/kaldi-io.h"
#include "kaldi/util/table-types.h"
namespace
ppspeech
{
struct
ClsNnetConf
{
// wav
bool
wav_normal_
;
std
::
string
wav_normal_type_
;
float
wav_norm_mul_factor_
;
// model
std
::
string
model_file_path_
;
std
::
string
param_file_path_
;
std
::
string
dict_file_path_
;
int
num_cpu_thread_
;
// fbank
float
samp_freq
;
float
frame_length_ms
;
float
frame_shift_ms
;
int
num_bins
;
float
low_freq
;
float
high_freq
;
float
dither
;
};
class
ClsNnet
{
public:
ClsNnet
();
int
Init
(
const
ClsNnetConf
&
conf
);
int
Forward
(
const
char
*
wav_path
,
int
topk
,
char
*
result
,
int
result_max_len
);
void
Reset
();
private:
int
ModelForward
(
float
*
features
,
const
int
num_frames
,
const
int
feat_dim
,
std
::
vector
<
float
>*
model_out
);
int
ModelForwardStream
(
std
::
vector
<
float
>*
feats
);
int
GetTopkResult
(
int
k
,
const
std
::
vector
<
float
>&
model_out
);
ClsNnetConf
conf_
;
knf
::
FbankOptions
fbank_opts_
;
std
::
unique_ptr
<
fastdeploy
::
Runtime
>
runtime_
;
std
::
vector
<
std
::
string
>
dict_
;
std
::
stringstream
ss_
;
};
}
// namespace ppspeech
\ No newline at end of file
runtime/engine/cls/nnet/panns_nnet_main.cc
0 → 100644
浏览文件 @
78e29c8e
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <fstream>
#include <string>
#include "base/flags.h"
#include "cls/nnet/panns_interface.h"
DEFINE_string
(
conf_path
,
""
,
"config path"
);
DEFINE_string
(
scp_path
,
""
,
"wav scp path"
);
DEFINE_string
(
topk
,
""
,
"print topk results"
);
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
SetUsageMessage
(
"Usage:"
);
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
google
::
InitGoogleLogging
(
argv
[
0
]);
google
::
InstallFailureSignalHandler
();
FLAGS_logtostderr
=
1
;
CHECK_GT
(
FLAGS_conf_path
.
size
(),
0
);
CHECK_GT
(
FLAGS_scp_path
.
size
(),
0
);
CHECK_GT
(
FLAGS_topk
.
size
(),
0
);
void
*
instance
=
ppspeech
::
ClsCreateInstance
(
FLAGS_conf_path
.
c_str
());
int
ret
=
0
;
// read wav
std
::
ifstream
ifs
(
FLAGS_scp_path
);
std
::
string
line
=
""
;
int
topk
=
std
::
atoi
(
FLAGS_topk
.
c_str
());
while
(
getline
(
ifs
,
line
))
{
// read wav
char
result
[
1024
]
=
{
0
};
ret
=
ppspeech
::
ClsFeedForward
(
instance
,
line
.
c_str
(),
topk
,
result
,
1024
);
printf
(
"%s %s
\n
"
,
line
.
c_str
(),
result
);
ret
=
ppspeech
::
ClsReset
(
instance
);
}
ret
=
ppspeech
::
ClsDestroyInstance
(
instance
);
return
0
;
}
runtime/engine/common/base/config.h
0 → 100644
浏览文件 @
78e29c8e
// Copyright (c) code is from
// https://blog.csdn.net/huixingshao/article/details/45969887.
#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#include <string>
using
namespace
std
;
#pragma once
#pragma region ParseIniFile
/*
* \brief Generic configuration Class
*
*/
class
Config
{
// Data
protected:
std
::
string
m_Delimiter
;
//!< separator between key and value
std
::
string
m_Comment
;
//!< separator between value and comments
std
::
map
<
std
::
string
,
std
::
string
>
m_Contents
;
//!< extracted keys and values
typedef
std
::
map
<
std
::
string
,
std
::
string
>::
iterator
mapi
;
typedef
std
::
map
<
std
::
string
,
std
::
string
>::
const_iterator
mapci
;
// Methods
public:
Config
(
std
::
string
filename
,
std
::
string
delimiter
=
"="
,
std
::
string
comment
=
"#"
);
Config
();
template
<
class
T
>
T
Read
(
const
std
::
string
&
in_key
)
const
;
//!<Search for key and read value
//! or optional default value, call
//! as read<T>
template
<
class
T
>
T
Read
(
const
std
::
string
&
in_key
,
const
T
&
in_value
)
const
;
template
<
class
T
>
bool
ReadInto
(
T
*
out_var
,
const
std
::
string
&
in_key
)
const
;
template
<
class
T
>
bool
ReadInto
(
T
*
out_var
,
const
std
::
string
&
in_key
,
const
T
&
in_value
)
const
;
bool
FileExist
(
std
::
string
filename
);
void
ReadFile
(
std
::
string
filename
,
std
::
string
delimiter
=
"="
,
std
::
string
comment
=
"#"
);
// Check whether key exists in configuration
bool
KeyExists
(
const
std
::
string
&
in_key
)
const
;
// Modify keys and values
template
<
class
T
>
void
Add
(
const
std
::
string
&
in_key
,
const
T
&
in_value
);
void
Remove
(
const
std
::
string
&
in_key
);
// Check or change configuration syntax
std
::
string
GetDelimiter
()
const
{
return
m_Delimiter
;
}
std
::
string
GetComment
()
const
{
return
m_Comment
;
}
std
::
string
SetDelimiter
(
const
std
::
string
&
in_s
)
{
std
::
string
old
=
m_Delimiter
;
m_Delimiter
=
in_s
;
return
old
;
}
std
::
string
SetComment
(
const
std
::
string
&
in_s
)
{
std
::
string
old
=
m_Comment
;
m_Comment
=
in_s
;
return
old
;
}
// Write or read configuration
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
Config
&
cf
);
friend
std
::
istream
&
operator
>>
(
std
::
istream
&
is
,
Config
&
cf
);
protected:
template
<
class
T
>
static
std
::
string
T_as_string
(
const
T
&
t
);
template
<
class
T
>
static
T
string_as_T
(
const
std
::
string
&
s
);
static
void
Trim
(
std
::
string
*
inout_s
);
// Exception types
public:
struct
File_not_found
{
std
::
string
filename
;
explicit
File_not_found
(
const
std
::
string
&
filename_
=
std
::
string
())
:
filename
(
filename_
)
{}
};
struct
Key_not_found
{
// thrown only by T read(key) variant of read()
std
::
string
key
;
explicit
Key_not_found
(
const
std
::
string
&
key_
=
std
::
string
())
:
key
(
key_
)
{}
};
};
/* static */
template
<
class
T
>
std
::
string
Config
::
T_as_string
(
const
T
&
t
)
{
// Convert from a T to a string
// Type T must support << operator
std
::
ostringstream
ost
;
ost
<<
t
;
return
ost
.
str
();
}
/* static */
template
<
class
T
>
T
Config
::
string_as_T
(
const
std
::
string
&
s
)
{
// Convert from a string to a T
// Type T must support >> operator
T
t
;
std
::
istringstream
ist
(
s
);
ist
>>
t
;
return
t
;
}
/* static */
template
<
>
inline
std
::
string
Config
::
string_as_T
<
std
::
string
>
(
const
std
::
string
&
s
)
{
// Convert from a string to a string
// In other words, do nothing
return
s
;
}
/* static */
template
<
>
inline
bool
Config
::
string_as_T
<
bool
>
(
const
std
::
string
&
s
)
{
// Convert from a string to a bool
// Interpret "false", "F", "no", "n", "0" as false
// Interpret "true", "T", "yes", "y", "1", "-1", or anything else as true
bool
b
=
true
;
std
::
string
sup
=
s
;
for
(
std
::
string
::
iterator
p
=
sup
.
begin
();
p
!=
sup
.
end
();
++
p
)
*
p
=
toupper
(
*
p
);
// make string all caps
if
(
sup
==
std
::
string
(
"FALSE"
)
||
sup
==
std
::
string
(
"F"
)
||
sup
==
std
::
string
(
"NO"
)
||
sup
==
std
::
string
(
"N"
)
||
sup
==
std
::
string
(
"0"
)
||
sup
==
std
::
string
(
"NONE"
))
b
=
false
;
return
b
;
}
template
<
class
T
>
T
Config
::
Read
(
const
std
::
string
&
key
)
const
{
// Read the value corresponding to key
mapci
p
=
m_Contents
.
find
(
key
);
if
(
p
==
m_Contents
.
end
())
throw
Key_not_found
(
key
);
return
string_as_T
<
T
>
(
p
->
second
);
}
template
<
class
T
>
T
Config
::
Read
(
const
std
::
string
&
key
,
const
T
&
value
)
const
{
// Return the value corresponding to key or given default value
// if key is not found
mapci
p
=
m_Contents
.
find
(
key
);
if
(
p
==
m_Contents
.
end
())
{
printf
(
"%s = %s(default)
\n
"
,
key
.
c_str
(),
T_as_string
(
value
).
c_str
());
return
value
;
}
else
{
printf
(
"%s = %s
\n
"
,
key
.
c_str
(),
T_as_string
(
p
->
second
).
c_str
());
return
string_as_T
<
T
>
(
p
->
second
);
}
}
template
<
class
T
>
bool
Config
::
ReadInto
(
T
*
var
,
const
std
::
string
&
key
)
const
{
// Get the value corresponding to key and store in var
// Return true if key is found
// Otherwise leave var untouched
mapci
p
=
m_Contents
.
find
(
key
);
bool
found
=
(
p
!=
m_Contents
.
end
());
if
(
found
)
*
var
=
string_as_T
<
T
>
(
p
->
second
);
return
found
;
}
template
<
class
T
>
bool
Config
::
ReadInto
(
T
*
var
,
const
std
::
string
&
key
,
const
T
&
value
)
const
{
// Get the value corresponding to key and store in var
// Return true if key is found
// Otherwise set var to given default
mapci
p
=
m_Contents
.
find
(
key
);
bool
found
=
(
p
!=
m_Contents
.
end
());
if
(
found
)
*
var
=
string_as_T
<
T
>
(
p
->
second
);
else
var
=
value
;
return
found
;
}
template
<
class
T
>
void
Config
::
Add
(
const
std
::
string
&
in_key
,
const
T
&
value
)
{
// Add a key with given value
std
::
string
v
=
T_as_string
(
value
);
std
::
string
key
=
in_key
;
Trim
(
&
key
);
Trim
(
&
v
);
m_Contents
[
key
]
=
v
;
return
;
}
Config
::
Config
(
string
filename
,
string
delimiter
,
string
comment
)
:
m_Delimiter
(
delimiter
),
m_Comment
(
comment
)
{
// Construct a Config, getting keys and values from given file
std
::
ifstream
in
(
filename
.
c_str
());
if
(
!
in
)
throw
File_not_found
(
filename
);
in
>>
(
*
this
);
}
Config
::
Config
()
:
m_Delimiter
(
string
(
1
,
'='
)),
m_Comment
(
string
(
1
,
'#'
))
{
// Construct a Config without a file; empty
}
bool
Config
::
KeyExists
(
const
string
&
key
)
const
{
// Indicate whether key is found
mapci
p
=
m_Contents
.
find
(
key
);
return
(
p
!=
m_Contents
.
end
());
}
/* static */
void
Config
::
Trim
(
string
*
inout_s
)
{
// Remove leading and trailing whitespace
static
const
char
whitespace
[]
=
"
\n\t\v\r\f
"
;
inout_s
->
erase
(
0
,
inout_s
->
find_first_not_of
(
whitespace
));
inout_s
->
erase
(
inout_s
->
find_last_not_of
(
whitespace
)
+
1U
);
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
Config
&
cf
)
{
// Save a Config to os
for
(
Config
::
mapci
p
=
cf
.
m_Contents
.
begin
();
p
!=
cf
.
m_Contents
.
end
();
++
p
)
{
os
<<
p
->
first
<<
" "
<<
cf
.
m_Delimiter
<<
" "
;
os
<<
p
->
second
<<
std
::
endl
;
}
return
os
;
}
void
Config
::
Remove
(
const
string
&
key
)
{
// Remove key and its value
m_Contents
.
erase
(
m_Contents
.
find
(
key
));
return
;
}
std
::
istream
&
operator
>>
(
std
::
istream
&
is
,
Config
&
cf
)
{
// Load a Config from is
// Read in keys and values, keeping internal whitespace
typedef
string
::
size_type
pos
;
const
string
&
delim
=
cf
.
m_Delimiter
;
// separator
const
string
&
comm
=
cf
.
m_Comment
;
// comment
const
pos
skip
=
delim
.
length
();
// length of separator
string
nextline
=
""
;
// might need to read ahead to see where value ends
while
(
is
||
nextline
.
length
()
>
0
)
{
// Read an entire line at a time
string
line
;
if
(
nextline
.
length
()
>
0
)
{
line
=
nextline
;
// we read ahead; use it now
nextline
=
""
;
}
else
{
std
::
getline
(
is
,
line
);
}
// Ignore comments
line
=
line
.
substr
(
0
,
line
.
find
(
comm
));
// Parse the line if it contains a delimiter
pos
delimPos
=
line
.
find
(
delim
);
if
(
delimPos
<
string
::
npos
)
{
// Extract the key
string
key
=
line
.
substr
(
0
,
delimPos
);
line
.
replace
(
0
,
delimPos
+
skip
,
""
);
// See if value continues on the next line
// Stop at blank line, next line with a key, end of stream,
// or end of file sentry
bool
terminate
=
false
;
while
(
!
terminate
&&
is
)
{
std
::
getline
(
is
,
nextline
);
terminate
=
true
;
string
nlcopy
=
nextline
;
Config
::
Trim
(
&
nlcopy
);
if
(
nlcopy
==
""
)
continue
;
nextline
=
nextline
.
substr
(
0
,
nextline
.
find
(
comm
));
if
(
nextline
.
find
(
delim
)
!=
string
::
npos
)
continue
;
nlcopy
=
nextline
;
Config
::
Trim
(
&
nlcopy
);
if
(
nlcopy
!=
""
)
line
+=
"
\n
"
;
line
+=
nextline
;
terminate
=
false
;
}
// Store key and value
Config
::
Trim
(
&
key
);
Config
::
Trim
(
&
line
);
cf
.
m_Contents
[
key
]
=
line
;
// overwrites if key is repeated
}
}
return
is
;
}
bool
Config
::
FileExist
(
std
::
string
filename
)
{
bool
exist
=
false
;
std
::
ifstream
in
(
filename
.
c_str
());
if
(
in
)
exist
=
true
;
return
exist
;
}
void
Config
::
ReadFile
(
string
filename
,
string
delimiter
,
string
comment
)
{
m_Delimiter
=
delimiter
;
m_Comment
=
comment
;
std
::
ifstream
in
(
filename
.
c_str
());
if
(
!
in
)
throw
File_not_found
(
filename
);
in
>>
(
*
this
);
}
#pragma endregion ParseIniFIle
runtime/engine/common/utils/CMakeLists.txt
浏览文件 @
78e29c8e
...
...
@@ -3,6 +3,7 @@ add_library(utils
file_utils.cc
math.cc
strings.cc
audio_process.cc
)
...
...
runtime/engine/common/utils/audio_process.cc
0 → 100644
浏览文件 @
78e29c8e
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "utils/audio_process.h"
namespace
ppspeech
{
int
WaveformFloatNormal
(
std
::
vector
<
float
>*
waveform
)
{
int
tot_samples
=
waveform
->
size
();
for
(
int
i
=
0
;
i
<
tot_samples
;
i
++
)
{
(
*
waveform
)[
i
]
=
(
*
waveform
)[
i
]
/
32768.0
;
}
return
0
;
}
int
WaveformNormal
(
std
::
vector
<
float
>*
waveform
,
bool
wav_normal
,
const
std
::
string
&
wav_normal_type
,
float
wav_norm_mul_factor
)
{
if
(
wav_normal
==
false
)
{
return
0
;
}
if
(
wav_normal_type
==
"linear"
)
{
float
amax
=
INT32_MIN
;
for
(
int
i
=
0
;
i
<
waveform
->
size
();
++
i
)
{
float
tmp
=
std
::
abs
((
*
waveform
)[
i
]);
amax
=
std
::
max
(
amax
,
tmp
);
}
float
factor
=
1.0
/
(
amax
+
1e-8
);
for
(
int
i
=
0
;
i
<
waveform
->
size
();
++
i
)
{
(
*
waveform
)[
i
]
=
(
*
waveform
)[
i
]
*
factor
*
wav_norm_mul_factor
;
}
}
else
if
(
wav_normal_type
==
"gaussian"
)
{
double
sum
=
std
::
accumulate
(
waveform
->
begin
(),
waveform
->
end
(),
0.0
);
double
mean
=
sum
/
waveform
->
size
();
//均值
double
accum
=
0.0
;
std
::
for_each
(
waveform
->
begin
(),
waveform
->
end
(),
[
&
](
const
double
d
)
{
accum
+=
(
d
-
mean
)
*
(
d
-
mean
);
});
double
stdev
=
sqrt
(
accum
/
(
waveform
->
size
()
-
1
));
//方差
stdev
=
std
::
max
(
stdev
,
1e-8
);
for
(
int
i
=
0
;
i
<
waveform
->
size
();
++
i
)
{
(
*
waveform
)[
i
]
=
wav_norm_mul_factor
*
((
*
waveform
)[
i
]
-
mean
)
/
stdev
;
}
}
else
{
printf
(
"don't support
\n
"
);
return
-
1
;
}
return
0
;
}
float
PowerTodb
(
float
in
,
float
ref_value
,
float
amin
,
float
top_db
)
{
if
(
amin
<=
0
)
{
printf
(
"amin must be strictly positive
\n
"
);
return
-
1
;
}
if
(
ref_value
<=
0
)
{
printf
(
"ref_value must be strictly positive
\n
"
);
return
-
1
;
}
float
out
=
10.0
*
log10
(
std
::
max
(
amin
,
in
));
out
-=
10.0
*
log10
(
std
::
max
(
ref_value
,
amin
));
return
out
;
}
}
// namespace ppspeech
\ No newline at end of file
runtime/engine/common/utils/audio_process.h
0 → 100644
浏览文件 @
78e29c8e
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vector>
#include <string>
#include <algorithm>
#include <numeric>
#include <iomanip>
#include <math.h>
namespace
ppspeech
{
int
WaveformFloatNormal
(
std
::
vector
<
float
>*
waveform
);
int
WaveformNormal
(
std
::
vector
<
float
>*
waveform
,
bool
wav_normal
,
const
std
::
string
&
wav_normal_type
,
float
wav_norm_mul_factor
);
float
PowerTodb
(
float
in
,
float
ref_value
=
1.0
,
float
amin
=
1e-10
,
float
top_db
=
80.0
);
}
// namespace ppspeech
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录