Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
143ab136
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
143ab136
编写于
3月 31, 2022
作者:
Y
Yang Zhou
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add decoder_test_main
上级
6f0b3a15
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
209 addition
and
7 deletion
+209
-7
speechx/examples/decoder/CMakeLists.txt
speechx/examples/decoder/CMakeLists.txt
+9
-0
speechx/examples/decoder/decoder_test_main.cc
speechx/examples/decoder/decoder_test_main.cc
+69
-0
speechx/examples/decoder/offline_decoder_main.cc
speechx/examples/decoder/offline_decoder_main.cc
+1
-0
speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc
...hx/examples/decoder/offline_decoder_sliding_chunk_main.cc
+119
-0
speechx/speechx/decoder/ctc_beam_search_decoder.cc
speechx/speechx/decoder/ctc_beam_search_decoder.cc
+4
-2
speechx/speechx/decoder/ctc_beam_search_decoder.h
speechx/speechx/decoder/ctc_beam_search_decoder.h
+2
-2
speechx/speechx/nnet/decodable.cc
speechx/speechx/nnet/decodable.cc
+5
-3
未找到文件。
speechx/examples/decoder/CMakeLists.txt
浏览文件 @
143ab136
cmake_minimum_required
(
VERSION 3.14 FATAL_ERROR
)
cmake_minimum_required
(
VERSION 3.14 FATAL_ERROR
)
add_executable
(
offline_decoder_sliding_chunk_main
${
CMAKE_CURRENT_SOURCE_DIR
}
/offline_decoder_sliding_chunk_main.cc
)
target_include_directories
(
offline_decoder_sliding_chunk_main PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
target_link_libraries
(
offline_decoder_sliding_chunk_main PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util
${
DEPS
}
)
add_executable
(
offline_decoder_main
${
CMAKE_CURRENT_SOURCE_DIR
}
/offline_decoder_main.cc
)
add_executable
(
offline_decoder_main
${
CMAKE_CURRENT_SOURCE_DIR
}
/offline_decoder_main.cc
)
target_include_directories
(
offline_decoder_main PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
target_include_directories
(
offline_decoder_main PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
target_link_libraries
(
offline_decoder_main PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util
${
DEPS
}
)
target_link_libraries
(
offline_decoder_main PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util
${
DEPS
}
)
add_executable
(
decoder_test_main
${
CMAKE_CURRENT_SOURCE_DIR
}
/decoder_test_main.cc
)
target_include_directories
(
decoder_test_main PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
target_link_libraries
(
decoder_test_main PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util
${
DEPS
}
)
speechx/examples/decoder/decoder_test_main.cc
0 → 100644
浏览文件 @
143ab136
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// todo refactor, repalce with gtest
#include "base/flags.h"
#include "base/log.h"
#include "decoder/ctc_beam_search_decoder.h"
#include "kaldi/util/table-types.h"
#include "nnet/decodable.h"
DEFINE_string
(
nnet_prob_respecifier
,
""
,
"test nnet prob rspecifier"
);
DEFINE_string
(
dict_file
,
"vocab.txt"
,
"vocabulary of lm"
);
DEFINE_string
(
lm_path
,
"lm.klm"
,
"language model"
);
using
kaldi
::
BaseFloat
;
using
kaldi
::
Matrix
;
using
std
::
vector
;
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
google
::
InitGoogleLogging
(
argv
[
0
]);
kaldi
::
SequentialBaseFloatMatrixReader
likelihood_reader
(
FLAGS_nnet_prob_respecifier
);
std
::
string
dict_file
=
FLAGS_dict_file
;
std
::
string
lm_path
=
FLAGS_lm_path
;
int32
num_done
=
0
,
num_err
=
0
;
ppspeech
::
CTCBeamSearchOptions
opts
;
opts
.
dict_file
=
dict_file
;
opts
.
lm_path
=
lm_path
;
ppspeech
::
CTCBeamSearch
decoder
(
opts
);
std
::
shared_ptr
<
ppspeech
::
Decodable
>
decodable
(
new
ppspeech
::
Decodable
(
nullptr
,
nullptr
));
decoder
.
InitDecoder
();
for
(;
!
likelihood_reader
.
Done
();
likelihood_reader
.
Next
())
{
string
utt
=
likelihood_reader
.
Key
();
const
kaldi
::
Matrix
<
BaseFloat
>
likelihood
=
likelihood_reader
.
Value
();
decodable
->
Acceptlikelihood
(
likelihood
);
decoder
.
AdvanceDecode
(
decodable
);
std
::
string
result
;
result
=
decoder
.
GetFinalBestPath
();
KALDI_LOG
<<
" the result of "
<<
utt
<<
" is "
<<
result
;
decodable
->
Reset
();
decoder
.
Reset
();
++
num_done
;
}
KALDI_LOG
<<
"Done "
<<
num_done
<<
" utterances, "
<<
num_err
<<
" with errors."
;
return
(
num_done
!=
0
?
0
:
1
);
}
speechx/examples/decoder/offline_decoder_main.cc
浏览文件 @
143ab136
...
@@ -52,6 +52,7 @@ int main(int argc, char* argv[]) {
...
@@ -52,6 +52,7 @@ int main(int argc, char* argv[]) {
ppspeech
::
CTCBeamSearch
decoder
(
opts
);
ppspeech
::
CTCBeamSearch
decoder
(
opts
);
ppspeech
::
ModelOptions
model_opts
;
ppspeech
::
ModelOptions
model_opts
;
model_opts
.
cache_shape
=
"5-1-1024,5-1-1024"
;
model_opts
.
model_path
=
model_graph
;
model_opts
.
model_path
=
model_graph
;
model_opts
.
params_path
=
model_params
;
model_opts
.
params_path
=
model_params
;
std
::
shared_ptr
<
ppspeech
::
PaddleNnet
>
nnet
(
std
::
shared_ptr
<
ppspeech
::
PaddleNnet
>
nnet
(
...
...
speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc
0 → 100644
浏览文件 @
143ab136
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// todo refactor, repalce with gtest
#include "base/flags.h"
#include "base/log.h"
#include "decoder/ctc_beam_search_decoder.h"
#include "frontend/raw_audio.h"
#include "kaldi/util/table-types.h"
#include "nnet/decodable.h"
#include "nnet/paddle_nnet.h"
DEFINE_string
(
feature_respecifier
,
""
,
"test feature rspecifier"
);
DEFINE_string
(
model_path
,
"avg_1.jit.pdmodel"
,
"paddle nnet model"
);
DEFINE_string
(
param_path
,
"avg_1.jit.pdiparams"
,
"paddle nnet model param"
);
DEFINE_string
(
dict_file
,
"vocab.txt"
,
"vocabulary of lm"
);
DEFINE_string
(
lm_path
,
"lm.klm"
,
"language model"
);
using
kaldi
::
BaseFloat
;
using
kaldi
::
Matrix
;
using
std
::
vector
;
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
google
::
InitGoogleLogging
(
argv
[
0
]);
kaldi
::
SequentialBaseFloatMatrixReader
feature_reader
(
FLAGS_feature_respecifier
);
std
::
string
model_graph
=
FLAGS_model_path
;
std
::
string
model_params
=
FLAGS_param_path
;
std
::
string
dict_file
=
FLAGS_dict_file
;
std
::
string
lm_path
=
FLAGS_lm_path
;
int32
num_done
=
0
,
num_err
=
0
;
ppspeech
::
CTCBeamSearchOptions
opts
;
opts
.
dict_file
=
dict_file
;
opts
.
lm_path
=
lm_path
;
ppspeech
::
CTCBeamSearch
decoder
(
opts
);
ppspeech
::
ModelOptions
model_opts
;
model_opts
.
model_path
=
model_graph
;
model_opts
.
params_path
=
model_params
;
model_opts
.
cache_shape
=
"5-1-1024,5-1-1024"
;
std
::
shared_ptr
<
ppspeech
::
PaddleNnet
>
nnet
(
new
ppspeech
::
PaddleNnet
(
model_opts
));
std
::
shared_ptr
<
ppspeech
::
RawDataCache
>
raw_data
(
new
ppspeech
::
RawDataCache
());
std
::
shared_ptr
<
ppspeech
::
Decodable
>
decodable
(
new
ppspeech
::
Decodable
(
nnet
,
raw_data
));
int32
chunk_size
=
7
;
int32
chunk_stride
=
4
;
int32
receptive_field_length
=
7
;
decoder
.
InitDecoder
();
for
(;
!
feature_reader
.
Done
();
feature_reader
.
Next
())
{
string
utt
=
feature_reader
.
Key
();
kaldi
::
Matrix
<
BaseFloat
>
feature
=
feature_reader
.
Value
();
raw_data
->
SetDim
(
feature
.
NumCols
());
int32
row_idx
=
0
;
int32
padding_len
=
0
;
int32
ori_feature_len
=
feature
.
NumRows
();
if
(
(
feature
.
NumRows
()
-
chunk_size
)
%
chunk_stride
!=
0
)
{
padding_len
=
chunk_stride
-
(
feature
.
NumRows
()
-
chunk_size
)
%
chunk_stride
;
feature
.
Resize
(
feature
.
NumRows
()
+
padding_len
,
feature
.
NumCols
(),
kaldi
::
kCopyData
);
}
int32
num_chunks
=
(
feature
.
NumRows
()
-
chunk_size
)
/
chunk_stride
+
1
;
for
(
int
chunk_idx
=
0
;
chunk_idx
<
num_chunks
;
++
chunk_idx
)
{
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
feature_chunk
(
chunk_size
*
feature
.
NumCols
());
int32
feature_chunk_size
=
0
;
if
(
ori_feature_len
>
chunk_idx
*
chunk_stride
)
{
feature_chunk_size
=
std
::
min
(
ori_feature_len
-
chunk_idx
*
chunk_stride
,
chunk_size
);
}
if
(
feature_chunk_size
<
receptive_field_length
)
break
;
int32
start
=
chunk_idx
*
chunk_stride
;
int32
end
=
start
+
chunk_size
;
for
(
int
row_id
=
0
;
row_id
<
chunk_size
;
++
row_id
)
{
kaldi
::
SubVector
<
kaldi
::
BaseFloat
>
tmp
(
feature
,
start
);
kaldi
::
SubVector
<
kaldi
::
BaseFloat
>
f_chunk_tmp
(
feature_chunk
.
Data
()
+
row_id
*
feature
.
NumCols
(),
feature
.
NumCols
());
f_chunk_tmp
.
CopyFromVec
(
tmp
);
++
start
;
}
raw_data
->
Accept
(
feature_chunk
);
if
(
chunk_idx
==
num_chunks
-
1
)
{
raw_data
->
SetFinished
();
}
decoder
.
AdvanceDecode
(
decodable
);
}
std
::
string
result
;
result
=
decoder
.
GetFinalBestPath
();
KALDI_LOG
<<
" the result of "
<<
utt
<<
" is "
<<
result
;
decodable
->
Reset
();
decoder
.
Reset
();
++
num_done
;
}
KALDI_LOG
<<
"Done "
<<
num_done
<<
" utterances, "
<<
num_err
<<
" with errors."
;
return
(
num_done
!=
0
?
0
:
1
);
}
speechx/speechx/decoder/ctc_beam_search_decoder.cc
浏览文件 @
143ab136
...
@@ -38,8 +38,10 @@ CTCBeamSearch::CTCBeamSearch(const CTCBeamSearchOptions& opts)
...
@@ -38,8 +38,10 @@ CTCBeamSearch::CTCBeamSearch(const CTCBeamSearchOptions& opts)
<<
vocabulary_
.
size
();
<<
vocabulary_
.
size
();
LOG
(
INFO
)
<<
"language model path: "
<<
opts_
.
lm_path
;
LOG
(
INFO
)
<<
"language model path: "
<<
opts_
.
lm_path
;
init_ext_scorer_
=
std
::
make_shared
<
Scorer
>
(
if
(
opts_
.
lm_path
!=
""
)
{
opts_
.
alpha
,
opts_
.
beta
,
opts_
.
lm_path
,
vocabulary_
);
init_ext_scorer_
=
std
::
make_shared
<
Scorer
>
(
opts_
.
alpha
,
opts_
.
beta
,
opts_
.
lm_path
,
vocabulary_
);
}
blank_id_
=
0
;
blank_id_
=
0
;
auto
it
=
std
::
find
(
vocabulary_
.
begin
(),
vocabulary_
.
end
(),
" "
);
auto
it
=
std
::
find
(
vocabulary_
.
begin
(),
vocabulary_
.
end
(),
" "
);
...
...
speechx/speechx/decoder/ctc_beam_search_decoder.h
浏览文件 @
143ab136
...
@@ -33,13 +33,13 @@ struct CTCBeamSearchOptions {
...
@@ -33,13 +33,13 @@ struct CTCBeamSearchOptions {
int
num_proc_bsearch
;
int
num_proc_bsearch
;
CTCBeamSearchOptions
()
CTCBeamSearchOptions
()
:
dict_file
(
"vocab.txt"
),
:
dict_file
(
"vocab.txt"
),
lm_path
(
"
lm.klm
"
),
lm_path
(
""
),
alpha
(
1.9
f
),
alpha
(
1.9
f
),
beta
(
5.0
),
beta
(
5.0
),
beam_size
(
300
),
beam_size
(
300
),
cutoff_prob
(
0.99
f
),
cutoff_prob
(
0.99
f
),
cutoff_top_n
(
40
),
cutoff_top_n
(
40
),
num_proc_bsearch
(
0
)
{}
num_proc_bsearch
(
1
0
)
{}
void
Register
(
kaldi
::
OptionsItf
*
opts
)
{
void
Register
(
kaldi
::
OptionsItf
*
opts
)
{
opts
->
Register
(
"dict"
,
&
dict_file
,
"dict file "
);
opts
->
Register
(
"dict"
,
&
dict_file
,
"dict file "
);
...
...
speechx/speechx/nnet/decodable.cc
浏览文件 @
143ab136
...
@@ -26,6 +26,7 @@ Decodable::Decodable(const std::shared_ptr<NnetInterface>& nnet,
...
@@ -26,6 +26,7 @@ Decodable::Decodable(const std::shared_ptr<NnetInterface>& nnet,
:
frontend_
(
frontend
),
nnet_
(
nnet
),
frame_offset_
(
0
),
frames_ready_
(
0
)
{}
:
frontend_
(
frontend
),
nnet_
(
nnet
),
frame_offset_
(
0
),
frames_ready_
(
0
)
{}
void
Decodable
::
Acceptlikelihood
(
const
Matrix
<
BaseFloat
>&
likelihood
)
{
void
Decodable
::
Acceptlikelihood
(
const
Matrix
<
BaseFloat
>&
likelihood
)
{
nnet_cache_
=
likelihood
;
frames_ready_
+=
likelihood
.
NumRows
();
frames_ready_
+=
likelihood
.
NumRows
();
}
}
...
@@ -53,7 +54,7 @@ bool Decodable::EnsureFrameHaveComputed(int32 frame) {
...
@@ -53,7 +54,7 @@ bool Decodable::EnsureFrameHaveComputed(int32 frame) {
bool
Decodable
::
AdvanceChunk
()
{
bool
Decodable
::
AdvanceChunk
()
{
Vector
<
BaseFloat
>
features
;
Vector
<
BaseFloat
>
features
;
if
(
frontend_
->
Read
(
&
features
)
==
false
)
{
if
(
frontend_
==
NULL
||
frontend_
->
Read
(
&
features
)
==
false
)
{
return
false
;
return
false
;
}
}
int32
nnet_dim
=
0
;
int32
nnet_dim
=
0
;
...
@@ -77,10 +78,11 @@ bool Decodable::FrameLogLikelihood(int32 frame, vector<BaseFloat>* likelihood) {
...
@@ -77,10 +78,11 @@ bool Decodable::FrameLogLikelihood(int32 frame, vector<BaseFloat>* likelihood) {
}
}
void
Decodable
::
Reset
()
{
void
Decodable
::
Reset
()
{
frontend_
->
Reset
();
if
(
frontend_
!=
nullptr
)
frontend_
->
Reset
();
nnet_
->
Reset
();
if
(
nnet_
!=
nullptr
)
nnet_
->
Reset
();
frame_offset_
=
0
;
frame_offset_
=
0
;
frames_ready_
=
0
;
frames_ready_
=
0
;
nnet_cache_
.
Resize
(
0
,
0
);
}
}
}
// namespace ppspeech
}
// namespace ppspeech
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录