Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
99b3632d
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
99b3632d
编写于
10月 18, 2022
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
seprate recognizer; NnetBase as base class
上级
fddcd36f
变更
17
显示空白变更内容
内联
并排
Showing
17 changed file
with
78 addition
and
40 deletion
+78
-40
speechx/speechx/CMakeLists.txt
speechx/speechx/CMakeLists.txt
+6
-0
speechx/speechx/decoder/CMakeLists.txt
speechx/speechx/decoder/CMakeLists.txt
+4
-10
speechx/speechx/nnet/decodable.cc
speechx/speechx/nnet/decodable.cc
+1
-1
speechx/speechx/nnet/decodable.h
speechx/speechx/nnet/decodable.h
+3
-3
speechx/speechx/nnet/ds2_nnet.h
speechx/speechx/nnet/ds2_nnet.h
+1
-1
speechx/speechx/nnet/nnet_itf.h
speechx/speechx/nnet/nnet_itf.h
+6
-4
speechx/speechx/nnet/u2_nnet.cc
speechx/speechx/nnet/u2_nnet.cc
+1
-1
speechx/speechx/nnet/u2_nnet.h
speechx/speechx/nnet/u2_nnet.h
+3
-3
speechx/speechx/protocol/websocket/CMakeLists.txt
speechx/speechx/protocol/websocket/CMakeLists.txt
+1
-1
speechx/speechx/protocol/websocket/websocket_server.h
speechx/speechx/protocol/websocket/websocket_server.h
+1
-1
speechx/speechx/recognizer/CMakeLists.txt
speechx/speechx/recognizer/CMakeLists.txt
+45
-0
speechx/speechx/recognizer/recognizer.cc
speechx/speechx/recognizer/recognizer.cc
+1
-1
speechx/speechx/recognizer/recognizer.h
speechx/speechx/recognizer/recognizer.h
+0
-0
speechx/speechx/recognizer/recognizer_main.cc
speechx/speechx/recognizer/recognizer_main.cc
+2
-11
speechx/speechx/recognizer/u2_recognizer.cc
speechx/speechx/recognizer/u2_recognizer.cc
+2
-2
speechx/speechx/recognizer/u2_recognizer.h
speechx/speechx/recognizer/u2_recognizer.h
+0
-0
speechx/speechx/recognizer/u2_recognizer_main.cc
speechx/speechx/recognizer/u2_recognizer_main.cc
+1
-1
未找到文件。
speechx/speechx/CMakeLists.txt
浏览文件 @
99b3632d
...
...
@@ -32,6 +32,12 @@ ${CMAKE_CURRENT_SOURCE_DIR}/decoder
)
add_subdirectory
(
decoder
)
include_directories
(
${
CMAKE_CURRENT_SOURCE_DIR
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/recognizer
)
add_subdirectory
(
recognizer
)
include_directories
(
${
CMAKE_CURRENT_SOURCE_DIR
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/protocol
...
...
speechx/speechx/decoder/CMakeLists.txt
浏览文件 @
99b3632d
project
(
decoder
)
include_directories
(
${
CMAKE_CURRENT_SOURCE_DIR/ctc_decoders
}
)
set
(
decoder_src
)
set
(
srcs
)
if
(
USING_DS2
)
list
(
APPEND
decoder_src
list
(
APPEND
srcs
ctc_decoders/decoder_utils.cpp
ctc_decoders/path_trie.cpp
ctc_decoders/scorer.cpp
ctc_beam_search_decoder.cc
ctc_tlg_decoder.cc
recognizer.cc
)
endif
()
if
(
USING_U2
)
list
(
APPEND
decoder_src
list
(
APPEND
srcs
ctc_prefix_beam_search_decoder.cc
u2_recognizer.cc
)
endif
()
add_library
(
decoder STATIC
${
decoder_src
}
)
add_library
(
decoder STATIC
${
srcs
}
)
target_link_libraries
(
decoder PUBLIC kenlm utils fst frontend nnet kaldi-decoder absl::strings
)
# test
...
...
@@ -30,7 +26,6 @@ if (USING_DS2)
set
(
BINS
ctc_beam_search_decoder_main
nnet_logprob_decoder_main
recognizer_main
ctc_tlg_decoder_main
)
...
...
@@ -45,7 +40,6 @@ endif()
if
(
USING_U2
)
set
(
TEST_BINS
ctc_prefix_beam_search_decoder_main
u2_recognizer_main
)
foreach
(
bin_name IN LISTS TEST_BINS
)
...
...
speechx/speechx/nnet/decodable.cc
浏览文件 @
99b3632d
...
...
@@ -21,7 +21,7 @@ using kaldi::Matrix;
using
kaldi
::
Vector
;
using
std
::
vector
;
Decodable
::
Decodable
(
const
std
::
shared_ptr
<
Nnet
Interfac
e
>&
nnet
,
Decodable
::
Decodable
(
const
std
::
shared_ptr
<
Nnet
Bas
e
>&
nnet
,
const
std
::
shared_ptr
<
FrontendInterface
>&
frontend
,
kaldi
::
BaseFloat
acoustic_scale
)
:
frontend_
(
frontend
),
...
...
speechx/speechx/nnet/decodable.h
浏览文件 @
99b3632d
...
...
@@ -24,7 +24,7 @@ struct DecodableOpts;
class
Decodable
:
public
kaldi
::
DecodableInterface
{
public:
explicit
Decodable
(
const
std
::
shared_ptr
<
Nnet
Interfac
e
>&
nnet
,
explicit
Decodable
(
const
std
::
shared_ptr
<
Nnet
Bas
e
>&
nnet
,
const
std
::
shared_ptr
<
FrontendInterface
>&
frontend
,
kaldi
::
BaseFloat
acoustic_scale
=
1.0
);
...
...
@@ -63,14 +63,14 @@ class Decodable : public kaldi::DecodableInterface {
int32
TokenId2NnetId
(
int32
token_id
);
std
::
shared_ptr
<
Nnet
Interfac
e
>
Nnet
()
{
return
nnet_
;
}
std
::
shared_ptr
<
Nnet
Bas
e
>
Nnet
()
{
return
nnet_
;
}
// for offline test
void
Acceptlikelihood
(
const
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>&
likelihood
);
private:
std
::
shared_ptr
<
FrontendInterface
>
frontend_
;
std
::
shared_ptr
<
Nnet
Interfac
e
>
nnet_
;
std
::
shared_ptr
<
Nnet
Bas
e
>
nnet_
;
// nnet outputs' cache
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>
nnet_out_cache_
;
...
...
speechx/speechx/nnet/ds2_nnet.h
浏览文件 @
99b3632d
...
...
@@ -48,7 +48,7 @@ class Tensor {
std
::
vector
<
T
>
_data
;
};
class
PaddleNnet
:
public
Nnet
Interfac
e
{
class
PaddleNnet
:
public
Nnet
Bas
e
{
public:
PaddleNnet
(
const
ModelOptions
&
opts
);
...
...
speechx/speechx/nnet/nnet_itf.h
浏览文件 @
99b3632d
...
...
@@ -11,8 +11,6 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "base/basic_types.h"
...
...
@@ -105,11 +103,15 @@ class NnetInterface {
// true, nnet output is logprob; otherwise is prob,
virtual
bool
IsLogProb
()
=
0
;
int
SubsamplingRate
()
const
{
return
subsampling_rate_
;
}
// using to get encoder outs. e.g. seq2seq with Attention model.
virtual
void
EncoderOuts
(
std
::
vector
<
kaldi
::
Vector
<
kaldi
::
BaseFloat
>>*
encoder_out
)
const
=
0
;
};
class
NnetBase
:
public
NnetInterface
{
public:
int
SubsamplingRate
()
const
{
return
subsampling_rate_
;
}
protected:
int
subsampling_rate_
{
1
};
...
...
speechx/speechx/nnet/u2_nnet.cc
浏览文件 @
99b3632d
...
...
@@ -193,7 +193,7 @@ U2Nnet::U2Nnet(const U2Nnet& other) {
// ignore inner states
}
std
::
shared_ptr
<
Nnet
Interfac
e
>
U2Nnet
::
Copy
()
const
{
std
::
shared_ptr
<
Nnet
Bas
e
>
U2Nnet
::
Copy
()
const
{
auto
asr_model
=
std
::
make_shared
<
U2Nnet
>
(
*
this
);
// reset inner state for new decoding
asr_model
->
Reset
();
...
...
speechx/speechx/nnet/u2_nnet.h
浏览文件 @
99b3632d
...
...
@@ -24,7 +24,7 @@
namespace
ppspeech
{
class
U2NnetBase
:
public
Nnet
Interfac
e
{
class
U2NnetBase
:
public
Nnet
Bas
e
{
public:
virtual
int
context
()
const
{
return
right_context_
+
1
;
}
virtual
int
right_context
()
const
{
return
right_context_
;
}
...
...
@@ -41,7 +41,7 @@ class U2NnetBase : public NnetInterface {
// start: false, it is the start chunk of one sentence, else true
virtual
int
num_frames_for_chunk
(
bool
start
)
const
;
virtual
std
::
shared_ptr
<
Nnet
Interfac
e
>
Copy
()
const
=
0
;
virtual
std
::
shared_ptr
<
Nnet
Bas
e
>
Copy
()
const
=
0
;
virtual
void
ForwardEncoderChunk
(
const
std
::
vector
<
kaldi
::
BaseFloat
>&
chunk_feats
,
...
...
@@ -99,7 +99,7 @@ class U2Nnet : public U2NnetBase {
std
::
shared_ptr
<
paddle
::
jit
::
Layer
>
model
()
const
{
return
model_
;
}
std
::
shared_ptr
<
Nnet
Interfac
e
>
Copy
()
const
override
;
std
::
shared_ptr
<
Nnet
Bas
e
>
Copy
()
const
override
;
void
ForwardEncoderChunkImpl
(
const
std
::
vector
<
kaldi
::
BaseFloat
>&
chunk_feats
,
...
...
speechx/speechx/protocol/websocket/CMakeLists.txt
浏览文件 @
99b3632d
...
...
@@ -2,7 +2,7 @@ add_library(websocket STATIC
websocket_server.cc
websocket_client.cc
)
target_link_libraries
(
websocket PUBLIC frontend
decoder nnet
)
target_link_libraries
(
websocket PUBLIC frontend
nnet decoder recognizer
)
add_executable
(
websocket_server_main
${
CMAKE_CURRENT_SOURCE_DIR
}
/websocket_server_main.cc
)
target_include_directories
(
websocket_server_main PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
...
...
speechx/speechx/protocol/websocket/websocket_server.h
浏览文件 @
99b3632d
...
...
@@ -19,7 +19,7 @@
#include "boost/asio/ip/tcp.hpp"
#include "boost/beast/core.hpp"
#include "boost/beast/websocket.hpp"
#include "
decod
er/recognizer.h"
#include "
recogniz
er/recognizer.h"
#include "frontend/audio/feature_pipeline.h"
namespace
beast
=
boost
::
beast
;
// from <boost/beast.hpp>
...
...
speechx/speechx/recognizer/CMakeLists.txt
0 → 100644
浏览文件 @
99b3632d
set
(
srcs
)
if
(
USING_DS2
)
list
(
APPEND srcs
recognizer.cc
)
endif
()
if
(
USING_U2
)
list
(
APPEND srcs
u2_recognizer.cc
)
endif
()
add_library
(
recognizer STATIC
${
srcs
}
)
target_link_libraries
(
recognizer PUBLIC decoder
)
# test
if
(
USING_DS2
)
set
(
BINS recognizer_main
)
foreach
(
bin_name IN LISTS BINS
)
add_executable
(
${
bin_name
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
bin_name
}
.cc
)
target_include_directories
(
${
bin_name
}
PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
target_link_libraries
(
${
bin_name
}
PUBLIC recognizer nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util
${
DEPS
}
)
endforeach
()
endif
()
if
(
USING_U2
)
set
(
TEST_BINS
u2_recognizer_main
)
foreach
(
bin_name IN LISTS TEST_BINS
)
add_executable
(
${
bin_name
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
bin_name
}
.cc
)
target_include_directories
(
${
bin_name
}
PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
target_link_libraries
(
${
bin_name
}
recognizer nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util
)
target_compile_options
(
${
bin_name
}
PRIVATE
${
PADDLE_COMPILE_FLAGS
}
)
target_include_directories
(
${
bin_name
}
PRIVATE
${
pybind11_INCLUDE_DIRS
}
${
PROJECT_SOURCE_DIR
}
)
target_link_libraries
(
${
bin_name
}
${
PYTHON_LIBRARIES
}
${
PADDLE_LINK_FLAGS
}
)
endforeach
()
endif
()
speechx/speechx/
decod
er/recognizer.cc
→
speechx/speechx/
recogniz
er/recognizer.cc
浏览文件 @
99b3632d
...
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "
decod
er/recognizer.h"
#include "
recogniz
er/recognizer.h"
namespace
ppspeech
{
...
...
speechx/speechx/
decod
er/recognizer.h
→
speechx/speechx/
recogniz
er/recognizer.h
浏览文件 @
99b3632d
文件已移动
speechx/speechx/
decod
er/recognizer_main.cc
→
speechx/speechx/
recogniz
er/recognizer_main.cc
浏览文件 @
99b3632d
...
...
@@ -13,7 +13,7 @@
// limitations under the License.
#include "decoder/param.h"
#include "
decod
er/recognizer.h"
#include "
recogniz
er/recognizer.h"
#include "kaldi/feat/wave-reader.h"
#include "kaldi/util/table-types.h"
...
...
@@ -22,15 +22,6 @@ DEFINE_string(result_wspecifier, "", "test result wspecifier");
DEFINE_double
(
streaming_chunk
,
0.36
,
"streaming feature chunk size"
);
DEFINE_int32
(
sample_rate
,
16000
,
"sample rate"
);
ppspeech
::
RecognizerResource
InitRecognizerResoure
()
{
ppspeech
::
RecognizerResource
resource
;
resource
.
acoustic_scale
=
FLAGS_acoustic_scale
;
resource
.
feature_pipeline_opts
=
ppspeech
::
FeaturePipelineOptions
::
InitFromFlags
();
resource
.
model_opts
=
ppspeech
::
ModelOptions
::
InitFromFlags
();
resource
.
tlg_opts
=
ppspeech
::
TLGDecoderOptions
::
InitFromFlags
();
return
resource
;
}
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
SetUsageMessage
(
"Usage:"
);
...
...
@@ -39,7 +30,7 @@ int main(int argc, char* argv[]) {
google
::
InstallFailureSignalHandler
();
FLAGS_logtostderr
=
1
;
ppspeech
::
RecognizerResource
resource
=
InitRecognizerResoure
();
ppspeech
::
RecognizerResource
resource
=
ppspeech
::
RecognizerResource
::
InitFromFlags
();
ppspeech
::
Recognizer
recognizer
(
resource
);
kaldi
::
SequentialTableReader
<
kaldi
::
WaveHolder
>
wav_reader
(
...
...
speechx/speechx/
decod
er/u2_recognizer.cc
→
speechx/speechx/
recogniz
er/u2_recognizer.cc
浏览文件 @
99b3632d
...
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "
decod
er/u2_recognizer.h"
#include "
recogniz
er/u2_recognizer.h"
#include "nnet/u2_nnet.h"
...
...
@@ -30,7 +30,7 @@ U2Recognizer::U2Recognizer(const U2RecognizerResource& resource)
const
FeaturePipelineOptions
&
feature_opts
=
resource
.
feature_pipeline_opts
;
feature_pipeline_
.
reset
(
new
FeaturePipeline
(
feature_opts
));
std
::
shared_ptr
<
Nnet
Interfac
e
>
nnet
(
new
U2Nnet
(
resource
.
model_opts
));
std
::
shared_ptr
<
Nnet
Bas
e
>
nnet
(
new
U2Nnet
(
resource
.
model_opts
));
BaseFloat
am_scale
=
resource
.
acoustic_scale
;
decodable_
.
reset
(
new
Decodable
(
nnet
,
feature_pipeline_
,
am_scale
));
...
...
speechx/speechx/
decod
er/u2_recognizer.h
→
speechx/speechx/
recogniz
er/u2_recognizer.h
浏览文件 @
99b3632d
文件已移动
speechx/speechx/
decod
er/u2_recognizer_main.cc
→
speechx/speechx/
recogniz
er/u2_recognizer_main.cc
浏览文件 @
99b3632d
...
...
@@ -13,7 +13,7 @@
// limitations under the License.
#include "decoder/param.h"
#include "
decod
er/u2_recognizer.h"
#include "
recogniz
er/u2_recognizer.h"
#include "kaldi/feat/wave-reader.h"
#include "kaldi/util/table-types.h"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录