Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
56a0a024
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
56a0a024
编写于
10月 17, 2022
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
format code
上级
616fc459
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
53 addition
and
55 deletion
+53
-55
speechx/speechx/base/basic_types.h
speechx/speechx/base/basic_types.h
+2
-2
speechx/speechx/base/common.h
speechx/speechx/base/common.h
+0
-1
speechx/speechx/decoder/ctc_prefix_beam_search_decoder.h
speechx/speechx/decoder/ctc_prefix_beam_search_decoder.h
+0
-1
speechx/speechx/decoder/ctc_tlg_decoder.h
speechx/speechx/decoder/ctc_tlg_decoder.h
+6
-5
speechx/speechx/decoder/param.h
speechx/speechx/decoder/param.h
+1
-2
speechx/speechx/decoder/recognizer.h
speechx/speechx/decoder/recognizer.h
+6
-6
speechx/speechx/decoder/u2_recognizer.h
speechx/speechx/decoder/u2_recognizer.h
+13
-13
speechx/speechx/frontend/audio/data_cache.h
speechx/speechx/frontend/audio/data_cache.h
+1
-1
speechx/speechx/frontend/audio/feature_pipeline.h
speechx/speechx/frontend/audio/feature_pipeline.h
+13
-9
speechx/speechx/frontend/audio/mfcc.h
speechx/speechx/frontend/audio/mfcc.h
+0
-1
speechx/speechx/nnet/ds2_nnet.h
speechx/speechx/nnet/ds2_nnet.h
+1
-0
speechx/speechx/nnet/nnet_itf.h
speechx/speechx/nnet/nnet_itf.h
+8
-8
speechx/speechx/nnet/u2_nnet.h
speechx/speechx/nnet/u2_nnet.h
+0
-1
speechx/speechx/protocol/websocket/websocket_client.h
speechx/speechx/protocol/websocket/websocket_client.h
+1
-2
speechx/speechx/protocol/websocket/websocket_server.h
speechx/speechx/protocol/websocket/websocket_server.h
+0
-2
speechx/speechx/utils/file_utils.h
speechx/speechx/utils/file_utils.h
+1
-1
未找到文件。
speechx/speechx/base/basic_types.h
浏览文件 @
56a0a024
...
...
@@ -14,10 +14,10 @@
#pragma once
#include "kaldi/base/kaldi-types.h"
#include <limits>
#include "kaldi/base/kaldi-types.h"
typedef
float
BaseFloat
;
typedef
double
double64
;
...
...
speechx/speechx/base/common.h
浏览文件 @
56a0a024
...
...
@@ -47,6 +47,5 @@
#include "base/flags.h"
#include "base/log.h"
#include "base/macros.h"
#include "utils/file_utils.h"
#include "utils/math.h"
\ No newline at end of file
speechx/speechx/decoder/ctc_prefix_beam_search_decoder.h
浏览文件 @
56a0a024
...
...
@@ -17,7 +17,6 @@
#include "decoder/ctc_beam_search_opt.h"
#include "decoder/ctc_prefix_beam_search_score.h"
#include "decoder/decoder_itf.h"
#include "fst/symbol-table.h"
namespace
ppspeech
{
...
...
speechx/speechx/decoder/ctc_tlg_decoder.h
浏览文件 @
56a0a024
...
...
@@ -16,7 +16,6 @@
#include "base/common.h"
#include "decoder/decoder_itf.h"
#include "kaldi/decoder/lattice-faster-online-decoder.h"
#include "util/parse-options.h"
...
...
@@ -35,7 +34,7 @@ struct TLGDecoderOptions {
std
::
string
word_symbol_table
{};
std
::
string
fst_path
{};
static
TLGDecoderOptions
InitFromFlags
(){
static
TLGDecoderOptions
InitFromFlags
()
{
TLGDecoderOptions
decoder_opts
;
decoder_opts
.
word_symbol_table
=
FLAGS_word_symbol_table
;
decoder_opts
.
fst_path
=
FLAGS_graph_path
;
...
...
@@ -45,9 +44,11 @@ struct TLGDecoderOptions {
decoder_opts
.
opts
.
max_active
=
FLAGS_max_active
;
decoder_opts
.
opts
.
beam
=
FLAGS_beam
;
decoder_opts
.
opts
.
lattice_beam
=
FLAGS_lattice_beam
;
LOG
(
INFO
)
<<
"LatticeFasterDecoder max active: "
<<
decoder_opts
.
opts
.
max_active
;
LOG
(
INFO
)
<<
"LatticeFasterDecoder beam: "
<<
decoder_opts
.
opts
.
beam
;
LOG
(
INFO
)
<<
"LatticeFasterDecoder lattice_beam: "
<<
decoder_opts
.
opts
.
lattice_beam
;
LOG
(
INFO
)
<<
"LatticeFasterDecoder max active: "
<<
decoder_opts
.
opts
.
max_active
;
LOG
(
INFO
)
<<
"LatticeFasterDecoder beam: "
<<
decoder_opts
.
opts
.
beam
;
LOG
(
INFO
)
<<
"LatticeFasterDecoder lattice_beam: "
<<
decoder_opts
.
opts
.
lattice_beam
;
return
decoder_opts
;
}
...
...
speechx/speechx/decoder/param.h
浏览文件 @
56a0a024
...
...
@@ -30,7 +30,7 @@ DEFINE_int32(receptive_field_length,
7
,
"receptive field of two CNN(kernel=3) downsampling module."
);
DEFINE_int32
(
subsampling_rate
,
4
,
4
,
"two CNN(kernel=3) module downsampling rate."
);
DEFINE_int32
(
nnet_decoder_chunk
,
1
,
"paddle nnet forward chunk"
);
...
...
@@ -62,7 +62,6 @@ DEFINE_double(beam, 15.0, "decoder beam");
DEFINE_double
(
lattice_beam
,
7
.
5
,
"decoder beam"
);
// DecodeOptions flags
// DEFINE_int32(chunk_size, -1, "decoding chunk size");
DEFINE_int32
(
num_left_chunks
,
-
1
,
"left chunks in decoding"
);
...
...
speechx/speechx/decoder/recognizer.h
浏览文件 @
56a0a024
...
...
@@ -32,15 +32,15 @@ struct RecognizerResource {
ModelOptions
model_opts
{};
TLGDecoderOptions
tlg_opts
{};
// CTCBeamSearchOptions beam_search_opts;
static
RecognizerResource
InitFromFlags
(){
static
RecognizerResource
InitFromFlags
()
{
RecognizerResource
resource
;
resource
.
acoustic_scale
=
FLAGS_acoustic_scale
;
resource
.
feature_pipeline_opts
=
FeaturePipelineOptions
::
InitFromFlags
();
resource
.
feature_pipeline_opts
=
FeaturePipelineOptions
::
InitFromFlags
();
resource
.
model_opts
=
ModelOptions
::
InitFromFlags
();
resource
.
tlg_opts
=
TLGDecoderOptions
::
InitFromFlags
();
return
resource
;
resource
.
tlg_opts
=
TLGDecoderOptions
::
InitFromFlags
();
return
resource
;
}
};
...
...
speechx/speechx/decoder/u2_recognizer.h
浏览文件 @
56a0a024
...
...
@@ -21,10 +21,9 @@
#include "decoder/ctc_prefix_beam_search_decoder.h"
#include "decoder/decoder_itf.h"
#include "frontend/audio/feature_pipeline.h"
#include "nnet/decodable.h"
#include "fst/fstlib.h"
#include "fst/symbol-table.h"
#include "nnet/decodable.h"
DECLARE_int32
(
nnet_decoder_chunk
);
DECLARE_int32
(
num_left_chunks
);
...
...
@@ -63,9 +62,9 @@ struct DecodeOptions {
// CtcEndpointConfig ctc_endpoint_opts;
CTCBeamSearchOptions
ctc_prefix_search_opts
{};
static
DecodeOptions
InitFromFlags
(){
static
DecodeOptions
InitFromFlags
()
{
DecodeOptions
decoder_opts
;
decoder_opts
.
chunk_size
=
FLAGS_nnet_decoder_chunk
;
decoder_opts
.
chunk_size
=
FLAGS_nnet_decoder_chunk
;
decoder_opts
.
num_left_chunks
=
FLAGS_num_left_chunks
;
decoder_opts
.
ctc_weight
=
FLAGS_ctc_weight
;
decoder_opts
.
rescoring_weight
=
FLAGS_rescoring_weight
;
...
...
@@ -86,15 +85,16 @@ struct U2RecognizerResource {
DecodeOptions
decoder_opts
{};
static
U2RecognizerResource
InitFromFlags
()
{
U2RecognizerResource
resource
;
resource
.
vocab_path
=
FLAGS_vocab_path
;
resource
.
acoustic_scale
=
FLAGS_acoustic_scale
;
resource
.
feature_pipeline_opts
=
ppspeech
::
FeaturePipelineOptions
::
InitFromFlags
();
resource
.
model_opts
=
ppspeech
::
ModelOptions
::
InitFromFlags
();
resource
.
decoder_opts
=
ppspeech
::
DecodeOptions
::
InitFromFlags
();
return
resource
;
}
U2RecognizerResource
resource
;
resource
.
vocab_path
=
FLAGS_vocab_path
;
resource
.
acoustic_scale
=
FLAGS_acoustic_scale
;
resource
.
feature_pipeline_opts
=
ppspeech
::
FeaturePipelineOptions
::
InitFromFlags
();
resource
.
model_opts
=
ppspeech
::
ModelOptions
::
InitFromFlags
();
resource
.
decoder_opts
=
ppspeech
::
DecodeOptions
::
InitFromFlags
();
return
resource
;
}
};
...
...
speechx/speechx/frontend/audio/data_cache.h
浏览文件 @
56a0a024
...
...
@@ -56,4 +56,4 @@ class DataCache : public FrontendInterface {
DISALLOW_COPY_AND_ASSIGN
(
DataCache
);
};
}
\ No newline at end of file
}
// namespace ppspeech
\ No newline at end of file
speechx/speechx/frontend/audio/feature_pipeline.h
浏览文件 @
56a0a024
...
...
@@ -46,17 +46,17 @@ struct FeaturePipelineOptions {
FeatureCacheOptions
feature_cache_opts
{};
AssemblerOptions
assembler_opts
{};
static
FeaturePipelineOptions
InitFromFlags
(){
static
FeaturePipelineOptions
InitFromFlags
()
{
FeaturePipelineOptions
opts
;
opts
.
cmvn_file
=
FLAGS_cmvn_file
;
LOG
(
INFO
)
<<
"cmvn file: "
<<
opts
.
cmvn_file
;
LOG
(
INFO
)
<<
"cmvn file: "
<<
opts
.
cmvn_file
;
// frame options
kaldi
::
FrameExtractionOptions
frame_opts
;
frame_opts
.
dither
=
0.0
;
LOG
(
INFO
)
<<
"dither: "
<<
frame_opts
.
dither
;
LOG
(
INFO
)
<<
"dither: "
<<
frame_opts
.
dither
;
frame_opts
.
frame_shift_ms
=
10
;
LOG
(
INFO
)
<<
"frame shift ms: "
<<
frame_opts
.
frame_shift_ms
;
LOG
(
INFO
)
<<
"frame shift ms: "
<<
frame_opts
.
frame_shift_ms
;
opts
.
use_fbank
=
FLAGS_use_fbank
;
LOG
(
INFO
)
<<
"feature type: "
<<
(
opts
.
use_fbank
?
"fbank"
:
"linear"
);
if
(
opts
.
use_fbank
)
{
...
...
@@ -76,15 +76,19 @@ struct FeaturePipelineOptions {
opts
.
linear_spectrogram_opts
.
frame_opts
=
frame_opts
;
}
LOG
(
INFO
)
<<
"frame length ms: "
<<
frame_opts
.
frame_length_ms
;
LOG
(
INFO
)
<<
"frame length ms: "
<<
frame_opts
.
frame_length_ms
;
// assembler opts
opts
.
assembler_opts
.
subsampling_rate
=
FLAGS_subsampling_rate
;
LOG
(
INFO
)
<<
"subsampling rate: "
<<
opts
.
assembler_opts
.
subsampling_rate
;
opts
.
assembler_opts
.
receptive_filed_length
=
FLAGS_receptive_field_length
;
LOG
(
INFO
)
<<
"nnet receptive filed length: "
<<
opts
.
assembler_opts
.
receptive_filed_length
;
LOG
(
INFO
)
<<
"subsampling rate: "
<<
opts
.
assembler_opts
.
subsampling_rate
;
opts
.
assembler_opts
.
receptive_filed_length
=
FLAGS_receptive_field_length
;
LOG
(
INFO
)
<<
"nnet receptive filed length: "
<<
opts
.
assembler_opts
.
receptive_filed_length
;
opts
.
assembler_opts
.
nnet_decoder_chunk
=
FLAGS_nnet_decoder_chunk
;
LOG
(
INFO
)
<<
"nnet chunk size: "
<<
opts
.
assembler_opts
.
nnet_decoder_chunk
;
LOG
(
INFO
)
<<
"nnet chunk size: "
<<
opts
.
assembler_opts
.
nnet_decoder_chunk
;
return
opts
;
}
};
...
...
speechx/speechx/frontend/audio/mfcc.h
浏览文件 @
56a0a024
...
...
@@ -14,7 +14,6 @@
#pragma once
#include "kaldi/feat/feature-mfcc.h"
#include "kaldi/feat/feature-mfcc.h"
#include "kaldi/matrix/kaldi-vector.h"
...
...
speechx/speechx/nnet/ds2_nnet.h
浏览文件 @
56a0a024
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#pragma once
#include <numeric>
#include "base/common.h"
#include "kaldi/matrix/kaldi-matrix.h"
#include "nnet/nnet_itf.h"
...
...
speechx/speechx/nnet/nnet_itf.h
浏览文件 @
56a0a024
...
...
@@ -48,25 +48,25 @@ struct ModelOptions {
bool
enable_fc_padding
{
false
};
bool
enable_profile
{
false
};
static
ModelOptions
InitFromFlags
(){
static
ModelOptions
InitFromFlags
()
{
ModelOptions
opts
;
opts
.
subsample_rate
=
FLAGS_subsampling_rate
;
LOG
(
INFO
)
<<
"subsampling rate: "
<<
opts
.
subsample_rate
;
LOG
(
INFO
)
<<
"subsampling rate: "
<<
opts
.
subsample_rate
;
opts
.
model_path
=
FLAGS_model_path
;
LOG
(
INFO
)
<<
"model path: "
<<
opts
.
model_path
;
LOG
(
INFO
)
<<
"model path: "
<<
opts
.
model_path
;
opts
.
param_path
=
FLAGS_param_path
;
LOG
(
INFO
)
<<
"param path: "
<<
opts
.
param_path
;
LOG
(
INFO
)
<<
"param path: "
<<
opts
.
param_path
;
LOG
(
INFO
)
<<
"DS2 param: "
;
opts
.
cache_names
=
FLAGS_model_cache_names
;
LOG
(
INFO
)
<<
" cache names: "
<<
opts
.
cache_names
;
LOG
(
INFO
)
<<
" cache names: "
<<
opts
.
cache_names
;
opts
.
cache_shape
=
FLAGS_model_cache_shapes
;
LOG
(
INFO
)
<<
" cache shape: "
<<
opts
.
cache_shape
;
LOG
(
INFO
)
<<
" cache shape: "
<<
opts
.
cache_shape
;
opts
.
input_names
=
FLAGS_model_input_names
;
LOG
(
INFO
)
<<
" input names: "
<<
opts
.
input_names
;
LOG
(
INFO
)
<<
" input names: "
<<
opts
.
input_names
;
opts
.
output_names
=
FLAGS_model_output_names
;
LOG
(
INFO
)
<<
" output names: "
<<
opts
.
output_names
;
LOG
(
INFO
)
<<
" output names: "
<<
opts
.
output_names
;
return
opts
;
}
};
...
...
speechx/speechx/nnet/u2_nnet.h
浏览文件 @
56a0a024
...
...
@@ -16,7 +16,6 @@
#include "base/common.h"
#include "kaldi/matrix/kaldi-matrix.h"
#include "nnet/nnet_itf.h"
#include "paddle/extension.h"
#include "paddle/jit/all.h"
...
...
speechx/speechx/protocol/websocket/websocket_client.h
浏览文件 @
56a0a024
...
...
@@ -13,7 +13,6 @@
// limitations under the License.
#include "base/common.h"
#include "boost/asio/connect.hpp"
#include "boost/asio/ip/tcp.hpp"
#include "boost/beast/core.hpp"
...
...
@@ -54,4 +53,4 @@ class WebSocketClient {
websocket
::
stream
<
tcp
::
socket
>
ws_
{
ioc_
};
std
::
unique_ptr
<
std
::
thread
>
t_
{
nullptr
};
};
}
\ No newline at end of file
}
// namespace ppspeech
\ No newline at end of file
speechx/speechx/protocol/websocket/websocket_server.h
浏览文件 @
56a0a024
...
...
@@ -15,12 +15,10 @@
#pragma once
#include "base/common.h"
#include "boost/asio/connect.hpp"
#include "boost/asio/ip/tcp.hpp"
#include "boost/beast/core.hpp"
#include "boost/beast/websocket.hpp"
#include "decoder/recognizer.h"
#include "frontend/audio/feature_pipeline.h"
...
...
speechx/speechx/utils/file_utils.h
浏览文件 @
56a0a024
...
...
@@ -20,4 +20,4 @@ bool ReadFileToVector(const std::string& filename,
std
::
vector
<
std
::
string
>*
data
);
std
::
string
ReadFile2String
(
const
std
::
string
&
path
);
}
}
// namespace ppspeech
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录