Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
6abc5d9f
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6abc5d9f
编写于
3月 17, 2022
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
format
上级
854b63b5
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
112 addition
and
100 deletion
+112
-100
paddleaudio/setup.py
paddleaudio/setup.py
+1
-0
paddlespeech/cli/utils.py
paddlespeech/cli/utils.py
+1
-1
paddlespeech/server/bin/paddlespeech_server.py
paddlespeech/server/bin/paddlespeech_server.py
+1
-1
speechx/examples/README.md
speechx/examples/README.md
+1
-1
speechx/examples/feat/linear_spectrogram_main.cc
speechx/examples/feat/linear_spectrogram_main.cc
+2
-2
speechx/speechx/decoder/ctc_beam_search_decoder.cc
speechx/speechx/decoder/ctc_beam_search_decoder.cc
+3
-3
speechx/speechx/frontend/feature_cache.cc
speechx/speechx/frontend/feature_cache.cc
+1
-2
speechx/speechx/frontend/feature_cache.h
speechx/speechx/frontend/feature_cache.h
+1
-2
speechx/speechx/frontend/feature_extractor_controller.h
speechx/speechx/frontend/feature_extractor_controller.h
+0
-1
speechx/speechx/frontend/feature_extractor_controller_impl.h
speechx/speechx/frontend/feature_extractor_controller_impl.h
+0
-1
speechx/speechx/frontend/feature_extractor_interface.h
speechx/speechx/frontend/feature_extractor_interface.h
+2
-3
speechx/speechx/frontend/linear_spectrogram.cc
speechx/speechx/frontend/linear_spectrogram.cc
+5
-4
speechx/speechx/frontend/linear_spectrogram.h
speechx/speechx/frontend/linear_spectrogram.h
+2
-5
speechx/speechx/frontend/normalizer.cc
speechx/speechx/frontend/normalizer.cc
+4
-5
speechx/speechx/frontend/normalizer.h
speechx/speechx/frontend/normalizer.h
+4
-10
speechx/speechx/frontend/raw_audio.cc
speechx/speechx/frontend/raw_audio.cc
+3
-2
speechx/speechx/frontend/raw_audio.h
speechx/speechx/frontend/raw_audio.h
+4
-6
speechx/speechx/nnet/decodable-itf.h
speechx/speechx/nnet/decodable-itf.h
+74
-45
speechx/speechx/nnet/decodable.cc
speechx/speechx/nnet/decodable.cc
+2
-5
utils/generate_infer_yaml.py
utils/generate_infer_yaml.py
+1
-1
未找到文件。
paddleaudio/setup.py
浏览文件 @
6abc5d9f
...
@@ -61,6 +61,7 @@ def remove_version_py(filename='paddleaudio/__init__.py'):
...
@@ -61,6 +61,7 @@ def remove_version_py(filename='paddleaudio/__init__.py'):
if
"__version__"
not
in
line
:
if
"__version__"
not
in
line
:
f
.
write
(
line
)
f
.
write
(
line
)
remove_version_py
()
remove_version_py
()
write_version_py
()
write_version_py
()
...
...
paddlespeech/cli/utils.py
浏览文件 @
6abc5d9f
...
@@ -192,7 +192,7 @@ class ConfigCache:
...
@@ -192,7 +192,7 @@ class ConfigCache:
try
:
try
:
cfg
=
yaml
.
load
(
file
,
Loader
=
yaml
.
FullLoader
)
cfg
=
yaml
.
load
(
file
,
Loader
=
yaml
.
FullLoader
)
self
.
_data
.
update
(
cfg
)
self
.
_data
.
update
(
cfg
)
except
:
except
Exception
as
e
:
self
.
flush
()
self
.
flush
()
@
property
@
property
...
...
paddlespeech/server/bin/paddlespeech_server.py
浏览文件 @
6abc5d9f
...
@@ -174,7 +174,7 @@ class ServerStatsExecutor():
...
@@ -174,7 +174,7 @@ class ServerStatsExecutor():
"Failed to get the table of TTS pretrained models supported in the service."
"Failed to get the table of TTS pretrained models supported in the service."
)
)
return
False
return
False
elif
self
.
task
==
'cls'
:
elif
self
.
task
==
'cls'
:
try
:
try
:
from
paddlespeech.cli.cls.infer
import
pretrained_models
from
paddlespeech.cli.cls.infer
import
pretrained_models
...
...
speechx/examples/README.md
浏览文件 @
6abc5d9f
...
@@ -13,4 +13,4 @@ Example to play `decoder`:
...
@@ -13,4 +13,4 @@ Example to play `decoder`:
```
```
pushd decoder
pushd decoder
bash run.sh
bash run.sh
```
```
\ No newline at end of file
speechx/examples/feat/linear_spectrogram_main.cc
浏览文件 @
6abc5d9f
...
@@ -164,8 +164,8 @@ int main(int argc, char* argv[]) {
...
@@ -164,8 +164,8 @@ int main(int argc, char* argv[]) {
// test feature linear_spectorgram: wave --> decibel_normalizer --> hanning
// test feature linear_spectorgram: wave --> decibel_normalizer --> hanning
// window -->linear_spectrogram --> cmvn
// window -->linear_spectrogram --> cmvn
int32
num_done
=
0
,
num_err
=
0
;
int32
num_done
=
0
,
num_err
=
0
;
//std::unique_ptr<ppspeech::FeatureExtractorInterface> data_source(new
//
std::unique_ptr<ppspeech::FeatureExtractorInterface> data_source(new
//
ppspeech::RawDataCache());
//
ppspeech::RawDataCache());
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
data_source
(
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
data_source
(
new
ppspeech
::
RawAudioCache
());
new
ppspeech
::
RawAudioCache
());
...
...
speechx/speechx/decoder/ctc_beam_search_decoder.cc
浏览文件 @
6abc5d9f
...
@@ -52,14 +52,14 @@ CTCBeamSearch::CTCBeamSearch(const CTCBeamSearchOptions& opts)
...
@@ -52,14 +52,14 @@ CTCBeamSearch::CTCBeamSearch(const CTCBeamSearchOptions& opts)
}
}
void
CTCBeamSearch
::
Reset
()
{
void
CTCBeamSearch
::
Reset
()
{
//num_frame_decoded_ = 0;
//
num_frame_decoded_ = 0;
//ResetPrefixes();
//
ResetPrefixes();
InitDecoder
();
InitDecoder
();
}
}
void
CTCBeamSearch
::
InitDecoder
()
{
void
CTCBeamSearch
::
InitDecoder
()
{
num_frame_decoded_
=
0
;
num_frame_decoded_
=
0
;
//ResetPrefixes();
//
ResetPrefixes();
prefixes_
.
clear
();
prefixes_
.
clear
();
root_
=
std
::
make_shared
<
PathTrie
>
();
root_
=
std
::
make_shared
<
PathTrie
>
();
...
...
speechx/speechx/frontend/feature_cache.cc
浏览文件 @
6abc5d9f
...
@@ -29,8 +29,7 @@ FeatureCache::FeatureCache(
...
@@ -29,8 +29,7 @@ FeatureCache::FeatureCache(
base_extractor_
=
std
::
move
(
base_extractor
);
base_extractor_
=
std
::
move
(
base_extractor
);
}
}
void
FeatureCache
::
Accept
(
void
FeatureCache
::
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
)
{
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
)
{
base_extractor_
->
Accept
(
inputs
);
base_extractor_
->
Accept
(
inputs
);
// feed current data
// feed current data
bool
result
=
false
;
bool
result
=
false
;
...
...
speechx/speechx/frontend/feature_cache.h
浏览文件 @
6abc5d9f
...
@@ -24,8 +24,7 @@ class FeatureCache : public FeatureExtractorInterface {
...
@@ -24,8 +24,7 @@ class FeatureCache : public FeatureExtractorInterface {
explicit
FeatureCache
(
explicit
FeatureCache
(
int32
max_size
=
kint16max
,
int32
max_size
=
kint16max
,
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
=
NULL
);
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
=
NULL
);
virtual
void
Accept
(
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
);
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
);
// feats dim = num_frames * feature_dim
// feats dim = num_frames * feature_dim
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
// feature cache only cache feature which from base extractor
// feature cache only cache feature which from base extractor
...
...
speechx/speechx/frontend/feature_extractor_controller.h
浏览文件 @
6abc5d9f
...
@@ -11,4 +11,3 @@
...
@@ -11,4 +11,3 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
speechx/speechx/frontend/feature_extractor_controller_impl.h
浏览文件 @
6abc5d9f
...
@@ -11,4 +11,3 @@
...
@@ -11,4 +11,3 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
speechx/speechx/frontend/feature_extractor_interface.h
浏览文件 @
6abc5d9f
...
@@ -21,10 +21,9 @@ namespace ppspeech {
...
@@ -21,10 +21,9 @@ namespace ppspeech {
class
FeatureExtractorInterface
{
class
FeatureExtractorInterface
{
public:
public:
// accept input data, accept feature or raw waves which decided
// accept input data, accept feature or raw waves which decided
// by the base_extractor
// by the base_extractor
virtual
void
Accept
(
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
)
=
0
;
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
)
=
0
;
// get the processed result
// get the processed result
// the length of output = feature_row * feature_dim,
// the length of output = feature_row * feature_dim,
// the Matrix is squashed into Vector
// the Matrix is squashed into Vector
...
...
speechx/speechx/frontend/linear_spectrogram.cc
浏览文件 @
6abc5d9f
...
@@ -57,8 +57,9 @@ bool LinearSpectrogram::Read(Vector<BaseFloat>* feats) {
...
@@ -57,8 +57,9 @@ bool LinearSpectrogram::Read(Vector<BaseFloat>* feats) {
if
(
flag
==
false
||
input_feats
.
Dim
()
==
0
)
return
false
;
if
(
flag
==
false
||
input_feats
.
Dim
()
==
0
)
return
false
;
vector
<
BaseFloat
>
input_feats_vec
(
input_feats
.
Dim
());
vector
<
BaseFloat
>
input_feats_vec
(
input_feats
.
Dim
());
std
::
memcpy
(
input_feats_vec
.
data
(),
input_feats
.
Data
(),
std
::
memcpy
(
input_feats_vec
.
data
(),
input_feats
.
Dim
()
*
sizeof
(
BaseFloat
));
input_feats
.
Data
(),
input_feats
.
Dim
()
*
sizeof
(
BaseFloat
));
vector
<
vector
<
BaseFloat
>>
result
;
vector
<
vector
<
BaseFloat
>>
result
;
Compute
(
input_feats_vec
,
result
);
Compute
(
input_feats_vec
,
result
);
int32
feat_size
=
0
;
int32
feat_size
=
0
;
...
@@ -86,10 +87,10 @@ bool LinearSpectrogram::NumpyFft(vector<BaseFloat>* v,
...
@@ -86,10 +87,10 @@ bool LinearSpectrogram::NumpyFft(vector<BaseFloat>* v,
vector
<
BaseFloat
>*
img
)
const
{
vector
<
BaseFloat
>*
img
)
const
{
Vector
<
BaseFloat
>
v_tmp
;
Vector
<
BaseFloat
>
v_tmp
;
v_tmp
.
Resize
(
v
->
size
());
v_tmp
.
Resize
(
v
->
size
());
std
::
memcpy
(
v_tmp
.
Data
(),
v
->
data
(),
sizeof
(
BaseFloat
)
*
(
v
->
size
()));
std
::
memcpy
(
v_tmp
.
Data
(),
v
->
data
(),
sizeof
(
BaseFloat
)
*
(
v
->
size
()));
RealFft
(
&
v_tmp
,
true
);
RealFft
(
&
v_tmp
,
true
);
v
->
resize
(
v_tmp
.
Dim
());
v
->
resize
(
v_tmp
.
Dim
());
std
::
memcpy
(
v
->
data
(),
v_tmp
.
Data
(),
sizeof
(
BaseFloat
)
*
(
v
->
size
()));
std
::
memcpy
(
v
->
data
(),
v_tmp
.
Data
(),
sizeof
(
BaseFloat
)
*
(
v
->
size
()));
real
->
push_back
(
v
->
at
(
0
));
real
->
push_back
(
v
->
at
(
0
));
img
->
push_back
(
0
);
img
->
push_back
(
0
);
...
...
speechx/speechx/frontend/linear_spectrogram.h
浏览文件 @
6abc5d9f
...
@@ -38,16 +38,13 @@ class LinearSpectrogram : public FeatureExtractorInterface {
...
@@ -38,16 +38,13 @@ class LinearSpectrogram : public FeatureExtractorInterface {
explicit
LinearSpectrogram
(
explicit
LinearSpectrogram
(
const
LinearSpectrogramOptions
&
opts
,
const
LinearSpectrogramOptions
&
opts
,
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
virtual
void
Accept
(
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
);
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
// the dim_ is the dim of single frame feature
// the dim_ is the dim of single frame feature
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
virtual
void
Reset
()
{
virtual
void
Reset
()
{
base_extractor_
->
Reset
();
}
base_extractor_
->
Reset
();
}
private:
private:
void
Hanning
(
std
::
vector
<
kaldi
::
BaseFloat
>*
data
)
const
;
void
Hanning
(
std
::
vector
<
kaldi
::
BaseFloat
>*
data
)
const
;
...
...
speechx/speechx/frontend/normalizer.cc
浏览文件 @
6abc5d9f
...
@@ -34,14 +34,12 @@ DecibelNormalizer::DecibelNormalizer(
...
@@ -34,14 +34,12 @@ DecibelNormalizer::DecibelNormalizer(
dim_
=
1
;
dim_
=
1
;
}
}
void
DecibelNormalizer
::
Accept
(
void
DecibelNormalizer
::
Accept
(
const
kaldi
::
VectorBase
<
BaseFloat
>&
waves
)
{
const
kaldi
::
VectorBase
<
BaseFloat
>&
waves
)
{
base_extractor_
->
Accept
(
waves
);
base_extractor_
->
Accept
(
waves
);
}
}
bool
DecibelNormalizer
::
Read
(
kaldi
::
Vector
<
BaseFloat
>*
waves
)
{
bool
DecibelNormalizer
::
Read
(
kaldi
::
Vector
<
BaseFloat
>*
waves
)
{
if
(
base_extractor_
->
Read
(
waves
)
==
false
||
if
(
base_extractor_
->
Read
(
waves
)
==
false
||
waves
->
Dim
()
==
0
)
{
waves
->
Dim
()
==
0
)
{
return
false
;
return
false
;
}
}
Compute
(
waves
);
Compute
(
waves
);
...
@@ -88,7 +86,8 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* waves) const {
...
@@ -88,7 +86,8 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* waves) const {
item
*=
std
::
pow
(
10.0
,
gain
/
20.0
);
item
*=
std
::
pow
(
10.0
,
gain
/
20.0
);
}
}
std
::
memcpy
(
waves
->
Data
(),
samples
.
data
(),
sizeof
(
BaseFloat
)
*
samples
.
size
());
std
::
memcpy
(
waves
->
Data
(),
samples
.
data
(),
sizeof
(
BaseFloat
)
*
samples
.
size
());
return
true
;
return
true
;
}
}
...
...
speechx/speechx/frontend/normalizer.h
浏览文件 @
6abc5d9f
...
@@ -45,16 +45,13 @@ class DecibelNormalizer : public FeatureExtractorInterface {
...
@@ -45,16 +45,13 @@ class DecibelNormalizer : public FeatureExtractorInterface {
explicit
DecibelNormalizer
(
explicit
DecibelNormalizer
(
const
DecibelNormalizerOptions
&
opts
,
const
DecibelNormalizerOptions
&
opts
,
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
virtual
void
Accept
(
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
waves
);
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
waves
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
waves
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
waves
);
// noramlize audio, the dim is 1.
// noramlize audio, the dim is 1.
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
virtual
void
Reset
()
{
virtual
void
Reset
()
{
base_extractor_
->
Reset
();
}
base_extractor_
->
Reset
();
}
private:
private:
bool
Compute
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
waves
)
const
;
bool
Compute
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
waves
)
const
;
...
@@ -69,8 +66,7 @@ class CMVN : public FeatureExtractorInterface {
...
@@ -69,8 +66,7 @@ class CMVN : public FeatureExtractorInterface {
public:
public:
explicit
CMVN
(
std
::
string
cmvn_file
,
explicit
CMVN
(
std
::
string
cmvn_file
,
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
virtual
void
Accept
(
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
);
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
);
// the length of feats = feature_row * feature_dim,
// the length of feats = feature_row * feature_dim,
// the Matrix is squashed into Vector
// the Matrix is squashed into Vector
...
@@ -79,9 +75,7 @@ class CMVN : public FeatureExtractorInterface {
...
@@ -79,9 +75,7 @@ class CMVN : public FeatureExtractorInterface {
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
virtual
void
Reset
()
{
virtual
void
Reset
()
{
base_extractor_
->
Reset
();
}
base_extractor_
->
Reset
();
}
private:
private:
void
Compute
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
feats
)
const
;
void
Compute
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
feats
)
const
;
...
...
speechx/speechx/frontend/raw_audio.cc
浏览文件 @
6abc5d9f
...
@@ -32,7 +32,7 @@ void RawAudioCache::Accept(const VectorBase<BaseFloat>& waves) {
...
@@ -32,7 +32,7 @@ void RawAudioCache::Accept(const VectorBase<BaseFloat>& waves) {
ready_feed_condition_
.
wait
(
lock
);
ready_feed_condition_
.
wait
(
lock
);
}
}
for
(
size_t
idx
=
0
;
idx
<
waves
.
Dim
();
++
idx
)
{
for
(
size_t
idx
=
0
;
idx
<
waves
.
Dim
();
++
idx
)
{
int32
buffer_idx
=
(
idx
+
start_
)
%
ring_buffer_
.
size
();
int32
buffer_idx
=
(
idx
+
start_
)
%
ring_buffer_
.
size
();
ring_buffer_
[
buffer_idx
]
=
waves
(
idx
);
ring_buffer_
[
buffer_idx
]
=
waves
(
idx
);
}
}
data_length_
+=
waves
.
Dim
();
data_length_
+=
waves
.
Dim
();
...
@@ -44,7 +44,8 @@ bool RawAudioCache::Read(Vector<BaseFloat>* waves) {
...
@@ -44,7 +44,8 @@ bool RawAudioCache::Read(Vector<BaseFloat>* waves) {
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
while
(
chunk_size
>
data_length_
)
{
while
(
chunk_size
>
data_length_
)
{
// when audio is empty and no more data feed
// when audio is empty and no more data feed
// ready_read_condition will block in dead lock. so replace with timeout_
// ready_read_condition will block in dead lock. so replace with
// timeout_
// ready_read_condition_.wait(lock);
// ready_read_condition_.wait(lock);
int32
elapsed
=
static_cast
<
int32
>
(
timer
.
Elapsed
()
*
1000
);
int32
elapsed
=
static_cast
<
int32
>
(
timer
.
Elapsed
()
*
1000
);
if
(
elapsed
>
timeout_
)
{
if
(
elapsed
>
timeout_
)
{
...
...
speechx/speechx/frontend/raw_audio.h
浏览文件 @
6abc5d9f
...
@@ -35,9 +35,9 @@ class RawAudioCache : public FeatureExtractorInterface {
...
@@ -35,9 +35,9 @@ class RawAudioCache : public FeatureExtractorInterface {
}
}
virtual
bool
IsFinished
()
const
{
return
finished_
;
}
virtual
bool
IsFinished
()
const
{
return
finished_
;
}
virtual
void
Reset
()
{
virtual
void
Reset
()
{
start_
=
0
;
start_
=
0
;
data_length_
=
0
;
data_length_
=
0
;
finished_
=
false
;
finished_
=
false
;
}
}
private:
private:
...
@@ -72,9 +72,7 @@ class RawDataCache : public FeatureExtractorInterface {
...
@@ -72,9 +72,7 @@ class RawDataCache : public FeatureExtractorInterface {
virtual
void
SetFinished
()
{
finished_
=
true
;
}
virtual
void
SetFinished
()
{
finished_
=
true
;
}
virtual
bool
IsFinished
()
const
{
return
finished_
;
}
virtual
bool
IsFinished
()
const
{
return
finished_
;
}
void
SetDim
(
int32
dim
)
{
dim_
=
dim
;
}
void
SetDim
(
int32
dim
)
{
dim_
=
dim
;
}
virtual
void
Reset
()
{
virtual
void
Reset
()
{
finished_
=
true
;
}
finished_
=
true
;
}
private:
private:
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
data_
;
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
data_
;
...
...
speechx/speechx/nnet/decodable-itf.h
浏览文件 @
6abc5d9f
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// itf/decodable-itf.h
// itf/decodable-itf.h
// Copyright 2009-2011 Microsoft Corporation; Saarland University;
// Copyright 2009-2011 Microsoft Corporation; Saarland University;
...
@@ -42,8 +56,10 @@ namespace kaldi {
...
@@ -42,8 +56,10 @@ namespace kaldi {
For online decoding, where the features are coming in in real time, it is
For online decoding, where the features are coming in in real time, it is
important to understand the IsLastFrame() and NumFramesReady() functions.
important to understand the IsLastFrame() and NumFramesReady() functions.
There are two ways these are used: the old online-decoding code, in ../online/,
There are two ways these are used: the old online-decoding code, in
and the new online-decoding code, in ../online2/. In the old online-decoding
../online/,
and the new online-decoding code, in ../online2/. In the old
online-decoding
code, the decoder would do:
code, the decoder would do:
\code{.cc}
\code{.cc}
for (int frame = 0; !decodable.IsLastFrame(frame); frame++) {
for (int frame = 0; !decodable.IsLastFrame(frame); frame++) {
...
@@ -52,13 +68,16 @@ namespace kaldi {
...
@@ -52,13 +68,16 @@ namespace kaldi {
\endcode
\endcode
and the call to IsLastFrame would block if the features had not arrived yet.
and the call to IsLastFrame would block if the features had not arrived yet.
The decodable object would have to know when to terminate the decoding. This
The decodable object would have to know when to terminate the decoding. This
online-decoding mode is still supported, it is what happens when you call, for
online-decoding mode is still supported, it is what happens when you call,
for
example, LatticeFasterDecoder::Decode().
example, LatticeFasterDecoder::Decode().
We realized that this "blocking" mode of decoding is not very convenient
We realized that this "blocking" mode of decoding is not very convenient
because it forces the program to be multi-threaded and makes it complex to
because it forces the program to be multi-threaded and makes it complex to
control endpointing. In the "new" decoding code, you don't call (for example)
control endpointing. In the "new" decoding code, you don't call (for
LatticeFasterDecoder::Decode(), you call LatticeFasterDecoder::InitDecoding(),
example)
LatticeFasterDecoder::Decode(), you call
LatticeFasterDecoder::InitDecoding(),
and then each time you get more features, you provide them to the decodable
and then each time you get more features, you provide them to the decodable
object, and you call LatticeFasterDecoder::AdvanceDecoding(), which does
object, and you call LatticeFasterDecoder::AdvanceDecoding(), which does
something like this:
something like this:
...
@@ -68,7 +87,8 @@ namespace kaldi {
...
@@ -68,7 +87,8 @@ namespace kaldi {
}
}
\endcode
\endcode
So the decodable object never has IsLastFrame() called. For decoding where
So the decodable object never has IsLastFrame() called. For decoding where
you are starting with a matrix of features, the NumFramesReady() function will
you are starting with a matrix of features, the NumFramesReady() function
will
always just return the number of frames in the file, and IsLastFrame() will
always just return the number of frames in the file, and IsLastFrame() will
return true for the last frame.
return true for the last frame.
...
@@ -80,45 +100,54 @@ namespace kaldi {
...
@@ -80,45 +100,54 @@ namespace kaldi {
frame of the file once we've decided to terminate decoding.
frame of the file once we've decided to terminate decoding.
*/
*/
class
DecodableInterface
{
class
DecodableInterface
{
public:
public:
/// Returns the log likelihood, which will be negated in the decoder.
/// Returns the log likelihood, which will be negated in the decoder.
/// The "frame" starts from zero. You should verify that NumFramesReady() > frame
/// The "frame" starts from zero. You should verify that NumFramesReady() >
/// before calling this.
/// frame
virtual
BaseFloat
LogLikelihood
(
int32
frame
,
int32
index
)
=
0
;
/// before calling this.
virtual
BaseFloat
LogLikelihood
(
int32
frame
,
int32
index
)
=
0
;
/// Returns true if this is the last frame. Frames are zero-based, so the
/// first frame is zero. IsLastFrame(-1) will return false, unless the file
/// Returns true if this is the last frame. Frames are zero-based, so the
/// is empty (which is a case that I'm not sure all the code will handle, so
/// first frame is zero. IsLastFrame(-1) will return false, unless the file
/// be careful). Caution: the behavior of this function in an online setting
/// is empty (which is a case that I'm not sure all the code will handle, so
/// is being changed somewhat. In future it may return false in cases where
/// be careful). Caution: the behavior of this function in an online
/// we haven't yet decided to terminate decoding, but later true if we decide
/// setting
/// to terminate decoding. The plan in future is to rely more on
/// is being changed somewhat. In future it may return false in cases where
/// NumFramesReady(), and in future, IsLastFrame() would always return false
/// we haven't yet decided to terminate decoding, but later true if we
/// in an online-decoding setting, and would only return true in a
/// decide
/// decoding-from-matrix setting where we want to allow the last delta or LDA
/// to terminate decoding. The plan in future is to rely more on
/// features to be flushed out for compatibility with the baseline setup.
/// NumFramesReady(), and in future, IsLastFrame() would always return false
virtual
bool
IsLastFrame
(
int32
frame
)
const
=
0
;
/// in an online-decoding setting, and would only return true in a
/// decoding-from-matrix setting where we want to allow the last delta or
/// The call NumFramesReady() will return the number of frames currently available
/// LDA
/// for this decodable object. This is for use in setups where you don't want the
/// features to be flushed out for compatibility with the baseline setup.
/// decoder to block while waiting for input. This is newly added as of Jan 2014,
virtual
bool
IsLastFrame
(
int32
frame
)
const
=
0
;
/// and I hope, going forward, to rely on this mechanism more than IsLastFrame to
/// know when to stop decoding.
/// The call NumFramesReady() will return the number of frames currently
virtual
int32
NumFramesReady
()
const
{
/// available
KALDI_ERR
<<
"NumFramesReady() not implemented for this decodable type."
;
/// for this decodable object. This is for use in setups where you don't
return
-
1
;
/// want the
}
/// decoder to block while waiting for input. This is newly added as of Jan
/// 2014,
/// Returns the number of states in the acoustic model
/// and I hope, going forward, to rely on this mechanism more than
/// (they will be indexed one-based, i.e. from 1 to NumIndices();
/// IsLastFrame to
/// this is for compatibility with OpenFst).
/// know when to stop decoding.
virtual
int32
NumIndices
()
const
=
0
;
virtual
int32
NumFramesReady
()
const
{
KALDI_ERR
virtual
bool
FrameLogLikelihood
(
int32
frame
,
<<
"NumFramesReady() not implemented for this decodable type."
;
std
::
vector
<
kaldi
::
BaseFloat
>*
likelihood
)
=
0
;
return
-
1
;
}
virtual
~
DecodableInterface
()
{}
/// Returns the number of states in the acoustic model
/// (they will be indexed one-based, i.e. from 1 to NumIndices();
/// this is for compatibility with OpenFst).
virtual
int32
NumIndices
()
const
=
0
;
virtual
bool
FrameLogLikelihood
(
int32
frame
,
std
::
vector
<
kaldi
::
BaseFloat
>*
likelihood
)
=
0
;
virtual
~
DecodableInterface
()
{}
};
};
/// @}
/// @}
}
// namespace Kaldi
}
// namespace Kaldi
...
...
speechx/speechx/nnet/decodable.cc
浏览文件 @
6abc5d9f
...
@@ -23,10 +23,7 @@ using kaldi::Vector;
...
@@ -23,10 +23,7 @@ using kaldi::Vector;
Decodable
::
Decodable
(
const
std
::
shared_ptr
<
NnetInterface
>&
nnet
,
Decodable
::
Decodable
(
const
std
::
shared_ptr
<
NnetInterface
>&
nnet
,
const
std
::
shared_ptr
<
FeatureExtractorInterface
>&
frontend
)
const
std
::
shared_ptr
<
FeatureExtractorInterface
>&
frontend
)
:
frontend_
(
frontend
),
:
frontend_
(
frontend
),
nnet_
(
nnet
),
frame_offset_
(
0
),
frames_ready_
(
0
)
{}
nnet_
(
nnet
),
frame_offset_
(
0
),
frames_ready_
(
0
)
{}
void
Decodable
::
Acceptlikelihood
(
const
Matrix
<
BaseFloat
>&
likelihood
)
{
void
Decodable
::
Acceptlikelihood
(
const
Matrix
<
BaseFloat
>&
likelihood
)
{
frames_ready_
+=
likelihood
.
NumRows
();
frames_ready_
+=
likelihood
.
NumRows
();
...
@@ -83,7 +80,7 @@ void Decodable::Reset() {
...
@@ -83,7 +80,7 @@ void Decodable::Reset() {
frontend_
->
Reset
();
frontend_
->
Reset
();
nnet_
->
Reset
();
nnet_
->
Reset
();
frame_offset_
=
0
;
frame_offset_
=
0
;
frames_ready_
=
0
;
frames_ready_
=
0
;
}
}
}
// namespace ppspeech
}
// namespace ppspeech
\ No newline at end of file
utils/generate_infer_yaml.py
浏览文件 @
6abc5d9f
...
@@ -148,7 +148,7 @@ def merge_configs(
...
@@ -148,7 +148,7 @@ def merge_configs(
for
item
in
remove_train_list
:
for
item
in
remove_train_list
:
try
:
try
:
remove_config_part
(
config
,
[
item
])
remove_config_part
(
config
,
[
item
])
except
:
except
Exception
as
e
:
print
(
item
+
" "
+
"can not be removed"
)
print
(
item
+
" "
+
"can not be removed"
)
# Save the config
# Save the config
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录