Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
00a185b1
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
00a185b1
编写于
5月 30, 2022
作者:
Y
Yang Zhou
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add dispenser in frontend
上级
775c4bef
变更
10
显示空白变更内容
内联
并排
Showing
10 changed file
with
157 addition
and
44 deletion
+157
-44
speechx/speechx/decoder/param.h
speechx/speechx/decoder/param.h
+2
-2
speechx/speechx/frontend/audio/CMakeLists.txt
speechx/speechx/frontend/audio/CMakeLists.txt
+2
-1
speechx/speechx/frontend/audio/compute_fbank_main.cc
speechx/speechx/frontend/audio/compute_fbank_main.cc
+0
-4
speechx/speechx/frontend/audio/compute_linear_spectrogram_main.cc
...speechx/frontend/audio/compute_linear_spectrogram_main.cc
+0
-4
speechx/speechx/frontend/audio/dispenser.cc
speechx/speechx/frontend/audio/dispenser.cc
+72
-0
speechx/speechx/frontend/audio/dispenser.h
speechx/speechx/frontend/audio/dispenser.h
+67
-0
speechx/speechx/frontend/audio/feature_cache.cc
speechx/speechx/frontend/audio/feature_cache.cc
+4
-26
speechx/speechx/frontend/audio/feature_cache.h
speechx/speechx/frontend/audio/feature_cache.h
+1
-5
speechx/speechx/frontend/audio/feature_pipeline.cc
speechx/speechx/frontend/audio/feature_pipeline.cc
+4
-1
speechx/speechx/frontend/audio/feature_pipeline.h
speechx/speechx/frontend/audio/feature_pipeline.h
+5
-1
未找到文件。
speechx/speechx/decoder/param.h
浏览文件 @
00a185b1
...
...
@@ -81,8 +81,8 @@ FeaturePipelineOptions InitFeaturePipelineOptions() {
frame_opts
.
preemph_coeff
=
0.0
;
opts
.
linear_spectrogram_opts
.
frame_opts
=
frame_opts
;
}
opts
.
feature_cache
_opts
.
frame_chunk_size
=
FLAGS_receptive_field_length
;
opts
.
feature_cache
_opts
.
frame_chunk_stride
=
FLAGS_downsampling_rate
;
opts
.
dispenser
_opts
.
frame_chunk_size
=
FLAGS_receptive_field_length
;
opts
.
dispenser
_opts
.
frame_chunk_stride
=
FLAGS_downsampling_rate
;
return
opts
;
}
...
...
speechx/speechx/frontend/audio/CMakeLists.txt
浏览文件 @
00a185b1
...
...
@@ -8,6 +8,7 @@ add_library(frontend STATIC
feature_cache.cc
feature_pipeline.cc
fbank.cc
dispenser.cc
)
target_link_libraries
(
frontend PUBLIC kaldi-matrix kaldi-feat-common kaldi-fbank
)
...
...
speechx/speechx/frontend/audio/compute_fbank_main.cc
浏览文件 @
00a185b1
...
...
@@ -64,10 +64,6 @@ int main(int argc, char* argv[]) {
ppspeech
::
FeatureCacheOptions
feat_cache_opts
;
// the feature cache output feature chunk by chunk.
// frame_chunk_size : num frame of a chunk.
// frame_chunk_stride: chunk sliding window stride.
feat_cache_opts
.
frame_chunk_stride
=
1
;
feat_cache_opts
.
frame_chunk_size
=
1
;
ppspeech
::
FeatureCache
feature_cache
(
feat_cache_opts
,
std
::
move
(
cmvn
));
LOG
(
INFO
)
<<
"fbank: "
<<
true
;
LOG
(
INFO
)
<<
"feat dim: "
<<
feature_cache
.
Dim
();
...
...
speechx/speechx/frontend/audio/compute_linear_spectrogram_main.cc
浏览文件 @
00a185b1
...
...
@@ -66,10 +66,6 @@ int main(int argc, char* argv[]) {
ppspeech
::
FeatureCacheOptions
feat_cache_opts
;
// the feature cache output feature chunk by chunk.
// frame_chunk_size : num frame of a chunk.
// frame_chunk_stride: chunk sliding window stride.
feat_cache_opts
.
frame_chunk_stride
=
1
;
feat_cache_opts
.
frame_chunk_size
=
1
;
ppspeech
::
FeatureCache
feature_cache
(
feat_cache_opts
,
std
::
move
(
cmvn
));
LOG
(
INFO
)
<<
"feat dim: "
<<
feature_cache
.
Dim
();
...
...
speechx/speechx/frontend/audio/dispenser.cc
0 → 100644
浏览文件 @
00a185b1
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "frontend/audio/dispenser.h"
namespace
ppspeech
{
using
kaldi
::
Vector
;
using
kaldi
::
VectorBase
;
using
kaldi
::
BaseFloat
;
using
std
::
unique_ptr
;
Dispenser
::
Dispenser
(
DispenserOptions
opts
,
unique_ptr
<
FrontendInterface
>
base_extractor
)
{
frame_chunk_stride_
=
opts
.
frame_chunk_stride
;
frame_chunk_size_
=
opts
.
frame_chunk_size
;
base_extractor_
=
std
::
move
(
base_extractor
);
dim_
=
base_extractor_
->
Dim
();
}
void
Dispenser
::
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
)
{
// read inputs
base_extractor_
->
Accept
(
inputs
);
}
// pop feature chunk
bool
Dispenser
::
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
)
{
feats
->
Resize
(
dim_
*
frame_chunk_size_
);
bool
result
=
Compute
(
feats
);
return
result
;
}
// read all data from base_feature_extractor_ into cache_
bool
Dispenser
::
Compute
(
Vector
<
BaseFloat
>*
feats
)
{
// compute and feed
bool
result
=
false
;
while
(
feature_cache_
.
size
()
<
frame_chunk_size_
)
{
Vector
<
BaseFloat
>
feature
;
result
=
base_extractor_
->
Read
(
&
feature
);
if
(
result
==
false
||
feature
.
Dim
()
==
0
)
return
false
;
feature_cache_
.
push
(
feature
);
}
int32
counter
=
0
;
int32
cache_size
=
frame_chunk_size_
-
frame_chunk_stride_
;
int32
elem_dim
=
base_extractor_
->
Dim
();
while
(
counter
<
frame_chunk_size_
)
{
Vector
<
BaseFloat
>&
val
=
feature_cache_
.
front
();
int32
start
=
counter
*
elem_dim
;
feats
->
Range
(
start
,
elem_dim
).
CopyFromVec
(
val
);
if
(
frame_chunk_size_
-
counter
<=
cache_size
)
{
feature_cache_
.
push
(
val
);
}
feature_cache_
.
pop
();
counter
++
;
}
return
result
;
}
}
// namespace ppspeech
\ No newline at end of file
speechx/speechx/frontend/audio/dispenser.h
0 → 100644
浏览文件 @
00a185b1
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "base/common.h"
#include "frontend/audio/frontend_itf.h"
namespace
ppspeech
{
struct
DispenserOptions
{
int32
frame_chunk_size
;
int32
frame_chunk_stride
;
DispenserOptions
()
:
frame_chunk_size
(
1
),
frame_chunk_stride
(
1
)
{}
};
class
Dispenser
:
public
FrontendInterface
{
public:
explicit
Dispenser
(
DispenserOptions
opts
,
std
::
unique_ptr
<
FrontendInterface
>
base_extractor
=
NULL
);
// Feed feats or waves
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
);
// feats size = num_frames * feat_dim
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
// feat dim
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
virtual
void
Reset
()
{
base_extractor_
->
Reset
();
}
private:
bool
Compute
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
int32
dim_
;
int32
frame_chunk_size_
;
// window
int32
frame_chunk_stride_
;
// stride
std
::
queue
<
kaldi
::
Vector
<
kaldi
::
BaseFloat
>>
feature_cache_
;
std
::
unique_ptr
<
FrontendInterface
>
base_extractor_
;
DISALLOW_COPY_AND_ASSIGN
(
Dispenser
);
};
}
// namespace ppspeech
speechx/speechx/frontend/audio/feature_cache.cc
浏览文件 @
00a185b1
...
...
@@ -26,8 +26,6 @@ using std::unique_ptr;
FeatureCache
::
FeatureCache
(
FeatureCacheOptions
opts
,
unique_ptr
<
FrontendInterface
>
base_extractor
)
{
max_size_
=
opts
.
max_size
;
frame_chunk_stride_
=
opts
.
frame_chunk_stride
;
frame_chunk_size_
=
opts
.
frame_chunk_size
;
timeout_
=
opts
.
timeout
;
// ms
base_extractor_
=
std
::
move
(
base_extractor
);
dim_
=
base_extractor_
->
Dim
();
...
...
@@ -74,24 +72,11 @@ bool FeatureCache::Compute() {
bool
result
=
base_extractor_
->
Read
(
&
feature
);
if
(
result
==
false
||
feature
.
Dim
()
==
0
)
return
false
;
// join with remained
int32
joint_len
=
feature
.
Dim
()
+
remained_feature_
.
Dim
();
Vector
<
BaseFloat
>
joint_feature
(
joint_len
);
joint_feature
.
Range
(
0
,
remained_feature_
.
Dim
())
.
CopyFromVec
(
remained_feature_
);
joint_feature
.
Range
(
remained_feature_
.
Dim
(),
feature
.
Dim
())
.
CopyFromVec
(
feature
);
// one by one, or stride with window
// controlled by frame_chunk_stride_ and frame_chunk_size_
int32
num_chunk
=
((
joint_len
/
dim_
)
-
frame_chunk_size_
)
/
frame_chunk_stride_
+
1
;
int32
num_chunk
=
feature
.
Dim
()
/
dim_
;
for
(
int
chunk_idx
=
0
;
chunk_idx
<
num_chunk
;
++
chunk_idx
)
{
int32
start
=
chunk_idx
*
frame_chunk_stride_
*
dim_
;
Vector
<
BaseFloat
>
feature_chunk
(
frame_chunk_size_
*
dim_
);
SubVector
<
BaseFloat
>
tmp
(
joint_feature
.
Data
()
+
start
,
frame_chunk_size_
*
dim_
);
int32
start
=
chunk_idx
*
dim_
;
Vector
<
BaseFloat
>
feature_chunk
(
dim_
);
SubVector
<
BaseFloat
>
tmp
(
feature
.
Data
()
+
start
,
dim_
);
feature_chunk
.
CopyFromVec
(
tmp
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
...
...
@@ -104,13 +89,6 @@ bool FeatureCache::Compute() {
cache_
.
push
(
feature_chunk
);
ready_read_condition_
.
notify_one
();
}
// cache remained feats
int32
remained_feature_len
=
joint_len
-
num_chunk
*
frame_chunk_stride_
*
dim_
;
remained_feature_
.
Resize
(
remained_feature_len
);
remained_feature_
.
CopyFromVec
(
joint_feature
.
Range
(
frame_chunk_stride_
*
num_chunk
*
dim_
,
remained_feature_len
));
return
result
;
}
...
...
speechx/speechx/frontend/audio/feature_cache.h
浏览文件 @
00a185b1
...
...
@@ -21,13 +21,9 @@ namespace ppspeech {
struct
FeatureCacheOptions
{
int32
max_size
;
int32
frame_chunk_size
;
int32
frame_chunk_stride
;
int32
timeout
;
// ms
FeatureCacheOptions
()
:
max_size
(
kint16max
),
frame_chunk_size
(
1
),
frame_chunk_stride
(
1
),
timeout
(
1
)
{}
};
...
...
@@ -80,7 +76,7 @@ class FeatureCache : public FrontendInterface {
std
::
condition_variable
ready_feed_condition_
;
std
::
condition_variable
ready_read_condition_
;
// DISALLOW_COPY_AND_ASSGI
N(FeatureCache);
DISALLOW_COPY_AND_ASSIG
N
(
FeatureCache
);
};
}
// namespace ppspeech
speechx/speechx/frontend/audio/feature_pipeline.cc
浏览文件 @
00a185b1
...
...
@@ -35,8 +35,11 @@ FeaturePipeline::FeaturePipeline(const FeaturePipelineOptions& opts) {
unique_ptr
<
FrontendInterface
>
cmvn
(
new
ppspeech
::
CMVN
(
opts
.
cmvn_file
,
std
::
move
(
base_feature
)));
base_extractor_
.
reset
(
unique_ptr
<
FrontendInterface
>
cache
(
new
ppspeech
::
FeatureCache
(
opts
.
feature_cache_opts
,
std
::
move
(
cmvn
)));
base_extractor_
.
reset
(
new
ppspeech
::
Dispenser
(
opts
.
dispenser_opts
,
std
::
move
(
cache
)));
}
}
// ppspeech
\ No newline at end of file
speechx/speechx/frontend/audio/feature_pipeline.h
浏览文件 @
00a185b1
...
...
@@ -23,6 +23,7 @@
#include "frontend/audio/frontend_itf.h"
#include "frontend/audio/linear_spectrogram.h"
#include "frontend/audio/normalizer.h"
#include "frontend/audio/dispenser.h"
namespace
ppspeech
{
...
...
@@ -33,13 +34,16 @@ struct FeaturePipelineOptions {
LinearSpectrogramOptions
linear_spectrogram_opts
;
FbankOptions
fbank_opts
;
FeatureCacheOptions
feature_cache_opts
;
DispenserOptions
dispenser_opts
;
FeaturePipelineOptions
()
:
cmvn_file
(
""
),
to_float32
(
false
),
// true, only for linear feature
use_fbank
(
true
),
linear_spectrogram_opts
(),
fbank_opts
(),
feature_cache_opts
()
{}
feature_cache_opts
(),
dispenser_opts
()
{}
};
class
FeaturePipeline
:
public
FrontendInterface
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录