Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
760e5d44
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
760e5d44
编写于
3月 31, 2022
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor cache
上级
3572cacf
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
101 addition
and
65 deletion
+101
-65
speechx/examples/decoder/offline_decoder_main.cc
speechx/examples/decoder/offline_decoder_main.cc
+2
-3
speechx/examples/feat/linear_spectrogram_main.cc
speechx/examples/feat/linear_spectrogram_main.cc
+4
-3
speechx/speechx/frontend/CMakeLists.txt
speechx/speechx/frontend/CMakeLists.txt
+2
-2
speechx/speechx/frontend/audio_cache.cc
speechx/speechx/frontend/audio_cache.cc
+24
-19
speechx/speechx/frontend/audio_cache.h
speechx/speechx/frontend/audio_cache.h
+61
-0
speechx/speechx/frontend/data_cache.h
speechx/speechx/frontend/data_cache.h
+8
-38
未找到文件。
speechx/examples/decoder/offline_decoder_main.cc
浏览文件 @
760e5d44
...
...
@@ -17,7 +17,7 @@
#include "base/flags.h"
#include "base/log.h"
#include "decoder/ctc_beam_search_decoder.h"
#include "frontend/
raw_audio
.h"
#include "frontend/
data_cache
.h"
#include "kaldi/util/table-types.h"
#include "nnet/decodable.h"
#include "nnet/paddle_nnet.h"
...
...
@@ -60,8 +60,7 @@ int main(int argc, char* argv[]) {
model_opts
.
params_path
=
model_params
;
std
::
shared_ptr
<
ppspeech
::
PaddleNnet
>
nnet
(
new
ppspeech
::
PaddleNnet
(
model_opts
));
std
::
shared_ptr
<
ppspeech
::
RawDataCache
>
raw_data
(
new
ppspeech
::
RawDataCache
());
std
::
shared_ptr
<
ppspeech
::
DataCache
>
raw_data
(
new
ppspeech
::
DataCache
());
std
::
shared_ptr
<
ppspeech
::
Decodable
>
decodable
(
new
ppspeech
::
Decodable
(
nnet
,
raw_data
));
LOG
(
INFO
)
<<
"Init decodeable."
;
...
...
speechx/examples/feat/linear_spectrogram_main.cc
浏览文件 @
760e5d44
...
...
@@ -17,10 +17,11 @@
#include "frontend/linear_spectrogram.h"
#include "base/flags.h"
#include "base/log.h"
#include "frontend/audio_cache.h"
#include "frontend/data_cache.h"
#include "frontend/feature_cache.h"
#include "frontend/feature_extractor_interface.h"
#include "frontend/normalizer.h"
#include "frontend/raw_audio.h"
#include "kaldi/feat/wave-reader.h"
#include "kaldi/util/kaldi-io.h"
#include "kaldi/util/table-types.h"
...
...
@@ -170,9 +171,9 @@ int main(int argc, char* argv[]) {
// window -->linear_spectrogram --> global cmvn -> feat cache
// std::unique_ptr<ppspeech::FeatureExtractorInterface> data_source(new
// ppspeech::
Raw
DataCache());
// ppspeech::DataCache());
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
data_source
(
new
ppspeech
::
Raw
AudioCache
());
new
ppspeech
::
AudioCache
());
ppspeech
::
DecibelNormalizerOptions
db_norm_opt
;
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
db_norm
(
...
...
speechx/speechx/frontend/CMakeLists.txt
浏览文件 @
760e5d44
...
...
@@ -3,7 +3,7 @@ project(frontend)
add_library
(
frontend STATIC
normalizer.cc
linear_spectrogram.cc
raw_audio
.cc
audio_cache
.cc
feature_cache.cc
)
...
...
speechx/speechx/frontend/
raw_audio
.cc
→
speechx/speechx/frontend/
audio_cache
.cc
浏览文件 @
760e5d44
...
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "frontend/
raw_audio
.h"
#include "frontend/
audio_cache
.h"
#include "kaldi/base/timer.h"
namespace
ppspeech
{
...
...
@@ -21,38 +21,43 @@ using kaldi::BaseFloat;
using
kaldi
::
VectorBase
;
using
kaldi
::
Vector
;
RawAudioCache
::
RawAudioCache
(
int
buffer_size
)
:
finished_
(
false
),
data_length_
(
0
),
start_
(
0
),
timeout_
(
1
)
{
ring_buffer_
.
resize
(
buffer_size
);
AudioCache
::
AudioCache
(
int
buffer_size
)
:
finished_
(
false
),
capacity_
(
buffer_size
),
size_
(
0
),
offset_
(
0
),
timeout_
(
1
)
{
ring_buffer_
.
resize
(
capacity_
);
}
void
Raw
AudioCache
::
Accept
(
const
VectorBase
<
BaseFloat
>&
waves
)
{
void
AudioCache
::
Accept
(
const
VectorBase
<
BaseFloat
>&
waves
)
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
while
(
data_length
_
+
waves
.
Dim
()
>
ring_buffer_
.
size
())
{
while
(
size
_
+
waves
.
Dim
()
>
ring_buffer_
.
size
())
{
ready_feed_condition_
.
wait
(
lock
);
}
for
(
size_t
idx
=
0
;
idx
<
waves
.
Dim
();
++
idx
)
{
int32
buffer_idx
=
(
idx
+
star
t_
)
%
ring_buffer_
.
size
();
int32
buffer_idx
=
(
idx
+
offse
t_
)
%
ring_buffer_
.
size
();
ring_buffer_
[
buffer_idx
]
=
waves
(
idx
);
}
data_length
_
+=
waves
.
Dim
();
size
_
+=
waves
.
Dim
();
}
bool
Raw
AudioCache
::
Read
(
Vector
<
BaseFloat
>*
waves
)
{
bool
AudioCache
::
Read
(
Vector
<
BaseFloat
>*
waves
)
{
size_t
chunk_size
=
waves
->
Dim
();
kaldi
::
Timer
timer
;
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
while
(
chunk_size
>
data_length
_
)
{
while
(
chunk_size
>
size
_
)
{
// when audio is empty and no more data feed
// ready_read_condition will block in dead lock
. so replace with
// timeout_
// ready_read_condition will block in dead lock
,
//
so replace with
timeout_
// ready_read_condition_.wait(lock);
int32
elapsed
=
static_cast
<
int32
>
(
timer
.
Elapsed
()
*
1000
);
if
(
elapsed
>
timeout_
)
{
if
(
finished_
==
true
)
{
// read last chunk data
if
(
finished_
==
true
)
{
// read last chunk data
break
;
}
if
(
chunk_size
>
data_length
_
)
{
if
(
chunk_size
>
size
_
)
{
return
false
;
}
}
...
...
@@ -60,17 +65,17 @@ bool RawAudioCache::Read(Vector<BaseFloat>* waves) {
}
// read last chunk data
if
(
chunk_size
>
data_length
_
)
{
chunk_size
=
data_length
_
;
if
(
chunk_size
>
size
_
)
{
chunk_size
=
size
_
;
waves
->
Resize
(
chunk_size
);
}
for
(
size_t
idx
=
0
;
idx
<
chunk_size
;
++
idx
)
{
int
buff_idx
=
(
star
t_
+
idx
)
%
ring_buffer_
.
size
();
int
buff_idx
=
(
offse
t_
+
idx
)
%
ring_buffer_
.
size
();
waves
->
Data
()[
idx
]
=
ring_buffer_
[
buff_idx
];
}
data_length
_
-=
chunk_size
;
start_
=
(
star
t_
+
chunk_size
)
%
ring_buffer_
.
size
();
size
_
-=
chunk_size
;
offset_
=
(
offse
t_
+
chunk_size
)
%
ring_buffer_
.
size
();
ready_feed_condition_
.
notify_one
();
return
true
;
}
...
...
speechx/speechx/frontend/audio_cache.h
0 → 100644
浏览文件 @
760e5d44
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "base/common.h"
#include "frontend/feature_extractor_interface.h"
namespace
ppspeech
{
// waves cache
class
AudioCache
:
public
FeatureExtractorInterface
{
public:
explicit
AudioCache
(
int
buffer_size
=
kint16max
);
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
BaseFloat
>&
waves
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
waves
);
// the audio dim is 1, one sample
virtual
size_t
Dim
()
const
{
return
1
;
}
virtual
void
SetFinished
()
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
finished_
=
true
;
}
virtual
bool
IsFinished
()
const
{
return
finished_
;
}
virtual
void
Reset
()
{
offset_
=
0
;
size_
=
0
;
finished_
=
false
;
}
private:
std
::
vector
<
kaldi
::
BaseFloat
>
ring_buffer_
;
size_t
offset_
;
// offset in ring_buffer_
size_t
size_
;
// samples in ring_buffer_ now
size_t
capacity_
;
// capacity of ring_buffer_
bool
finished_
;
// reach audio end
mutable
std
::
mutex
mutex_
;
std
::
condition_variable
ready_feed_condition_
;
kaldi
::
int32
timeout_
;
// millisecond
DISALLOW_COPY_AND_ASSIGN
(
AudioCache
);
};
}
// namespace ppspeech
speechx/speechx/frontend/
raw_audio
.h
→
speechx/speechx/frontend/
data_cache
.h
浏览文件 @
760e5d44
...
...
@@ -15,51 +15,22 @@
#pragma once
#include "base/common.h"
#include "frontend/feature_extractor_interface.h"
#pragma once
namespace
ppspeech
{
class
RawAudioCache
:
public
FeatureExtractorInterface
{
// A data source for testing different frontend module.
// It accepts waves or feats.
class
DataCache
:
public
FeatureExtractorInterface
{
public:
explicit
RawAudioCache
(
int
buffer_size
=
kint16max
);
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
BaseFloat
>&
waves
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
waves
);
// the audio dim is 1
virtual
size_t
Dim
()
const
{
return
1
;
}
virtual
void
SetFinished
()
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
finished_
=
true
;
}
virtual
bool
IsFinished
()
const
{
return
finished_
;
}
virtual
void
Reset
()
{
start_
=
0
;
data_length_
=
0
;
finished_
=
false
;
}
private:
std
::
vector
<
kaldi
::
BaseFloat
>
ring_buffer_
;
size_t
start_
;
size_t
data_length_
;
bool
finished_
;
mutable
std
::
mutex
mutex_
;
std
::
condition_variable
ready_feed_condition_
;
kaldi
::
int32
timeout_
;
DISALLOW_COPY_AND_ASSIGN
(
RawAudioCache
);
};
explicit
DataCache
()
{
finished_
=
false
;
}
// it is a datasource for testing different frontend module.
// it accepts waves or feats.
class
RawDataCache
:
public
FeatureExtractorInterface
{
public:
explicit
RawDataCache
()
{
finished_
=
false
;
}
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
)
{
data_
=
inputs
;
}
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
)
{
if
(
data_
.
Dim
()
==
0
)
{
return
false
;
...
...
@@ -80,7 +51,6 @@ class RawDataCache : public FeatureExtractorInterface {
bool
finished_
;
int32
dim_
;
DISALLOW_COPY_AND_ASSIGN
(
Raw
DataCache
);
DISALLOW_COPY_AND_ASSIGN
(
DataCache
);
};
}
// namespace ppspeech
}
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录