Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
5ae63919
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5ae63919
编写于
8月 17, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix dataloader
上级
981cecf7
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
108 addition
and
20 deletion
+108
-20
deepspeech/io/batchfy.py
deepspeech/io/batchfy.py
+2
-2
deepspeech/io/collator.py
deepspeech/io/collator.py
+7
-8
deepspeech/io/dataloader.py
deepspeech/io/dataloader.py
+9
-9
deepspeech/io/dataset.py
deepspeech/io/dataset.py
+0
-1
deepspeech/io/utility.py
deepspeech/io/utility.py
+90
-0
未找到文件。
deepspeech/io/batchfy.py
浏览文件 @
5ae63919
...
...
@@ -421,7 +421,7 @@ def make_batchset(
key
=
lambda
data
:
int
(
data
[
1
][
batch_sort_key
][
batch_sort_axis
][
"shape"
][
0
]),
reverse
=
not
shortest_first
,
)
logger
.
info
(
"# utts: "
+
str
(
len
(
sorted_data
)))
if
count
==
"seq"
:
batches
=
batchfy_by_seq
(
sorted_data
,
...
...
@@ -466,4 +466,4 @@ def make_batchset(
logger
.
info
(
"# minibatches: "
+
str
(
len
(
batches
)))
# batch: List[List[Tuple[str, dict]]]
return
batches
\ No newline at end of file
return
batches
deepspeech/io/collator.py
浏览文件 @
5ae63919
...
...
@@ -23,7 +23,7 @@ from deepspeech.frontend.featurizer.speech_featurizer import SpeechFeaturizer
from
deepspeech.frontend.normalizer
import
FeatureNormalizer
from
deepspeech.frontend.speech
import
SpeechSegment
from
deepspeech.frontend.utility
import
IGNORE_ID
from
deepspeech.io.utility
import
pad_
sequence
from
deepspeech.io.utility
import
pad_
list
from
deepspeech.utils.log
import
Log
__all__
=
[
"SpeechCollator"
]
...
...
@@ -286,13 +286,12 @@ class SpeechCollator():
texts
.
append
(
tokens
)
text_lens
.
append
(
tokens
.
shape
[
0
])
padded_audios
=
pad_sequence
(
audios
,
padding_value
=
0.0
).
astype
(
np
.
float32
)
#[B, T, D]
audio_lens
=
np
.
array
(
audio_lens
).
astype
(
np
.
int64
)
padded_texts
=
pad_sequence
(
texts
,
padding_value
=
IGNORE_ID
).
astype
(
np
.
int64
)
text_lens
=
np
.
array
(
text_lens
).
astype
(
np
.
int64
)
return
utts
,
padded_audios
,
audio_lens
,
padded_texts
,
text_lens
#[B, T, D]
xs_pad
=
pad_list
(
audios
,
0.0
).
astype
(
np
.
float32
)
ilens
=
np
.
array
(
audio_lens
).
astype
(
np
.
int64
)
ys_pad
=
pad_list
(
texts
,
IGNORE_ID
).
astype
(
np
.
int64
)
olens
=
np
.
array
(
text_lens
).
astype
(
np
.
int64
)
return
utts
,
xs_pad
,
ilens
,
ys_pad
,
olens
@
property
def
manifest
(
self
):
...
...
deepspeech/io/dataloader.py
浏览文件 @
5ae63919
...
...
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
from
paddle.io
import
DataLoader
from
deepspeech.frontend.utility
import
read_manifest
...
...
@@ -30,11 +31,11 @@ class CustomConverter():
Args:
subsampling_factor (int): The subsampling factor.
dtype (
paddle
.dtype): Data type to convert.
dtype (
np
.dtype): Data type to convert.
"""
def
__init__
(
self
,
subsampling_factor
=
1
,
dtype
=
paddle
.
float32
):
def
__init__
(
self
,
subsampling_factor
=
1
,
dtype
=
np
.
float32
):
"""Construct a CustomConverter object."""
self
.
subsampling_factor
=
subsampling_factor
self
.
ignore_id
=
-
1
...
...
@@ -52,7 +53,7 @@ class CustomConverter():
"""
# batch should be located in list
assert
len
(
batch
)
==
1
xs
,
y
s
=
batch
[
0
]
(
xs
,
ys
),
utt
s
=
batch
[
0
]
# perform subsampling
if
self
.
subsampling_factor
>
1
:
...
...
@@ -74,15 +75,14 @@ class CustomConverter():
else
:
xs_pad
=
pad_list
(
xs
,
0
).
astype
(
self
.
dtype
)
ilens
=
paddle
.
to_tensor
(
ilens
)
# NOTE: this is for multi-output (e.g., speech translation)
ys_pad
=
pad_list
(
[
np
.
array
(
y
[
0
][:])
if
isinstance
(
y
,
tuple
)
else
y
for
y
in
ys
],
self
.
ignore_id
)
olens
=
np
.
array
([
y
.
shape
[
0
]
for
y
in
ys
])
return
xs_pad
,
ilens
,
ys_pad
,
olens
olens
=
np
.
array
(
[
y
[
0
].
shape
[
0
]
if
isinstance
(
y
,
tuple
)
else
y
.
shape
[
0
]
for
y
in
ys
])
return
utts
,
xs_pad
,
ilens
,
ys_pad
,
olens
class
BatchDataLoader
():
...
...
@@ -166,7 +166,7 @@ class BatchDataLoader():
# we used an empty collate function instead which returns list
self
.
train_loader
=
DataLoader
(
dataset
=
TransformDataset
(
self
.
data
,
lambda
data
:
self
.
converter
([
self
.
load
(
data
)])),
self
.
data
,
lambda
data
:
self
.
converter
([
self
.
load
(
data
,
return_uttid
=
True
)])),
batch_size
=
1
,
shuffle
=
not
use_sortagrad
if
train_mode
else
False
,
collate_fn
=
lambda
x
:
x
[
0
],
...
...
deepspeech/io/dataset.py
浏览文件 @
5ae63919
...
...
@@ -16,7 +16,6 @@ from typing import Optional
from
paddle.io
import
Dataset
from
yacs.config
import
CfgNode
from
deepspeech.utils.log
import
Log
__all__
=
[
"ManifestDataset"
,
"TripletManifestDataset"
,
"TransformDataset"
]
...
...
deepspeech/io/utility.py
浏览文件 @
5ae63919
...
...
@@ -14,7 +14,9 @@
from
collections
import
OrderedDict
from
typing
import
List
import
kaldiio
import
numpy
as
np
import
soundfile
from
deepspeech.frontend.augmentor.augmentation
import
AugmentationPipeline
from
deepspeech.utils.log
import
Log
...
...
@@ -383,3 +385,91 @@ class LoadInputsAndTargets():
else
:
raise
NotImplementedError
(
"Not supported: loader_type={}"
.
format
(
filetype
))
class
SoundHDF5File
():
"""Collecting sound files to a HDF5 file
>>> f = SoundHDF5File('a.flac.h5', mode='a')
>>> array = np.random.randint(0, 100, 100, dtype=np.int16)
>>> f['id'] = (array, 16000)
>>> array, rate = f['id']
:param: str filepath:
:param: str mode:
:param: str format: The type used when saving wav. flac, nist, htk, etc.
:param: str dtype:
"""
def
__init__
(
self
,
filepath
,
mode
=
"r+"
,
format
=
None
,
dtype
=
"int16"
,
**
kwargs
):
self
.
filepath
=
filepath
self
.
mode
=
mode
self
.
dtype
=
dtype
self
.
file
=
h5py
.
File
(
filepath
,
mode
,
**
kwargs
)
if
format
is
None
:
# filepath = a.flac.h5 -> format = flac
second_ext
=
os
.
path
.
splitext
(
os
.
path
.
splitext
(
filepath
)[
0
])[
1
]
format
=
second_ext
[
1
:]
if
format
.
upper
()
not
in
soundfile
.
available_formats
():
# If not found, flac is selected
format
=
"flac"
# This format affects only saving
self
.
format
=
format
def
__repr__
(
self
):
return
'<SoundHDF5 file "{}" (mode {}, format {}, type {})>'
.
format
(
self
.
filepath
,
self
.
mode
,
self
.
format
,
self
.
dtype
)
def
create_dataset
(
self
,
name
,
shape
=
None
,
data
=
None
,
**
kwds
):
f
=
io
.
BytesIO
()
array
,
rate
=
data
soundfile
.
write
(
f
,
array
,
rate
,
format
=
self
.
format
)
self
.
file
.
create_dataset
(
name
,
shape
=
shape
,
data
=
np
.
void
(
f
.
getvalue
()),
**
kwds
)
def
__setitem__
(
self
,
name
,
data
):
self
.
create_dataset
(
name
,
data
=
data
)
def
__getitem__
(
self
,
key
):
data
=
self
.
file
[
key
][()]
f
=
io
.
BytesIO
(
data
.
tobytes
())
array
,
rate
=
soundfile
.
read
(
f
,
dtype
=
self
.
dtype
)
return
array
,
rate
def
keys
(
self
):
return
self
.
file
.
keys
()
def
values
(
self
):
for
k
in
self
.
file
:
yield
self
[
k
]
def
items
(
self
):
for
k
in
self
.
file
:
yield
k
,
self
[
k
]
def
__iter__
(
self
):
return
iter
(
self
.
file
)
def
__contains__
(
self
,
item
):
return
item
in
self
.
file
def
__len__
(
self
,
item
):
return
len
(
self
.
file
)
def
__enter__
(
self
):
return
self
def
__exit__
(
self
,
exc_type
,
exc_val
,
exc_tb
):
self
.
file
.
close
()
def
close
(
self
):
self
.
file
.
close
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录