Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
965f486d
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
965f486d
编写于
4月 01, 2022
作者:
X
xiongxinlei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add voxceleb and rirs noise dataset
上级
9944fec3
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
124 addition
and
0 deletion
+124
-0
dataset/rir_noise/rir_noise.py
dataset/rir_noise/rir_noise.py
+4
-0
paddlespeech/vector/io/dataset.py
paddlespeech/vector/io/dataset.py
+120
-0
未找到文件。
dataset/rir_noise/rir_noise.py
浏览文件 @
965f486d
...
...
@@ -81,6 +81,10 @@ def create_manifest(data_dir, manifest_path_prefix):
},
ensure_ascii
=
False
))
manifest_path
=
manifest_path_prefix
+
'.'
+
dtype
if
not
os
.
path
.
exists
(
os
.
path
.
dirname
(
manifest_path
)):
os
.
makedirs
(
os
.
path
.
dirname
(
manifest_path
))
with
codecs
.
open
(
manifest_path
,
'w'
,
'utf-8'
)
as
fout
:
for
line
in
json_lines
:
fout
.
write
(
line
+
'
\n
'
)
...
...
paddlespeech/vector/io/dataset.py
0 → 100644
浏览文件 @
965f486d
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
collections
from
paddle.io
import
Dataset
from
paddleaudio
import
load
as
load_audio
class
VoxCelebDataset
(
Dataset
):
meta_info
=
collections
.
namedtuple
(
'META_INFO'
,
(
'id'
,
'duration'
,
'wav'
,
'start'
,
'stop'
,
'spk_id'
))
def
__init__
(
self
,
csv_path
,
spk_id2label_path
,
config
):
super
().
__init__
()
self
.
csv_path
=
csv_path
self
.
spk_id2label_path
=
spk_id2label_path
self
.
config
=
config
self
.
data
=
self
.
load_data_csv
()
self
.
spk_id2label
=
self
.
load_speaker_to_label
()
def
load_data_csv
(
self
):
data
=
[]
with
open
(
self
.
csv_path
,
'r'
)
as
rf
:
for
line
in
rf
.
readlines
()[
1
:]:
audio_id
,
duration
,
wav
,
start
,
stop
,
spk_id
=
line
.
strip
(
).
split
(
','
)
data
.
append
(
self
.
meta_info
(
audio_id
,
float
(
duration
),
wav
,
int
(
start
),
int
(
stop
),
spk_id
))
return
data
def
load_speaker_to_label
(
self
):
with
open
(
self
.
spk_id2label_path
,
'r'
)
as
f
:
for
line
in
f
.
readlines
():
spk_id
,
label
=
line
.
strip
().
split
(
' '
)
self
.
spk_id2label
[
spk_id
]
=
int
(
label
)
def
convert_to_record
(
self
,
idx
:
int
):
sample
=
self
.
data
[
idx
]
record
=
{}
# To show all fields in a namedtuple: `type(sample)._fields`
for
field
in
type
(
sample
).
_fields
:
record
[
field
]
=
getattr
(
sample
,
field
)
waveform
,
sr
=
load_audio
(
record
[
'wav'
])
# random select a chunk audio samples from the audio
if
self
.
config
.
random_chunk
:
num_wav_samples
=
waveform
.
shape
[
0
]
num_chunk_samples
=
int
(
self
.
config
.
chunk_duration
*
sr
)
start
=
random
.
randint
(
0
,
num_wav_samples
-
num_chunk_samples
-
1
)
stop
=
start
+
num_chunk_samples
else
:
start
=
record
[
'start'
]
stop
=
record
[
'stop'
]
# we only return the waveform as feat
waveform
=
waveform
[
start
:
stop
]
record
.
update
({
'feat'
:
waveform
})
record
.
update
({
'label'
:
self
.
spk_id2label
[
record
[
'spk_id'
]]})
return
record
def
__getitem__
(
self
,
idx
):
return
self
.
convert_to_record
(
idx
)
def
__len__
(
self
):
return
len
(
self
.
data
)
class
RIRSNoiseDataset
(
Dataset
):
meta_info
=
collections
.
namedtuple
(
'META_INFO'
,
(
'id'
,
'duration'
,
'wav'
))
def
__init__
(
self
,
csv_path
):
super
().
__init__
()
self
.
csv_path
=
csv_path
self
.
data
=
self
.
load_csv_data
()
def
load_csv_data
(
self
):
data
=
[]
with
open
(
self
.
csv_path
,
'r'
)
as
rf
:
for
line
in
rf
.
readlines
()[
1
:]:
audio_id
,
duration
,
wav
=
line
.
strip
().
split
(
','
)
data
.
append
(
self
.
meta_info
(
audio_id
,
float
(
duration
),
wav
))
random
.
shuffle
(
data
)
return
data
def
convert_to_record
(
self
,
idx
:
int
):
sample
=
self
.
data
[
idx
]
record
=
{}
# To show all fields in a namedtuple: `type(sample)._fields`
for
field
in
type
(
sample
).
_fields
:
record
[
field
]
=
getattr
(
sample
,
field
)
waveform
,
sr
=
load_audio
(
record
[
'wav'
])
record
.
update
({
'feat'
:
waveform
})
return
record
def
__getitem__
(
self
,
idx
):
return
self
.
convert_to_record
(
idx
)
def
__len__
(
self
):
return
len
(
self
.
data
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录