Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
0780d181
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0780d181
编写于
3月 02, 2022
作者:
X
xiongxinlei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove personal code test=doc
上级
7ef60eba
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
85 addition
and
49 deletion
+85
-49
dataset/voxceleb/voxceleb1.py
dataset/voxceleb/voxceleb1.py
+63
-43
examples/voxceleb/sv0/local/train.py
examples/voxceleb/sv0/local/train.py
+20
-4
examples/voxceleb/sv0/run.sh
examples/voxceleb/sv0/run.sh
+2
-2
未找到文件。
dataset/voxceleb/voxceleb1.py
浏览文件 @
0780d181
...
@@ -11,23 +11,26 @@
...
@@ -11,23 +11,26 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
collections
import
collections
import
csv
import
csv
import
glob
import
glob
import
os
import
os
import
random
import
random
from
typing
import
Dict
,
List
,
Tuple
from
typing
import
Dict
from
typing
import
List
from
typing
import
Tuple
from
paddle.io
import
Dataset
from
paddle.io
import
Dataset
from
tqdm
import
tqdm
from
pathos.multiprocessing
import
Pool
from
pathos.multiprocessing
import
Pool
from
tqdm
import
tqdm
from
paddleaudio.backends
import
load
as
load_audio
from
paddleaudio.backends
import
load
as
load_audio
from
paddleaudio.utils
import
DATA_HOME
,
decompress
,
download_and_decompress
from
paddleaudio.datasets.dataset
import
feat_funcs
from
paddleaudio.datasets.dataset
import
feat_funcs
from
utils.utility
import
unpack
from
paddleaudio.utils
import
DATA_HOME
from
paddleaudio.utils
import
decompress
from
paddleaudio.utils
import
download_and_decompress
from
utils.utility
import
download
from
utils.utility
import
download
from
utils.utility
import
unpack
__all__
=
[
'VoxCeleb1'
]
__all__
=
[
'VoxCeleb1'
]
...
@@ -60,12 +63,13 @@ class VoxCeleb1(Dataset):
...
@@ -60,12 +63,13 @@ class VoxCeleb1(Dataset):
]
]
archieves_meta
=
[
archieves_meta
=
[
{
{
'url'
:
'https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test2.txt'
,
'url'
:
'md5'
:
'b73110731c9223c1461fe49cb48dddfc'
,
'https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test2.txt'
,
'md5'
:
'b73110731c9223c1461fe49cb48dddfc'
,
},
},
]
]
num_speakers
=
1211
# 1211 vox1, 5994 vox2, 7205 vox1+2, test speakers: 41
num_speakers
=
1211
# 1211 vox1, 5994 vox2, 7205 vox1+2, test speakers: 41
sample_rate
=
16000
sample_rate
=
16000
meta_info
=
collections
.
namedtuple
(
meta_info
=
collections
.
namedtuple
(
...
@@ -74,15 +78,16 @@ class VoxCeleb1(Dataset):
...
@@ -74,15 +78,16 @@ class VoxCeleb1(Dataset):
wav_path
=
os
.
path
.
join
(
base_path
,
'wav'
)
wav_path
=
os
.
path
.
join
(
base_path
,
'wav'
)
subsets
=
[
'train'
,
'dev'
,
'enrol'
,
'test'
]
subsets
=
[
'train'
,
'dev'
,
'enrol'
,
'test'
]
def
__init__
(
self
,
def
__init__
(
subset
:
str
=
'train'
,
self
,
feat_type
:
str
=
'raw'
,
subset
:
str
=
'train'
,
random_chunk
:
bool
=
True
,
feat_type
:
str
=
'raw'
,
chunk_duration
:
float
=
3.0
,
# seconds
random_chunk
:
bool
=
True
,
split_ratio
:
float
=
0.9
,
# train split ratio
chunk_duration
:
float
=
3.0
,
# seconds
seed
:
int
=
0
,
split_ratio
:
float
=
0.9
,
# train split ratio
target_dir
:
str
=
None
,
seed
:
int
=
0
,
**
kwargs
):
target_dir
:
str
=
None
,
**
kwargs
):
assert
subset
in
self
.
subsets
,
\
assert
subset
in
self
.
subsets
,
\
'Dataset subset must be one in {}, but got {}'
.
format
(
self
.
subsets
,
subset
)
'Dataset subset must be one in {}, but got {}'
.
format
(
self
.
subsets
,
subset
)
...
@@ -95,8 +100,12 @@ class VoxCeleb1(Dataset):
...
@@ -95,8 +100,12 @@ class VoxCeleb1(Dataset):
self
.
chunk_duration
=
chunk_duration
self
.
chunk_duration
=
chunk_duration
self
.
split_ratio
=
split_ratio
self
.
split_ratio
=
split_ratio
self
.
target_dir
=
target_dir
if
target_dir
else
self
.
base_path
self
.
target_dir
=
target_dir
if
target_dir
else
self
.
base_path
self
.
csv_path
=
os
.
path
.
join
(
target_dir
,
'csv'
)
if
target_dir
else
os
.
path
.
join
(
self
.
base_path
,
'csv'
)
self
.
csv_path
=
os
.
path
.
join
(
self
.
meta_path
=
os
.
path
.
join
(
target_dir
,
'meta'
)
if
target_dir
else
os
.
path
.
join
(
base_path
,
'meta'
)
target_dir
,
'csv'
)
if
target_dir
else
os
.
path
.
join
(
self
.
base_path
,
'csv'
)
self
.
meta_path
=
os
.
path
.
join
(
target_dir
,
'meta'
)
if
target_dir
else
os
.
path
.
join
(
base_path
,
'meta'
)
self
.
veri_test_file
=
os
.
path
.
join
(
self
.
meta_path
,
'veri_test2.txt'
)
self
.
veri_test_file
=
os
.
path
.
join
(
self
.
meta_path
,
'veri_test2.txt'
)
# self._data = self._get_data()[:1000] # KP: Small dataset test.
# self._data = self._get_data()[:1000] # KP: Small dataset test.
self
.
_data
=
self
.
_get_data
()
self
.
_data
=
self
.
_get_data
()
...
@@ -112,10 +121,14 @@ class VoxCeleb1(Dataset):
...
@@ -112,10 +121,14 @@ class VoxCeleb1(Dataset):
print
(
"wav base path: {}"
.
format
(
self
.
wav_path
))
print
(
"wav base path: {}"
.
format
(
self
.
wav_path
))
if
not
os
.
path
.
isdir
(
self
.
wav_path
):
if
not
os
.
path
.
isdir
(
self
.
wav_path
):
print
(
"start to download the voxceleb1 dataset"
)
print
(
"start to download the voxceleb1 dataset"
)
download_and_decompress
(
# multi-zip parts concatenate to vox1_dev_wav.zip
download_and_decompress
(
# multi-zip parts concatenate to vox1_dev_wav.zip
self
.
archieves_audio_dev
,
self
.
base_path
,
decompress
=
False
)
self
.
archieves_audio_dev
,
download_and_decompress
(
# download the vox1_test_wav.zip and unzip
self
.
base_path
,
self
.
archieves_audio_test
,
self
.
base_path
,
decompress
=
True
)
decompress
=
False
)
download_and_decompress
(
# download the vox1_test_wav.zip and unzip
self
.
archieves_audio_test
,
self
.
base_path
,
decompress
=
True
)
# Download all parts and concatenate the files into one zip file.
# Download all parts and concatenate the files into one zip file.
dev_zipfile
=
os
.
path
.
join
(
self
.
base_path
,
'vox1_dev_wav.zip'
)
dev_zipfile
=
os
.
path
.
join
(
self
.
base_path
,
'vox1_dev_wav.zip'
)
...
@@ -131,7 +144,7 @@ class VoxCeleb1(Dataset):
...
@@ -131,7 +144,7 @@ class VoxCeleb1(Dataset):
if
not
os
.
path
.
isdir
(
self
.
meta_path
):
if
not
os
.
path
.
isdir
(
self
.
meta_path
):
download_and_decompress
(
download_and_decompress
(
self
.
archieves_meta
,
self
.
meta_path
,
decompress
=
False
)
self
.
archieves_meta
,
self
.
meta_path
,
decompress
=
False
)
# Data preparation.
# Data preparation.
if
not
os
.
path
.
isdir
(
self
.
csv_path
):
if
not
os
.
path
.
isdir
(
self
.
csv_path
):
os
.
makedirs
(
self
.
csv_path
)
os
.
makedirs
(
self
.
csv_path
)
...
@@ -143,8 +156,9 @@ class VoxCeleb1(Dataset):
...
@@ -143,8 +156,9 @@ class VoxCeleb1(Dataset):
audio_id
,
duration
,
wav
,
start
,
stop
,
spk_id
=
line
.
strip
(
audio_id
,
duration
,
wav
,
start
,
stop
,
spk_id
=
line
.
strip
(
).
split
(
','
)
).
split
(
','
)
data
.
append
(
data
.
append
(
self
.
meta_info
(
audio_id
,
float
(
duration
),
wav
,
int
(
start
),
self
.
meta_info
(
audio_id
,
int
(
stop
),
spk_id
))
float
(
duration
),
wav
,
int
(
start
),
int
(
stop
),
spk_id
))
with
open
(
os
.
path
.
join
(
self
.
meta_path
,
'spk_id2label.txt'
),
'r'
)
as
f
:
with
open
(
os
.
path
.
join
(
self
.
meta_path
,
'spk_id2label.txt'
),
'r'
)
as
f
:
for
line
in
f
.
readlines
():
for
line
in
f
.
readlines
():
...
@@ -228,14 +242,16 @@ class VoxCeleb1(Dataset):
...
@@ -228,14 +242,16 @@ class VoxCeleb1(Dataset):
def
generate_csv
(
self
,
def
generate_csv
(
self
,
wav_files
:
List
[
str
],
wav_files
:
List
[
str
],
output_file
:
str
,
output_file
:
str
,
split_chunks
:
bool
=
True
):
split_chunks
:
bool
=
True
):
print
(
f
'Generating csv:
{
output_file
}
'
)
print
(
f
'Generating csv:
{
output_file
}
'
)
header
=
[
"id"
,
"duration"
,
"wav"
,
"start"
,
"stop"
,
"spk_id"
]
header
=
[
"id"
,
"duration"
,
"wav"
,
"start"
,
"stop"
,
"spk_id"
]
with
Pool
(
64
)
as
p
:
with
Pool
(
64
)
as
p
:
infos
=
list
(
infos
=
list
(
tqdm
(
tqdm
(
p
.
imap
(
lambda
x
:
self
.
_get_audio_info
(
x
,
split_chunks
),
wav_files
),
total
=
len
(
wav_files
)))
p
.
imap
(
lambda
x
:
self
.
_get_audio_info
(
x
,
split_chunks
),
wav_files
),
total
=
len
(
wav_files
)))
csv_lines
=
[]
csv_lines
=
[]
for
info
in
infos
:
for
info
in
infos
:
...
@@ -272,35 +288,39 @@ class VoxCeleb1(Dataset):
...
@@ -272,35 +288,39 @@ class VoxCeleb1(Dataset):
audio_files
=
[]
audio_files
=
[]
speakers
=
set
()
speakers
=
set
()
for
path
in
[
self
.
wav_path
]:
for
path
in
[
self
.
wav_path
]:
for
file
in
glob
.
glob
(
os
.
path
.
join
(
path
,
"**"
,
"*.wav"
),
recursive
=
True
):
for
file
in
glob
.
glob
(
os
.
path
.
join
(
path
,
"**"
,
"*.wav"
),
recursive
=
True
):
spk
=
file
.
split
(
'/wav/'
)[
1
].
split
(
'/'
)[
0
]
spk
=
file
.
split
(
'/wav/'
)[
1
].
split
(
'/'
)[
0
]
if
spk
in
test_spks
:
if
spk
in
test_spks
:
continue
continue
speakers
.
add
(
spk
)
speakers
.
add
(
spk
)
audio_files
.
append
(
file
)
audio_files
.
append
(
file
)
print
(
"start to generate the {}"
.
format
(
os
.
path
.
join
(
self
.
meta_path
,
'spk_id2label.txt'
)))
print
(
"start to generate the {}"
.
format
(
os
.
path
.
join
(
self
.
meta_path
,
'spk_id2label.txt'
)))
# encode the train and dev speakers label to spk_id2label.txt
# encode the train and dev speakers label to spk_id2label.txt
with
open
(
os
.
path
.
join
(
self
.
meta_path
,
'spk_id2label.txt'
),
'w'
)
as
f
:
with
open
(
os
.
path
.
join
(
self
.
meta_path
,
'spk_id2label.txt'
),
'w'
)
as
f
:
for
label
,
spk_id
in
enumerate
(
sorted
(
speakers
)):
# 1211 vox1, 5994 vox2, 7205 vox1+2
for
label
,
spk_id
in
enumerate
(
sorted
(
speakers
)):
# 1211 vox1, 5994 vox2, 7205 vox1+2
f
.
write
(
f
'
{
spk_id
}
{
label
}
\n
'
)
f
.
write
(
f
'
{
spk_id
}
{
label
}
\n
'
)
audio_files
=
sorted
(
audio_files
)
audio_files
=
sorted
(
audio_files
)
random
.
shuffle
(
audio_files
)
random
.
shuffle
(
audio_files
)
split_idx
=
int
(
self
.
split_ratio
*
len
(
audio_files
))
split_idx
=
int
(
self
.
split_ratio
*
len
(
audio_files
))
# split_ratio to train
# split_ratio to train
train_files
,
dev_files
=
audio_files
[:
split_idx
],
audio_files
[
split_idx
:]
train_files
,
dev_files
=
audio_files
[:
split_idx
],
audio_files
[
split_idx
:]
self
.
generate_csv
(
train_files
,
os
.
path
.
join
(
self
.
csv_path
,
'train.csv'
))
self
.
generate_csv
(
train_files
,
os
.
path
.
join
(
self
.
csv_path
,
'train.csv'
))
self
.
generate_csv
(
dev_files
,
self
.
generate_csv
(
dev_files
,
os
.
path
.
join
(
self
.
csv_path
,
'dev.csv'
))
os
.
path
.
join
(
self
.
csv_path
,
'dev.csv'
))
self
.
generate_csv
(
self
.
generate_csv
(
enrol_files
,
enrol_files
,
os
.
path
.
join
(
self
.
csv_path
,
'enrol.csv'
),
os
.
path
.
join
(
self
.
csv_path
,
'enrol.csv'
),
split_chunks
=
False
)
split_chunks
=
False
)
self
.
generate_csv
(
test_files
,
self
.
generate_csv
(
os
.
path
.
join
(
self
.
csv_path
,
'test.csv'
),
test_files
,
split_chunks
=
False
)
os
.
path
.
join
(
self
.
csv_path
,
'test.csv'
),
split_chunks
=
False
)
def
__getitem__
(
self
,
idx
):
def
__getitem__
(
self
,
idx
):
return
self
.
_convert_to_record
(
idx
)
return
self
.
_convert_to_record
(
idx
)
...
...
examples/voxceleb/sv0/local/train.py
浏览文件 @
0780d181
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
argparse
import
argparse
import
paddle
import
paddle
from
dataset.voxceleb.voxceleb1
import
VoxCeleb1
from
dataset.voxceleb.voxceleb1
import
VoxCeleb1
...
@@ -14,12 +29,13 @@ def main(args):
...
@@ -14,12 +29,13 @@ def main(args):
# stage2: data prepare
# stage2: data prepare
train_ds
=
VoxCeleb1
(
'train'
,
target_dir
=
args
.
data_dir
)
train_ds
=
VoxCeleb1
(
'train'
,
target_dir
=
args
.
data_dir
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
# yapf: disable
# yapf: disable
parser
=
argparse
.
ArgumentParser
(
__doc__
)
parser
=
argparse
.
ArgumentParser
(
__doc__
)
parser
.
add_argument
(
'--device'
,
parser
.
add_argument
(
'--device'
,
choices
=
[
'cpu'
,
'gpu'
],
choices
=
[
'cpu'
,
'gpu'
],
default
=
"cpu"
,
default
=
"cpu"
,
help
=
"Select which device to train model, defaults to gpu."
)
help
=
"Select which device to train model, defaults to gpu."
)
parser
.
add_argument
(
"--data-dir"
,
parser
.
add_argument
(
"--data-dir"
,
default
=
"./data/"
,
default
=
"./data/"
,
...
@@ -28,4 +44,4 @@ if __name__ == "__main__":
...
@@ -28,4 +44,4 @@ if __name__ == "__main__":
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
# yapf: enable
# yapf: enable
main
(
args
)
main
(
args
)
\ No newline at end of file
examples/voxceleb/sv0/run.sh
浏览文件 @
0780d181
...
@@ -5,6 +5,6 @@ export PPAUDIO_HOME=/home/users/xiongxinlei/exprts/v3
...
@@ -5,6 +5,6 @@ export PPAUDIO_HOME=/home/users/xiongxinlei/exprts/v3
dir
=
./data/
dir
=
./data/
mkdir
-p
${
dir
}
mkdir
-p
${
dir
}
# you can set the variable DATA_HOME to specifiy the downloaded the vox1 and vox2 dataset
# you can set the variable DATA_HOME to specifiy the downloaded the vox1 and vox2 dataset
/home/users/xiongxinlei/.conda/envs/xxl_base/bin/
python3
\
python3
\
local
/train.py
\
local
/train.py
\
--data-dir
${
dir
}
--data-dir
${
dir
}
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录