Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
38e4e9c8
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
38e4e9c8
编写于
4月 06, 2022
作者:
X
xiongxinlei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor voxceleb2 data download, test=doc
上级
ebfe3e6b
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
140 addition
and
28 deletion
+140
-28
dataset/voxceleb/voxceleb1.py
dataset/voxceleb/voxceleb1.py
+1
-1
dataset/voxceleb/voxceleb2.py
dataset/voxceleb/voxceleb2.py
+70
-11
examples/voxceleb/sv0/conf/ecapa_tdnn.yaml
examples/voxceleb/sv0/conf/ecapa_tdnn.yaml
+2
-5
examples/voxceleb/sv0/conf/ecapa_tdnn_small.yaml
examples/voxceleb/sv0/conf/ecapa_tdnn_small.yaml
+53
-0
examples/voxceleb/sv0/local/data.sh
examples/voxceleb/sv0/local/data.sh
+7
-2
examples/voxceleb/sv0/run.sh
examples/voxceleb/sv0/run.sh
+7
-9
未找到文件。
dataset/voxceleb/voxceleb1.py
浏览文件 @
38e4e9c8
...
@@ -149,7 +149,7 @@ def prepare_dataset(base_url, data_list, target_dir, manifest_path,
...
@@ -149,7 +149,7 @@ def prepare_dataset(base_url, data_list, target_dir, manifest_path,
# we will download the voxceleb1 data to ${target_dir}/vox1/dev/ or ${target_dir}/vox1/test directory
# we will download the voxceleb1 data to ${target_dir}/vox1/dev/ or ${target_dir}/vox1/test directory
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
target_dir
,
"wav"
)):
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
target_dir
,
"wav"
)):
# download all dataset part
# download all dataset part
print
(
"start to download the vox1 dev zip package
"
)
print
(
f
"start to download the vox1 zip package to
{
target_dir
}
"
)
for
zip_part
in
data_list
.
keys
():
for
zip_part
in
data_list
.
keys
():
download_url
=
" --no-check-certificate "
+
base_url
+
"/"
+
zip_part
download_url
=
" --no-check-certificate "
+
base_url
+
"/"
+
zip_part
download
(
download
(
...
...
dataset/voxceleb/voxceleb2.py
浏览文件 @
38e4e9c8
...
@@ -22,10 +22,12 @@ import codecs
...
@@ -22,10 +22,12 @@ import codecs
import
glob
import
glob
import
json
import
json
import
os
import
os
import
subprocess
from
pathlib
import
Path
from
pathlib
import
Path
import
soundfile
import
soundfile
from
utils.utility
import
check_md5sum
from
utils.utility
import
download
from
utils.utility
import
download
from
utils.utility
import
unzip
from
utils.utility
import
unzip
...
@@ -35,12 +37,22 @@ DATA_HOME = os.path.expanduser('.')
...
@@ -35,12 +37,22 @@ DATA_HOME = os.path.expanduser('.')
BASE_URL
=
"--no-check-certificate https://www.robots.ox.ac.uk/~vgg/data/voxceleb/data/"
BASE_URL
=
"--no-check-certificate https://www.robots.ox.ac.uk/~vgg/data/voxceleb/data/"
# dev data
# dev data
DEV_DATA_URL
=
BASE_URL
+
'/vox2_aac.zip'
DEV_LIST
=
{
DEV_MD5SUM
=
"bbc063c46078a602ca71605645c2a402"
"vox2_dev_aac_partaa"
:
"da070494c573e5c0564b1d11c3b20577"
,
"vox2_dev_aac_partab"
:
"17fe6dab2b32b48abaf1676429cdd06f"
,
"vox2_dev_aac_partac"
:
"1de58e086c5edf63625af1cb6d831528"
,
"vox2_dev_aac_partad"
:
"5a043eb03e15c5a918ee6a52aad477f9"
,
"vox2_dev_aac_partae"
:
"cea401b624983e2d0b2a87fb5d59aa60"
,
"vox2_dev_aac_partaf"
:
"fc886d9ba90ab88e7880ee98effd6ae9"
,
"vox2_dev_aac_partag"
:
"d160ecc3f6ee3eed54d55349531cb42e"
,
"vox2_dev_aac_partah"
:
"6b84a81b9af72a9d9eecbb3b1f602e65"
,
}
DEV_TARGET_DATA
=
"vox2_dev_aac_parta* vox2_dev_aac.zip bbc063c46078a602ca71605645c2a402"
# test data
# test data
TEST_
DATA_URL
=
BASE_URL
+
'/vox2_test_aac.zip'
TEST_
LIST
=
{
"vox2_test_aac.zip"
:
"0d2b3ea430a821c33263b5ea37ede312"
}
TEST_
MD5SUM
=
"
0d2b3ea430a821c33263b5ea37ede312"
TEST_
TARGET_DATA
=
"vox2_test_aac.zip vox2_test_aac.zip
0d2b3ea430a821c33263b5ea37ede312"
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
parser
.
add_argument
(
parser
.
add_argument
(
...
@@ -68,6 +80,14 @@ args = parser.parse_args()
...
@@ -68,6 +80,14 @@ args = parser.parse_args()
def
create_manifest
(
data_dir
,
manifest_path_prefix
):
def
create_manifest
(
data_dir
,
manifest_path_prefix
):
"""Generate the voxceleb2 dataset manifest file.
We will create the ${manifest_path_prefix}.vox2 as the final manifest file
The dev and test wav info will be put in one manifest file.
Args:
data_dir (str): voxceleb2 wav directory, which include dev and test subdataset
manifest_path_prefix (str): manifest file prefix
"""
print
(
"Creating manifest %s ..."
%
manifest_path_prefix
)
print
(
"Creating manifest %s ..."
%
manifest_path_prefix
)
json_lines
=
[]
json_lines
=
[]
data_path
=
os
.
path
.
join
(
data_dir
,
"**"
,
"*.wav"
)
data_path
=
os
.
path
.
join
(
data_dir
,
"**"
,
"*.wav"
)
...
@@ -119,7 +139,19 @@ def create_manifest(data_dir, manifest_path_prefix):
...
@@ -119,7 +139,19 @@ def create_manifest(data_dir, manifest_path_prefix):
print
(
f
"
{
total_sec
/
total_num
}
sec/utt"
,
file
=
f
)
print
(
f
"
{
total_sec
/
total_num
}
sec/utt"
,
file
=
f
)
def
download_dataset
(
url
,
md5sum
,
target_dir
,
dataset
):
def
download_dataset
(
base_url
,
data_list
,
target_data
,
target_dir
,
dataset
):
"""Download the voxceleb2 zip package
Args:
base_url (str): the voxceleb2 dataset download baseline url
data_list (dict): the dataset part zip package and the md5 value
target_data (str): the final dataset zip info
target_dir (str): the dataset stored directory
dataset (str): the dataset name, dev or test
Raises:
RuntimeError: the md5sum occurs error
"""
if
not
os
.
path
.
exists
(
target_dir
):
if
not
os
.
path
.
exists
(
target_dir
):
os
.
makedirs
(
target_dir
)
os
.
makedirs
(
target_dir
)
...
@@ -129,9 +161,34 @@ def download_dataset(url, md5sum, target_dir, dataset):
...
@@ -129,9 +161,34 @@ def download_dataset(url, md5sum, target_dir, dataset):
# but the test dataset will unzip to aac
# but the test dataset will unzip to aac
# so, wo create the ${target_dir}/test and unzip the m4a to test dir
# so, wo create the ${target_dir}/test and unzip the m4a to test dir
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
target_dir
,
dataset
)):
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
target_dir
,
dataset
)):
filepath
=
download
(
url
,
md5sum
,
target_dir
)
print
(
f
"start to download the vox2 zip package to
{
target_dir
}
"
)
for
zip_part
in
data_list
.
keys
():
download_url
=
" --no-check-certificate "
+
base_url
+
"/"
+
zip_part
download
(
url
=
download_url
,
md5sum
=
data_list
[
zip_part
],
target_dir
=
target_dir
)
# pack the all part to target zip file
all_target_part
,
target_name
,
target_md5sum
=
target_data
.
split
()
target_name
=
os
.
path
.
join
(
target_dir
,
target_name
)
if
not
os
.
path
.
exists
(
target_name
):
pack_part_cmd
=
"cat {}/{} > {}"
.
format
(
target_dir
,
all_target_part
,
target_name
)
subprocess
.
call
(
pack_part_cmd
,
shell
=
True
)
# check the target zip file md5sum
if
not
check_md5sum
(
target_name
,
target_md5sum
):
raise
RuntimeError
(
"{} MD5 checkssum failed"
.
format
(
target_name
))
else
:
print
(
"Check {} md5sum successfully"
.
format
(
target_name
))
if
dataset
==
"test"
:
if
dataset
==
"test"
:
unzip
(
filepath
,
os
.
path
.
join
(
target_dir
,
"test"
))
# we need make the test directory
unzip
(
target_name
,
os
.
path
.
join
(
target_dir
,
"test"
))
else
:
# upzip dev zip pacakge and will create the dev directory
unzip
(
target_name
,
target_dir
)
def
main
():
def
main
():
...
@@ -142,14 +199,16 @@ def main():
...
@@ -142,14 +199,16 @@ def main():
print
(
"download: {}"
.
format
(
args
.
download
))
print
(
"download: {}"
.
format
(
args
.
download
))
if
args
.
download
:
if
args
.
download
:
download_dataset
(
download_dataset
(
url
=
DEV_DATA_URL
,
base_url
=
BASE_URL
,
md5sum
=
DEV_MD5SUM
,
data_list
=
DEV_LIST
,
target_data
=
DEV_TARGET_DATA
,
target_dir
=
args
.
target_dir
,
target_dir
=
args
.
target_dir
,
dataset
=
"dev"
)
dataset
=
"dev"
)
download_dataset
(
download_dataset
(
url
=
TEST_DATA_URL
,
base_url
=
BASE_URL
,
md5sum
=
TEST_MD5SUM
,
data_list
=
TEST_LIST
,
target_data
=
TEST_TARGET_DATA
,
target_dir
=
args
.
target_dir
,
target_dir
=
args
.
target_dir
,
dataset
=
"test"
)
dataset
=
"test"
)
...
...
examples/voxceleb/sv0/conf/ecapa_tdnn.yaml
浏览文件 @
38e4e9c8
###########################################
###########################################
# Data #
# Data #
###########################################
###########################################
# we should explicitly specify the wav path of vox2 audio data converted from m4a
vox2_base_path
:
augment
:
True
augment
:
True
batch_size
:
32
batch_size
:
32
num_workers
:
2
num_workers
:
2
...
@@ -30,7 +28,6 @@ hop_size: 160 #10ms, sample rate 16000, 10 * 16000 / 1000 = 160
...
@@ -30,7 +28,6 @@ hop_size: 160 #10ms, sample rate 16000, 10 * 16000 / 1000 = 160
# if we want use another model, please choose another configuration yaml file
# if we want use another model, please choose another configuration yaml file
model
:
model
:
input_size
:
80
input_size
:
80
# "channels": [512, 512, 512, 512, 1536],
channels
:
[
1024
,
1024
,
1024
,
1024
,
3072
]
channels
:
[
1024
,
1024
,
1024
,
1024
,
3072
]
kernel_sizes
:
[
5
,
3
,
3
,
3
,
1
]
kernel_sizes
:
[
5
,
3
,
3
,
3
,
1
]
dilations
:
[
1
,
2
,
3
,
4
,
1
]
dilations
:
[
1
,
2
,
3
,
4
,
1
]
...
@@ -42,8 +39,8 @@ model:
...
@@ -42,8 +39,8 @@ model:
###########################################
###########################################
seed
:
1986
# according from speechbrain configuration
seed
:
1986
# according from speechbrain configuration
epochs
:
10
epochs
:
10
save_interval
:
1
save_interval
:
1
0
log_interval
:
1
log_interval
:
1
0
learning_rate
:
1e-8
learning_rate
:
1e-8
...
...
examples/voxceleb/sv0/conf/ecapa_tdnn_small.yaml
0 → 100644
浏览文件 @
38e4e9c8
###########################################
# Data #
###########################################
augment
:
True
batch_size
:
16
num_workers
:
2
num_speakers
:
1211
# 1211 vox1, 5994 vox2, 7205 vox1+2, test speakers: 41
shuffle
:
True
skip_prep
:
False
split_ratio
:
0.9
chunk_duration
:
3.0
# seconds
random_chunk
:
True
verification_file
:
data/vox1/veri_test2.txt
###########################################################
# FEATURE EXTRACTION SETTING #
###########################################################
# currently, we only support fbank
sr
:
16000
# sample rate
n_mels
:
80
window_size
:
400
#25ms, sample rate 16000, 25 * 16000 / 1000 = 400
hop_size
:
160
#10ms, sample rate 16000, 10 * 16000 / 1000 = 160
###########################################################
# MODEL SETTING #
###########################################################
# currently, we only support ecapa-tdnn in the ecapa_tdnn.yaml
# if we want use another model, please choose another configuration yaml file
model
:
input_size
:
80
channels
:
[
512
,
512
,
512
,
512
,
1536
]
kernel_sizes
:
[
5
,
3
,
3
,
3
,
1
]
dilations
:
[
1
,
2
,
3
,
4
,
1
]
attention_channels
:
128
lin_neurons
:
192
###########################################
# Training #
###########################################
seed
:
1986
# according from speechbrain configuration
epochs
:
100
save_interval
:
10
log_interval
:
10
learning_rate
:
1e-8
###########################################
# Testing #
###########################################
global_embedding_norm
:
True
embedding_mean_norm
:
True
embedding_std_norm
:
False
examples/voxceleb/sv0/local/data.sh
浏览文件 @
38e4e9c8
...
@@ -38,7 +38,10 @@ mkdir -p ${TARGET_DIR}
...
@@ -38,7 +38,10 @@ mkdir -p ${TARGET_DIR}
if
[
${
stage
}
-le
1
]
&&
[
${
stop_stage
}
-ge
1
]
;
then
if
[
${
stage
}
-le
1
]
&&
[
${
stop_stage
}
-ge
1
]
;
then
# download data, generate manifests
# download data, generate manifests
# we will generate the manifest.{dev, test} file in ${dir}/vox1/ directory
# we will generate the manifest.{dev,test} file from ${TARGET_DIR}/voxceleb/vox1/{dev,test} directory
# and generate the meta info and download the trial file
# manifest.dev: 148642
# manifest.test: 4847
echo
"Start to download vox1 dataset and generate the manifest files "
echo
"Start to download vox1 dataset and generate the manifest files "
python3
${
TARGET_DIR
}
/voxceleb/voxceleb1.py
\
python3
${
TARGET_DIR
}
/voxceleb/voxceleb1.py
\
--manifest_prefix
=
"
${
dir
}
/vox1/manifest"
\
--manifest_prefix
=
"
${
dir
}
/vox1/manifest"
\
...
@@ -53,6 +56,8 @@ fi
...
@@ -53,6 +56,8 @@ fi
if
[
${
stage
}
-le
2
]
&&
[
${
stop_stage
}
-ge
2
]
;
then
if
[
${
stage
}
-le
2
]
&&
[
${
stop_stage
}
-ge
2
]
;
then
# download voxceleb2 data
# download voxceleb2 data
# we will download the data and unzip the package
# and we will store the m4a file in ${TARGET_DIR}/voxceleb/vox2/{dev,test}
echo
"start to download vox2 dataset"
echo
"start to download vox2 dataset"
python3
${
TARGET_DIR
}
/voxceleb/voxceleb2.py
\
python3
${
TARGET_DIR
}
/voxceleb/voxceleb2.py
\
--download
\
--download
\
...
@@ -99,7 +104,7 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
...
@@ -99,7 +104,7 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
# Currently, our training system use csv file for dataset
# Currently, our training system use csv file for dataset
echo
"convert the json format to csv format to be compatible with training process"
echo
"convert the json format to csv format to be compatible with training process"
python3
local
/make_vox_csv_dataset_from_json.py
\
python3
local
/make_vox_csv_dataset_from_json.py
\
--train
"
${
dir
}
/vox1/manifest.dev"
\
--train
"
${
dir
}
/vox1/manifest.dev"
"
${
dir
}
/vox2/manifest.vox2"
\
--test
"
${
dir
}
/vox1/manifest.test"
\
--test
"
${
dir
}
/vox1/manifest.test"
\
--target_dir
"
${
dir
}
/vox/"
\
--target_dir
"
${
dir
}
/vox/"
\
--config
${
conf_path
}
--config
${
conf_path
}
...
...
examples/voxceleb/sv0/run.sh
浏览文件 @
38e4e9c8
...
@@ -18,24 +18,22 @@ set -e
...
@@ -18,24 +18,22 @@ set -e
#######################################################################
#######################################################################
# stage 0: data prepare, including voxceleb1 download and generate {train,dev,enroll,test}.csv
# stage 0: data prepare, including voxceleb1 download and generate {train,dev,enroll,test}.csv
# voxceleb2 data is m4a format, so we need
user to convert the m4a to wav yourselves as described in Readme.md
with the script local/convert.sh
# voxceleb2 data is m4a format, so we need
convert the m4a to wav yourselves
with the script local/convert.sh
# stage 1: train the speaker identification model
# stage 1: train the speaker identification model
# stage 2: test speaker identification
# stage 2: test speaker identification
# stage 3: extract the training embeding to train the LDA and PLDA
# stage 3:
(todo)
extract the training embeding to train the LDA and PLDA
######################################################################
######################################################################
# we can set the variable PPAUDIO_HOME to specifiy the root directory of the downloaded vox1 and vox2 dataset
# default the dataset will be stored in the ~/.paddleaudio/
# the vox2 dataset is stored in m4a format, we need to convert the audio from m4a to wav yourself
# the vox2 dataset is stored in m4a format, we need to convert the audio from m4a to wav yourself
# and put all of them to ${
PPAUDIO_HOME
}/datasets/vox2
# and put all of them to ${
MAIN_ROOT
}/datasets/vox2
# we will find the wav from ${
PPAUDIO_HOME}/datasets/vox1/wav and ${PPAUDIO_HOME
}/datasets/vox2/wav
# we will find the wav from ${
MAIN_ROOT}/datasets/vox1/{dev,test}/wav and ${MAIN_ROOT
}/datasets/vox2/wav
# export PPAUDIO_HOME=
stage
=
0
stage
=
0
stop_stage
=
50
stop_stage
=
50
# data directory
# data directory
# if we set the variable ${dir}, we will store the wav info to this directory
# if we set the variable ${dir}, we will store the wav info to this directory
# otherwise, we will store the wav info to
vox1 and
vox2 directory respectively
# otherwise, we will store the wav info to
data/vox1 and data/
vox2 directory respectively
# vox2 wav path, we must convert the m4a format to wav format
# vox2 wav path, we must convert the m4a format to wav format
dir
=
data/
# data info directory
dir
=
data/
# data info directory
...
@@ -64,6 +62,6 @@ if [ $stage -le 2 ] && [ ${stop_stage} -ge 2 ]; then
...
@@ -64,6 +62,6 @@ if [ $stage -le 2 ] && [ ${stop_stage} -ge 2 ]; then
fi
fi
# if [ $stage -le 3 ]; then
# if [ $stage -le 3 ]; then
# # stage
2
: extract the training embeding to train the LDA and PLDA
# # stage
3
: extract the training embeding to train the LDA and PLDA
# # todo: extract the training embedding
# # todo: extract the training embedding
# fi
# fi
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录