Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
bc53f726
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
bc53f726
编写于
4月 08, 2022
作者:
C
ccrrong
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
convert dataset format to paddlespeech, test=doc
上级
7a03f365
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
138 addition
and
17 deletion
+138
-17
examples/ami/sd0/local/compute_embdding.py
examples/ami/sd0/local/compute_embdding.py
+3
-4
examples/ami/sd0/local/process.sh
examples/ami/sd0/local/process.sh
+2
-2
examples/ami/sd0/run.sh
examples/ami/sd0/run.sh
+17
-11
paddlespeech/vector/io/dataset_from_json.py
paddlespeech/vector/io/dataset_from_json.py
+116
-0
未找到文件。
examples/ami/sd0/local/compute_embdding.py
浏览文件 @
bc53f726
...
@@ -19,7 +19,6 @@ import sys
...
@@ -19,7 +19,6 @@ import sys
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle
from
ami_dataset
import
AMIDataset
from
paddle.io
import
BatchSampler
from
paddle.io
import
BatchSampler
from
paddle.io
import
DataLoader
from
paddle.io
import
DataLoader
from
tqdm.contrib
import
tqdm
from
tqdm.contrib
import
tqdm
...
@@ -28,6 +27,7 @@ from yacs.config import CfgNode
...
@@ -28,6 +27,7 @@ from yacs.config import CfgNode
from
paddlespeech.s2t.utils.log
import
Log
from
paddlespeech.s2t.utils.log
import
Log
from
paddlespeech.vector.cluster.diarization
import
EmbeddingMeta
from
paddlespeech.vector.cluster.diarization
import
EmbeddingMeta
from
paddlespeech.vector.io.batch
import
batch_feature_normalize
from
paddlespeech.vector.io.batch
import
batch_feature_normalize
from
paddlespeech.vector.io.dataset_from_json
import
JSONDataset
from
paddlespeech.vector.models.ecapa_tdnn
import
EcapaTdnn
from
paddlespeech.vector.models.ecapa_tdnn
import
EcapaTdnn
from
paddlespeech.vector.modules.sid_model
import
SpeakerIdetification
from
paddlespeech.vector.modules.sid_model
import
SpeakerIdetification
from
paddlespeech.vector.training.seeding
import
seed_everything
from
paddlespeech.vector.training.seeding
import
seed_everything
...
@@ -65,7 +65,7 @@ def create_dataloader(json_file, batch_size):
...
@@ -65,7 +65,7 @@ def create_dataloader(json_file, batch_size):
"""
"""
# create datasets
# create datasets
dataset
=
AMI
Dataset
(
dataset
=
JSON
Dataset
(
json_file
=
json_file
,
json_file
=
json_file
,
feat_type
=
'melspectrogram'
,
feat_type
=
'melspectrogram'
,
n_mels
=
config
.
n_mels
,
n_mels
=
config
.
n_mels
,
...
@@ -93,8 +93,7 @@ def main(args, config):
...
@@ -93,8 +93,7 @@ def main(args, config):
ecapa_tdnn
=
EcapaTdnn
(
**
config
.
model
)
ecapa_tdnn
=
EcapaTdnn
(
**
config
.
model
)
# stage2: build the speaker verification eval instance with backbone model
# stage2: build the speaker verification eval instance with backbone model
model
=
SpeakerIdetification
(
model
=
SpeakerIdetification
(
backbone
=
ecapa_tdnn
,
num_class
=
1
)
backbone
=
ecapa_tdnn
,
num_class
=
1
)
# stage3: load the pre-trained model
# stage3: load the pre-trained model
# we get the last model from the epoch and save_interval
# we get the last model from the epoch and save_interval
...
...
examples/ami/sd0/local/process.sh
浏览文件 @
bc53f726
...
@@ -4,7 +4,6 @@ stage=0
...
@@ -4,7 +4,6 @@ stage=0
set
=
L
set
=
L
.
${
MAIN_ROOT
}
/utils/parse_options.sh
||
exit
1
;
.
${
MAIN_ROOT
}
/utils/parse_options.sh
||
exit
1
;
set
-u
set
-o
pipefail
set
-o
pipefail
data_folder
=
$1
data_folder
=
$1
...
@@ -12,6 +11,7 @@ manual_annot_folder=$2
...
@@ -12,6 +11,7 @@ manual_annot_folder=$2
save_folder
=
$3
save_folder
=
$3
pretrained_model_dir
=
$4
pretrained_model_dir
=
$4
conf_path
=
$5
conf_path
=
$5
device
=
$6
ref_rttm_dir
=
${
save_folder
}
/ref_rttms
ref_rttm_dir
=
${
save_folder
}
/ref_rttms
meta_data_dir
=
${
save_folder
}
/metadata
meta_data_dir
=
${
save_folder
}
/metadata
...
@@ -35,7 +35,7 @@ if [ ${stage} -le 1 ]; then
...
@@ -35,7 +35,7 @@ if [ ${stage} -le 1 ]; then
for
name
in
dev
eval
;
do
for
name
in
dev
eval
;
do
python
local
/compute_embdding.py
--config
${
conf_path
}
\
python
local
/compute_embdding.py
--config
${
conf_path
}
\
--data-dir
${
save_folder
}
\
--data-dir
${
save_folder
}
\
--device
gpu:0
\
--device
${
device
}
\
--dataset
${
name
}
\
--dataset
${
name
}
\
--load-checkpoint
${
pretrained_model_dir
}
--load-checkpoint
${
pretrained_model_dir
}
done
done
...
...
examples/ami/sd0/run.sh
浏览文件 @
bc53f726
...
@@ -3,8 +3,7 @@
...
@@ -3,8 +3,7 @@
.
./path.sh
||
exit
1
;
.
./path.sh
||
exit
1
;
set
-e
set
-e
stage
=
1
stage
=
0
stop_stage
=
50
#TARGET_DIR=${MAIN_ROOT}/dataset/ami
#TARGET_DIR=${MAIN_ROOT}/dataset/ami
TARGET_DIR
=
/home/dataset/AMI
TARGET_DIR
=
/home/dataset/AMI
...
@@ -12,15 +11,14 @@ data_folder=${TARGET_DIR}/amicorpus #e.g., /path/to/amicorpus/
...
@@ -12,15 +11,14 @@ data_folder=${TARGET_DIR}/amicorpus #e.g., /path/to/amicorpus/
manual_annot_folder
=
${
TARGET_DIR
}
/ami_public_manual_1.6.2
#e.g., /path/to/ami_public_manual_1.6.2/
manual_annot_folder
=
${
TARGET_DIR
}
/ami_public_manual_1.6.2
#e.g., /path/to/ami_public_manual_1.6.2/
save_folder
=
./save
save_folder
=
./save
pretraind_model_dir
=
${
save_folder
}
/model
pretraind_model_dir
=
${
save_folder
}
/sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_1/model
conf_path
=
conf/ecapa_tdnn.yaml
conf_path
=
conf/ecapa_tdnn.yaml
device
=
gpu
.
${
MAIN_ROOT
}
/utils/parse_options.sh
||
exit
1
;
.
${
MAIN_ROOT
}
/utils/parse_options.sh
||
exit
1
;
if
[
$stage
-le
0
]
&&
[
${
stop_stage
}
-ge
0
]
;
then
if
[
$stage
-le
0
]
;
then
# Prepare data
and model
# Prepare data
# Download AMI corpus, You need around 10GB of free space to get whole data
# Download AMI corpus, You need around 10GB of free space to get whole data
# The signals are too large to package in this way,
# The signals are too large to package in this way,
# so you need to use the chooser to indicate which ones you wish to download
# so you need to use the chooser to indicate which ones you wish to download
...
@@ -29,12 +27,20 @@ if [ $stage -le 0 ] && [ ${stop_stage} -ge 0 ]; then
...
@@ -29,12 +27,20 @@ if [ $stage -le 0 ] && [ ${stop_stage} -ge 0 ]; then
echo
"Signals: "
echo
"Signals: "
echo
"1) Select one or more AMI meetings: the IDs please follow ./ami_split.py"
echo
"1) Select one or more AMI meetings: the IDs please follow ./ami_split.py"
echo
"2) Select media streams: Just select Headset mix"
echo
"2) Select media streams: Just select Headset mix"
# Download the pretrained Model from HuggingFace or other pretrained model
echo
"Please download the pretrained ECAPA-TDNN Model and put the pretrainde model in given path: "
${
pretraind_model_dir
}
fi
fi
if
[
$stage
-le
1
]
&&
[
${
stop_stage
}
-ge
1
]
;
then
if
[
$stage
-le
1
]
;
then
# Download the pretrained model
wget https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_1.tar.gz
mkdir
-p
${
save_folder
}
&&
tar
-xvf
sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_1.tar.gz
-C
${
save_folder
}
rm
-rf
sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_1.tar.gz
echo
"download the pretrained ECAPA-TDNN Model to path: "
${
pretraind_model_dir
}
fi
if
[
$stage
-le
2
]
;
then
# Tune hyperparams on dev set and perform final diarization on dev and eval with best hyperparams.
# Tune hyperparams on dev set and perform final diarization on dev and eval with best hyperparams.
bash ./local/process.sh
${
data_folder
}
${
manual_annot_folder
}
${
save_folder
}
${
pretraind_model_dir
}
${
conf_path
}
||
exit
1
echo
${
data_folder
}
${
manual_annot_folder
}
${
save_folder
}
${
pretraind_model_dir
}
${
conf_path
}
bash ./local/process.sh
${
data_folder
}
${
manual_annot_folder
}
\
${
save_folder
}
${
pretraind_model_dir
}
${
conf_path
}
${
device
}
||
exit
1
fi
fi
paddlespeech/vector/io/dataset_from_json.py
0 → 100644
浏览文件 @
bc53f726
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
json
from
dataclasses
import
dataclass
from
dataclasses
import
fields
from
paddle.io
import
Dataset
from
paddleaudio
import
load
as
load_audio
from
paddleaudio.compliance.librosa
import
melspectrogram
from
paddleaudio.compliance.librosa
import
mfcc
@
dataclass
class
meta_info
:
"""the audio meta info in the vector JSONDataset
Args:
id (str): the segment name
duration (float): segment time
wav (str): wav file path
start (int): start point in the original wav file
stop (int): stop point in the original wav file
lab_id (str): the record id
"""
id
:
str
duration
:
float
wav
:
str
start
:
int
stop
:
int
record_id
:
str
# json dataset support feature type
feat_funcs
=
{
'raw'
:
None
,
'melspectrogram'
:
melspectrogram
,
'mfcc'
:
mfcc
,
}
class
JSONDataset
(
Dataset
):
"""
dataset from json file.
"""
def
__init__
(
self
,
json_file
:
str
,
feat_type
:
str
=
'raw'
,
**
kwargs
):
"""
Ags:
json_file (:obj:`str`): Data prep JSON file.
labels (:obj:`List[int]`): Labels of audio files.
feat_type (:obj:`str`, `optional`, defaults to `raw`):
It identifies the feature type that user wants to extrace of an audio file.
"""
if
feat_type
not
in
feat_funcs
.
keys
():
raise
RuntimeError
(
f
"Unknown feat_type:
{
feat_type
}
, it must be one in
{
list
(
feat_funcs
.
keys
())
}
"
)
self
.
json_file
=
json_file
self
.
feat_type
=
feat_type
self
.
feat_config
=
kwargs
self
.
_data
=
self
.
_get_data
()
super
(
JSONDataset
,
self
).
__init__
()
def
_get_data
(
self
):
with
open
(
self
.
json_file
,
"r"
)
as
f
:
meta_data
=
json
.
load
(
f
)
data
=
[]
for
key
in
meta_data
:
sub_seg
=
meta_data
[
key
][
"wav"
]
wav
=
sub_seg
[
"file"
]
duration
=
sub_seg
[
"duration"
]
start
=
sub_seg
[
"start"
]
stop
=
sub_seg
[
"stop"
]
rec_id
=
str
(
key
).
rsplit
(
"_"
,
2
)[
0
]
data
.
append
(
meta_info
(
str
(
key
),
float
(
duration
),
wav
,
int
(
start
),
int
(
stop
),
str
(
rec_id
)))
return
data
def
_convert_to_record
(
self
,
idx
:
int
):
sample
=
self
.
_data
[
idx
]
record
=
{}
# To show all fields in a namedtuple
for
field
in
fields
(
sample
):
record
[
field
.
name
]
=
getattr
(
sample
,
field
.
name
)
waveform
,
sr
=
load_audio
(
record
[
'wav'
])
waveform
=
waveform
[
record
[
'start'
]:
record
[
'stop'
]]
feat_func
=
feat_funcs
[
self
.
feat_type
]
feat
=
feat_func
(
waveform
,
sr
=
sr
,
**
self
.
feat_config
)
if
feat_func
else
waveform
record
.
update
({
'feat'
:
feat
})
return
record
def
__getitem__
(
self
,
idx
):
return
self
.
_convert_to_record
(
idx
)
def
__len__
(
self
):
return
len
(
self
.
_data
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录