Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
995436c6
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
“1ad8f8211c6f6bdb938afa47dfd833c74daeb4b0”上不存在“mobile/src/operators/scale_op.cpp”
提交
995436c6
编写于
4月 08, 2022
作者:
C
ccrrong
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
delete unused file ami_dataset.py, compute_der.py, test=doc
上级
bc53f726
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
1 addition
and
266 deletion
+1
-266
examples/ami/sd0/local/ami_dataset.py
examples/ami/sd0/local/ami_dataset.py
+0
-90
examples/ami/sd0/local/experiment.py
examples/ami/sd0/local/experiment.py
+1
-1
utils/compute_der.py
utils/compute_der.py
+0
-175
未找到文件。
examples/ami/sd0/local/ami_dataset.py
已删除
100644 → 0
浏览文件 @
bc53f726
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
collections
import
json
from
paddle.io
import
Dataset
from
paddleaudio.backends
import
load
as
load_audio
from
paddleaudio.datasets.dataset
import
feat_funcs
class
AMIDataset
(
Dataset
):
"""
AMI dataset.
"""
meta_info
=
collections
.
namedtuple
(
'META_INFO'
,
(
'id'
,
'duration'
,
'wav'
,
'start'
,
'stop'
,
'record_id'
))
def
__init__
(
self
,
json_file
:
str
,
feat_type
:
str
=
'raw'
,
**
kwargs
):
"""
Ags:
json_file (:obj:`str`): Data prep JSON file.
labels (:obj:`List[int]`): Labels of audio files.
feat_type (:obj:`str`, `optional`, defaults to `raw`):
It identifies the feature type that user wants to extrace of an audio file.
"""
if
feat_type
not
in
feat_funcs
.
keys
():
raise
RuntimeError
(
f
"Unknown feat_type:
{
feat_type
}
, it must be one in
{
list
(
feat_funcs
.
keys
())
}
"
)
self
.
json_file
=
json_file
self
.
feat_type
=
feat_type
self
.
feat_config
=
kwargs
self
.
_data
=
self
.
_get_data
()
super
(
AMIDataset
,
self
).
__init__
()
def
_get_data
(
self
):
with
open
(
self
.
json_file
,
"r"
)
as
f
:
meta_data
=
json
.
load
(
f
)
data
=
[]
for
key
in
meta_data
:
sub_seg
=
meta_data
[
key
][
"wav"
]
wav
=
sub_seg
[
"file"
]
duration
=
sub_seg
[
"duration"
]
start
=
sub_seg
[
"start"
]
stop
=
sub_seg
[
"stop"
]
rec_id
=
str
(
key
).
rsplit
(
"_"
,
2
)[
0
]
data
.
append
(
self
.
meta_info
(
str
(
key
),
float
(
duration
),
wav
,
int
(
start
),
int
(
stop
),
str
(
rec_id
)))
return
data
def
_convert_to_record
(
self
,
idx
:
int
):
sample
=
self
.
_data
[
idx
]
record
=
{}
# To show all fields in a namedtuple: `type(sample)._fields`
for
field
in
type
(
sample
).
_fields
:
record
[
field
]
=
getattr
(
sample
,
field
)
waveform
,
sr
=
load_audio
(
record
[
'wav'
])
waveform
=
waveform
[
record
[
'start'
]:
record
[
'stop'
]]
feat_func
=
feat_funcs
[
self
.
feat_type
]
feat
=
feat_func
(
waveform
,
sr
=
sr
,
**
self
.
feat_config
)
if
feat_func
else
waveform
record
.
update
({
'feat'
:
feat
})
return
record
def
__getitem__
(
self
,
idx
):
return
self
.
_convert_to_record
(
idx
)
def
__len__
(
self
):
return
len
(
self
.
_data
)
examples/ami/sd0/local/experiment.py
浏览文件 @
995436c6
...
...
@@ -25,7 +25,7 @@ from yacs.config import CfgNode
from
paddlespeech.s2t.utils.log
import
Log
from
paddlespeech.vector.cluster
import
diarization
as
diar
from
utils.
compute_der
import
DER
from
utils.
DER
import
DER
# Logger setup
logger
=
Log
(
__name__
).
getlog
()
...
...
utils/compute_der.py
已删除
100755 → 0
浏览文件 @
bc53f726
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Calculates Diarization Error Rate (DER) which is the sum of Missed Speaker (MS),
False Alarm (FA), and Speaker Error Rate (SER) using md-eval-22.pl from NIST RT Evaluation.
Credits
This code is adapted from https://github.com/speechbrain/speechbrain
"""
import
argparse
import
os
import
re
import
subprocess
import
numpy
as
np
FILE_IDS
=
re
.
compile
(
r
"(?<=Speaker Diarization for).+(?=\*\*\*)"
)
SCORED_SPEAKER_TIME
=
re
.
compile
(
r
"(?<=SCORED SPEAKER TIME =)[\d.]+"
)
MISS_SPEAKER_TIME
=
re
.
compile
(
r
"(?<=MISSED SPEAKER TIME =)[\d.]+"
)
FA_SPEAKER_TIME
=
re
.
compile
(
r
"(?<=FALARM SPEAKER TIME =)[\d.]+"
)
ERROR_SPEAKER_TIME
=
re
.
compile
(
r
"(?<=SPEAKER ERROR TIME =)[\d.]+"
)
def
rectify
(
arr
):
"""Corrects corner cases and converts scores into percentage.
"""
# Numerator and denominator both 0.
arr
[
np
.
isnan
(
arr
)]
=
0
# Numerator > 0, but denominator = 0.
arr
[
np
.
isinf
(
arr
)]
=
1
arr
*=
100.0
return
arr
def
DER
(
ref_rttm
,
sys_rttm
,
ignore_overlap
=
False
,
collar
=
0.25
,
individual_file_scores
=
False
,
):
"""Computes Missed Speaker percentage (MS), False Alarm (FA),
Speaker Error Rate (SER), and Diarization Error Rate (DER).
Arguments
---------
ref_rttm : str
The path of reference/groundtruth RTTM file.
sys_rttm : str
The path of the system generated RTTM file.
individual_file_scores : bool
If True, returns scores for each file in order.
collar : float
Forgiveness collar.
ignore_overlap : bool
If True, ignores overlapping speech during evaluation.
Returns
-------
MS : float array
Missed Speech.
FA : float array
False Alarms.
SER : float array
Speaker Error Rates.
DER : float array
Diarization Error Rates.
"""
curr
=
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
))
mdEval
=
os
.
path
.
join
(
curr
,
"./md-eval.pl"
)
cmd
=
[
mdEval
,
"-af"
,
"-r"
,
ref_rttm
,
"-s"
,
sys_rttm
,
"-c"
,
str
(
collar
),
]
print
(
cmd
)
if
ignore_overlap
:
cmd
.
append
(
"-1"
)
try
:
stdout
=
subprocess
.
check_output
(
cmd
,
stderr
=
subprocess
.
STDOUT
)
except
subprocess
.
CalledProcessError
as
ex
:
stdout
=
ex
.
output
else
:
stdout
=
stdout
.
decode
(
"utf-8"
)
# Get all recording IDs
file_ids
=
[
m
.
strip
()
for
m
in
FILE_IDS
.
findall
(
stdout
)]
file_ids
=
[
file_id
[
2
:]
if
file_id
.
startswith
(
"f="
)
else
file_id
for
file_id
in
file_ids
]
scored_speaker_times
=
np
.
array
(
[
float
(
m
)
for
m
in
SCORED_SPEAKER_TIME
.
findall
(
stdout
)])
miss_speaker_times
=
np
.
array
(
[
float
(
m
)
for
m
in
MISS_SPEAKER_TIME
.
findall
(
stdout
)])
fa_speaker_times
=
np
.
array
(
[
float
(
m
)
for
m
in
FA_SPEAKER_TIME
.
findall
(
stdout
)])
error_speaker_times
=
np
.
array
(
[
float
(
m
)
for
m
in
ERROR_SPEAKER_TIME
.
findall
(
stdout
)])
with
np
.
errstate
(
invalid
=
"ignore"
,
divide
=
"ignore"
):
tot_error_times
=
(
miss_speaker_times
+
fa_speaker_times
+
error_speaker_times
)
miss_speaker_frac
=
miss_speaker_times
/
scored_speaker_times
fa_speaker_frac
=
fa_speaker_times
/
scored_speaker_times
sers_frac
=
error_speaker_times
/
scored_speaker_times
ders_frac
=
tot_error_times
/
scored_speaker_times
# Values in percentage of scored_speaker_time
miss_speaker
=
rectify
(
miss_speaker_frac
)
fa_speaker
=
rectify
(
fa_speaker_frac
)
sers
=
rectify
(
sers_frac
)
ders
=
rectify
(
ders_frac
)
if
individual_file_scores
:
return
miss_speaker
,
fa_speaker
,
sers
,
ders
else
:
return
miss_speaker
[
-
1
],
fa_speaker
[
-
1
],
sers
[
-
1
],
ders
[
-
1
]
def
main
():
parser
=
argparse
.
ArgumentParser
(
description
=
"Compute DER"
)
parser
.
add_argument
(
"--ref_rttm"
,
type
=
str
,
help
=
"the path of reference/groundtruth RTTM file"
)
parser
.
add_argument
(
"--sys_rttm"
,
type
=
str
,
help
=
"the path of the system generated RTTM file."
)
parser
.
add_argument
(
"--individual_file_scores"
,
type
=
bool
,
help
=
"whether returns scores for each file in order."
)
parser
.
add_argument
(
"--collar"
,
type
=
float
,
help
=
"forgiveness collar."
)
parser
.
add_argument
(
"--ignore_overlap"
,
type
=
bool
,
help
=
"whether ignores overlapping speech during evaluation."
)
args
=
parser
.
parse_args
()
Scores
=
DER
(
args
.
ref_rttm
,
args
.
sys_rttm
,
args
.
ignore_overlap
,
args
.
collar
,
args
.
individual_file_scores
)
print
(
Scores
)
if
__name__
==
"__main__"
:
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录