Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
98c0d43a
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
98c0d43a
编写于
9月 17, 2021
作者:
H
Hui Zhang
提交者:
GitHub
9月 17, 2021
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #833 from PaddlePaddle/fix_varbase
varbase getitem support np.longlong since paddle 2.2.0RC
上级
6f7a6dc2
282914f4
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
33 addition
and
39 deletion
+33
-39
deepspeech/exps/u2/model.py
deepspeech/exps/u2/model.py
+8
-9
deepspeech/exps/u2_kaldi/model.py
deepspeech/exps/u2_kaldi/model.py
+10
-12
deepspeech/exps/u2_st/model.py
deepspeech/exps/u2_st/model.py
+8
-9
deepspeech/utils/ctc_utils.py
deepspeech/utils/ctc_utils.py
+7
-9
未找到文件。
deepspeech/exps/u2/model.py
浏览文件 @
98c0d43a
...
@@ -587,26 +587,25 @@ class U2Tester(U2Trainer):
...
@@ -587,26 +587,25 @@ class U2Tester(U2Trainer):
ctc_probs
=
ctc_probs
.
squeeze
(
0
)
ctc_probs
=
ctc_probs
.
squeeze
(
0
)
target
=
target
.
squeeze
(
0
)
target
=
target
.
squeeze
(
0
)
alignment
=
ctc_utils
.
forced_align
(
ctc_probs
,
target
)
alignment
=
ctc_utils
.
forced_align
(
ctc_probs
,
target
)
logger
.
info
(
"align ids"
,
key
[
0
],
alignment
)
logger
.
info
(
f
"align ids:
{
key
[
0
]
}
{
alignment
}
"
)
fout
.
write
(
'{} {}
\n
'
.
format
(
key
[
0
],
alignment
))
fout
.
write
(
'{} {}
\n
'
.
format
(
key
[
0
],
alignment
))
# 3. gen praat
# 3. gen praat
# segment alignment
# segment alignment
align_segs
=
text_grid
.
segment_alignment
(
alignment
)
align_segs
=
text_grid
.
segment_alignment
(
alignment
)
logger
.
info
(
"align tokens"
,
key
[
0
],
align_segs
)
logger
.
info
(
f
"align tokens:
{
key
[
0
]
}
,
{
align_segs
}
"
)
# IntervalTier, List["start end token\n"]
# IntervalTier, List["start end token\n"]
subsample
=
utility
.
get_subsample
(
self
.
config
)
subsample
=
utility
.
get_subsample
(
self
.
config
)
tierformat
=
text_grid
.
align_to_tierformat
(
tierformat
=
text_grid
.
align_to_tierformat
(
align_segs
,
subsample
,
token_dict
)
align_segs
,
subsample
,
token_dict
)
# write tier
# write tier
align_output_path
=
os
.
path
.
join
(
align_output_path
=
Path
(
self
.
args
.
result_file
).
parent
/
"align"
os
.
path
.
dirname
(
self
.
args
.
result_file
),
"align"
)
align_output_path
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
tier_path
=
os
.
path
.
join
(
align_output_path
,
key
[
0
]
+
".tier"
)
tier_path
=
align_output_path
/
(
key
[
0
]
+
".tier"
)
with
open
(
tier_path
,
'w'
)
as
f
:
with
tier_path
.
open
(
'w'
)
as
f
:
f
.
writelines
(
tierformat
)
f
.
writelines
(
tierformat
)
# write textgrid
# write textgrid
textgrid_path
=
os
.
path
.
join
(
align_output_path
,
textgrid_path
=
align_output_path
/
(
key
[
0
]
+
".TextGrid"
)
key
[
0
]
+
".TextGrid"
)
second_per_frame
=
1.
/
(
1000.
/
second_per_frame
=
1.
/
(
1000.
/
stride_ms
)
# 25ms window, 10ms stride
stride_ms
)
# 25ms window, 10ms stride
second_per_example
=
(
second_per_example
=
(
...
@@ -614,7 +613,7 @@ class U2Tester(U2Trainer):
...
@@ -614,7 +613,7 @@ class U2Tester(U2Trainer):
text_grid
.
generate_textgrid
(
text_grid
.
generate_textgrid
(
maxtime
=
second_per_example
,
maxtime
=
second_per_example
,
intervals
=
tierformat
,
intervals
=
tierformat
,
output
=
textgrid_path
)
output
=
str
(
textgrid_path
)
)
def
run_align
(
self
):
def
run_align
(
self
):
self
.
resume_or_scratch
()
self
.
resume_or_scratch
()
...
...
deepspeech/exps/u2_kaldi/model.py
浏览文件 @
98c0d43a
...
@@ -546,9 +546,8 @@ class U2Tester(U2Trainer):
...
@@ -546,9 +546,8 @@ class U2Tester(U2Trainer):
self
.
model
.
eval
()
self
.
model
.
eval
()
logger
.
info
(
f
"Align Total Examples:
{
len
(
self
.
align_loader
.
dataset
)
}
"
)
logger
.
info
(
f
"Align Total Examples:
{
len
(
self
.
align_loader
.
dataset
)
}
"
)
stride_ms
=
self
.
config
.
collater
.
stride_ms
stride_ms
=
self
.
align_loader
.
collate_fn
.
stride_ms
token_dict
=
self
.
args
.
char_list
token_dict
=
self
.
align_loader
.
collate_fn
.
vocab_list
with
open
(
self
.
args
.
result_file
,
'w'
)
as
fout
:
with
open
(
self
.
args
.
result_file
,
'w'
)
as
fout
:
# one example in batch
# one example in batch
for
i
,
batch
in
enumerate
(
self
.
align_loader
):
for
i
,
batch
in
enumerate
(
self
.
align_loader
):
...
@@ -565,26 +564,25 @@ class U2Tester(U2Trainer):
...
@@ -565,26 +564,25 @@ class U2Tester(U2Trainer):
ctc_probs
=
ctc_probs
.
squeeze
(
0
)
ctc_probs
=
ctc_probs
.
squeeze
(
0
)
target
=
target
.
squeeze
(
0
)
target
=
target
.
squeeze
(
0
)
alignment
=
ctc_utils
.
forced_align
(
ctc_probs
,
target
)
alignment
=
ctc_utils
.
forced_align
(
ctc_probs
,
target
)
logger
.
info
(
"align ids"
,
key
[
0
],
alignment
)
logger
.
info
(
f
"align ids:
{
key
[
0
]
}
{
alignment
}
"
)
fout
.
write
(
'{} {}
\n
'
.
format
(
key
[
0
],
alignment
))
fout
.
write
(
'{} {}
\n
'
.
format
(
key
[
0
],
alignment
))
# 3. gen praat
# 3. gen praat
# segment alignment
# segment alignment
align_segs
=
text_grid
.
segment_alignment
(
alignment
)
align_segs
=
text_grid
.
segment_alignment
(
alignment
)
logger
.
info
(
"align tokens"
,
key
[
0
],
align_segs
)
logger
.
info
(
f
"align tokens:
{
key
[
0
]
}
,
{
align_segs
}
"
)
# IntervalTier, List["start end token\n"]
# IntervalTier, List["start end token\n"]
subsample
=
utility
.
get_subsample
(
self
.
config
)
subsample
=
utility
.
get_subsample
(
self
.
config
)
tierformat
=
text_grid
.
align_to_tierformat
(
tierformat
=
text_grid
.
align_to_tierformat
(
align_segs
,
subsample
,
token_dict
)
align_segs
,
subsample
,
token_dict
)
# write tier
# write tier
align_output_path
=
os
.
path
.
join
(
align_output_path
=
Path
(
self
.
args
.
result_file
).
parent
/
"align"
os
.
path
.
dirname
(
self
.
args
.
result_file
),
"align"
)
align_output_path
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
tier_path
=
os
.
path
.
join
(
align_output_path
,
key
[
0
]
+
".tier"
)
tier_path
=
align_output_path
/
(
key
[
0
]
+
".tier"
)
with
open
(
tier_path
,
'w'
)
as
f
:
with
tier_path
.
open
(
'w'
)
as
f
:
f
.
writelines
(
tierformat
)
f
.
writelines
(
tierformat
)
# write textgrid
# write textgrid
textgrid_path
=
os
.
path
.
join
(
align_output_path
,
textgrid_path
=
align_output_path
/
(
key
[
0
]
+
".TextGrid"
)
key
[
0
]
+
".TextGrid"
)
second_per_frame
=
1.
/
(
1000.
/
second_per_frame
=
1.
/
(
1000.
/
stride_ms
)
# 25ms window, 10ms stride
stride_ms
)
# 25ms window, 10ms stride
second_per_example
=
(
second_per_example
=
(
...
@@ -592,7 +590,7 @@ class U2Tester(U2Trainer):
...
@@ -592,7 +590,7 @@ class U2Tester(U2Trainer):
text_grid
.
generate_textgrid
(
text_grid
.
generate_textgrid
(
maxtime
=
second_per_example
,
maxtime
=
second_per_example
,
intervals
=
tierformat
,
intervals
=
tierformat
,
output
=
textgrid_path
)
output
=
str
(
textgrid_path
)
)
def
run_align
(
self
):
def
run_align
(
self
):
self
.
resume_or_scratch
()
self
.
resume_or_scratch
()
...
...
deepspeech/exps/u2_st/model.py
浏览文件 @
98c0d43a
...
@@ -596,26 +596,25 @@ class U2STTester(U2STTrainer):
...
@@ -596,26 +596,25 @@ class U2STTester(U2STTrainer):
ctc_probs
=
ctc_probs
.
squeeze
(
0
)
ctc_probs
=
ctc_probs
.
squeeze
(
0
)
target
=
target
.
squeeze
(
0
)
target
=
target
.
squeeze
(
0
)
alignment
=
ctc_utils
.
forced_align
(
ctc_probs
,
target
)
alignment
=
ctc_utils
.
forced_align
(
ctc_probs
,
target
)
logger
.
info
(
"align ids"
,
key
[
0
],
alignment
)
logger
.
info
(
f
"align ids:
{
key
[
0
]
}
{
alignment
}
"
)
fout
.
write
(
'{} {}
\n
'
.
format
(
key
[
0
],
alignment
))
fout
.
write
(
'{} {}
\n
'
.
format
(
key
[
0
],
alignment
))
# 3. gen praat
# 3. gen praat
# segment alignment
# segment alignment
align_segs
=
text_grid
.
segment_alignment
(
alignment
)
align_segs
=
text_grid
.
segment_alignment
(
alignment
)
logger
.
info
(
"align tokens"
,
key
[
0
],
align_segs
)
logger
.
info
(
f
"align tokens:
{
key
[
0
]
}
,
{
align_segs
}
"
)
# IntervalTier, List["start end token\n"]
# IntervalTier, List["start end token\n"]
subsample
=
utility
.
get_subsample
(
self
.
config
)
subsample
=
utility
.
get_subsample
(
self
.
config
)
tierformat
=
text_grid
.
align_to_tierformat
(
tierformat
=
text_grid
.
align_to_tierformat
(
align_segs
,
subsample
,
token_dict
)
align_segs
,
subsample
,
token_dict
)
# write tier
# write tier
align_output_path
=
os
.
path
.
join
(
align_output_path
=
Path
(
self
.
args
.
result_file
).
parent
/
"align"
os
.
path
.
dirname
(
self
.
args
.
result_file
),
"align"
)
align_output_path
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
tier_path
=
os
.
path
.
join
(
align_output_path
,
key
[
0
]
+
".tier"
)
tier_path
=
align_output_path
/
(
key
[
0
]
+
".tier"
)
with
open
(
tier_path
,
'w'
)
as
f
:
with
tier_path
.
open
(
'w'
)
as
f
:
f
.
writelines
(
tierformat
)
f
.
writelines
(
tierformat
)
# write textgrid
# write textgrid
textgrid_path
=
os
.
path
.
join
(
align_output_path
,
textgrid_path
=
align_output_path
/
(
key
[
0
]
+
".TextGrid"
)
key
[
0
]
+
".TextGrid"
)
second_per_frame
=
1.
/
(
1000.
/
second_per_frame
=
1.
/
(
1000.
/
stride_ms
)
# 25ms window, 10ms stride
stride_ms
)
# 25ms window, 10ms stride
second_per_example
=
(
second_per_example
=
(
...
@@ -623,7 +622,7 @@ class U2STTester(U2STTrainer):
...
@@ -623,7 +622,7 @@ class U2STTester(U2STTrainer):
text_grid
.
generate_textgrid
(
text_grid
.
generate_textgrid
(
maxtime
=
second_per_example
,
maxtime
=
second_per_example
,
intervals
=
tierformat
,
intervals
=
tierformat
,
output
=
textgrid_path
)
output
=
str
(
textgrid_path
)
)
def
run_align
(
self
):
def
run_align
(
self
):
self
.
resume_or_scratch
()
self
.
resume_or_scratch
()
...
...
deepspeech/utils/ctc_utils.py
浏览文件 @
98c0d43a
...
@@ -86,15 +86,15 @@ def forced_align(ctc_probs: paddle.Tensor, y: paddle.Tensor,
...
@@ -86,15 +86,15 @@ def forced_align(ctc_probs: paddle.Tensor, y: paddle.Tensor,
log_alpha
=
paddle
.
zeros
(
log_alpha
=
paddle
.
zeros
(
(
ctc_probs
.
shape
[
0
],
len
(
y_insert_blank
)))
#(T, 2L+1)
(
ctc_probs
.
shape
[
0
],
len
(
y_insert_blank
)))
#(T, 2L+1)
log_alpha
=
log_alpha
-
float
(
'inf'
)
# log of zero
log_alpha
=
log_alpha
-
float
(
'inf'
)
# log of zero
# TODO(Hui Zhang): zeros not support paddle.int16
# self.__setitem_varbase__(item, value) When assign a value to a paddle.Tensor, the data type of the paddle.Tensor not support int16
state_path
=
(
paddle
.
zeros
(
state_path
=
(
paddle
.
zeros
(
(
ctc_probs
.
shape
[
0
],
len
(
y_insert_blank
)),
dtype
=
paddle
.
int32
)
-
1
(
ctc_probs
.
shape
[
0
],
len
(
y_insert_blank
)),
dtype
=
paddle
.
int32
)
-
1
)
# state path, Tuple((T, 2L+1))
)
# state path, Tuple((T, 2L+1))
# init start state
# init start state
# TODO(Hui Zhang): VarBase.__getitem__() not support np.int64
log_alpha
[
0
,
0
]
=
ctc_probs
[
0
][
y_insert_blank
[
0
]]
# State-b, Sb
log_alpha
[
0
,
0
]
=
ctc_probs
[
0
][
int
(
y_insert_blank
[
0
])]
# State-b, Sb
log_alpha
[
0
,
1
]
=
ctc_probs
[
0
][
y_insert_blank
[
1
]]
# State-nb, Snb
log_alpha
[
0
,
1
]
=
ctc_probs
[
0
][
int
(
y_insert_blank
[
1
])]
# State-nb, Snb
for
t
in
range
(
1
,
ctc_probs
.
shape
[
0
]):
# T
for
t
in
range
(
1
,
ctc_probs
.
shape
[
0
]):
# T
for
s
in
range
(
len
(
y_insert_blank
)):
# 2L+1
for
s
in
range
(
len
(
y_insert_blank
)):
# 2L+1
...
@@ -110,12 +110,10 @@ def forced_align(ctc_probs: paddle.Tensor, y: paddle.Tensor,
...
@@ -110,12 +110,10 @@ def forced_align(ctc_probs: paddle.Tensor, y: paddle.Tensor,
log_alpha
[
t
-
1
,
s
-
2
],
log_alpha
[
t
-
1
,
s
-
2
],
])
])
prev_state
=
[
s
,
s
-
1
,
s
-
2
]
prev_state
=
[
s
,
s
-
1
,
s
-
2
]
# TODO(Hui Zhang): VarBase.__getitem__() not support np.int64
log_alpha
[
t
,
s
]
=
paddle
.
max
(
candidates
)
+
ctc_probs
[
t
][
log_alpha
[
t
,
s
]
=
paddle
.
max
(
candidates
)
+
ctc_probs
[
t
][
int
(
y_insert_blank
[
s
]]
y_insert_blank
[
s
])]
state_path
[
t
,
s
]
=
prev_state
[
paddle
.
argmax
(
candidates
)]
state_path
[
t
,
s
]
=
prev_state
[
paddle
.
argmax
(
candidates
)]
# self.__setitem_varbase__(item, value) When assign a value to a paddle.Tensor, the data type of the paddle.Tensor not support int16
# TODO(Hui Zhang): zeros not support paddle.int16
state_seq
=
-
1
*
paddle
.
ones
((
ctc_probs
.
shape
[
0
],
1
),
dtype
=
paddle
.
int32
)
state_seq
=
-
1
*
paddle
.
ones
((
ctc_probs
.
shape
[
0
],
1
),
dtype
=
paddle
.
int32
)
candidates
=
paddle
.
to_tensor
([
candidates
=
paddle
.
to_tensor
([
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录