Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
c8368410
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
1 年多 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c8368410
编写于
6月 04, 2021
作者:
H
Haoxin Ma
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
utt datapipeline
上级
03e5a64d
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
27 addition
and
20 deletion
+27
-20
deepspeech/exps/deepspeech2/model.py
deepspeech/exps/deepspeech2/model.py
+2
-2
deepspeech/io/collator.py
deepspeech/io/collator.py
+2
-2
deepspeech/io/dataset.py
deepspeech/io/dataset.py
+8
-4
deepspeech/models/deepspeech2.py
deepspeech/models/deepspeech2.py
+1
-1
examples/chinese_g2p/local/ignore_sandhi.py
examples/chinese_g2p/local/ignore_sandhi.py
+5
-2
examples/dataset/librispeech/.gitignore
examples/dataset/librispeech/.gitignore
+7
-7
examples/librispeech/s0/README.md
examples/librispeech/s0/README.md
+1
-1
examples/tiny/s0/run.sh
examples/tiny/s0/run.sh
+1
-1
未找到文件。
deepspeech/exps/deepspeech2/model.py
浏览文件 @
c8368410
...
...
@@ -75,7 +75,7 @@ class DeepSpeech2Trainer(Trainer):
for
i
,
batch
in
enumerate
(
self
.
valid_loader
):
loss
=
self
.
model
(
*
batch
)
if
paddle
.
isfinite
(
loss
):
num_utts
=
batch
[
0
].
shape
[
0
]
num_utts
=
batch
[
1
].
shape
[
0
]
num_seen_utts
+=
num_utts
total_loss
+=
float
(
loss
)
*
num_utts
valid_losses
[
'val_loss'
].
append
(
float
(
loss
))
...
...
@@ -191,7 +191,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
trans
.
append
(
''
.
join
([
chr
(
i
)
for
i
in
ids
]))
return
trans
def
compute_metrics
(
self
,
audio
,
audio_len
,
texts
,
texts_len
):
def
compute_metrics
(
self
,
utt
,
audio
,
audio_len
,
texts
,
texts_len
):
cfg
=
self
.
config
.
decoding
errors_sum
,
len_refs
,
num_ins
=
0.0
,
0
,
0
errors_func
=
error_rate
.
char_errors
if
cfg
.
error_rate_type
==
'cer'
else
error_rate
.
word_errors
...
...
deepspeech/io/collator.py
浏览文件 @
c8368410
...
...
@@ -51,7 +51,7 @@ class SpeechCollator():
audio_lens
=
[]
texts
=
[]
text_lens
=
[]
for
audio
,
text
in
batch
:
for
utt
,
audio
,
text
in
batch
:
# audio
audios
.
append
(
audio
.
T
)
# [T, D]
audio_lens
.
append
(
audio
.
shape
[
1
])
...
...
@@ -75,4 +75,4 @@ class SpeechCollator():
padded_texts
=
pad_sequence
(
texts
,
padding_value
=
IGNORE_ID
).
astype
(
np
.
int64
)
text_lens
=
np
.
array
(
text_lens
).
astype
(
np
.
int64
)
return
padded_audios
,
audio_lens
,
padded_texts
,
text_lens
return
utt
,
padded_audios
,
audio_lens
,
padded_texts
,
text_lens
deepspeech/io/dataset.py
浏览文件 @
c8368410
...
...
@@ -284,7 +284,7 @@ class ManifestDataset(Dataset):
return
self
.
_local_data
.
tar2object
[
tarpath
].
extractfile
(
self
.
_local_data
.
tar2info
[
tarpath
][
filename
])
def
process_utterance
(
self
,
audio_file
,
transcript
):
def
process_utterance
(
self
,
utt
,
audio_file
,
transcript
):
"""Load, augment, featurize and normalize for speech data.
:param audio_file: Filepath or file object of audio file.
...
...
@@ -323,7 +323,7 @@ class ManifestDataset(Dataset):
specgram
=
self
.
_augmentation_pipeline
.
transform_feature
(
specgram
)
feature_aug_time
=
time
.
time
()
-
start_time
#logger.debug(f"audio feature augmentation time: {feature_aug_time}")
return
specgram
,
transcript_part
return
utt
,
specgram
,
transcript_part
def
_instance_reader_creator
(
self
,
manifest
):
"""
...
...
@@ -336,7 +336,9 @@ class ManifestDataset(Dataset):
def
reader
():
for
instance
in
manifest
:
inst
=
self
.
process_utterance
(
instance
[
"feat"
],
# inst = self.process_utterance(instance["feat"],
# instance["text"])
inst
=
self
.
process_utterance
(
instance
[
"utt"
],
instance
[
"feat"
],
instance
[
"text"
])
yield
inst
...
...
@@ -347,4 +349,6 @@ class ManifestDataset(Dataset):
def
__getitem__
(
self
,
idx
):
instance
=
self
.
_manifest
[
idx
]
return
self
.
process_utterance
(
instance
[
"feat"
],
instance
[
"text"
])
return
self
.
process_utterance
(
instance
[
"utt"
],
instance
[
"feat"
],
instance
[
"text"
])
# return self.process_utterance(instance["feat"], instance["text"])
deepspeech/models/deepspeech2.py
浏览文件 @
c8368410
...
...
@@ -161,7 +161,7 @@ class DeepSpeech2Model(nn.Layer):
reduction
=
True
,
# sum
batch_average
=
True
)
# sum / batch_size
def
forward
(
self
,
audio
,
audio_len
,
text
,
text_len
):
def
forward
(
self
,
utt
,
audio
,
audio_len
,
text
,
text_len
):
"""Compute Model loss
Args:
...
...
examples/chinese_g2p/local/ignore_sandhi.py
浏览文件 @
c8368410
...
...
@@ -12,8 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
argparse
from
typing
import
List
,
Union
from
pathlib
import
Path
from
typing
import
List
from
typing
import
Union
def
erized
(
syllable
:
str
)
->
bool
:
...
...
@@ -67,7 +68,9 @@ def ignore_sandhi(reference: List[str], generated: List[str]) -> List[str]:
return
result
def
convert_transcriptions
(
reference
:
Union
[
str
,
Path
],
generated
:
Union
[
str
,
Path
],
output
:
Union
[
str
,
Path
]):
def
convert_transcriptions
(
reference
:
Union
[
str
,
Path
],
generated
:
Union
[
str
,
Path
],
output
:
Union
[
str
,
Path
]):
with
open
(
reference
,
'rt'
)
as
f_ref
:
with
open
(
generated
,
'rt'
)
as
f_gen
:
with
open
(
output
,
'wt'
)
as
f_out
:
...
...
examples/dataset/librispeech/.gitignore
浏览文件 @
c8368410
dev-clean
/
dev-other
/
test-clean
/
test-other
/
train-clean-100
/
train-clean-360
/
train-other-500
/
dev-clean
dev-other
test-clean
test-other
train-clean-100
train-clean-360
train-other-500
examples/librispeech/s0/README.md
浏览文件 @
c8368410
...
...
@@ -3,7 +3,7 @@
## Deepspeech2
| Model | release | Config | Test set | Loss | WER |
| --- | --- | --- | --- | --- | --- |
| --- | --- | --- | --- | --- | --- |
| DeepSpeech2 | 2.1.0 | conf/deepspeech2.yaml | 15.184467315673828 | test-clean | 0.072154 |
| DeepSpeech2 | 2.0.0 | conf/deepspeech2.yaml | - | test-clean | 0.073973 |
| DeepSpeech2 | 1.8.5 | - | test-clean | - | 0.074939 |
examples/tiny/s0/run.sh
浏览文件 @
c8368410
...
...
@@ -11,7 +11,7 @@ avg_num=1
source
${
MAIN_ROOT
}
/utils/parse_options.sh
||
exit
1
;
avg_ckpt
=
avg_
${
avg_num
}
ckpt
=
$(
basename
${
conf_path
}
|
awk
-F
'.'
'{print $1}'
)
ckpt
=
$(
basename
${
conf_path
}
|
awk
-F
'.'
'{print $1}'
)
###ckpt = deepspeech2
echo
"checkpoint name
${
ckpt
}
"
if
[
${
stage
}
-le
0
]
&&
[
${
stop_stage
}
-ge
0
]
;
then
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录