Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
3d994f5c
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3d994f5c
编写于
10月 11, 2022
作者:
T
tianhao zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
format wav2vec2 demo
上级
7bee9d80
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
11 addition
and
21 deletion
+11
-21
examples/librispeech/asr3/conf/preprocess.yaml
examples/librispeech/asr3/conf/preprocess.yaml
+2
-2
examples/librispeech/asr3/conf/tuning/decode.yaml
examples/librispeech/asr3/conf/tuning/decode.yaml
+1
-8
examples/librispeech/asr3/run.sh
examples/librispeech/asr3/run.sh
+1
-2
paddlespeech/audio/transform/spectrogram.py
paddlespeech/audio/transform/spectrogram.py
+1
-1
paddlespeech/s2t/exps/wav2vec2/bin/test.py
paddlespeech/s2t/exps/wav2vec2/bin/test.py
+0
-2
paddlespeech/s2t/exps/wav2vec2/model.py
paddlespeech/s2t/exps/wav2vec2/model.py
+0
-3
paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py
paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py
+1
-1
paddlespeech/s2t/modules/ctc.py
paddlespeech/s2t/modules/ctc.py
+5
-2
未找到文件。
examples/librispeech/asr3/conf/preprocess.yaml
浏览文件 @
3d994f5c
process
:
process
:
# extract kaldi fbank from PCM
# use raw audio
-
type
:
wav_process
-
type
:
wav_process
dither
:
0.
1
dither
:
0.
0
examples/librispeech/asr3/conf/tuning/decode.yaml
浏览文件 @
3d994f5c
decode_batch_size
:
1
decode_batch_size
:
1
error_rate_type
:
wer
error_rate_type
:
wer
decoding_method
:
ctc_greedy_search
# '
attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring
'
decoding_method
:
ctc_greedy_search
# '
ctc_greedy_search', 'ctc_prefix_beam_search
'
beam_size
:
10
beam_size
:
10
ctc_weight
:
0.5
# ctc weight for attention rescoring decode mode.
decoding_chunk_size
:
-1
# decoding chunk size. Defaults to -1.
# <0: for decoding, use full chunk.
# >0: for decoding, use fixed chunk size as set.
# 0: used for training, it's prohibited here.
num_decoding_left_chunks
:
-1
# number of left chunks for decoding. Defaults to -1.
simulate_streaming
:
False
# simulate streaming inference. Defaults to False.
examples/librispeech/asr3/run.sh
浏览文件 @
3d994f5c
...
@@ -36,9 +36,8 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
...
@@ -36,9 +36,8 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
avg.sh best exp/
${
ckpt
}
/checkpoints
${
avg_num
}
avg.sh best exp/
${
ckpt
}
/checkpoints
${
avg_num
}
fi
fi
if
[
${
stage
}
-le
3
]
&&
[
${
stop_stage
}
-ge
3
]
;
then
if
[
${
stage
}
-le
3
]
&&
[
${
stop_stage
}
-ge
3
]
;
then
#
attetion resocre
decoder
#
greedy search
decoder
CUDA_VISIBLE_DEVICES
=
${
gpus
}
./local/test.sh
${
conf_path
}
${
decode_conf_path
}
exp/
${
ckpt
}
/checkpoints/
${
avg_ckpt
}
||
exit
-1
CUDA_VISIBLE_DEVICES
=
${
gpus
}
./local/test.sh
${
conf_path
}
${
decode_conf_path
}
exp/
${
ckpt
}
/checkpoints/
${
avg_ckpt
}
||
exit
-1
fi
fi
...
...
paddlespeech/audio/transform/spectrogram.py
浏览文件 @
3d994f5c
...
@@ -383,7 +383,7 @@ class LogMelSpectrogramKaldi():
...
@@ -383,7 +383,7 @@ class LogMelSpectrogramKaldi():
class
WavProcess
():
class
WavProcess
():
def
__init__
(
self
,
dither
=
0.
1
):
def
__init__
(
self
,
dither
=
0.
0
):
"""
"""
Args:
Args:
dither (float): Dithering constant
dither (float): Dithering constant
...
...
paddlespeech/s2t/exps/wav2vec2/bin/test.py
浏览文件 @
3d994f5c
...
@@ -20,8 +20,6 @@ from paddlespeech.s2t.exps.wav2vec2.model import Wav2Vec2ASRTester as Tester
...
@@ -20,8 +20,6 @@ from paddlespeech.s2t.exps.wav2vec2.model import Wav2Vec2ASRTester as Tester
from
paddlespeech.s2t.training.cli
import
default_argument_parser
from
paddlespeech.s2t.training.cli
import
default_argument_parser
from
paddlespeech.s2t.utils.utility
import
print_arguments
from
paddlespeech.s2t.utils.utility
import
print_arguments
# TODO(hui zhang): dynamic load
def
main_sp
(
config
,
args
):
def
main_sp
(
config
,
args
):
exp
=
Tester
(
config
,
args
)
exp
=
Tester
(
config
,
args
)
...
...
paddlespeech/s2t/exps/wav2vec2/model.py
浏览文件 @
3d994f5c
...
@@ -25,9 +25,7 @@ import paddle
...
@@ -25,9 +25,7 @@ import paddle
from
paddle
import
distributed
as
dist
from
paddle
import
distributed
as
dist
from
paddlespeech.s2t.frontend.featurizer
import
TextFeaturizer
from
paddlespeech.s2t.frontend.featurizer
import
TextFeaturizer
from
paddlespeech.s2t.io.dataloader
import
BatchDataLoader
from
paddlespeech.s2t.io.dataloader
import
DataLoaderFactory
from
paddlespeech.s2t.io.dataloader
import
DataLoaderFactory
from
paddlespeech.s2t.io.dataloader
import
StreamDataLoader
from
paddlespeech.s2t.models.wav2vec2.processing.speech_augmentation
import
TimeDomainSpecAugment
from
paddlespeech.s2t.models.wav2vec2.processing.speech_augmentation
import
TimeDomainSpecAugment
from
paddlespeech.s2t.models.wav2vec2.wav2vec2_ASR
import
Wav2vec2ASR
from
paddlespeech.s2t.models.wav2vec2.wav2vec2_ASR
import
Wav2vec2ASR
from
paddlespeech.s2t.training.optimizer
import
OptimizerFactory
from
paddlespeech.s2t.training.optimizer
import
OptimizerFactory
...
@@ -300,7 +298,6 @@ class Wav2Vec2ASRTrainer(Trainer):
...
@@ -300,7 +298,6 @@ class Wav2Vec2ASRTrainer(Trainer):
"epsilon"
:
optim_conf
.
epsilon
,
"epsilon"
:
optim_conf
.
epsilon
,
"rho"
:
optim_conf
.
rho
,
"rho"
:
optim_conf
.
rho
,
"parameters"
:
parameters
,
"parameters"
:
parameters
,
"epsilon"
:
1e-9
if
optim_type
==
'noam'
else
None
,
"beta1"
:
0.9
if
optim_type
==
'noam'
else
None
,
"beta1"
:
0.9
if
optim_type
==
'noam'
else
None
,
"beat2"
:
0.98
if
optim_type
==
'noam'
else
None
,
"beat2"
:
0.98
if
optim_type
==
'noam'
else
None
,
}
}
...
...
paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py
浏览文件 @
3d994f5c
...
@@ -39,7 +39,7 @@ class Wav2vec2ASR(nn.Layer):
...
@@ -39,7 +39,7 @@ class Wav2vec2ASR(nn.Layer):
enc_n_units
=
config
.
dnn_neurons
,
enc_n_units
=
config
.
dnn_neurons
,
blank_id
=
config
.
blank_id
,
blank_id
=
config
.
blank_id
,
dropout_rate
=
config
.
ctc_dropout_rate
,
dropout_rate
=
config
.
ctc_dropout_rate
,
reduction
=
True
)
reduction
=
'mean'
)
def
forward
(
self
,
wav
,
wavs_lens_rate
,
target
,
target_lens_rate
):
def
forward
(
self
,
wav
,
wavs_lens_rate
,
target
,
target_lens_rate
):
if
self
.
normalize_wav
:
if
self
.
normalize_wav
:
...
...
paddlespeech/s2t/modules/ctc.py
浏览文件 @
3d994f5c
...
@@ -53,7 +53,7 @@ class CTCDecoderBase(nn.Layer):
...
@@ -53,7 +53,7 @@ class CTCDecoderBase(nn.Layer):
enc_n_units
,
enc_n_units
,
blank_id
=
0
,
blank_id
=
0
,
dropout_rate
:
float
=
0.0
,
dropout_rate
:
float
=
0.0
,
reduction
:
bool
=
True
,
reduction
:
Union
[
str
,
bool
]
=
True
,
batch_average
:
bool
=
True
,
batch_average
:
bool
=
True
,
grad_norm_type
:
Union
[
str
,
None
]
=
None
):
grad_norm_type
:
Union
[
str
,
None
]
=
None
):
"""CTC decoder
"""CTC decoder
...
@@ -73,7 +73,10 @@ class CTCDecoderBase(nn.Layer):
...
@@ -73,7 +73,10 @@ class CTCDecoderBase(nn.Layer):
self
.
odim
=
odim
self
.
odim
=
odim
self
.
dropout
=
nn
.
Dropout
(
dropout_rate
)
self
.
dropout
=
nn
.
Dropout
(
dropout_rate
)
self
.
ctc_lo
=
Linear
(
enc_n_units
,
self
.
odim
)
self
.
ctc_lo
=
Linear
(
enc_n_units
,
self
.
odim
)
reduction_type
=
"sum"
if
reduction
else
"none"
if
isinstance
(
reduction
,
bool
):
reduction_type
=
"sum"
if
reduction
else
"none"
else
:
reduction_type
=
reduction
self
.
criterion
=
CTCLoss
(
self
.
criterion
=
CTCLoss
(
blank
=
self
.
blank_id
,
blank
=
self
.
blank_id
,
reduction
=
reduction_type
,
reduction
=
reduction_type
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录