Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
3d994f5c
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3d994f5c
编写于
10月 11, 2022
作者:
T
tianhao zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
format wav2vec2 demo
上级
7bee9d80
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
11 addition
and
21 deletion
+11
-21
examples/librispeech/asr3/conf/preprocess.yaml
examples/librispeech/asr3/conf/preprocess.yaml
+2
-2
examples/librispeech/asr3/conf/tuning/decode.yaml
examples/librispeech/asr3/conf/tuning/decode.yaml
+1
-8
examples/librispeech/asr3/run.sh
examples/librispeech/asr3/run.sh
+1
-2
paddlespeech/audio/transform/spectrogram.py
paddlespeech/audio/transform/spectrogram.py
+1
-1
paddlespeech/s2t/exps/wav2vec2/bin/test.py
paddlespeech/s2t/exps/wav2vec2/bin/test.py
+0
-2
paddlespeech/s2t/exps/wav2vec2/model.py
paddlespeech/s2t/exps/wav2vec2/model.py
+0
-3
paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py
paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py
+1
-1
paddlespeech/s2t/modules/ctc.py
paddlespeech/s2t/modules/ctc.py
+5
-2
未找到文件。
examples/librispeech/asr3/conf/preprocess.yaml
浏览文件 @
3d994f5c
process
:
# extract kaldi fbank from PCM
# use raw audio
-
type
:
wav_process
dither
:
0.
1
dither
:
0.
0
examples/librispeech/asr3/conf/tuning/decode.yaml
浏览文件 @
3d994f5c
decode_batch_size
:
1
error_rate_type
:
wer
decoding_method
:
ctc_greedy_search
# '
attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring
'
decoding_method
:
ctc_greedy_search
# '
ctc_greedy_search', 'ctc_prefix_beam_search
'
beam_size
:
10
ctc_weight
:
0.5
# ctc weight for attention rescoring decode mode.
decoding_chunk_size
:
-1
# decoding chunk size. Defaults to -1.
# <0: for decoding, use full chunk.
# >0: for decoding, use fixed chunk size as set.
# 0: used for training, it's prohibited here.
num_decoding_left_chunks
:
-1
# number of left chunks for decoding. Defaults to -1.
simulate_streaming
:
False
# simulate streaming inference. Defaults to False.
examples/librispeech/asr3/run.sh
浏览文件 @
3d994f5c
...
...
@@ -36,9 +36,8 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
avg.sh best exp/
${
ckpt
}
/checkpoints
${
avg_num
}
fi
if
[
${
stage
}
-le
3
]
&&
[
${
stop_stage
}
-ge
3
]
;
then
#
attetion resocre
decoder
#
greedy search
decoder
CUDA_VISIBLE_DEVICES
=
${
gpus
}
./local/test.sh
${
conf_path
}
${
decode_conf_path
}
exp/
${
ckpt
}
/checkpoints/
${
avg_ckpt
}
||
exit
-1
fi
...
...
paddlespeech/audio/transform/spectrogram.py
浏览文件 @
3d994f5c
...
...
@@ -383,7 +383,7 @@ class LogMelSpectrogramKaldi():
class
WavProcess
():
def
__init__
(
self
,
dither
=
0.
1
):
def
__init__
(
self
,
dither
=
0.
0
):
"""
Args:
dither (float): Dithering constant
...
...
paddlespeech/s2t/exps/wav2vec2/bin/test.py
浏览文件 @
3d994f5c
...
...
@@ -20,8 +20,6 @@ from paddlespeech.s2t.exps.wav2vec2.model import Wav2Vec2ASRTester as Tester
from
paddlespeech.s2t.training.cli
import
default_argument_parser
from
paddlespeech.s2t.utils.utility
import
print_arguments
# TODO(hui zhang): dynamic load
def
main_sp
(
config
,
args
):
exp
=
Tester
(
config
,
args
)
...
...
paddlespeech/s2t/exps/wav2vec2/model.py
浏览文件 @
3d994f5c
...
...
@@ -25,9 +25,7 @@ import paddle
from
paddle
import
distributed
as
dist
from
paddlespeech.s2t.frontend.featurizer
import
TextFeaturizer
from
paddlespeech.s2t.io.dataloader
import
BatchDataLoader
from
paddlespeech.s2t.io.dataloader
import
DataLoaderFactory
from
paddlespeech.s2t.io.dataloader
import
StreamDataLoader
from
paddlespeech.s2t.models.wav2vec2.processing.speech_augmentation
import
TimeDomainSpecAugment
from
paddlespeech.s2t.models.wav2vec2.wav2vec2_ASR
import
Wav2vec2ASR
from
paddlespeech.s2t.training.optimizer
import
OptimizerFactory
...
...
@@ -300,7 +298,6 @@ class Wav2Vec2ASRTrainer(Trainer):
"epsilon"
:
optim_conf
.
epsilon
,
"rho"
:
optim_conf
.
rho
,
"parameters"
:
parameters
,
"epsilon"
:
1e-9
if
optim_type
==
'noam'
else
None
,
"beta1"
:
0.9
if
optim_type
==
'noam'
else
None
,
"beat2"
:
0.98
if
optim_type
==
'noam'
else
None
,
}
...
...
paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py
浏览文件 @
3d994f5c
...
...
@@ -39,7 +39,7 @@ class Wav2vec2ASR(nn.Layer):
enc_n_units
=
config
.
dnn_neurons
,
blank_id
=
config
.
blank_id
,
dropout_rate
=
config
.
ctc_dropout_rate
,
reduction
=
True
)
reduction
=
'mean'
)
def
forward
(
self
,
wav
,
wavs_lens_rate
,
target
,
target_lens_rate
):
if
self
.
normalize_wav
:
...
...
paddlespeech/s2t/modules/ctc.py
浏览文件 @
3d994f5c
...
...
@@ -53,7 +53,7 @@ class CTCDecoderBase(nn.Layer):
enc_n_units
,
blank_id
=
0
,
dropout_rate
:
float
=
0.0
,
reduction
:
bool
=
True
,
reduction
:
Union
[
str
,
bool
]
=
True
,
batch_average
:
bool
=
True
,
grad_norm_type
:
Union
[
str
,
None
]
=
None
):
"""CTC decoder
...
...
@@ -73,7 +73,10 @@ class CTCDecoderBase(nn.Layer):
self
.
odim
=
odim
self
.
dropout
=
nn
.
Dropout
(
dropout_rate
)
self
.
ctc_lo
=
Linear
(
enc_n_units
,
self
.
odim
)
reduction_type
=
"sum"
if
reduction
else
"none"
if
isinstance
(
reduction
,
bool
):
reduction_type
=
"sum"
if
reduction
else
"none"
else
:
reduction_type
=
reduction
self
.
criterion
=
CTCLoss
(
blank
=
self
.
blank_id
,
reduction
=
reduction_type
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录