Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
1635e000
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
接近 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1635e000
编写于
5月 07, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix cmvn
上级
2aed2752
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
51 addition
and
40 deletion
+51
-40
deepspeech/frontend/normalizer.py
deepspeech/frontend/normalizer.py
+5
-2
deepspeech/utils/log.py
deepspeech/utils/log.py
+31
-30
examples/aishell/s0/local/data.sh
examples/aishell/s0/local/data.sh
+1
-0
examples/librispeech/s1/local/data.sh
examples/librispeech/s1/local/data.sh
+1
-0
examples/tiny/s0/local/data.sh
examples/tiny/s0/local/data.sh
+1
-0
utils/avg_model.py
utils/avg_model.py
+2
-0
utils/compute_mean_std.py
utils/compute_mean_std.py
+10
-8
未找到文件。
deepspeech/frontend/normalizer.py
浏览文件 @
1635e000
...
...
@@ -22,9 +22,12 @@ from paddle.io import Dataset
from
deepspeech.frontend.audio
import
AudioSegment
from
deepspeech.frontend.utility
import
load_cmvn
from
deepspeech.frontend.utility
import
read_manifest
from
deepspeech.utils.log
import
Log
__all__
=
[
"FeatureNormalizer"
]
logger
=
Log
(
__name__
).
getlog
()
# https://github.com/PaddlePaddle/Paddle/pull/31481
class
CollateFunc
(
object
):
...
...
@@ -176,7 +179,7 @@ class FeatureNormalizer(object):
wav_number
+=
batch_size
if
wav_number
%
1000
==
0
:
print
(
'process {} wavs,{} frames'
.
format
(
wav_number
,
logger
.
info
(
'process {} wavs,{} frames'
.
format
(
wav_number
,
all_number
))
self
.
cmvn_info
=
{
...
...
deepspeech/utils/log.py
浏览文件 @
1635e000
...
...
@@ -17,6 +17,12 @@ import os
import
socket
import
sys
FORMAT_STR
=
'[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
DATE_FMT_STR
=
'%Y/%m/%d %H:%M:%S'
logging
.
basicConfig
(
level
=
logging
.
DEBUG
,
format
=
FORMAT_STR
,
datefmt
=
DATE_FMT_STR
)
def
find_log_dir
(
log_dir
=
None
):
"""Returns the most suitable directory to put log files into.
...
...
@@ -123,12 +129,10 @@ class Log():
pass
if
not
self
.
logger
.
hasHandlers
():
format
=
'[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
formatter
=
logging
.
Formatter
(
fmt
=
format
,
datefmt
=
'%Y/%m/%d %H:%M:%S'
)
formatter
=
logging
.
Formatter
(
fmt
=
FORMAT_STR
,
datefmt
=
DATE_FMT_STR
)
fh
=
logging
.
FileHandler
(
Log
.
log_name
)
fh
.
setFormatter
(
formatter
)
fh
.
setLevel
(
logging
.
DEBUG
)
fh
.
setFormatter
(
formatter
)
self
.
logger
.
addHandler
(
fh
)
ch
=
logging
.
StreamHandler
()
...
...
@@ -136,9 +140,6 @@ class Log():
ch
.
setFormatter
(
formatter
)
self
.
logger
.
addHandler
(
ch
)
#fh.close()
#ch.close()
# stop propagate for propagating may print
# log multiple times
self
.
logger
.
propagate
=
False
...
...
examples/aishell/s0/local/data.sh
浏览文件 @
1635e000
...
...
@@ -51,6 +51,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
--stride_ms
=
10.0
\
--window_ms
=
25.0
\
--sample_rate
=
16000
\
--use_dB_normalization
=
False
\
--num_samples
=
-1
\
--num_workers
=
16
\
--output_path
=
"data/mean_std.json"
...
...
examples/librispeech/s1/local/data.sh
浏览文件 @
1635e000
...
...
@@ -73,6 +73,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
--sample_rate
=
16000
\
--stride_ms
=
10.0
\
--window_ms
=
25.0
\
--use_dB_normalization
=
False
\
--num_workers
=
${
num_workers
}
\
--output_path
=
"data/mean_std.json"
...
...
examples/tiny/s0/local/data.sh
浏览文件 @
1635e000
...
...
@@ -57,6 +57,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
--sample_rate
=
16000
\
--stride_ms
=
10.0
\
--window_ms
=
25.0
\
--use_dB_normalization
=
False
\
--num_workers
=
2
\
--output_path
=
"data/mean_std.json"
...
...
utils/avg_model.py
浏览文件 @
1635e000
...
...
@@ -21,6 +21,8 @@ import paddle
def
main
(
args
):
paddle
.
set_device
(
'cpu'
)
val_scores
=
[]
beat_val_scores
=
[]
selected_epochs
=
[]
...
...
utils/compute_mean_std.py
浏览文件 @
1635e000
...
...
@@ -25,17 +25,19 @@ parser = argparse.ArgumentParser(description=__doc__)
add_arg
=
functools
.
partial
(
add_arguments
,
argparser
=
parser
)
# yapf: disable
add_arg
(
'num_samples'
,
int
,
-
1
,
"# of samples to for statistics."
)
add_arg
(
'specgram_type'
,
str
,
'linear'
,
"Audio feature type. Options: linear, mfcc, fbank."
,
choices
=
[
'linear'
,
'mfcc'
,
'fbank'
])
add_arg
(
'feat_dim'
,
int
,
13
,
"Audio feature dim."
)
add_arg
(
'delta_delta'
,
bool
,
False
,
"Audio feature with delta delta."
)
add_arg
(
'delta_delta'
,
bool
,
False
,
"Audio feature with delta delta."
)
add_arg
(
'stride_ms'
,
float
,
10.0
,
"stride length in ms."
)
add_arg
(
'window_ms'
,
float
,
20.0
,
"stride length in ms."
)
add_arg
(
'sample_rate'
,
int
,
16000
,
"target sample rate."
)
add_arg
(
'use_dB_normalization'
,
bool
,
False
,
"do dB normalization."
)
add_arg
(
'target_dB'
,
int
,
-
20
,
"target dB."
)
add_arg
(
'manifest_path'
,
str
,
'data/librispeech/manifest.train'
,
"Filepath of manifest to compute normalizer's mean and stddev."
)
...
...
@@ -63,8 +65,8 @@ def main():
n_fft
=
None
,
max_freq
=
None
,
target_sample_rate
=
args
.
sample_rate
,
use_dB_normalization
=
True
,
target_dB
=
-
20
,
use_dB_normalization
=
args
.
use_dB_normalization
,
target_dB
=
args
.
target_dB
,
dither
=
0.0
)
def
augment_and_featurize
(
audio_segment
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录