Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
6f83be1a
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6f83be1a
编写于
10月 03, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix spectrum_type typo
上级
c8a702e9
变更
24
隐藏空白更改
内联
并排
Showing
24 changed file
with
51 addition
and
51 deletion
+51
-51
deepspeech/exps/deepspeech2/config.py
deepspeech/exps/deepspeech2/config.py
+1
-1
deepspeech/frontend/featurizer/audio_featurizer.py
deepspeech/frontend/featurizer/audio_featurizer.py
+16
-16
deepspeech/frontend/featurizer/speech_featurizer.py
deepspeech/frontend/featurizer/speech_featurizer.py
+7
-7
deepspeech/io/__init__.py
deepspeech/io/__init__.py
+2
-2
deepspeech/io/dataset.py
deepspeech/io/dataset.py
+5
-5
examples/aishell/s0/conf/deepspeech2.yaml
examples/aishell/s0/conf/deepspeech2.yaml
+1
-1
examples/aishell/s0/local/data.sh
examples/aishell/s0/local/data.sh
+1
-1
examples/aishell/s1/conf/chunk_conformer.yaml
examples/aishell/s1/conf/chunk_conformer.yaml
+1
-1
examples/aishell/s1/conf/conformer.yaml
examples/aishell/s1/conf/conformer.yaml
+1
-1
examples/aishell/s1/local/data.sh
examples/aishell/s1/local/data.sh
+1
-1
examples/librispeech/s0/conf/deepspeech2.yaml
examples/librispeech/s0/conf/deepspeech2.yaml
+1
-1
examples/librispeech/s0/local/data.sh
examples/librispeech/s0/local/data.sh
+1
-1
examples/librispeech/s1/conf/chunk_confermer.yaml
examples/librispeech/s1/conf/chunk_confermer.yaml
+1
-1
examples/librispeech/s1/conf/chunk_transformer.yaml
examples/librispeech/s1/conf/chunk_transformer.yaml
+1
-1
examples/librispeech/s1/conf/conformer.yaml
examples/librispeech/s1/conf/conformer.yaml
+1
-1
examples/librispeech/s1/conf/transformer.yaml
examples/librispeech/s1/conf/transformer.yaml
+1
-1
examples/librispeech/s1/local/data.sh
examples/librispeech/s1/local/data.sh
+1
-1
examples/tiny/s0/conf/deepspeech2.yaml
examples/tiny/s0/conf/deepspeech2.yaml
+1
-1
examples/tiny/s0/local/data.sh
examples/tiny/s0/local/data.sh
+2
-2
examples/tiny/s1/conf/chunk_confermer.yaml
examples/tiny/s1/conf/chunk_confermer.yaml
+1
-1
examples/tiny/s1/conf/chunk_transformer.yaml
examples/tiny/s1/conf/chunk_transformer.yaml
+1
-1
examples/tiny/s1/conf/conformer.yaml
examples/tiny/s1/conf/conformer.yaml
+1
-1
examples/tiny/s1/conf/transformer.yaml
examples/tiny/s1/conf/transformer.yaml
+1
-1
examples/tiny/s1/local/data.sh
examples/tiny/s1/local/data.sh
+1
-1
未找到文件。
deepspeech/exps/deepspeech2/config.py
浏览文件 @
6f83be1a
...
...
@@ -32,7 +32,7 @@ _C.data = CN(
window_ms
=
20.0
,
# ms
n_fft
=
None
,
# fft points
max_freq
=
None
,
# None for samplerate/2
spec
gra
m_type
=
'linear'
,
# 'linear', 'mfcc', 'fbank'
spec
tru
m_type
=
'linear'
,
# 'linear', 'mfcc', 'fbank'
feat_dim
=
0
,
# 'mfcc', 'fbank'
delat_delta
=
False
,
# 'mfcc', 'fbank'
target_sample_rate
=
16000
,
# target sample rate
...
...
deepspeech/frontend/featurizer/audio_featurizer.py
浏览文件 @
6f83be1a
...
...
@@ -24,15 +24,15 @@ class AudioFeaturizer(object):
Currently, it supports feature types of linear spectrogram and mfcc.
:param spec
gra
m_type: Specgram feature type. Options: 'linear'.
:type spec
gra
m_type: str
:param spec
tru
m_type: Specgram feature type. Options: 'linear'.
:type spec
tru
m_type: str
:param stride_ms: Striding size (in milliseconds) for generating frames.
:type stride_ms: float
:param window_ms: Window size (in milliseconds) for generating frames.
:type window_ms: float
:param max_freq: When spec
gra
m_type is 'linear', only FFT bins
:param max_freq: When spec
tru
m_type is 'linear', only FFT bins
corresponding to frequencies between [0, max_freq] are
returned; when spec
gra
m_type is 'mfcc', max_feq is the
returned; when spec
tru
m_type is 'mfcc', max_feq is the
highest band edge of mel filters.
:types max_freq: None|float
:param target_sample_rate: Audio are resampled (if upsampling or
...
...
@@ -47,7 +47,7 @@ class AudioFeaturizer(object):
"""
def
__init__
(
self
,
spec
gra
m_type
:
str
=
'linear'
,
spec
tru
m_type
:
str
=
'linear'
,
feat_dim
:
int
=
None
,
delta_delta
:
bool
=
False
,
stride_ms
=
10.0
,
...
...
@@ -58,7 +58,7 @@ class AudioFeaturizer(object):
use_dB_normalization
=
True
,
target_dB
=-
20
,
dither
=
1.0
):
self
.
_spec
gram_type
=
specgra
m_type
self
.
_spec
trum_type
=
spectru
m_type
# mfcc and fbank using `feat_dim`
self
.
_feat_dim
=
feat_dim
# mfcc and fbank using `delta-delta`
...
...
@@ -113,27 +113,27 @@ class AudioFeaturizer(object):
def
feature_size
(
self
):
"""audio feature size"""
feat_dim
=
0
if
self
.
_spec
gra
m_type
==
'linear'
:
if
self
.
_spec
tru
m_type
==
'linear'
:
fft_point
=
self
.
_window_ms
if
self
.
_fft_point
is
None
else
self
.
_fft_point
feat_dim
=
int
(
fft_point
*
(
self
.
_target_sample_rate
/
1000
)
/
2
+
1
)
elif
self
.
_spec
gra
m_type
==
'mfcc'
:
elif
self
.
_spec
tru
m_type
==
'mfcc'
:
# mfcc, delta, delta-delta
feat_dim
=
int
(
self
.
_feat_dim
*
3
)
if
self
.
_delta_delta
else
int
(
self
.
_feat_dim
)
elif
self
.
_spec
gra
m_type
==
'fbank'
:
elif
self
.
_spec
tru
m_type
==
'fbank'
:
# fbank, delta, delta-delta
feat_dim
=
int
(
self
.
_feat_dim
*
3
)
if
self
.
_delta_delta
else
int
(
self
.
_feat_dim
)
else
:
raise
ValueError
(
"Unknown spec
gra
m_type %s. "
"Supported values: linear."
%
self
.
_spec
gra
m_type
)
raise
ValueError
(
"Unknown spec
tru
m_type %s. "
"Supported values: linear."
%
self
.
_spec
tru
m_type
)
return
feat_dim
def
_compute_specgram
(
self
,
audio_segment
):
"""Extract various audio features."""
sample_rate
=
audio_segment
.
sample_rate
if
self
.
_spec
gra
m_type
==
'linear'
:
if
self
.
_spec
tru
m_type
==
'linear'
:
samples
=
audio_segment
.
samples
return
self
.
_compute_linear_specgram
(
samples
,
...
...
@@ -141,7 +141,7 @@ class AudioFeaturizer(object):
stride_ms
=
self
.
_stride_ms
,
window_ms
=
self
.
_window_ms
,
max_freq
=
self
.
_max_freq
)
elif
self
.
_spec
gra
m_type
==
'mfcc'
:
elif
self
.
_spec
tru
m_type
==
'mfcc'
:
samples
=
audio_segment
.
to
(
'int16'
)
return
self
.
_compute_mfcc
(
samples
,
...
...
@@ -152,7 +152,7 @@ class AudioFeaturizer(object):
max_freq
=
self
.
_max_freq
,
dither
=
self
.
_dither
,
delta_delta
=
self
.
_delta_delta
)
elif
self
.
_spec
gra
m_type
==
'fbank'
:
elif
self
.
_spec
tru
m_type
==
'fbank'
:
samples
=
audio_segment
.
to
(
'int16'
)
return
self
.
_compute_fbank
(
samples
,
...
...
@@ -164,8 +164,8 @@ class AudioFeaturizer(object):
dither
=
self
.
_dither
,
delta_delta
=
self
.
_delta_delta
)
else
:
raise
ValueError
(
"Unknown spec
gra
m_type %s. "
"Supported values: linear."
%
self
.
_spec
gra
m_type
)
raise
ValueError
(
"Unknown spec
tru
m_type %s. "
"Supported values: linear."
%
self
.
_spec
tru
m_type
)
def
_compute_linear_specgram
(
self
,
samples
,
...
...
deepspeech/frontend/featurizer/speech_featurizer.py
浏览文件 @
6f83be1a
...
...
@@ -27,16 +27,16 @@ class SpeechFeaturizer(object):
:param vocab_filepath: Filepath to load vocabulary for token indices
conversion.
:type spec
gra
m_type: str
:param spec
gra
m_type: Specgram feature type. Options: 'linear', 'mfcc'.
:type spec
gra
m_type: str
:type spec
tru
m_type: str
:param spec
tru
m_type: Specgram feature type. Options: 'linear', 'mfcc'.
:type spec
tru
m_type: str
:param stride_ms: Striding size (in milliseconds) for generating frames.
:type stride_ms: float
:param window_ms: Window size (in milliseconds) for generating frames.
:type window_ms: float
:param max_freq: When spec
gra
m_type is 'linear', only FFT bins
:param max_freq: When spec
tru
m_type is 'linear', only FFT bins
corresponding to frequencies between [0, max_freq] are
returned; when spec
gra
m_type is 'mfcc', max_freq is the
returned; when spec
tru
m_type is 'mfcc', max_freq is the
highest band edge of mel filters.
:types max_freq: None|float
:param target_sample_rate: Speech are resampled (if upsampling or
...
...
@@ -54,7 +54,7 @@ class SpeechFeaturizer(object):
unit_type
,
vocab_filepath
,
spm_model_prefix
=
None
,
spec
gra
m_type
=
'linear'
,
spec
tru
m_type
=
'linear'
,
feat_dim
=
None
,
delta_delta
=
False
,
stride_ms
=
10.0
,
...
...
@@ -66,7 +66,7 @@ class SpeechFeaturizer(object):
target_dB
=-
20
,
dither
=
1.0
):
self
.
_audio_featurizer
=
AudioFeaturizer
(
spec
gram_type
=
specgra
m_type
,
spec
trum_type
=
spectru
m_type
,
feat_dim
=
feat_dim
,
delta_delta
=
delta_delta
,
stride_ms
=
stride_ms
,
...
...
deepspeech/io/__init__.py
浏览文件 @
6f83be1a
...
...
@@ -35,7 +35,7 @@ def create_dataloader(manifest_path,
stride_ms
=
10.0
,
window_ms
=
20.0
,
max_freq
=
None
,
spec
gra
m_type
=
'linear'
,
spec
tru
m_type
=
'linear'
,
feat_dim
=
None
,
delta_delta
=
False
,
use_dB_normalization
=
True
,
...
...
@@ -64,7 +64,7 @@ def create_dataloader(manifest_path,
stride_ms
=
stride_ms
,
window_ms
=
window_ms
,
max_freq
=
max_freq
,
spec
gram_type
=
specgra
m_type
,
spec
trum_type
=
spectru
m_type
,
feat_dim
=
feat_dim
,
delta_delta
=
delta_delta
,
use_dB_normalization
=
use_dB_normalization
,
...
...
deepspeech/io/dataset.py
浏览文件 @
6f83be1a
...
...
@@ -63,7 +63,7 @@ class ManifestDataset(Dataset):
n_fft
=
None
,
# fft points
max_freq
=
None
,
# None for samplerate/2
raw_wav
=
True
,
# use raw_wav or kaldi feature
spec
gra
m_type
=
'linear'
,
# 'linear', 'mfcc', 'fbank'
spec
tru
m_type
=
'linear'
,
# 'linear', 'mfcc', 'fbank'
feat_dim
=
0
,
# 'mfcc', 'fbank'
delta_delta
=
False
,
# 'mfcc', 'fbank'
dither
=
1.0
,
# feature dither
...
...
@@ -124,7 +124,7 @@ class ManifestDataset(Dataset):
n_fft
=
config
.
data
.
n_fft
,
max_freq
=
config
.
data
.
max_freq
,
target_sample_rate
=
config
.
data
.
target_sample_rate
,
spec
gram_type
=
config
.
data
.
specgra
m_type
,
spec
trum_type
=
config
.
data
.
spectru
m_type
,
feat_dim
=
config
.
data
.
feat_dim
,
delta_delta
=
config
.
data
.
delta_delta
,
dither
=
config
.
data
.
dither
,
...
...
@@ -152,7 +152,7 @@ class ManifestDataset(Dataset):
n_fft
=
None
,
max_freq
=
None
,
target_sample_rate
=
16000
,
spec
gra
m_type
=
'linear'
,
spec
tru
m_type
=
'linear'
,
feat_dim
=
None
,
delta_delta
=
False
,
dither
=
1.0
,
...
...
@@ -180,7 +180,7 @@ class ManifestDataset(Dataset):
n_fft (int, optional): fft points for rfft. Defaults to None.
max_freq (int, optional): max cut freq. Defaults to None.
target_sample_rate (int, optional): target sample rate which used for training. Defaults to 16000.
spec
gra
m_type (str, optional): 'linear', 'mfcc' or 'fbank'. Defaults to 'linear'.
spec
tru
m_type (str, optional): 'linear', 'mfcc' or 'fbank'. Defaults to 'linear'.
feat_dim (int, optional): audio feature dim, using by 'mfcc' or 'fbank'. Defaults to None.
delta_delta (bool, optional): audio feature with delta-delta, using by 'fbank' or 'mfcc'. Defaults to False.
use_dB_normalization (bool, optional): do dB normalization. Defaults to True.
...
...
@@ -200,7 +200,7 @@ class ManifestDataset(Dataset):
unit_type
=
unit_type
,
vocab_filepath
=
vocab_filepath
,
spm_model_prefix
=
spm_model_prefix
,
spec
gram_type
=
specgra
m_type
,
spec
trum_type
=
spectru
m_type
,
feat_dim
=
feat_dim
,
delta_delta
=
delta_delta
,
stride_ms
=
stride_ms
,
...
...
examples/aishell/s0/conf/deepspeech2.yaml
浏览文件 @
6f83be1a
...
...
@@ -13,7 +13,7 @@ data:
max_output_len
:
.inf
min_output_input_ratio
:
0.00
max_output_input_ratio
:
.inf
spec
gra
m_type
:
linear
spec
tru
m_type
:
linear
target_sample_rate
:
16000
max_freq
:
None
n_fft
:
None
...
...
examples/aishell/s0/local/data.sh
浏览文件 @
6f83be1a
...
...
@@ -46,7 +46,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
num_workers
=
$(
nproc
)
python3
${
MAIN_ROOT
}
/utils/compute_mean_std.py
\
--manifest_path
=
"data/manifest.train.raw"
\
--spec
gra
m_type
=
"linear"
\
--spec
tru
m_type
=
"linear"
\
--delta_delta
=
false
\
--stride_ms
=
10.0
\
--window_ms
=
20.0
\
...
...
examples/aishell/s1/conf/chunk_conformer.yaml
浏览文件 @
6f83be1a
...
...
@@ -15,7 +15,7 @@ data:
min_output_input_ratio
:
0.05
max_output_input_ratio
:
10.0
raw_wav
:
True
# use raw_wav or kaldi feature
spec
gra
m_type
:
fbank
#linear, mfcc, fbank
spec
tru
m_type
:
fbank
#linear, mfcc, fbank
feat_dim
:
80
delta_delta
:
False
dither
:
1.0
...
...
examples/aishell/s1/conf/conformer.yaml
浏览文件 @
6f83be1a
...
...
@@ -15,7 +15,7 @@ data:
min_output_input_ratio
:
0.05
max_output_input_ratio
:
10.0
raw_wav
:
True
# use raw_wav or kaldi feature
spec
gra
m_type
:
fbank
#linear, mfcc, fbank
spec
tru
m_type
:
fbank
#linear, mfcc, fbank
feat_dim
:
80
delta_delta
:
False
dither
:
1.0
...
...
examples/aishell/s1/local/data.sh
浏览文件 @
6f83be1a
...
...
@@ -46,7 +46,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
num_workers
=
$(
nproc
)
python3
${
MAIN_ROOT
}
/utils/compute_mean_std.py
\
--manifest_path
=
"data/manifest.train.raw"
\
--spec
gra
m_type
=
"fbank"
\
--spec
tru
m_type
=
"fbank"
\
--feat_dim
=
80
\
--delta_delta
=
false
\
--stride_ms
=
10.0
\
...
...
examples/librispeech/s0/conf/deepspeech2.yaml
浏览文件 @
6f83be1a
...
...
@@ -13,7 +13,7 @@ data:
max_output_len
:
.inf
min_output_input_ratio
:
0.00
max_output_input_ratio
:
.inf
spec
gra
m_type
:
linear
spec
tru
m_type
:
linear
target_sample_rate
:
16000
max_freq
:
None
n_fft
:
None
...
...
examples/librispeech/s0/local/data.sh
浏览文件 @
6f83be1a
...
...
@@ -62,7 +62,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
python3
${
MAIN_ROOT
}
/utils/compute_mean_std.py
\
--manifest_path
=
"data/manifest.train.raw"
\
--num_samples
=
2000
\
--spec
gra
m_type
=
"linear"
\
--spec
tru
m_type
=
"linear"
\
--delta_delta
=
false
\
--sample_rate
=
16000
\
--stride_ms
=
10.0
\
...
...
examples/librispeech/s1/conf/chunk_confermer.yaml
浏览文件 @
6f83be1a
...
...
@@ -16,7 +16,7 @@ data:
min_output_input_ratio
:
0.05
max_output_input_ratio
:
10.0
raw_wav
:
True
# use raw_wav or kaldi feature
spec
gra
m_type
:
fbank
#linear, mfcc, fbank
spec
tru
m_type
:
fbank
#linear, mfcc, fbank
feat_dim
:
80
delta_delta
:
False
dither
:
1.0
...
...
examples/librispeech/s1/conf/chunk_transformer.yaml
浏览文件 @
6f83be1a
...
...
@@ -16,7 +16,7 @@ data:
min_output_input_ratio
:
0.05
max_output_input_ratio
:
10.0
raw_wav
:
True
# use raw_wav or kaldi feature
spec
gra
m_type
:
fbank
#linear, mfcc, fbank
spec
tru
m_type
:
fbank
#linear, mfcc, fbank
feat_dim
:
80
delta_delta
:
False
dither
:
1.0
...
...
examples/librispeech/s1/conf/conformer.yaml
浏览文件 @
6f83be1a
...
...
@@ -16,7 +16,7 @@ data:
min_output_input_ratio
:
0.05
max_output_input_ratio
:
10.0
raw_wav
:
True
# use raw_wav or kaldi feature
spec
gra
m_type
:
fbank
#linear, mfcc, fbank
spec
tru
m_type
:
fbank
#linear, mfcc, fbank
feat_dim
:
80
delta_delta
:
False
dither
:
1.0
...
...
examples/librispeech/s1/conf/transformer.yaml
浏览文件 @
6f83be1a
...
...
@@ -16,7 +16,7 @@ data:
min_output_input_ratio
:
0.05
max_output_input_ratio
:
10.0
raw_wav
:
True
# use raw_wav or kaldi feature
spec
gra
m_type
:
fbank
#linear, mfcc, fbank
spec
tru
m_type
:
fbank
#linear, mfcc, fbank
feat_dim
:
80
delta_delta
:
False
dither
:
1.0
...
...
examples/librispeech/s1/local/data.sh
浏览文件 @
6f83be1a
...
...
@@ -68,7 +68,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
python3
${
MAIN_ROOT
}
/utils/compute_mean_std.py
\
--manifest_path
=
"data/manifest.train.raw"
\
--num_samples
=
-1
\
--spec
gra
m_type
=
"fbank"
\
--spec
tru
m_type
=
"fbank"
\
--feat_dim
=
80
\
--delta_delta
=
false
\
--sample_rate
=
16000
\
...
...
examples/tiny/s0/conf/deepspeech2.yaml
浏览文件 @
6f83be1a
...
...
@@ -13,7 +13,7 @@ data:
max_output_len
:
400.0
min_output_input_ratio
:
0.05
max_output_input_ratio
:
10.0
spec
gra
m_type
:
linear
spec
tru
m_type
:
linear
target_sample_rate
:
16000
max_freq
:
None
n_fft
:
None
...
...
examples/tiny/s0/local/data.sh
浏览文件 @
6f83be1a
#!
/usr/bin/env
bash
#!
/bin/
bash
stage
=
-1
stop_stage
=
100
...
...
@@ -46,7 +46,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
python3
${
MAIN_ROOT
}
/utils/compute_mean_std.py
\
--manifest_path
=
"data/manifest.tiny.raw"
\
--num_samples
=
64
\
--spec
gra
m_type
=
"linear"
\
--spec
tru
m_type
=
"linear"
\
--delta_delta
=
false
\
--sample_rate
=
16000
\
--stride_ms
=
10.0
\
...
...
examples/tiny/s1/conf/chunk_confermer.yaml
浏览文件 @
6f83be1a
...
...
@@ -16,7 +16,7 @@ data:
min_output_input_ratio
:
0.05
max_output_input_ratio
:
10.0
raw_wav
:
True
# use raw_wav or kaldi feature
spec
gra
m_type
:
fbank
#linear, mfcc, fbank
spec
tru
m_type
:
fbank
#linear, mfcc, fbank
feat_dim
:
80
delta_delta
:
False
dither
:
1.0
...
...
examples/tiny/s1/conf/chunk_transformer.yaml
浏览文件 @
6f83be1a
...
...
@@ -16,7 +16,7 @@ data:
min_output_input_ratio
:
0.05
max_output_input_ratio
:
10.0
raw_wav
:
True
# use raw_wav or kaldi feature
spec
gra
m_type
:
fbank
#linear, mfcc, fbank
spec
tru
m_type
:
fbank
#linear, mfcc, fbank
feat_dim
:
80
delta_delta
:
False
dither
:
1.0
...
...
examples/tiny/s1/conf/conformer.yaml
浏览文件 @
6f83be1a
...
...
@@ -16,7 +16,7 @@ data:
min_output_input_ratio
:
0.05
max_output_input_ratio
:
10.0
raw_wav
:
True
# use raw_wav or kaldi feature
spec
gra
m_type
:
fbank
#linear, mfcc, fbank
spec
tru
m_type
:
fbank
#linear, mfcc, fbank
feat_dim
:
80
delta_delta
:
False
dither
:
1.0
...
...
examples/tiny/s1/conf/transformer.yaml
浏览文件 @
6f83be1a
...
...
@@ -16,7 +16,7 @@ data:
min_output_input_ratio
:
0.05
max_output_input_ratio
:
10.0
raw_wav
:
True
# use raw_wav or kaldi feature
spec
gra
m_type
:
fbank
#linear, mfcc, fbank
spec
tru
m_type
:
fbank
#linear, mfcc, fbank
feat_dim
:
80
delta_delta
:
False
dither
:
1.0
...
...
examples/tiny/s1/local/data.sh
浏览文件 @
6f83be1a
...
...
@@ -51,7 +51,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
python3
${
MAIN_ROOT
}
/utils/compute_mean_std.py
\
--manifest_path
=
"data/manifest.tiny.raw"
\
--num_samples
=
64
\
--spec
gra
m_type
=
"fbank"
\
--spec
tru
m_type
=
"fbank"
\
--feat_dim
=
80
\
--delta_delta
=
false
\
--sample_rate
=
16000
\
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录