Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
e8f7a8fd
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
接近 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e8f7a8fd
编写于
9月 05, 2017
作者:
X
Xinghai Sun
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Update argument naming following Yibing's reviews.
上级
9571b6fc
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
34 addition
and
34 deletion
+34
-34
demo_server.py
demo_server.py
+4
-4
evaluate.py
evaluate.py
+8
-8
infer.py
infer.py
+6
-6
model.py
model.py
+8
-8
train.py
train.py
+4
-4
tune.py
tune.py
+4
-4
未找到文件。
demo_server.py
浏览文件 @
e8f7a8fd
...
...
@@ -25,7 +25,7 @@ add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.")
add_arg
(
'alpha'
,
float
,
0.36
,
"Coef of LM for beam search."
)
add_arg
(
'beta'
,
float
,
0.25
,
"Coef of WC for beam search."
)
add_arg
(
'cutoff_prob'
,
float
,
0.99
,
"Cutoff probability for pruning."
)
add_arg
(
'use_gru'
,
bool
,
False
,
"Use GRUs instead of
S
imple RNNs."
)
add_arg
(
'use_gru'
,
bool
,
False
,
"Use GRUs instead of
s
imple RNNs."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Use GPU or not."
)
add_arg
(
'share_rnn_weights'
,
bool
,
True
,
"Share input-hidden weights across "
"bi-directional RNNs. Not for GRU."
)
...
...
@@ -51,9 +51,9 @@ add_arg('model_path', str,
add_arg
(
'lang_model_path'
,
str
,
'lm/data/common_crawl_00.prune01111.trie.klm'
,
"Filepath for language model."
)
add_arg
(
'decod
er_method'
,
str
,
add_arg
(
'decod
ing_method'
,
str
,
'ctc_beam_search'
,
"Decod
er
method. Options: ctc_beam_search, ctc_greedy"
,
"Decod
ing
method. Options: ctc_beam_search, ctc_greedy"
,
choices
=
[
'ctc_beam_search'
,
'ctc_greedy'
])
add_arg
(
'specgram_type'
,
str
,
'linear'
,
...
...
@@ -160,7 +160,7 @@ def start_server():
feature
=
data_generator
.
process_utterance
(
filename
,
""
)
result_transcript
=
ds2_model
.
infer_batch
(
infer_data
=
[
feature
],
decod
er_method
=
args
.
decoder
_method
,
decod
ing_method
=
args
.
decoding
_method
,
beam_alpha
=
args
.
alpha
,
beam_beta
=
args
.
beta
,
beam_size
=
args
.
beam_size
,
...
...
evaluate.py
浏览文件 @
e8f7a8fd
...
...
@@ -17,15 +17,15 @@ add_arg = functools.partial(add_arguments, argparser=parser)
add_arg
(
'batch_size'
,
int
,
128
,
"Minibatch size."
)
add_arg
(
'trainer_count'
,
int
,
8
,
"# of Trainers (CPUs or GPUs)."
)
add_arg
(
'beam_size'
,
int
,
500
,
"Beam search width."
)
add_arg
(
'
parallels_bsearch'
,
int
,
12
,
"# of CPUs for beam search."
)
add_arg
(
'
parallels_data'
,
int
,
12
,
"# of CPUs for data preprocessing."
)
add_arg
(
'
num_proc_bsearch'
,
int
,
12
,
"# of CPUs for beam search."
)
add_arg
(
'
num_proc_data'
,
int
,
12
,
"# of CPUs for data preprocessing."
)
add_arg
(
'num_conv_layers'
,
int
,
2
,
"# of convolution layers."
)
add_arg
(
'num_rnn_layers'
,
int
,
3
,
"# of recurrent layers."
)
add_arg
(
'rnn_layer_size'
,
int
,
2048
,
"# of recurrent cells per layer."
)
add_arg
(
'alpha'
,
float
,
0.36
,
"Coef of LM for beam search."
)
add_arg
(
'beta'
,
float
,
0.25
,
"Coef of WC for beam search."
)
add_arg
(
'cutoff_prob'
,
float
,
0.99
,
"Cutoff probability for pruning."
)
add_arg
(
'use_gru'
,
bool
,
False
,
"Use GRUs instead of
S
imple RNNs."
)
add_arg
(
'use_gru'
,
bool
,
False
,
"Use GRUs instead of
s
imple RNNs."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Use GPU or not."
)
add_arg
(
'share_rnn_weights'
,
bool
,
True
,
"Share input-hidden weights across "
"bi-directional RNNs. Not for GRU."
)
...
...
@@ -45,9 +45,9 @@ add_arg('model_path', str,
add_arg
(
'lang_model_path'
,
str
,
'lm/data/common_crawl_00.prune01111.trie.klm'
,
"Filepath for language model."
)
add_arg
(
'decod
er_method'
,
str
,
add_arg
(
'decod
ing_method'
,
str
,
'ctc_beam_search'
,
"Decod
er
method. Options: ctc_beam_search, ctc_greedy"
,
"Decod
ing
method. Options: ctc_beam_search, ctc_greedy"
,
choices
=
[
'ctc_beam_search'
,
'ctc_greedy'
])
add_arg
(
'error_rate_type'
,
str
,
'wer'
,
...
...
@@ -68,7 +68,7 @@ def evaluate():
mean_std_filepath
=
args
.
mean_std_path
,
augmentation_config
=
'{}'
,
specgram_type
=
args
.
specgram_type
,
num_threads
=
args
.
parallels
_data
)
num_threads
=
args
.
num_proc
_data
)
batch_reader
=
data_generator
.
batch_reader_creator
(
manifest_path
=
args
.
test_manifest
,
batch_size
=
args
.
batch_size
,
...
...
@@ -90,14 +90,14 @@ def evaluate():
for
infer_data
in
batch_reader
():
result_transcripts
=
ds2_model
.
infer_batch
(
infer_data
=
infer_data
,
decod
er_method
=
args
.
decoder
_method
,
decod
ing_method
=
args
.
decoding
_method
,
beam_alpha
=
args
.
alpha
,
beam_beta
=
args
.
beta
,
beam_size
=
args
.
beam_size
,
cutoff_prob
=
args
.
cutoff_prob
,
vocab_list
=
data_generator
.
vocab_list
,
language_model_path
=
args
.
lang_model_path
,
num_processes
=
args
.
parallels
_bsearch
)
num_processes
=
args
.
num_proc
_bsearch
)
target_transcripts
=
[
''
.
join
([
data_generator
.
vocab_list
[
token
]
for
token
in
transcript
])
for
_
,
transcript
in
infer_data
...
...
infer.py
浏览文件 @
e8f7a8fd
...
...
@@ -17,14 +17,14 @@ add_arg = functools.partial(add_arguments, argparser=parser)
add_arg
(
'num_samples'
,
int
,
10
,
"# of samples to infer."
)
add_arg
(
'trainer_count'
,
int
,
8
,
"# of Trainers (CPUs or GPUs)."
)
add_arg
(
'beam_size'
,
int
,
500
,
"Beam search width."
)
add_arg
(
'
parallels_bsearch'
,
int
,
12
,
"# of CPUs for beam search."
)
add_arg
(
'
num_proc_bsearch'
,
int
,
12
,
"# of CPUs for beam search."
)
add_arg
(
'num_conv_layers'
,
int
,
2
,
"# of convolution layers."
)
add_arg
(
'num_rnn_layers'
,
int
,
3
,
"# of recurrent layers."
)
add_arg
(
'rnn_layer_size'
,
int
,
2048
,
"# of recurrent cells per layer."
)
add_arg
(
'alpha'
,
float
,
0.36
,
"Coef of LM for beam search."
)
add_arg
(
'beta'
,
float
,
0.25
,
"Coef of WC for beam search."
)
add_arg
(
'cutoff_prob'
,
float
,
0.99
,
"Cutoff probability for pruning."
)
add_arg
(
'use_gru'
,
bool
,
False
,
"Use GRUs instead of
S
imple RNNs."
)
add_arg
(
'use_gru'
,
bool
,
False
,
"Use GRUs instead of
s
imple RNNs."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Use GPU or not."
)
add_arg
(
'share_rnn_weights'
,
bool
,
True
,
"Share input-hidden weights across "
"bi-directional RNNs. Not for GRU."
)
...
...
@@ -44,9 +44,9 @@ add_arg('model_path', str,
'./checkpoints/params.latest.tar.gz'
,
"If None, the training starts from scratch, "
"otherwise, it resumes from the pre-trained model."
)
add_arg
(
'decod
er_method'
,
str
,
add_arg
(
'decod
ing_method'
,
str
,
'ctc_beam_search'
,
"Decod
er
method. Options: ctc_beam_search, ctc_greedy"
,
"Decod
ing
method. Options: ctc_beam_search, ctc_greedy"
,
choices
=
[
'ctc_beam_search'
,
'ctc_greedy'
])
add_arg
(
'error_rate_type'
,
str
,
'wer'
,
...
...
@@ -86,14 +86,14 @@ def infer():
share_rnn_weights
=
args
.
share_rnn_weights
)
result_transcripts
=
ds2_model
.
infer_batch
(
infer_data
=
infer_data
,
decod
er_method
=
args
.
decoder
_method
,
decod
ing_method
=
args
.
decoding
_method
,
beam_alpha
=
args
.
alpha
,
beam_beta
=
args
.
beta
,
beam_size
=
args
.
beam_size
,
cutoff_prob
=
args
.
cutoff_prob
,
vocab_list
=
data_generator
.
vocab_list
,
language_model_path
=
args
.
lang_model_path
,
num_processes
=
args
.
parallels
_bsearch
)
num_processes
=
args
.
num_proc
_bsearch
)
error_rate_func
=
cer
if
args
.
error_rate_type
==
'cer'
else
wer
target_transcripts
=
[
...
...
model.py
浏览文件 @
e8f7a8fd
...
...
@@ -146,7 +146,7 @@ class DeepSpeech2Model(object):
# run inference
return
self
.
_loss_inferer
.
infer
(
input
=
infer_data
)
def
infer_batch
(
self
,
infer_data
,
decod
er
_method
,
beam_alpha
,
beam_beta
,
def
infer_batch
(
self
,
infer_data
,
decod
ing
_method
,
beam_alpha
,
beam_beta
,
beam_size
,
cutoff_prob
,
vocab_list
,
language_model_path
,
num_processes
):
"""Model inference. Infer the transcription for a batch of speech
...
...
@@ -156,9 +156,9 @@ class DeepSpeech2Model(object):
consisting of a tuple of audio features and
transcription text (empty string).
:type infer_data: list
:param decod
er
_method: Decoding method name, 'ctc_greedy' or
'ctc_beam_search'.
:param decod
er
_method: string
:param decod
ing
_method: Decoding method name, 'ctc_greedy' or
'ctc_beam_search'.
:param decod
ing
_method: string
:param beam_alpha: Parameter associated with language model.
:type beam_alpha: float
:param beam_beta: Parameter associated with word count.
...
...
@@ -190,13 +190,13 @@ class DeepSpeech2Model(object):
]
# run decoder
results
=
[]
if
decod
er
_method
==
"ctc_greedy"
:
if
decod
ing
_method
==
"ctc_greedy"
:
# best path decode
for
i
,
probs
in
enumerate
(
probs_split
):
output_transcription
=
ctc_greedy_decoder
(
probs_seq
=
probs
,
vocabulary
=
vocab_list
)
results
.
append
(
output_transcription
)
elif
decod
er
_method
==
"ctc_beam_search"
:
elif
decod
ing
_method
==
"ctc_beam_search"
:
# initialize external scorer
if
self
.
_ext_scorer
==
None
:
self
.
_ext_scorer
=
LmScorer
(
beam_alpha
,
beam_beta
,
...
...
@@ -217,8 +217,8 @@ class DeepSpeech2Model(object):
results
=
[
result
[
0
][
1
]
for
result
in
beam_search_results
]
else
:
raise
ValueError
(
"Decod
er
method [%s] is not supported."
%
decod
er
_method
)
raise
ValueError
(
"Decod
ing
method [%s] is not supported."
%
decod
ing
_method
)
return
results
def
_create_parameters
(
self
,
model_path
=
None
):
...
...
train.py
浏览文件 @
e8f7a8fd
...
...
@@ -16,7 +16,7 @@ add_arg = functools.partial(add_arguments, argparser=parser)
add_arg
(
'batch_size'
,
int
,
256
,
"Minibatch size."
)
add_arg
(
'trainer_count'
,
int
,
8
,
"# of Trainers (CPUs or GPUs)."
)
add_arg
(
'num_passes'
,
int
,
200
,
"# of training epochs."
)
add_arg
(
'
parallels_data'
,
int
,
12
,
"# of CPUs for data preprocessing."
)
add_arg
(
'
num_proc_data'
,
int
,
12
,
"# of CPUs for data preprocessing."
)
add_arg
(
'num_conv_layers'
,
int
,
2
,
"# of convolution layers."
)
add_arg
(
'num_rnn_layers'
,
int
,
3
,
"# of recurrent layers."
)
add_arg
(
'rnn_layer_size'
,
int
,
2048
,
"# of recurrent cells per layer."
)
...
...
@@ -28,7 +28,7 @@ add_arg('min_duration', float, 0.0, "Shortest audio duration allowed.")
add_arg
(
'use_sortagrad'
,
bool
,
True
,
"Use SortaGrad or not."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Use GPU or not."
)
add_arg
(
'is_local'
,
bool
,
True
,
"Use pserver or not."
)
add_arg
(
'use_gru'
,
bool
,
False
,
"Use GRUs instead of
S
imple RNNs."
)
add_arg
(
'use_gru'
,
bool
,
False
,
"Use GRUs instead of
s
imple RNNs."
)
add_arg
(
'share_rnn_weights'
,
bool
,
True
,
"Share input-hidden weights across "
"bi-directional RNNs. Not for GRU."
)
add_arg
(
'train_manifest'
,
str
,
...
...
@@ -74,13 +74,13 @@ def train():
max_duration
=
args
.
max_duration
,
min_duration
=
args
.
min_duration
,
specgram_type
=
args
.
specgram_type
,
num_threads
=
args
.
parallels
_data
)
num_threads
=
args
.
num_proc
_data
)
dev_generator
=
DataGenerator
(
vocab_filepath
=
args
.
vocab_path
,
mean_std_filepath
=
args
.
mean_std_path
,
augmentation_config
=
"{}"
,
specgram_type
=
args
.
specgram_type
,
num_threads
=
args
.
parallels
_data
)
num_threads
=
args
.
num_proc
_data
)
train_batch_reader
=
train_generator
.
batch_reader_creator
(
manifest_path
=
args
.
train_manifest
,
batch_size
=
args
.
batch_size
,
...
...
tune.py
浏览文件 @
e8f7a8fd
...
...
@@ -18,7 +18,7 @@ add_arg = functools.partial(add_arguments, argparser=parser)
add_arg
(
'num_samples'
,
int
,
100
,
"# of samples to infer."
)
add_arg
(
'trainer_count'
,
int
,
8
,
"# of Trainers (CPUs or GPUs)."
)
add_arg
(
'beam_size'
,
int
,
500
,
"Beam search width."
)
add_arg
(
'
parallels_bsearch'
,
int
,
12
,
"# of CPUs for beam search."
)
add_arg
(
'
num_proc_bsearch'
,
int
,
12
,
"# of CPUs for beam search."
)
add_arg
(
'num_conv_layers'
,
int
,
2
,
"# of convolution layers."
)
add_arg
(
'num_rnn_layers'
,
int
,
3
,
"# of recurrent layers."
)
add_arg
(
'rnn_layer_size'
,
int
,
2048
,
"# of recurrent cells per layer."
)
...
...
@@ -29,7 +29,7 @@ add_arg('alpha_to', float, 0.36, "Where alpha ends tuning with.")
add_arg
(
'beta_from'
,
float
,
0.05
,
"Where beta starts tuning from."
)
add_arg
(
'beta_to'
,
float
,
0.36
,
"Where beta ends tuning with."
)
add_arg
(
'cutoff_prob'
,
float
,
0.99
,
"Cutoff probability for pruning."
)
add_arg
(
'use_gru'
,
bool
,
False
,
"Use GRUs instead of
S
imple RNNs."
)
add_arg
(
'use_gru'
,
bool
,
False
,
"Use GRUs instead of
s
imple RNNs."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Use GPU or not."
)
add_arg
(
'share_rnn_weights'
,
bool
,
True
,
"Share input-hidden weights across "
"bi-directional RNNs. Not for GRU."
)
...
...
@@ -104,14 +104,14 @@ def tune():
for
alpha
,
beta
in
params_grid
:
result_transcripts
=
ds2_model
.
infer_batch
(
infer_data
=
tune_data
,
decod
er
_method
=
'ctc_beam_search'
,
decod
ing
_method
=
'ctc_beam_search'
,
beam_alpha
=
alpha
,
beam_beta
=
beta
,
beam_size
=
args
.
beam_size
,
cutoff_prob
=
args
.
cutoff_prob
,
vocab_list
=
data_generator
.
vocab_list
,
language_model_path
=
args
.
lang_model_path
,
num_processes
=
args
.
parallels
_bsearch
)
num_processes
=
args
.
num_proc
_bsearch
)
wer_sum
,
num_ins
=
0.0
,
0
for
target
,
result
in
zip
(
target_transcripts
,
result_transcripts
):
wer_sum
+=
wer
(
target
,
result
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录