Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
4c64841c
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
1 年多 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4c64841c
编写于
9月 19, 2017
作者:
Y
Yibing Liu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add tuning script & enable ploting error surface
上级
f7032c82
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
121 addition
and
48 deletion
+121
-48
examples/librispeech/run_tune.sh
examples/librispeech/run_tune.sh
+12
-10
tools/tune.py
tools/tune.py
+109
-38
未找到文件。
examples/librispeech/run_tune.sh
浏览文件 @
4c64841c
...
...
@@ -5,27 +5,29 @@ pushd ../.. > /dev/null
# grid-search for hyper-parameters in language model
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
\
python
-u
tools/tune.py
\
--num_samples
=
100
\
--num_batches
=
2
\
--batch_size
=
24
\
--trainer_count
=
8
\
--beam_size
=
500
\
--num_proc_bsearch
=
12
\
--num_conv_layers
=
2
\
--num_rnn_layers
=
3
\
--rnn_layer_size
=
2048
\
--num_alphas
=
14
\
--num_betas
=
20
\
--alpha_from
=
0.1
\
--alpha_to
=
0.36
\
--beta_from
=
0.05
\
--beta_to
=
1.0
\
--cutoff_prob
=
0.99
\
--num_alphas
=
2
\
--num_betas
=
2
\
--alpha_from
=
1.0
\
--alpha_to
=
3.2
\
--beta_from
=
0.1
\
--beta_to
=
0.45
\
--cutoff_prob
=
1.0
\
--cutoff_top_n
=
40
\
--use_gru
=
False
\
--use_gpu
=
True
\
--share_rnn_weights
=
True
\
--tune_manifest
=
'data/librispeech/manifest.dev-clean'
\
--mean_std_path
=
'data/librispeech/mean_std.npz'
\
--vocab_path
=
'
data
/librispeech/vocab.txt'
\
--model_path
=
'
checkpoints/libri/params.latest
.tar.gz'
\
--vocab_path
=
'
models
/librispeech/vocab.txt'
\
--model_path
=
'
models/librispeech/params
.tar.gz'
\
--lang_model_path
=
'models/lm/common_crawl_00.prune01111.trie.klm'
\
--error_rate_type
=
'wer'
\
--specgram_type
=
'linear'
...
...
tools/tune.py
浏览文件 @
4c64841c
...
...
@@ -3,6 +3,7 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
sys
import
numpy
as
np
import
argparse
import
functools
...
...
@@ -16,26 +17,30 @@ from utils.utility import add_arguments, print_arguments
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
add_arg
=
functools
.
partial
(
add_arguments
,
argparser
=
parser
)
# yapf: disable
add_arg
(
'num_samples'
,
int
,
100
,
"# of samples to infer."
)
add_arg
(
'trainer_count'
,
int
,
8
,
"# of Trainers (CPUs or GPUs)."
)
add_arg
(
'beam_size'
,
int
,
500
,
"Beam search width."
)
add_arg
(
'num_proc_bsearch'
,
int
,
12
,
"# of CPUs for beam search."
)
add_arg
(
'num_conv_layers'
,
int
,
2
,
"# of convolution layers."
)
add_arg
(
'num_rnn_layers'
,
int
,
3
,
"# of recurrent layers."
)
add_arg
(
'rnn_layer_size'
,
int
,
2048
,
"# of recurrent cells per layer."
)
add_arg
(
'num_alphas'
,
int
,
14
,
"# of alpha candidates for tuning."
)
add_arg
(
'num_betas'
,
int
,
20
,
"# of beta candidates for tuning."
)
add_arg
(
'alpha_from'
,
float
,
0.1
,
"Where alpha starts tuning from."
)
add_arg
(
'alpha_to'
,
float
,
0.36
,
"Where alpha ends tuning with."
)
add_arg
(
'beta_from'
,
float
,
0.05
,
"Where beta starts tuning from."
)
add_arg
(
'beta_to'
,
float
,
1.0
,
"Where beta ends tuning with."
)
add_arg
(
'cutoff_prob'
,
float
,
0.99
,
"Cutoff probability for pruning."
)
add_arg
(
'use_gru'
,
bool
,
False
,
"Use GRUs instead of simple RNNs."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Use GPU or not."
)
add_arg
(
'share_rnn_weights'
,
bool
,
True
,
"Share input-hidden weights across "
"bi-directional RNNs. Not for GRU."
)
add_arg
(
'num_batches'
,
int
,
-
1
,
"# of batches tuning on. "
"Default -1, on whole dev set."
)
add_arg
(
'batch_size'
,
int
,
256
,
"# of samples per batch."
)
add_arg
(
'trainer_count'
,
int
,
8
,
"# of Trainers (CPUs or GPUs)."
)
add_arg
(
'beam_size'
,
int
,
500
,
"Beam search width."
)
add_arg
(
'num_proc_bsearch'
,
int
,
12
,
"# of CPUs for beam search."
)
add_arg
(
'num_conv_layers'
,
int
,
2
,
"# of convolution layers."
)
add_arg
(
'num_rnn_layers'
,
int
,
3
,
"# of recurrent layers."
)
add_arg
(
'rnn_layer_size'
,
int
,
2048
,
"# of recurrent cells per layer."
)
add_arg
(
'num_alphas'
,
int
,
45
,
"# of alpha candidates for tuning."
)
add_arg
(
'num_betas'
,
int
,
8
,
"# of beta candidates for tuning."
)
add_arg
(
'alpha_from'
,
float
,
1.0
,
"Where alpha starts tuning from."
)
add_arg
(
'alpha_to'
,
float
,
3.2
,
"Where alpha ends tuning with."
)
add_arg
(
'beta_from'
,
float
,
0.1
,
"Where beta starts tuning from."
)
add_arg
(
'beta_to'
,
float
,
0.45
,
"Where beta ends tuning with."
)
add_arg
(
'cutoff_prob'
,
float
,
1.0
,
"Cutoff probability for pruning."
)
add_arg
(
'cutoff_top_n'
,
int
,
40
,
"Cutoff number for pruning."
)
add_arg
(
'output_fig'
,
bool
,
True
,
"Output error rate figure or not."
)
add_arg
(
'use_gru'
,
bool
,
False
,
"Use GRUs instead of simple RNNs."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Use GPU or not."
)
add_arg
(
'share_rnn_weights'
,
bool
,
True
,
"Share input-hidden weights across "
"bi-directional RNNs. Not for GRU."
)
add_arg
(
'tune_manifest'
,
str
,
'data/librispeech/manifest.dev'
,
'data/librispeech/manifest.dev
-clean
'
,
"Filepath of manifest to tune."
)
add_arg
(
'mean_std_path'
,
str
,
'data/librispeech/mean_std.npz'
,
...
...
@@ -61,6 +66,23 @@ add_arg('specgram_type', str,
# yapf: disable
args
=
parser
.
parse_args
()
def
plot_error_surface
(
params_grid
,
err_ave
,
fig_name
):
import
matplotlib.pyplot
as
plt
import
mpl_toolkits.mplot3d
as
Axes3D
fig
=
plt
.
figure
()
ax
=
Axes3D
(
fig
)
alphas
=
[
param
[
0
]
for
param
in
params_grid
]
betas
=
[
param
[
1
]
for
param
in
params_grid
]
ALPHAS
=
np
.
reshape
(
alphas
,
(
args
.
num_alphas
,
args
.
num_betas
))
BETAS
=
np
.
reshape
(
betas
,
(
args
.
num_alphas
,
args
.
num_betas
))
ERR_AVE
=
np
.
reshape
(
err_ave
,
(
args
.
num_alphas
,
args
.
num_betas
))
ax
.
plot_surface
(
ALPHAS
,
BETAS
,
WERS
,
rstride
=
1
,
cstride
=
1
,
alpha
=
0.8
,
cmap
=
'rainbow'
)
ax
.
set_xlabel
(
'alpha'
)
ax
.
set_ylabel
(
'beta'
)
z_label
=
'WER'
if
args
.
error_rate_type
==
'wer'
else
'CER'
ax
.
set_zlabel
(
z_label
)
plt
.
savefig
(
fig_name
)
def
tune
():
"""Tune parameters alpha and beta on one minibatch."""
...
...
@@ -77,7 +99,7 @@ def tune():
num_threads
=
1
)
batch_reader
=
data_generator
.
batch_reader_creator
(
manifest_path
=
args
.
tune_manifest
,
batch_size
=
args
.
num_samples
,
batch_size
=
args
.
batch_size
,
sortagrad
=
False
,
shuffle_method
=
None
)
tune_data
=
batch_reader
().
next
()
...
...
@@ -95,31 +117,80 @@ def tune():
pretrained_model_path
=
args
.
model_path
,
share_rnn_weights
=
args
.
share_rnn_weights
)
# decoders only accept string encoded in utf-8
vocab_list
=
[
chars
.
encode
(
"utf-8"
)
for
chars
in
data_generator
.
vocab_list
]
error_rate_func
=
cer
if
args
.
error_rate_type
==
'cer'
else
wer
# create grid for search
cand_alphas
=
np
.
linspace
(
args
.
alpha_from
,
args
.
alpha_to
,
args
.
num_alphas
)
cand_betas
=
np
.
linspace
(
args
.
beta_from
,
args
.
beta_to
,
args
.
num_betas
)
params_grid
=
[(
alpha
,
beta
)
for
alpha
in
cand_alphas
for
beta
in
cand_betas
]
## tune parameters in loop
for
alpha
,
beta
in
params_grid
:
result_transcripts
=
ds2_model
.
infer_batch
(
infer_data
=
tune_data
,
decoding_method
=
'ctc_beam_search'
,
beam_alpha
=
alpha
,
beam_beta
=
beta
,
beam_size
=
args
.
beam_size
,
cutoff_prob
=
args
.
cutoff_prob
,
vocab_list
=
data_generator
.
vocab_list
,
language_model_path
=
args
.
lang_model_path
,
num_processes
=
args
.
num_proc_bsearch
)
wer_sum
,
num_ins
=
0.0
,
0
for
target
,
result
in
zip
(
target_transcripts
,
result_transcripts
):
wer_sum
+=
wer
(
target
,
result
)
num_ins
+=
1
print
(
"alpha = %f
\t
beta = %f
\t
WER = %f"
%
(
alpha
,
beta
,
wer_sum
/
num_ins
))
err_sum
=
[
0.0
for
i
in
xrange
(
len
(
params_grid
))]
err_ave
=
[
0.0
for
i
in
xrange
(
len
(
params_grid
))]
num_ins
,
cur_batch
=
0
,
0
## incremental tuning parameters over multiple batches
for
infer_data
in
batch_reader
():
if
(
args
.
num_batches
>=
0
)
and
(
cur_batch
>=
args
.
num_batches
):
break
target_transcripts
=
[
''
.
join
([
data_generator
.
vocab_list
[
token
]
for
token
in
transcript
])
for
_
,
transcript
in
infer_data
]
num_ins
+=
len
(
target_transcripts
)
# grid search
for
index
,
(
alpha
,
beta
)
in
enumerate
(
params_grid
):
result_transcripts
=
ds2_model
.
infer_batch
(
infer_data
=
infer_data
,
decoding_method
=
'ctc_beam_search'
,
beam_alpha
=
alpha
,
beam_beta
=
beta
,
beam_size
=
args
.
beam_size
,
cutoff_prob
=
args
.
cutoff_prob
,
cutoff_top_n
=
args
.
cutoff_top_n
,
vocab_list
=
vocab_list
,
language_model_path
=
args
.
lang_model_path
,
num_processes
=
args
.
num_proc_bsearch
)
for
target
,
result
in
zip
(
target_transcripts
,
result_transcripts
):
err_sum
[
index
]
+=
error_rate_func
(
target
,
result
)
err_ave
[
index
]
=
err_sum
[
index
]
/
num_ins
# print("alpha = %f, beta = %f, WER = %f" %
# (alpha, beta, err_ave[index]))
if
index
%
10
==
0
:
sys
.
stdout
.
write
(
'.'
)
sys
.
stdout
.
flush
()
# output on-line tuning result at the the end of current batch
err_ave_min
=
min
(
err_ave
)
min_index
=
err_ave
.
index
(
err_ave_min
)
print
(
"
\n
Batch %d, opt.(alpha, beta) = (%f, %f), min. error_rate = %f"
%
(
cur_batch
,
params_grid
[
min_index
][
0
],
params_grid
[
min_index
][
1
],
err_ave_min
))
cur_batch
+=
1
# output WER/CER at every point
print
(
"
\n
error rate at each point:
\n
"
)
for
index
in
xrange
(
len
(
params_grid
)):
print
(
"(%f, %f), error_rate = %f"
%
(
params_grid
[
index
][
0
],
params_grid
[
index
][
1
],
err_ave
[
index
]))
err_ave_min
=
min
(
err_ave
)
min_index
=
err_ave
.
index
(
err_ave_min
)
print
(
"
\n
Tuning on %d batches, opt. (alpha, beta) = (%f, %f)"
%
(
args
.
num_batches
,
params_grid
[
min_index
][
0
],
params_grid
[
min_index
][
1
]))
if
args
.
output_fig
==
True
:
fig_name
=
(
"error_surface_alphas_%d_betas_%d"
%
(
args
.
num_alphas
,
args
.
num_betas
))
plot_error_surface
(
params_grid
,
err_ave
,
fig_name
)
ds2_model
.
logger
.
info
(
"output figure %s"
%
fig_name
)
ds2_model
.
logger
.
info
(
"finish inference"
)
def
main
():
print_arguments
(
args
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录