Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
79212916
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
79212916
编写于
9月 05, 2017
作者:
X
Xinghai Sun
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Sort the config lines to make it look better.
上级
dfd76523
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
99 addition
and
122 deletion
+99
-122
demo_server.py
demo_server.py
+22
-28
evaluate.py
evaluate.py
+19
-23
infer.py
infer.py
+19
-22
train.py
train.py
+19
-24
tune.py
tune.py
+20
-25
未找到文件。
demo_server.py
浏览文件 @
79212916
...
...
@@ -27,41 +27,25 @@ def add_arg(argname, type, default, help, **kwargs):
# yapf: disable
# configurations of overall
add_arg
(
'host_port'
,
int
,
8086
,
"Server's IP port."
)
add_arg
(
'host_ip'
,
str
,
'localhost'
,
"Server's IP address."
)
add_arg
(
'speech_save_dir'
,
str
,
'demo_cache'
,
"Directory to save demo audios."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Use GPU or not."
)
# configurations of decoder
add_arg
(
'beam_size'
,
int
,
500
,
"Beam search width."
)
add_arg
(
'alpha'
,
float
,
0.36
,
"Coef of LM for beam search."
)
add_arg
(
'beta'
,
float
,
0.25
,
"Coef of WC for beam search."
)
add_arg
(
'cutoff_prob'
,
float
,
0.99
,
"Cutoff probability for pruning."
)
add_arg
(
'lang_model_path'
,
str
,
'lm/data/common_crawl_00.prune01111.trie.klm'
,
"Filepath for language model."
)
add_arg
(
'decoder_method'
,
str
,
'ctc_beam_search'
,
"Decoder method. Options: ctc_beam_search, ctc_greedy"
,
choices
=
[
'ctc_beam_search'
,
'ctc_greedy'
])
# configurations of data preprocess
add_arg
(
'specgram_type'
,
str
,
'linear'
,
"Audio feature type. Options: linear, mfcc."
,
choices
=
[
'linear'
,
'mfcc'
])
# configurations of model structure
add_arg
(
'num_conv_layers'
,
int
,
2
,
"# of convolution layers."
)
add_arg
(
'num_rnn_layers'
,
int
,
3
,
"# of recurrent layers."
)
add_arg
(
'rnn_layer_size'
,
int
,
2048
,
"# of recurrent cells per layer."
)
add_arg
(
'alpha'
,
float
,
0.36
,
"Coef of LM for beam search."
)
add_arg
(
'beta'
,
float
,
0.25
,
"Coef of WC for beam search."
)
add_arg
(
'cutoff_prob'
,
float
,
0.99
,
"Cutoff probability for pruning."
)
add_arg
(
'use_gru'
,
bool
,
False
,
"Use GRUs instead of Simple RNNs."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Use GPU or not."
)
add_arg
(
'share_rnn_weights'
,
bool
,
True
,
"Share input-hidden weights across "
"bi-directional RNNs. Not for GRU."
)
# configurations of data io
add_arg
(
'warmup_manifest'
,
str
,
add_arg
(
'host_ip'
,
str
,
'localhost'
,
"Server's IP address."
)
add_arg
(
'speech_save_dir'
,
str
,
'demo_cache'
,
"Directory to save demo audios."
)
add_arg
(
'warmup_manifest'
,
str
,
'datasets/manifest.test'
,
"Filepath of manifest to warm up."
)
add_arg
(
'mean_std_path'
,
str
,
...
...
@@ -70,11 +54,21 @@ add_arg('mean_std_path', str,
add_arg
(
'vocab_path'
,
str
,
'datasets/vocab/eng_vocab.txt'
,
"Filepath of vocabulary."
)
# configurations of model io
add_arg
(
'model_path'
,
str
,
'./checkpoints/params.latest.tar.gz'
,
"If None, the training starts from scratch, "
"otherwise, it resumes from the pre-trained model."
)
add_arg
(
'lang_model_path'
,
str
,
'lm/data/common_crawl_00.prune01111.trie.klm'
,
"Filepath for language model."
)
add_arg
(
'decoder_method'
,
str
,
'ctc_beam_search'
,
"Decoder method. Options: ctc_beam_search, ctc_greedy"
,
choices
=
[
'ctc_beam_search'
,
'ctc_greedy'
])
add_arg
(
'specgram_type'
,
str
,
'linear'
,
"Audio feature type. Options: linear, mfcc."
,
choices
=
[
'linear'
,
'mfcc'
])
args
=
parser
.
parse_args
()
# yapf: disable
...
...
evaluate.py
浏览文件 @
79212916
...
...
@@ -26,39 +26,21 @@ def add_arg(argname, type, default, help, **kwargs):
# yapf: disable
# configurations of overall
add_arg
(
'batch_size'
,
int
,
128
,
"Minibatch size."
)
add_arg
(
'trainer_count'
,
int
,
8
,
"# of Trainers (CPUs or GPUs)."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Use GPU or not."
)
add_arg
(
'error_rate_type'
,
str
,
'wer'
,
"Error rate type for evaluation."
,
choices
=
[
'wer'
,
'cer'
])
# configurations of decoder
add_arg
(
'beam_size'
,
int
,
500
,
"Beam search width."
)
add_arg
(
'alpha'
,
float
,
0.36
,
"Coef of LM for beam search."
)
add_arg
(
'beta'
,
float
,
0.25
,
"Coef of WC for beam search."
)
add_arg
(
'cutoff_prob'
,
float
,
0.99
,
"Cutoff probability for pruning."
)
add_arg
(
'parallels_bsearch'
,
int
,
NUM_CPU
,
"# of CPUs for beam search."
)
add_arg
(
'lang_model_path'
,
str
,
'lm/data/common_crawl_00.prune01111.trie.klm'
,
"Filepath for language model."
)
add_arg
(
'decoder_method'
,
str
,
'ctc_beam_search'
,
"Decoder method. Options: ctc_beam_search, ctc_greedy"
,
choices
=
[
'ctc_beam_search'
,
'ctc_greedy'
])
# configurations of data preprocess
add_arg
(
'parallels_data'
,
int
,
NUM_CPU
,
"# of CPUs for data preprocessing."
)
add_arg
(
'specgram_type'
,
str
,
'linear'
,
"Audio feature type. Options: linear, mfcc."
,
choices
=
[
'linear'
,
'mfcc'
])
# configurations of model structure
add_arg
(
'num_conv_layers'
,
int
,
2
,
"# of convolution layers."
)
add_arg
(
'num_rnn_layers'
,
int
,
3
,
"# of recurrent layers."
)
add_arg
(
'rnn_layer_size'
,
int
,
2048
,
"# of recurrent cells per layer."
)
add_arg
(
'alpha'
,
float
,
0.36
,
"Coef of LM for beam search."
)
add_arg
(
'beta'
,
float
,
0.25
,
"Coef of WC for beam search."
)
add_arg
(
'cutoff_prob'
,
float
,
0.99
,
"Cutoff probability for pruning."
)
add_arg
(
'use_gru'
,
bool
,
False
,
"Use GRUs instead of Simple RNNs."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Use GPU or not."
)
add_arg
(
'share_rnn_weights'
,
bool
,
True
,
"Share input-hidden weights across "
"bi-directional RNNs. Not for GRU."
)
# configurations of data io
add_arg
(
'test_manifest'
,
str
,
'datasets/manifest.test'
,
"Filepath of manifest to evaluate."
)
...
...
@@ -68,11 +50,25 @@ add_arg('mean_std_path', str,
add_arg
(
'vocab_path'
,
str
,
'datasets/vocab/eng_vocab.txt'
,
"Filepath of vocabulary."
)
# configurations of model io
add_arg
(
'model_path'
,
str
,
'./checkpoints/params.latest.tar.gz'
,
"If None, the training starts from scratch, "
"otherwise, it resumes from the pre-trained model."
)
add_arg
(
'lang_model_path'
,
str
,
'lm/data/common_crawl_00.prune01111.trie.klm'
,
"Filepath for language model."
)
add_arg
(
'decoder_method'
,
str
,
'ctc_beam_search'
,
"Decoder method. Options: ctc_beam_search, ctc_greedy"
,
choices
=
[
'ctc_beam_search'
,
'ctc_greedy'
])
add_arg
(
'error_rate_type'
,
str
,
'wer'
,
"Error rate type for evaluation."
,
choices
=
[
'wer'
,
'cer'
])
add_arg
(
'specgram_type'
,
str
,
'linear'
,
"Audio feature type. Options: linear, mfcc."
,
choices
=
[
'linear'
,
'mfcc'
])
args
=
parser
.
parse_args
()
# yapf: disable
...
...
infer.py
浏览文件 @
79212916
...
...
@@ -29,35 +29,18 @@ def add_arg(argname, type, default, help, **kwargs):
# configurations of overall
add_arg
(
'num_samples'
,
int
,
10
,
"# of samples to infer."
)
add_arg
(
'trainer_count'
,
int
,
8
,
"# of Trainers (CPUs or GPUs)."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Use GPU or not."
)
add_arg
(
'error_rate_type'
,
str
,
'wer'
,
"Error rate type for evaluation."
,
choices
=
[
'wer'
,
'cer'
])
# configurations of decoder
add_arg
(
'beam_size'
,
int
,
500
,
"Beam search width."
)
add_arg
(
'alpha'
,
float
,
0.36
,
"Coef of LM for beam search."
)
add_arg
(
'beta'
,
float
,
0.25
,
"Coef of WC for beam search."
)
add_arg
(
'cutoff_prob'
,
float
,
0.99
,
"Cutoff probability for pruning."
)
add_arg
(
'parallels_bsearch'
,
int
,
NUM_CPU
,
"# of CPUs for beam search."
)
add_arg
(
'lang_model_path'
,
str
,
'lm/data/common_crawl_00.prune01111.trie.klm'
,
"Filepath for language model."
)
add_arg
(
'decoder_method'
,
str
,
'ctc_beam_search'
,
"Decoder method. Options: ctc_beam_search, ctc_greedy"
,
choices
=
[
'ctc_beam_search'
,
'ctc_greedy'
])
# configurations of data preprocess
add_arg
(
'specgram_type'
,
str
,
'linear'
,
"Audio feature type. Options: linear, mfcc."
,
choices
=
[
'linear'
,
'mfcc'
])
# configurations of model structure
add_arg
(
'num_conv_layers'
,
int
,
2
,
"# of convolution layers."
)
add_arg
(
'num_rnn_layers'
,
int
,
3
,
"# of recurrent layers."
)
add_arg
(
'rnn_layer_size'
,
int
,
2048
,
"# of recurrent cells per layer."
)
add_arg
(
'alpha'
,
float
,
0.36
,
"Coef of LM for beam search."
)
add_arg
(
'beta'
,
float
,
0.25
,
"Coef of WC for beam search."
)
add_arg
(
'cutoff_prob'
,
float
,
0.99
,
"Cutoff probability for pruning."
)
add_arg
(
'use_gru'
,
bool
,
False
,
"Use GRUs instead of Simple RNNs."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Use GPU or not."
)
add_arg
(
'share_rnn_weights'
,
bool
,
True
,
"Share input-hidden weights across "
"bi-directional RNNs. Not for GRU."
)
# configurations of data io
add_arg
(
'infer_manifest'
,
str
,
'datasets/manifest.dev'
,
"Filepath of manifest to infer."
)
...
...
@@ -67,11 +50,25 @@ add_arg('mean_std_path', str,
add_arg
(
'vocab_path'
,
str
,
'datasets/vocab/eng_vocab.txt'
,
"Filepath of vocabulary."
)
# configurations of model io
add_arg
(
'lang_model_path'
,
str
,
'lm/data/common_crawl_00.prune01111.trie.klm'
,
"Filepath for language model."
)
add_arg
(
'model_path'
,
str
,
'./checkpoints/params.latest.tar.gz'
,
"If None, the training starts from scratch, "
"otherwise, it resumes from the pre-trained model."
)
add_arg
(
'decoder_method'
,
str
,
'ctc_beam_search'
,
"Decoder method. Options: ctc_beam_search, ctc_greedy"
,
choices
=
[
'ctc_beam_search'
,
'ctc_greedy'
])
add_arg
(
'error_rate_type'
,
str
,
'wer'
,
"Error rate type for evaluation."
,
choices
=
[
'wer'
,
'cer'
])
add_arg
(
'specgram_type'
,
str
,
'linear'
,
"Audio feature type. Options: linear, mfcc."
,
choices
=
[
'linear'
,
'mfcc'
])
args
=
parser
.
parse_args
()
# yapf: disable
...
...
train.py
浏览文件 @
79212916
...
...
@@ -25,39 +25,24 @@ def add_arg(argname, type, default, help, **kwargs):
# yapf: disable
# configurations of optimization
add_arg
(
'batch_size'
,
int
,
256
,
"Minibatch size."
)
add_arg
(
'learning_rate'
,
float
,
5e-4
,
"Learning rate."
)
add_arg
(
'use_sortagrad'
,
bool
,
True
,
"Use SortaGrad or not."
)
add_arg
(
'trainer_count'
,
int
,
8
,
"# of Trainers (CPUs or GPUs)."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Use GPU or not."
)
add_arg
(
'num_passes'
,
int
,
200
,
"# of training epochs."
)
add_arg
(
'is_local'
,
bool
,
True
,
"Use pserver or not."
)
add_arg
(
'num_iter_print'
,
int
,
100
,
"Every # iterations for printing "
"train cost."
)
# configurations of data preprocess
add_arg
(
'max_duration'
,
float
,
27.0
,
"Longest audio duration allowed."
)
add_arg
(
'min_duration'
,
float
,
0.0
,
"Shortest audio duration allowed."
)
add_arg
(
'parallels_data'
,
int
,
NUM_CPU
,
"# of CPUs for data preprocessing."
)
add_arg
(
'specgram_type'
,
str
,
'linear'
,
"Audio feature type. Options: linear, mfcc."
,
choices
=
[
'linear'
,
'mfcc'
])
add_arg
(
'augment_conf_path'
,
str
,
'conf/augmentation.config'
,
"Filepath of augmentation configuration file (json-format)."
)
add_arg
(
'shuffle_method'
,
str
,
'batch_shuffle_clipped'
,
"Shuffle method."
,
choices
=
[
'instance_shuffle'
,
'batch_shuffle'
,
'batch_shuffle_clipped'
])
# configurations of model structure
add_arg
(
'num_conv_layers'
,
int
,
2
,
"# of convolution layers."
)
add_arg
(
'num_rnn_layers'
,
int
,
3
,
"# of recurrent layers."
)
add_arg
(
'rnn_layer_size'
,
int
,
2048
,
"# of recurrent cells per layer."
)
add_arg
(
'num_iter_print'
,
int
,
100
,
"Every # iterations for printing "
"train cost."
)
add_arg
(
'learning_rate'
,
float
,
5e-4
,
"Learning rate."
)
add_arg
(
'max_duration'
,
float
,
27.0
,
"Longest audio duration allowed."
)
add_arg
(
'min_duration'
,
float
,
0.0
,
"Shortest audio duration allowed."
)
add_arg
(
'use_sortagrad'
,
bool
,
True
,
"Use SortaGrad or not."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Use GPU or not."
)
add_arg
(
'is_local'
,
bool
,
True
,
"Use pserver or not."
)
add_arg
(
'use_gru'
,
bool
,
False
,
"Use GRUs instead of Simple RNNs."
)
add_arg
(
'share_rnn_weights'
,
bool
,
True
,
"Share input-hidden weights across "
"bi-directional RNNs. Not for GRU."
)
# configurations of data io
add_arg
(
'train_manifest'
,
str
,
'datasets/manifest.train'
,
"Filepath of train manifest."
)
...
...
@@ -70,7 +55,6 @@ add_arg('mean_std_path', str,
add_arg
(
'vocab_path'
,
str
,
'datasets/vocab/eng_vocab.txt'
,
"Filepath of vocabulary."
)
# configurations of model io
add_arg
(
'init_model_path'
,
str
,
None
,
"If None, the training starts from scratch, "
...
...
@@ -78,6 +62,17 @@ add_arg('init_model_path', str,
add_arg
(
'output_model_dir'
,
str
,
"./checkpoints"
,
"Directory for saving checkpoints."
)
add_arg
(
'augment_conf_path'
,
str
,
'conf/augmentation.config'
,
"Filepath of augmentation configuration file (json-format)."
)
add_arg
(
'specgram_type'
,
str
,
'linear'
,
"Audio feature type. Options: linear, mfcc."
,
choices
=
[
'linear'
,
'mfcc'
])
add_arg
(
'shuffle_method'
,
str
,
'batch_shuffle_clipped'
,
"Shuffle method."
,
choices
=
[
'instance_shuffle'
,
'batch_shuffle'
,
'batch_shuffle_clipped'
])
args
=
parser
.
parse_args
()
# yapf: disable
...
...
tune.py
浏览文件 @
79212916
...
...
@@ -27,40 +27,25 @@ def add_arg(argname, type, default, help, **kwargs):
# yapf: disable
# configurations of overall
add_arg
(
'num_samples'
,
int
,
100
,
"# of samples to infer."
)
add_arg
(
'trainer_count'
,
int
,
8
,
"# of Trainers (CPUs or GPUs)."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Use GPU or not."
)
add_arg
(
'error_rate_type'
,
str
,
'wer'
,
"Error rate type for evaluation."
,
choices
=
[
'wer'
,
'cer'
])
# configurations of tuning parameters
add_arg
(
'alpha_from'
,
float
,
0.1
,
"Where alpha starts tuning from."
)
add_arg
(
'alpha_to'
,
float
,
0.36
,
"Where alpha ends tuning with."
)
add_arg
(
'num_alphas'
,
int
,
14
,
"# of alpha candidates for tuning."
)
add_arg
(
'beta_from'
,
float
,
0.05
,
"Where beta starts tuning from."
)
add_arg
(
'beta_to'
,
float
,
0.36
,
"Where beta ends tuning with."
)
add_arg
(
'num_betas'
,
int
,
20
,
"# of beta candidates for tuning."
)
# configurations of decoder
add_arg
(
'beam_size'
,
int
,
500
,
"Beam search width."
)
add_arg
(
'cutoff_prob'
,
float
,
0.99
,
"Cutoff probability for pruning."
)
add_arg
(
'parallels_bsearch'
,
int
,
NUM_CPU
,
"# of CPUs for beam search."
)
add_arg
(
'lang_model_path'
,
str
,
'lm/data/common_crawl_00.prune01111.trie.klm'
,
"Filepath for language model."
)
# configurations of data preprocess
add_arg
(
'specgram_type'
,
str
,
'linear'
,
"Audio feature type. Options: linear, mfcc."
,
choices
=
[
'linear'
,
'mfcc'
])
# configurations of model structure
add_arg
(
'num_conv_layers'
,
int
,
2
,
"# of convolution layers."
)
add_arg
(
'num_rnn_layers'
,
int
,
3
,
"# of recurrent layers."
)
add_arg
(
'rnn_layer_size'
,
int
,
2048
,
"# of recurrent cells per layer."
)
add_arg
(
'num_alphas'
,
int
,
14
,
"# of alpha candidates for tuning."
)
add_arg
(
'num_betas'
,
int
,
20
,
"# of beta candidates for tuning."
)
add_arg
(
'alpha_from'
,
float
,
0.1
,
"Where alpha starts tuning from."
)
add_arg
(
'alpha_to'
,
float
,
0.36
,
"Where alpha ends tuning with."
)
add_arg
(
'beta_from'
,
float
,
0.05
,
"Where beta starts tuning from."
)
add_arg
(
'beta_to'
,
float
,
0.36
,
"Where beta ends tuning with."
)
add_arg
(
'cutoff_prob'
,
float
,
0.99
,
"Cutoff probability for pruning."
)
add_arg
(
'use_gru'
,
bool
,
False
,
"Use GRUs instead of Simple RNNs."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Use GPU or not."
)
add_arg
(
'share_rnn_weights'
,
bool
,
True
,
"Share input-hidden weights across "
"bi-directional RNNs. Not for GRU."
)
# configurations of data io
add_arg
(
'tune_manifest'
,
str
,
add_arg
(
'tune_manifest'
,
str
,
'datasets/manifest.test'
,
"Filepath of manifest to tune."
)
add_arg
(
'mean_std_path'
,
str
,
...
...
@@ -69,11 +54,21 @@ add_arg('mean_std_path', str,
add_arg
(
'vocab_path'
,
str
,
'datasets/vocab/eng_vocab.txt'
,
"Filepath of vocabulary."
)
# configurations of model io
add_arg
(
'lang_model_path'
,
str
,
'lm/data/common_crawl_00.prune01111.trie.klm'
,
"Filepath for language model."
)
add_arg
(
'model_path'
,
str
,
'./checkpoints/params.latest.tar.gz'
,
"If None, the training starts from scratch, "
"otherwise, it resumes from the pre-trained model."
)
add_arg
(
'error_rate_type'
,
str
,
'wer'
,
"Error rate type for evaluation."
,
choices
=
[
'wer'
,
'cer'
])
add_arg
(
'specgram_type'
,
str
,
'linear'
,
"Audio feature type. Options: linear, mfcc."
,
choices
=
[
'linear'
,
'mfcc'
])
args
=
parser
.
parse_args
()
# yapf: disable
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录