Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
f329ecda
M
models
项目概览
PaddlePaddle
/
models
大约 1 年 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f329ecda
编写于
9月 12, 2017
作者:
X
Xinghai Sun
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Update examples scripts and REAME.md for DS2.
上级
d28ee3fc
变更
24
展开全部
隐藏空白更改
内联
并排
Showing
24 changed file
with
594 addition
and
177 deletion
+594
-177
deep_speech_2/README.md
deep_speech_2/README.md
+50
-46
deep_speech_2/data/librispeech/eng_vocab.txt
deep_speech_2/data/librispeech/eng_vocab.txt
+0
-28
deep_speech_2/data/librispeech/librispeech.py
deep_speech_2/data/librispeech/librispeech.py
+14
-17
deep_speech_2/deploy/demo_server.py
deep_speech_2/deploy/demo_server.py
+1
-1
deep_speech_2/examples/librispeech/run_data.sh
deep_speech_2/examples/librispeech/run_data.sh
+15
-9
deep_speech_2/examples/librispeech/run_infer.sh
deep_speech_2/examples/librispeech/run_infer.sh
+24
-6
deep_speech_2/examples/librispeech/run_infer_golden.sh
deep_speech_2/examples/librispeech/run_infer_golden.sh
+54
-0
deep_speech_2/examples/librispeech/run_test.sh
deep_speech_2/examples/librispeech/run_test.sh
+25
-7
deep_speech_2/examples/librispeech/run_test_golden.sh
deep_speech_2/examples/librispeech/run_test_golden.sh
+55
-0
deep_speech_2/examples/librispeech/run_train.sh
deep_speech_2/examples/librispeech/run_train.sh
+13
-4
deep_speech_2/examples/librispeech/run_tune.sh
deep_speech_2/examples/librispeech/run_tune.sh
+13
-4
deep_speech_2/examples/mandarin/run_demo_client.sh
deep_speech_2/examples/mandarin/run_demo_client.sh
+17
-0
deep_speech_2/examples/mandarin/run_demo_server.sh
deep_speech_2/examples/mandarin/run_demo_server.sh
+53
-0
deep_speech_2/examples/tiny/run_data.sh
deep_speech_2/examples/tiny/run_data.sh
+9
-9
deep_speech_2/examples/tiny/run_infer.sh
deep_speech_2/examples/tiny/run_infer.sh
+23
-5
deep_speech_2/examples/tiny/run_infer_golden.sh
deep_speech_2/examples/tiny/run_infer_golden.sh
+54
-0
deep_speech_2/examples/tiny/run_test.sh
deep_speech_2/examples/tiny/run_test.sh
+28
-10
deep_speech_2/examples/tiny/run_test_golden.sh
deep_speech_2/examples/tiny/run_test_golden.sh
+55
-0
deep_speech_2/examples/tiny/run_train.sh
deep_speech_2/examples/tiny/run_train.sh
+18
-9
deep_speech_2/examples/tiny/run_tune.sh
deep_speech_2/examples/tiny/run_tune.sh
+15
-6
deep_speech_2/models/librispeech/download_model.sh
deep_speech_2/models/librispeech/download_model.sh
+20
-0
deep_speech_2/models/lm/download_en.sh
deep_speech_2/models/lm/download_en.sh
+0
-16
deep_speech_2/models/lm/download_lm_en.sh
deep_speech_2/models/lm/download_lm_en.sh
+18
-0
deep_speech_2/utils/utility.sh
deep_speech_2/utils/utility.sh
+20
-0
未找到文件。
deep_speech_2/README.md
浏览文件 @
f329ecda
此差异已折叠。
点击以展开。
deep_speech_2/data/librispeech/eng_vocab.txt
已删除
100644 → 0
浏览文件 @
d28ee3fc
'
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
deep_speech_2/data/librispeech/librispeech.py
浏览文件 @
f329ecda
...
@@ -19,8 +19,6 @@ import json
...
@@ -19,8 +19,6 @@ import json
import
codecs
import
codecs
from
paddle.v2.dataset.common
import
md5file
from
paddle.v2.dataset.common
import
md5file
DATA_HOME
=
os
.
path
.
expanduser
(
'~/.cache/paddle/dataset/speech'
)
URL_ROOT
=
"http://www.openslr.org/resources/12"
URL_ROOT
=
"http://www.openslr.org/resources/12"
URL_TEST_CLEAN
=
URL_ROOT
+
"/test-clean.tar.gz"
URL_TEST_CLEAN
=
URL_ROOT
+
"/test-clean.tar.gz"
URL_TEST_OTHER
=
URL_ROOT
+
"/test-other.tar.gz"
URL_TEST_OTHER
=
URL_ROOT
+
"/test-other.tar.gz"
...
@@ -41,7 +39,7 @@ MD5_TRAIN_OTHER_500 = "d1a0fd59409feb2c614ce4d30c387708"
...
@@ -41,7 +39,7 @@ MD5_TRAIN_OTHER_500 = "d1a0fd59409feb2c614ce4d30c387708"
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
parser
.
add_argument
(
parser
.
add_argument
(
"--target_dir"
,
"--target_dir"
,
default
=
DATA_HOME
+
"/libri"
,
default
=
'~/.cache/paddle/dataset/speech/libri'
,
type
=
str
,
type
=
str
,
help
=
"Directory to save the dataset. (default: %(default)s)"
)
help
=
"Directory to save the dataset. (default: %(default)s)"
)
parser
.
add_argument
(
parser
.
add_argument
(
...
@@ -60,14 +58,14 @@ args = parser.parse_args()
...
@@ -60,14 +58,14 @@ args = parser.parse_args()
def
download
(
url
,
md5sum
,
target_dir
):
def
download
(
url
,
md5sum
,
target_dir
):
"""
"""Download file from url to target_dir, and check md5sum.
Download file from url to target_dir, and check md5sum.
"""
"""
if
not
os
.
path
.
exists
(
target_dir
):
os
.
makedirs
(
target_dir
)
if
not
os
.
path
.
exists
(
target_dir
):
os
.
makedirs
(
target_dir
)
filepath
=
os
.
path
.
join
(
target_dir
,
url
.
split
(
"/"
)[
-
1
])
filepath
=
os
.
path
.
join
(
target_dir
,
url
.
split
(
"/"
)[
-
1
])
if
not
(
os
.
path
.
exists
(
filepath
)
and
md5file
(
filepath
)
==
md5sum
):
if
not
(
os
.
path
.
exists
(
filepath
)
and
md5file
(
filepath
)
==
md5sum
):
print
(
"Downloading %s ..."
%
url
)
print
(
"Downloading %s ..."
%
url
)
os
.
system
(
"wget -c "
+
url
+
" -P "
+
target_dir
)
ret
=
os
.
system
(
"wget -c "
+
url
+
" -P "
+
target_dir
)
print
(
ret
)
print
(
"
\n
MD5 Chesksum %s ..."
%
filepath
)
print
(
"
\n
MD5 Chesksum %s ..."
%
filepath
)
if
not
md5file
(
filepath
)
==
md5sum
:
if
not
md5file
(
filepath
)
==
md5sum
:
raise
RuntimeError
(
"MD5 checksum failed."
)
raise
RuntimeError
(
"MD5 checksum failed."
)
...
@@ -77,8 +75,7 @@ def download(url, md5sum, target_dir):
...
@@ -77,8 +75,7 @@ def download(url, md5sum, target_dir):
def
unpack
(
filepath
,
target_dir
):
def
unpack
(
filepath
,
target_dir
):
"""
"""Unpack the file to the target_dir.
Unpack the file to the target_dir.
"""
"""
print
(
"Unpacking %s ..."
%
filepath
)
print
(
"Unpacking %s ..."
%
filepath
)
tar
=
tarfile
.
open
(
filepath
)
tar
=
tarfile
.
open
(
filepath
)
...
@@ -87,8 +84,7 @@ def unpack(filepath, target_dir):
...
@@ -87,8 +84,7 @@ def unpack(filepath, target_dir):
def
create_manifest
(
data_dir
,
manifest_path
):
def
create_manifest
(
data_dir
,
manifest_path
):
"""
"""Create a manifest json file summarizing the data set, with each line
Create a manifest json file summarizing the data set, with each line
containing the meta data (i.e. audio filepath, transcription text, audio
containing the meta data (i.e. audio filepath, transcription text, audio
duration) of each audio file within the data set.
duration) of each audio file within the data set.
"""
"""
...
@@ -119,8 +115,7 @@ def create_manifest(data_dir, manifest_path):
...
@@ -119,8 +115,7 @@ def create_manifest(data_dir, manifest_path):
def
prepare_dataset
(
url
,
md5sum
,
target_dir
,
manifest_path
):
def
prepare_dataset
(
url
,
md5sum
,
target_dir
,
manifest_path
):
"""
"""Download, unpack and create summmary manifest file.
Download, unpack and create summmary manifest file.
"""
"""
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
target_dir
,
"LibriSpeech"
)):
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
target_dir
,
"LibriSpeech"
)):
# download
# download
...
@@ -135,6 +130,8 @@ def prepare_dataset(url, md5sum, target_dir, manifest_path):
...
@@ -135,6 +130,8 @@ def prepare_dataset(url, md5sum, target_dir, manifest_path):
def
main
():
def
main
():
args
.
target_dir
=
os
.
path
.
expanduser
(
args
.
target_dir
)
prepare_dataset
(
prepare_dataset
(
url
=
URL_TEST_CLEAN
,
url
=
URL_TEST_CLEAN
,
md5sum
=
MD5_TEST_CLEAN
,
md5sum
=
MD5_TEST_CLEAN
,
...
@@ -145,12 +142,12 @@ def main():
...
@@ -145,12 +142,12 @@ def main():
md5sum
=
MD5_DEV_CLEAN
,
md5sum
=
MD5_DEV_CLEAN
,
target_dir
=
os
.
path
.
join
(
args
.
target_dir
,
"dev-clean"
),
target_dir
=
os
.
path
.
join
(
args
.
target_dir
,
"dev-clean"
),
manifest_path
=
args
.
manifest_prefix
+
".dev-clean"
)
manifest_path
=
args
.
manifest_prefix
+
".dev-clean"
)
prepare_dataset
(
url
=
URL_TRAIN_CLEAN_100
,
md5sum
=
MD5_TRAIN_CLEAN_100
,
target_dir
=
os
.
path
.
join
(
args
.
target_dir
,
"train-clean-100"
),
manifest_path
=
args
.
manifest_prefix
+
".train-clean-100"
)
if
args
.
full_download
:
if
args
.
full_download
:
prepare_dataset
(
url
=
URL_TRAIN_CLEAN_100
,
md5sum
=
MD5_TRAIN_CLEAN_100
,
target_dir
=
os
.
path
.
join
(
args
.
target_dir
,
"train-clean-100"
),
manifest_path
=
args
.
manifest_prefix
+
".train-clean-100"
)
prepare_dataset
(
prepare_dataset
(
url
=
URL_TEST_OTHER
,
url
=
URL_TEST_OTHER
,
md5sum
=
MD5_TEST_OTHER
,
md5sum
=
MD5_TEST_OTHER
,
...
...
deep_speech_2/deploy/demo_server.py
浏览文件 @
f329ecda
...
@@ -11,7 +11,7 @@ import wave
...
@@ -11,7 +11,7 @@ import wave
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
import
_init_paths
import
_init_paths
from
data_utils.data
import
DataGenerator
from
data_utils.data
import
DataGenerator
from
models.model
import
DeepSpeech2Model
from
model
_util
s.model
import
DeepSpeech2Model
from
data_utils.utils
import
read_manifest
from
data_utils.utils
import
read_manifest
from
utils.utility
import
add_arguments
,
print_arguments
from
utils.utility
import
add_arguments
,
print_arguments
...
...
deep_speech_2/examples/librispeech/
prepare
_data.sh
→
deep_speech_2/examples/librispeech/
run
_data.sh
浏览文件 @
f329ecda
#! /usr/bin/bash
#! /usr/bin/bash
pushd
../..
pushd
../..
>
/dev/null
# download data, generate manifests
# download data, generate manifests
python data/librispeech/librispeech.py
\
python data/librispeech/librispeech.py
\
--manifest_prefix
=
'data/librispeech/manifest'
\
--manifest_prefix
=
'data/librispeech/manifest'
\
--
full_download
=
'True
'
\
--
target_dir
=
'~/.cache/paddle/dataset/speech/Libri
'
\
--
target_dir
=
'~/.cache/paddle/dataset/speech/Libri
'
--
full_download
=
'True
'
if
[
$?
-ne
0
]
;
then
if
[
$?
-ne
0
]
;
then
echo
"Prepare LibriSpeech failed. Terminated."
echo
"Prepare LibriSpeech failed. Terminated."
exit
1
exit
1
fi
fi
cat
data/librispeech/manifest.train
*
|
shuf
>
data/librispeech/manifest.train
cat
data/librispeech/manifest.train
-
*
|
shuf
>
data/librispeech/manifest.train
# build vocabulary (can be skipped for English, as already provided)
# build vocabulary
# python tools/build_vocab.py \
python tools/build_vocab.py
\
# --count_threshold=0 \
--count_threshold
=
0
\
# --vocab_path='data/librispeech/eng_vocab.txt' \
--vocab_path
=
'data/librispeech/vocab.txt'
\
# --manifest_paths='data/librispeech/manifeset.train'
--manifest_paths
=
'data/librispeech/manifest.train'
if
[
$?
-ne
0
]
;
then
echo
"Build vocabulary failed. Terminated."
exit
1
fi
# compute mean and stddev for normalizer
# compute mean and stddev for normalizer
...
@@ -37,3 +42,4 @@ fi
...
@@ -37,3 +42,4 @@ fi
echo
"LibriSpeech Data preparation done."
echo
"LibriSpeech Data preparation done."
exit
0
deep_speech_2/examples/librispeech/run_infer.sh
浏览文件 @
f329ecda
#! /usr/bin/bash
#! /usr/bin/bash
pushd
../..
pushd
../..
>
/dev/null
# download language model
pushd
models/lm
>
/dev/null
sh download_lm_en.sh
if
[
$?
-ne
0
]
;
then
exit
1
fi
popd
>
/dev/null
# infer
CUDA_VISIBLE_DEVICES
=
0
\
CUDA_VISIBLE_DEVICES
=
0
\
python
-u
infer.py
\
python
-u
infer.py
\
--num_samples
=
10
\
--num_samples
=
10
\
--trainer_count
=
1
\
--trainer_count
=
1
\
--beam_size
=
500
\
--beam_size
=
500
\
--num_proc_bsearch
=
12
\
--num_proc_bsearch
=
8
\
--num_conv_layers
=
2
\
--num_conv_layers
=
2
\
--num_rnn_layers
=
3
\
--num_rnn_layers
=
3
\
--rnn_layer_size
=
2048
\
--rnn_layer_size
=
2048
\
...
@@ -17,11 +27,19 @@ python -u infer.py \
...
@@ -17,11 +27,19 @@ python -u infer.py \
--use_gru
=
False
\
--use_gru
=
False
\
--use_gpu
=
True
\
--use_gpu
=
True
\
--share_rnn_weights
=
True
\
--share_rnn_weights
=
True
\
--infer_manifest
=
'data/librispeech/manifest.
dev
-clean'
\
--infer_manifest
=
'data/librispeech/manifest.
test
-clean'
\
--mean_std_path
=
'data/librispeech/mean_std.npz'
\
--mean_std_path
=
'data/librispeech/mean_std.npz'
\
--vocab_path
=
'data/librispeech/
eng_
vocab.txt'
\
--vocab_path
=
'data/librispeech/vocab.txt'
\
--model_path
=
'checkpoints/params.latest.tar.gz'
\
--model_path
=
'checkpoints/
libri/
params.latest.tar.gz'
\
--lang_model_path
=
'
lm/data
/common_crawl_00.prune01111.trie.klm'
\
--lang_model_path
=
'
models/lm
/common_crawl_00.prune01111.trie.klm'
\
--decoding_method
=
'ctc_beam_search'
\
--decoding_method
=
'ctc_beam_search'
\
--error_rate_type
=
'wer'
\
--error_rate_type
=
'wer'
\
--specgram_type
=
'linear'
--specgram_type
=
'linear'
if
[
$?
-ne
0
]
;
then
echo
"Failed in inference!"
exit
1
fi
exit
0
deep_speech_2/examples/librispeech/run_infer_golden.sh
0 → 100644
浏览文件 @
f329ecda
#! /usr/bin/bash
pushd
../..
>
/dev/null
# download language model
pushd
models/lm
>
/dev/null
sh download_lm_en.sh
if
[
$?
-ne
0
]
;
then
exit
1
fi
popd
>
/dev/null
# download well-trained model
pushd
models/librispeech
>
/dev/null
sh download_model.sh
if
[
$?
-ne
0
]
;
then
exit
1
fi
popd
>
/dev/null
# infer
CUDA_VISIBLE_DEVICES
=
0
\
python
-u
infer.py
\
--num_samples
=
10
\
--trainer_count
=
1
\
--beam_size
=
500
\
--num_proc_bsearch
=
8
\
--num_conv_layers
=
2
\
--num_rnn_layers
=
3
\
--rnn_layer_size
=
2048
\
--alpha
=
0.36
\
--beta
=
0.25
\
--cutoff_prob
=
0.99
\
--use_gru
=
False
\
--use_gpu
=
True
\
--share_rnn_weights
=
True
\
--infer_manifest
=
'data/tiny/manifest.test-clean'
\
--mean_std_path
=
'models/librispeech/mean_std.npz'
\
--vocab_path
=
'models/librispeech/vocab.txt'
\
--model_path
=
'models/librispeech/params.tar.gz'
\
--lang_model_path
=
'models/lm/common_crawl_00.prune01111.trie.klm'
\
--decoding_method
=
'ctc_beam_search'
\
--error_rate_type
=
'wer'
\
--specgram_type
=
'linear'
if
[
$?
-ne
0
]
;
then
echo
"Failed in inference!"
exit
1
fi
exit
0
deep_speech_2/examples/librispeech/run_test.sh
浏览文件 @
f329ecda
#! /usr/bin/bash
#! /usr/bin/bash
pushd
../..
pushd
../..
>
/dev/null
# download language model
pushd
models/lm
>
/dev/null
sh download_lm_en.sh
if
[
$?
-ne
0
]
;
then
exit
1
fi
popd
>
/dev/null
# evaluate model
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
\
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
\
python
-u
evaluate
.py
\
python
-u
test
.py
\
--batch_size
=
128
\
--batch_size
=
128
\
--trainer_count
=
8
\
--trainer_count
=
8
\
--beam_size
=
500
\
--beam_size
=
500
\
--num_proc_bsearch
=
12
\
--num_proc_bsearch
=
8
\
--num_proc_data
=
12
\
--num_proc_data
=
4
\
--num_conv_layers
=
2
\
--num_conv_layers
=
2
\
--num_rnn_layers
=
3
\
--num_rnn_layers
=
3
\
--rnn_layer_size
=
2048
\
--rnn_layer_size
=
2048
\
...
@@ -20,9 +30,17 @@ python -u evaluate.py \
...
@@ -20,9 +30,17 @@ python -u evaluate.py \
--share_rnn_weights
=
True
\
--share_rnn_weights
=
True
\
--test_manifest
=
'data/librispeech/manifest.test-clean'
\
--test_manifest
=
'data/librispeech/manifest.test-clean'
\
--mean_std_path
=
'data/librispeech/mean_std.npz'
\
--mean_std_path
=
'data/librispeech/mean_std.npz'
\
--vocab_path
=
'data/librispeech/
eng_
vocab.txt'
\
--vocab_path
=
'data/librispeech/vocab.txt'
\
--model_path
=
'checkpoints/params.latest.tar.gz'
\
--model_path
=
'checkpoints/
libri/
params.latest.tar.gz'
\
--lang_model_path
=
'
lm/data
/common_crawl_00.prune01111.trie.klm'
\
--lang_model_path
=
'
models/lm
/common_crawl_00.prune01111.trie.klm'
\
--decoding_method
=
'ctc_beam_search'
\
--decoding_method
=
'ctc_beam_search'
\
--error_rate_type
=
'wer'
\
--error_rate_type
=
'wer'
\
--specgram_type
=
'linear'
--specgram_type
=
'linear'
if
[
$?
-ne
0
]
;
then
echo
"Failed in evaluation!"
exit
1
fi
exit
0
deep_speech_2/examples/librispeech/run_test_golden.sh
0 → 100644
浏览文件 @
f329ecda
#! /usr/bin/bash
pushd
../..
>
/dev/null
# download language model
pushd
models/lm
>
/dev/null
sh download_lm_en.sh
if
[
$?
-ne
0
]
;
then
exit
1
fi
popd
>
/dev/null
# download well-trained model
pushd
models/librispeech
>
/dev/null
sh download_model.sh
if
[
$?
-ne
0
]
;
then
exit
1
fi
popd
>
/dev/null
# evaluate model
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
\
python
-u
test.py
\
--batch_size
=
128
\
--trainer_count
=
8
\
--beam_size
=
500
\
--num_proc_bsearch
=
8
\
--num_proc_data
=
4
\
--num_conv_layers
=
2
\
--num_rnn_layers
=
3
\
--rnn_layer_size
=
2048
\
--alpha
=
0.36
\
--beta
=
0.25
\
--cutoff_prob
=
0.99
\
--use_gru
=
False
\
--use_gpu
=
True
\
--share_rnn_weights
=
True
\
--test_manifest
=
'data/tiny/manifest.test-clean'
\
--mean_std_path
=
'models/librispeech/mean_std.npz'
\
--vocab_path
=
'models/librispeech/vocab.txt'
\
--model_path
=
'models/librispeech/params.tar.gz'
\
--lang_model_path
=
'models/lm/common_crawl_00.prune01111.trie.klm'
\
--decoding_method
=
'ctc_beam_search'
\
--error_rate_type
=
'wer'
\
--specgram_type
=
'linear'
if
[
$?
-ne
0
]
;
then
echo
"Failed in evaluation!"
exit
1
fi
exit
0
deep_speech_2/examples/librispeech/run_train.sh
浏览文件 @
f329ecda
#! /usr/bin/bash
#! /usr/bin/bash
pushd
../..
pushd
../..
>
/dev/null
# train model
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
\
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
\
python
-u
train.py
\
python
-u
train.py
\
--batch_size
=
256
\
--batch_size
=
512
\
--trainer_count
=
8
\
--trainer_count
=
8
\
--num_passes
=
50
\
--num_passes
=
50
\
--num_proc_data
=
12
\
--num_proc_data
=
12
\
...
@@ -23,8 +24,16 @@ python -u train.py \
...
@@ -23,8 +24,16 @@ python -u train.py \
--train_manifest
=
'data/librispeech/manifest.train'
\
--train_manifest
=
'data/librispeech/manifest.train'
\
--dev_manifest
=
'data/librispeech/manifest.dev'
\
--dev_manifest
=
'data/librispeech/manifest.dev'
\
--mean_std_path
=
'data/librispeech/mean_std.npz'
\
--mean_std_path
=
'data/librispeech/mean_std.npz'
\
--vocab_path
=
'data/librispeech/
eng_
vocab.txt'
\
--vocab_path
=
'data/librispeech/vocab.txt'
\
--output_model_dir
=
'./checkpoints'
\
--output_model_dir
=
'./checkpoints
/libri
'
\
--augment_conf_path
=
'conf/augmentation.config'
\
--augment_conf_path
=
'conf/augmentation.config'
\
--specgram_type
=
'linear'
\
--specgram_type
=
'linear'
\
--shuffle_method
=
'batch_shuffle_clipped'
--shuffle_method
=
'batch_shuffle_clipped'
if
[
$?
-ne
0
]
;
then
echo
"Failed in training!"
exit
1
fi
exit
0
deep_speech_2/examples/librispeech/run_tune.sh
浏览文件 @
f329ecda
#! /usr/bin/bash
#! /usr/bin/bash
pushd
../..
pushd
../..
>
/dev/null
# grid-search for hyper-parameters in language model
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
\
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
\
python
-u
tools/tune.py
\
python
-u
tools/tune.py
\
--num_samples
=
100
\
--num_samples
=
100
\
...
@@ -23,8 +24,16 @@ python -u tools/tune.py \
...
@@ -23,8 +24,16 @@ python -u tools/tune.py \
--share_rnn_weights
=
True
\
--share_rnn_weights
=
True
\
--tune_manifest
=
'data/librispeech/manifest.dev-clean'
\
--tune_manifest
=
'data/librispeech/manifest.dev-clean'
\
--mean_std_path
=
'data/librispeech/mean_std.npz'
\
--mean_std_path
=
'data/librispeech/mean_std.npz'
\
--vocab_path
=
'data/librispeech/
eng_
vocab.txt'
\
--vocab_path
=
'data/librispeech/vocab.txt'
\
--model_path
=
'checkpoints/params.latest.tar.gz'
\
--model_path
=
'checkpoints/
libri/
params.latest.tar.gz'
\
--lang_model_path
=
'
lm/data
/common_crawl_00.prune01111.trie.klm'
\
--lang_model_path
=
'
models/lm
/common_crawl_00.prune01111.trie.klm'
\
--error_rate_type
=
'wer'
\
--error_rate_type
=
'wer'
\
--specgram_type
=
'linear'
--specgram_type
=
'linear'
if
[
$?
-ne
0
]
;
then
echo
"Failed in tuning!"
exit
1
fi
exit
0
deep_speech_2/examples/mandarin/run_demo_client.sh
0 → 100644
浏览文件 @
f329ecda
#! /usr/bin/bash
pushd
../..
>
/dev/null
# start demo client
CUDA_VISIBLE_DEVICES
=
0
\
python
-u
deploy/demo_client.py
\
--host_ip
=
'localhost'
\
--host_port
=
8086
\
if
[
$?
-ne
0
]
;
then
echo
"Failed in starting demo client!"
exit
1
fi
exit
0
deep_speech_2/examples/mandarin/run_demo_server.sh
0 → 100644
浏览文件 @
f329ecda
#! /usr/bin/bash
# TODO: replace the model with a mandarin model
pushd
../..
>
/dev/null
# download language model
pushd
models/lm
>
/dev/null
sh download_lm_en.sh
if
[
$?
-ne
0
]
;
then
exit
1
fi
popd
>
/dev/null
# download well-trained model
pushd
models/librispeech
>
/dev/null
sh download_model.sh
if
[
$?
-ne
0
]
;
then
exit
1
fi
popd
>
/dev/null
# start demo server
CUDA_VISIBLE_DEVICES
=
0
\
python
-u
deploy/demo_server.py
\
--host_ip
=
'localhost'
\
--host_port
=
8086
\
--num_conv_layers
=
2
\
--num_rnn_layers
=
3
\
--rnn_layer_size
=
2048
\
--alpha
=
0.36
\
--beta
=
0.25
\
--cutoff_prob
=
0.99
\
--use_gru
=
False
\
--use_gpu
=
True
\
--share_rnn_weights
=
True
\
--speech_save_dir
=
'demo_cache'
\
--warmup_manifest
=
'data/tiny/manifest.test-clean'
\
--mean_std_path
=
'models/librispeech/mean_std.npz'
\
--vocab_path
=
'models/librispeech/vocab.txt'
\
--model_path
=
'models/librispeech/params.tar.gz'
\
--lang_model_path
=
'models/lm/common_crawl_00.prune01111.trie.klm'
\
--decoding_method
=
'ctc_beam_search'
\
--specgram_type
=
'linear'
if
[
$?
-ne
0
]
;
then
echo
"Failed in starting demo server!"
exit
1
fi
exit
0
deep_speech_2/examples/tiny/run_data.sh
浏览文件 @
f329ecda
#! /usr/bin/bash
#! /usr/bin/bash
pushd
../..
pushd
../..
>
/dev/null
# download data, generate manifests
# download data, generate manifests
python data/
tiny/tiny
.py
\
python data/
librispeech/librispeech
.py
\
--manifest_prefix
=
'data/tiny/manifest'
\
--manifest_prefix
=
'data/tiny/manifest'
\
--target_dir
=
$HOME
'/.cache/paddle/dataset/speech/tiny'
--target_dir
=
'~/.cache/paddle/dataset/speech/libri'
\
--full_download
=
'False'
if
[
$?
-ne
0
]
;
then
if
[
$?
-ne
0
]
;
then
echo
"Prepare LibriSpeech failed. Terminated."
echo
"Prepare LibriSpeech failed. Terminated."
exit
1
exit
1
fi
fi
cat
data/tiny/manifest.dev-clean |
head
-n
32
>
data/tiny/manifest.train
head
-n
64 data/tiny/manifest.dev-clean
>
data/tiny/manifest.tiny
cat
data/tiny/manifest.dev-clean |
head
-n
48 |
tail
-n
16
>
data/tiny/manifest.dev
cat
data/tiny/manifest.dev-clean |
head
-n
64 |
tail
-n
16
>
data/tiny/manifest.test
# build vocabulary
# build vocabulary
python tools/build_vocab.py
\
python tools/build_vocab.py
\
--count_threshold
=
0
\
--count_threshold
=
0
\
--vocab_path
=
'data/tiny/vocab.txt'
\
--vocab_path
=
'data/tiny/vocab.txt'
\
--manifest_paths
=
'data/tiny/manifest.
train
'
--manifest_paths
=
'data/tiny/manifest.
dev
'
if
[
$?
-ne
0
]
;
then
if
[
$?
-ne
0
]
;
then
echo
"Build vocabulary failed. Terminated."
echo
"Build vocabulary failed. Terminated."
...
@@ -31,8 +30,8 @@ fi
...
@@ -31,8 +30,8 @@ fi
# compute mean and stddev for normalizer
# compute mean and stddev for normalizer
python tools/compute_mean_std.py
\
python tools/compute_mean_std.py
\
--manifest_path
=
'data/tiny/manifest.t
rain
'
\
--manifest_path
=
'data/tiny/manifest.t
iny
'
\
--num_samples
=
32
\
--num_samples
=
64
\
--specgram_type
=
'linear'
\
--specgram_type
=
'linear'
\
--output_path
=
'data/tiny/mean_std.npz'
--output_path
=
'data/tiny/mean_std.npz'
...
@@ -43,3 +42,4 @@ fi
...
@@ -43,3 +42,4 @@ fi
echo
"Tiny data preparation done."
echo
"Tiny data preparation done."
exit
0
deep_speech_2/examples/tiny/run_infer.sh
浏览文件 @
f329ecda
#! /usr/bin/bash
#! /usr/bin/bash
pushd
../..
pushd
../..
>
/dev/null
# download language model
pushd
models/lm
>
/dev/null
sh download_lm_en.sh
if
[
$?
-ne
0
]
;
then
exit
1
fi
popd
>
/dev/null
# infer
CUDA_VISIBLE_DEVICES
=
0
\
CUDA_VISIBLE_DEVICES
=
0
\
python
-u
infer.py
\
python
-u
infer.py
\
--num_samples
=
4
\
--num_samples
=
10
\
--trainer_count
=
1
\
--trainer_count
=
1
\
--beam_size
=
500
\
--beam_size
=
500
\
--num_proc_bsearch
=
12
\
--num_proc_bsearch
=
8
\
--num_conv_layers
=
2
\
--num_conv_layers
=
2
\
--num_rnn_layers
=
3
\
--num_rnn_layers
=
3
\
--rnn_layer_size
=
2048
\
--rnn_layer_size
=
2048
\
...
@@ -17,11 +27,19 @@ python -u infer.py \
...
@@ -17,11 +27,19 @@ python -u infer.py \
--use_gru
=
False
\
--use_gru
=
False
\
--use_gpu
=
True
\
--use_gpu
=
True
\
--share_rnn_weights
=
True
\
--share_rnn_weights
=
True
\
--infer_manifest
=
'data/tiny/manifest.t
rain
'
\
--infer_manifest
=
'data/tiny/manifest.t
iny
'
\
--mean_std_path
=
'data/tiny/mean_std.npz'
\
--mean_std_path
=
'data/tiny/mean_std.npz'
\
--vocab_path
=
'data/tiny/vocab.txt'
\
--vocab_path
=
'data/tiny/vocab.txt'
\
--model_path
=
'checkpoints/
params.pass-14
.tar.gz'
\
--model_path
=
'checkpoints/
tiny/params.pass-19
.tar.gz'
\
--lang_model_path
=
'models/lm/common_crawl_00.prune01111.trie.klm'
\
--lang_model_path
=
'models/lm/common_crawl_00.prune01111.trie.klm'
\
--decoding_method
=
'ctc_beam_search'
\
--decoding_method
=
'ctc_beam_search'
\
--error_rate_type
=
'wer'
\
--error_rate_type
=
'wer'
\
--specgram_type
=
'linear'
--specgram_type
=
'linear'
if
[
$?
-ne
0
]
;
then
echo
"Failed in inference!"
exit
1
fi
exit
0
deep_speech_2/examples/tiny/run_infer_golden.sh
0 → 100644
浏览文件 @
f329ecda
#! /usr/bin/bash
pushd
../..
>
/dev/null
# download language model
pushd
models/lm
>
/dev/null
sh download_lm_en.sh
if
[
$?
-ne
0
]
;
then
exit
1
fi
popd
>
/dev/null
# download well-trained model
pushd
models/librispeech
>
/dev/null
sh download_model.sh
if
[
$?
-ne
0
]
;
then
exit
1
fi
popd
>
/dev/null
# infer
CUDA_VISIBLE_DEVICES
=
0
\
python
-u
infer.py
\
--num_samples
=
10
\
--trainer_count
=
1
\
--beam_size
=
500
\
--num_proc_bsearch
=
8
\
--num_conv_layers
=
2
\
--num_rnn_layers
=
3
\
--rnn_layer_size
=
2048
\
--alpha
=
0.36
\
--beta
=
0.25
\
--cutoff_prob
=
0.99
\
--use_gru
=
False
\
--use_gpu
=
True
\
--share_rnn_weights
=
True
\
--infer_manifest
=
'data/tiny/manifest.test-clean'
\
--mean_std_path
=
'models/librispeech/mean_std.npz'
\
--vocab_path
=
'models/librispeech/vocab.txt'
\
--model_path
=
'models/librispeech/params.tar.gz'
\
--lang_model_path
=
'models/lm/common_crawl_00.prune01111.trie.klm'
\
--decoding_method
=
'ctc_beam_search'
\
--error_rate_type
=
'wer'
\
--specgram_type
=
'linear'
if
[
$?
-ne
0
]
;
then
echo
"Failed in inference!"
exit
1
fi
exit
0
deep_speech_2/examples/tiny/run_test.sh
浏览文件 @
f329ecda
#! /usr/bin/bash
#! /usr/bin/bash
pushd
../..
pushd
../..
>
/dev/null
# download language model
pushd
models/lm
>
/dev/null
sh download_lm_en.sh
if
[
$?
-ne
0
]
;
then
exit
1
fi
popd
>
/dev/null
# evaluate model
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
\
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
\
python
-u
evaluate
.py
\
python
-u
test
.py
\
--batch_size
=
1
28
\
--batch_size
=
1
6
\
--trainer_count
=
8
\
--trainer_count
=
8
\
--beam_size
=
500
\
--beam_size
=
500
\
--num_proc_bsearch
=
12
\
--num_proc_bsearch
=
8
\
--num_proc_data
=
12
\
--num_proc_data
=
4
\
--num_conv_layers
=
2
\
--num_conv_layers
=
2
\
--num_rnn_layers
=
3
\
--num_rnn_layers
=
3
\
--rnn_layer_size
=
2048
\
--rnn_layer_size
=
2048
\
...
@@ -18,11 +28,19 @@ python -u evaluate.py \
...
@@ -18,11 +28,19 @@ python -u evaluate.py \
--use_gru
=
False
\
--use_gru
=
False
\
--use_gpu
=
True
\
--use_gpu
=
True
\
--share_rnn_weights
=
True
\
--share_rnn_weights
=
True
\
--test_manifest
=
'data/
librispeech/manifest.test-clean
'
\
--test_manifest
=
'data/
tiny/manifest.tiny
'
\
--mean_std_path
=
'data/
librispeech
/mean_std.npz'
\
--mean_std_path
=
'data/
tiny
/mean_std.npz'
\
--vocab_path
=
'data/
librispeech/eng_
vocab.txt'
\
--vocab_path
=
'data/
tiny/
vocab.txt'
\
--model_path
=
'checkpoints/params.
latest
.tar.gz'
\
--model_path
=
'checkpoints/params.
pass-19
.tar.gz'
\
--lang_model_path
=
'
lm/data
/common_crawl_00.prune01111.trie.klm'
\
--lang_model_path
=
'
models/lm
/common_crawl_00.prune01111.trie.klm'
\
--decoding_method
=
'ctc_beam_search'
\
--decoding_method
=
'ctc_beam_search'
\
--error_rate_type
=
'wer'
\
--error_rate_type
=
'wer'
\
--specgram_type
=
'linear'
--specgram_type
=
'linear'
if
[
$?
-ne
0
]
;
then
echo
"Failed in evaluation!"
exit
1
fi
exit
0
deep_speech_2/examples/tiny/run_test_golden.sh
0 → 100644
浏览文件 @
f329ecda
#! /usr/bin/bash
pushd
../..
>
/dev/null
# download language model
pushd
models/lm
>
/dev/null
sh download_lm_en.sh
if
[
$?
-ne
0
]
;
then
exit
1
fi
popd
>
/dev/null
# download well-trained model
pushd
models/librispeech
>
/dev/null
sh download_model.sh
if
[
$?
-ne
0
]
;
then
exit
1
fi
popd
>
/dev/null
# evaluate model
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
\
python
-u
test.py
\
--batch_size
=
128
\
--trainer_count
=
8
\
--beam_size
=
500
\
--num_proc_bsearch
=
8
\
--num_proc_data
=
4
\
--num_conv_layers
=
2
\
--num_rnn_layers
=
3
\
--rnn_layer_size
=
2048
\
--alpha
=
0.36
\
--beta
=
0.25
\
--cutoff_prob
=
0.99
\
--use_gru
=
False
\
--use_gpu
=
True
\
--share_rnn_weights
=
True
\
--test_manifest
=
'data/tiny/manifest.test-clean'
\
--mean_std_path
=
'models/librispeech/mean_std.npz'
\
--vocab_path
=
'models/librispeech/vocab.txt'
\
--model_path
=
'models/librispeech/params.tar.gz'
\
--lang_model_path
=
'models/lm/common_crawl_00.prune01111.trie.klm'
\
--decoding_method
=
'ctc_beam_search'
\
--error_rate_type
=
'wer'
\
--specgram_type
=
'linear'
if
[
$?
-ne
0
]
;
then
echo
"Failed in evaluation!"
exit
1
fi
exit
0
deep_speech_2/examples/tiny/run_train.sh
浏览文件 @
f329ecda
#! /usr/bin/bash
#! /usr/bin/bash
pushd
../..
pushd
../..
>
/dev/null
CUDA_VISIBLE_DEVICES
=
0,1
\
# train model
CUDA_VISIBLE_DEVICES
=
0,1,2,3
\
python
-u
train.py
\
python
-u
train.py
\
--batch_size
=
2
\
--batch_size
=
16
\
--trainer_count
=
1
\
--trainer_count
=
4
\
--num_passes
=
1
0
\
--num_passes
=
2
0
\
--num_proc_data
=
1
\
--num_proc_data
=
1
\
--num_conv_layers
=
2
\
--num_conv_layers
=
2
\
--num_rnn_layers
=
3
\
--num_rnn_layers
=
3
\
--rnn_layer_size
=
2048
\
--rnn_layer_size
=
2048
\
--num_iter_print
=
100
\
--num_iter_print
=
100
\
--learning_rate
=
5
e-5
\
--learning_rate
=
1
e-5
\
--max_duration
=
27.0
\
--max_duration
=
27.0
\
--min_duration
=
0.0
\
--min_duration
=
0.0
\
--use_sortagrad
=
True
\
--use_sortagrad
=
True
\
...
@@ -20,11 +21,19 @@ python -u train.py \
...
@@ -20,11 +21,19 @@ python -u train.py \
--use_gpu
=
True
\
--use_gpu
=
True
\
--is_local
=
True
\
--is_local
=
True
\
--share_rnn_weights
=
True
\
--share_rnn_weights
=
True
\
--train_manifest
=
'data/tiny/manifest.t
rain
'
\
--train_manifest
=
'data/tiny/manifest.t
iny
'
\
--dev_manifest
=
'data/tiny/manifest.t
rain
'
\
--dev_manifest
=
'data/tiny/manifest.t
iny
'
\
--mean_std_path
=
'data/tiny/mean_std.npz'
\
--mean_std_path
=
'data/tiny/mean_std.npz'
\
--vocab_path
=
'data/tiny/vocab.txt'
\
--vocab_path
=
'data/tiny/vocab.txt'
\
--output_model_dir
=
'./checkpoints'
\
--output_model_dir
=
'./checkpoints
/tiny
'
\
--augment_conf_path
=
'conf/augmentation.config'
\
--augment_conf_path
=
'conf/augmentation.config'
\
--specgram_type
=
'linear'
\
--specgram_type
=
'linear'
\
--shuffle_method
=
'batch_shuffle_clipped'
--shuffle_method
=
'batch_shuffle_clipped'
if
[
$?
-ne
0
]
;
then
echo
"Fail to do inference!"
exit
1
fi
exit
0
deep_speech_2/examples/tiny/run_tune.sh
浏览文件 @
f329ecda
#! /usr/bin/bash
#! /usr/bin/bash
pushd
../..
pushd
../..
>
/dev/null
# grid-search for hyper-parameters in language model
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
\
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
\
python
-u
tools/tune.py
\
python
-u
tools/tune.py
\
--num_samples
=
100
\
--num_samples
=
100
\
...
@@ -21,10 +22,18 @@ python -u tools/tune.py \
...
@@ -21,10 +22,18 @@ python -u tools/tune.py \
--use_gru
=
False
\
--use_gru
=
False
\
--use_gpu
=
True
\
--use_gpu
=
True
\
--share_rnn_weights
=
True
\
--share_rnn_weights
=
True
\
--tune_manifest
=
'data/
librispeech/manifest.dev-clean
'
\
--tune_manifest
=
'data/
tiny/manifest.tiny
'
\
--mean_std_path
=
'data/
librispeech
/mean_std.npz'
\
--mean_std_path
=
'data/
tiny
/mean_std.npz'
\
--vocab_path
=
'data/
librispeech/eng_
vocab.txt'
\
--vocab_path
=
'data/
tiny/
vocab.txt'
\
--model_path
=
'checkpoints/params.
latest
.tar.gz'
\
--model_path
=
'checkpoints/params.
pass-9
.tar.gz'
\
--lang_model_path
=
'
lm/data
/common_crawl_00.prune01111.trie.klm'
\
--lang_model_path
=
'
models/lm
/common_crawl_00.prune01111.trie.klm'
\
--error_rate_type
=
'wer'
\
--error_rate_type
=
'wer'
\
--specgram_type
=
'linear'
--specgram_type
=
'linear'
if
[
$?
-ne
0
]
;
then
echo
"Failed in tuning!"
exit
1
fi
exit
0
deep_speech_2/models/librispeech/download_model.sh
0 → 100644
浏览文件 @
f329ecda
#! /usr/bin/bash
source
../../utils/utility.sh
# TODO: add urls
URL
=
'to-be-added'
MD5
=
5b4af224b26c1dc4dd972b7d32f2f52a
TARGET
=
./librispeech_model.tar.gz
echo
"Download LibriSpeech model ..."
download
$URL
$MD5
$TARGET
if
[
$?
-ne
0
]
;
then
echo
"Fail to download LibriSpeech model!"
exit
1
fi
tar
-zxvf
$TARGET
exit
0
deep_speech_2/models/lm/download_en.sh
已删除
100644 → 0
浏览文件 @
d28ee3fc
echo
"Downloading language model ..."
mkdir
data
LM
=
common_crawl_00.prune01111.trie.klm
MD5
=
"099a601759d467cd0a8523ff939819c5"
wget
-c
http://paddlepaddle.bj.bcebos.com/model_zoo/speech/
$LM
-P
./data
echo
"Checking md5sum ..."
md5_tmp
=
`
md5sum
./data/
$LM
|
awk
-F
[
' '
]
'{print $1}'
`
if
[
$MD5
!=
$md5_tmp
]
;
then
echo
"Fail to download the language model!"
exit
1
fi
deep_speech_2/models/lm/download_lm_en.sh
0 → 100644
浏览文件 @
f329ecda
#! /usr/bin/bash
source
../../utils/utility.sh
URL
=
http://paddlepaddle.bj.bcebos.com/model_zoo/speech/common_crawl_00.prune01111.trie.klm
MD5
=
"099a601759d467cd0a8523ff939819c5"
TARGET
=
./common_crawl_00.prune01111.trie.klm
echo
"Download language model ..."
download
$URL
$MD5
$TARGET
if
[
$?
-ne
0
]
;
then
echo
"Fail to download the language model!"
exit
1
fi
exit
0
deep_speech_2/utils/utility.sh
0 → 100644
浏览文件 @
f329ecda
download
()
{
URL
=
$1
MD5
=
$2
TARGET
=
$3
if
[
-e
$TARGET
]
;
then
md5_result
=
`
md5sum
$TARGET
|
awk
-F
[
' '
]
'{print $1}'
`
if
[
$MD5
==
$md5_result
]
;
then
echo
"
$TARGET
already exists, download skipped."
return
0
fi
fi
wget
-c
$URL
-P
`
dirname
"
$TARGET
"
`
md5_result
=
`
md5sum
$TARGET
|
awk
-F
[
' '
]
'{print $1}'
`
if
[
$MD5
==
$md5_result
]
;
then
echo
"Fail to download the language model!"
return
1
fi
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录