提交 f329ecda 编写于 作者: X Xinghai Sun

Update examples scripts and REAME.md for DS2.

上级 d28ee3fc
此差异已折叠。
'
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
...@@ -19,8 +19,6 @@ import json ...@@ -19,8 +19,6 @@ import json
import codecs import codecs
from paddle.v2.dataset.common import md5file from paddle.v2.dataset.common import md5file
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
URL_ROOT = "http://www.openslr.org/resources/12" URL_ROOT = "http://www.openslr.org/resources/12"
URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz" URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz"
URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz" URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz"
...@@ -41,7 +39,7 @@ MD5_TRAIN_OTHER_500 = "d1a0fd59409feb2c614ce4d30c387708" ...@@ -41,7 +39,7 @@ MD5_TRAIN_OTHER_500 = "d1a0fd59409feb2c614ce4d30c387708"
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument( parser.add_argument(
"--target_dir", "--target_dir",
default=DATA_HOME + "/libri", default='~/.cache/paddle/dataset/speech/libri',
type=str, type=str,
help="Directory to save the dataset. (default: %(default)s)") help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument( parser.add_argument(
...@@ -60,14 +58,14 @@ args = parser.parse_args() ...@@ -60,14 +58,14 @@ args = parser.parse_args()
def download(url, md5sum, target_dir): def download(url, md5sum, target_dir):
""" """Download file from url to target_dir, and check md5sum.
Download file from url to target_dir, and check md5sum.
""" """
if not os.path.exists(target_dir): os.makedirs(target_dir) if not os.path.exists(target_dir): os.makedirs(target_dir)
filepath = os.path.join(target_dir, url.split("/")[-1]) filepath = os.path.join(target_dir, url.split("/")[-1])
if not (os.path.exists(filepath) and md5file(filepath) == md5sum): if not (os.path.exists(filepath) and md5file(filepath) == md5sum):
print("Downloading %s ..." % url) print("Downloading %s ..." % url)
os.system("wget -c " + url + " -P " + target_dir) ret = os.system("wget -c " + url + " -P " + target_dir)
print(ret)
print("\nMD5 Chesksum %s ..." % filepath) print("\nMD5 Chesksum %s ..." % filepath)
if not md5file(filepath) == md5sum: if not md5file(filepath) == md5sum:
raise RuntimeError("MD5 checksum failed.") raise RuntimeError("MD5 checksum failed.")
...@@ -77,8 +75,7 @@ def download(url, md5sum, target_dir): ...@@ -77,8 +75,7 @@ def download(url, md5sum, target_dir):
def unpack(filepath, target_dir): def unpack(filepath, target_dir):
""" """Unpack the file to the target_dir.
Unpack the file to the target_dir.
""" """
print("Unpacking %s ..." % filepath) print("Unpacking %s ..." % filepath)
tar = tarfile.open(filepath) tar = tarfile.open(filepath)
...@@ -87,8 +84,7 @@ def unpack(filepath, target_dir): ...@@ -87,8 +84,7 @@ def unpack(filepath, target_dir):
def create_manifest(data_dir, manifest_path): def create_manifest(data_dir, manifest_path):
""" """Create a manifest json file summarizing the data set, with each line
Create a manifest json file summarizing the data set, with each line
containing the meta data (i.e. audio filepath, transcription text, audio containing the meta data (i.e. audio filepath, transcription text, audio
duration) of each audio file within the data set. duration) of each audio file within the data set.
""" """
...@@ -119,8 +115,7 @@ def create_manifest(data_dir, manifest_path): ...@@ -119,8 +115,7 @@ def create_manifest(data_dir, manifest_path):
def prepare_dataset(url, md5sum, target_dir, manifest_path): def prepare_dataset(url, md5sum, target_dir, manifest_path):
""" """Download, unpack and create summmary manifest file.
Download, unpack and create summmary manifest file.
""" """
if not os.path.exists(os.path.join(target_dir, "LibriSpeech")): if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
# download # download
...@@ -135,6 +130,8 @@ def prepare_dataset(url, md5sum, target_dir, manifest_path): ...@@ -135,6 +130,8 @@ def prepare_dataset(url, md5sum, target_dir, manifest_path):
def main(): def main():
args.target_dir = os.path.expanduser(args.target_dir)
prepare_dataset( prepare_dataset(
url=URL_TEST_CLEAN, url=URL_TEST_CLEAN,
md5sum=MD5_TEST_CLEAN, md5sum=MD5_TEST_CLEAN,
...@@ -145,12 +142,12 @@ def main(): ...@@ -145,12 +142,12 @@ def main():
md5sum=MD5_DEV_CLEAN, md5sum=MD5_DEV_CLEAN,
target_dir=os.path.join(args.target_dir, "dev-clean"), target_dir=os.path.join(args.target_dir, "dev-clean"),
manifest_path=args.manifest_prefix + ".dev-clean") manifest_path=args.manifest_prefix + ".dev-clean")
prepare_dataset(
url=URL_TRAIN_CLEAN_100,
md5sum=MD5_TRAIN_CLEAN_100,
target_dir=os.path.join(args.target_dir, "train-clean-100"),
manifest_path=args.manifest_prefix + ".train-clean-100")
if args.full_download: if args.full_download:
prepare_dataset(
url=URL_TRAIN_CLEAN_100,
md5sum=MD5_TRAIN_CLEAN_100,
target_dir=os.path.join(args.target_dir, "train-clean-100"),
manifest_path=args.manifest_prefix + ".train-clean-100")
prepare_dataset( prepare_dataset(
url=URL_TEST_OTHER, url=URL_TEST_OTHER,
md5sum=MD5_TEST_OTHER, md5sum=MD5_TEST_OTHER,
......
...@@ -11,7 +11,7 @@ import wave ...@@ -11,7 +11,7 @@ import wave
import paddle.v2 as paddle import paddle.v2 as paddle
import _init_paths import _init_paths
from data_utils.data import DataGenerator from data_utils.data import DataGenerator
from models.model import DeepSpeech2Model from model_utils.model import DeepSpeech2Model
from data_utils.utils import read_manifest from data_utils.utils import read_manifest
from utils.utility import add_arguments, print_arguments from utils.utility import add_arguments, print_arguments
......
#! /usr/bin/bash #! /usr/bin/bash
pushd ../.. pushd ../.. > /dev/null
# download data, generate manifests # download data, generate manifests
python data/librispeech/librispeech.py \ python data/librispeech/librispeech.py \
--manifest_prefix='data/librispeech/manifest' \ --manifest_prefix='data/librispeech/manifest' \
--full_download='True' \ --target_dir='~/.cache/paddle/dataset/speech/Libri' \
--target_dir='~/.cache/paddle/dataset/speech/Libri' --full_download='True'
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Prepare LibriSpeech failed. Terminated." echo "Prepare LibriSpeech failed. Terminated."
exit 1 exit 1
fi fi
cat data/librispeech/manifest.train* | shuf > data/librispeech/manifest.train cat data/librispeech/manifest.train-* | shuf > data/librispeech/manifest.train
# build vocabulary (can be skipped for English, as already provided) # build vocabulary
# python tools/build_vocab.py \ python tools/build_vocab.py \
# --count_threshold=0 \ --count_threshold=0 \
# --vocab_path='data/librispeech/eng_vocab.txt' \ --vocab_path='data/librispeech/vocab.txt' \
# --manifest_paths='data/librispeech/manifeset.train' --manifest_paths='data/librispeech/manifest.train'
if [ $? -ne 0 ]; then
echo "Build vocabulary failed. Terminated."
exit 1
fi
# compute mean and stddev for normalizer # compute mean and stddev for normalizer
...@@ -37,3 +42,4 @@ fi ...@@ -37,3 +42,4 @@ fi
echo "LibriSpeech Data preparation done." echo "LibriSpeech Data preparation done."
exit 0
#! /usr/bin/bash #! /usr/bin/bash
pushd ../.. pushd ../.. > /dev/null
# download language model
pushd models/lm > /dev/null
sh download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
fi
popd > /dev/null
# infer
CUDA_VISIBLE_DEVICES=0 \ CUDA_VISIBLE_DEVICES=0 \
python -u infer.py \ python -u infer.py \
--num_samples=10 \ --num_samples=10 \
--trainer_count=1 \ --trainer_count=1 \
--beam_size=500 \ --beam_size=500 \
--num_proc_bsearch=12 \ --num_proc_bsearch=8 \
--num_conv_layers=2 \ --num_conv_layers=2 \
--num_rnn_layers=3 \ --num_rnn_layers=3 \
--rnn_layer_size=2048 \ --rnn_layer_size=2048 \
...@@ -17,11 +27,19 @@ python -u infer.py \ ...@@ -17,11 +27,19 @@ python -u infer.py \
--use_gru=False \ --use_gru=False \
--use_gpu=True \ --use_gpu=True \
--share_rnn_weights=True \ --share_rnn_weights=True \
--infer_manifest='data/librispeech/manifest.dev-clean' \ --infer_manifest='data/librispeech/manifest.test-clean' \
--mean_std_path='data/librispeech/mean_std.npz' \ --mean_std_path='data/librispeech/mean_std.npz' \
--vocab_path='data/librispeech/eng_vocab.txt' \ --vocab_path='data/librispeech/vocab.txt' \
--model_path='checkpoints/params.latest.tar.gz' \ --model_path='checkpoints/libri/params.latest.tar.gz' \
--lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \ --lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \ --decoding_method='ctc_beam_search' \
--error_rate_type='wer' \ --error_rate_type='wer' \
--specgram_type='linear' --specgram_type='linear'
if [ $? -ne 0 ]; then
echo "Failed in inference!"
exit 1
fi
exit 0
#! /usr/bin/bash
pushd ../.. > /dev/null
# download language model
pushd models/lm > /dev/null
sh download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
fi
popd > /dev/null
# download well-trained model
pushd models/librispeech > /dev/null
sh download_model.sh
if [ $? -ne 0 ]; then
exit 1
fi
popd > /dev/null
# infer
CUDA_VISIBLE_DEVICES=0 \
python -u infer.py \
--num_samples=10 \
--trainer_count=1 \
--beam_size=500 \
--num_proc_bsearch=8 \
--num_conv_layers=2 \
--num_rnn_layers=3 \
--rnn_layer_size=2048 \
--alpha=0.36 \
--beta=0.25 \
--cutoff_prob=0.99 \
--use_gru=False \
--use_gpu=True \
--share_rnn_weights=True \
--infer_manifest='data/tiny/manifest.test-clean' \
--mean_std_path='models/librispeech/mean_std.npz' \
--vocab_path='models/librispeech/vocab.txt' \
--model_path='models/librispeech/params.tar.gz' \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \
--error_rate_type='wer' \
--specgram_type='linear'
if [ $? -ne 0 ]; then
echo "Failed in inference!"
exit 1
fi
exit 0
#! /usr/bin/bash #! /usr/bin/bash
pushd ../.. pushd ../.. > /dev/null
# download language model
pushd models/lm > /dev/null
sh download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
fi
popd > /dev/null
# evaluate model
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
python -u evaluate.py \ python -u test.py \
--batch_size=128 \ --batch_size=128 \
--trainer_count=8 \ --trainer_count=8 \
--beam_size=500 \ --beam_size=500 \
--num_proc_bsearch=12 \ --num_proc_bsearch=8 \
--num_proc_data=12 \ --num_proc_data=4 \
--num_conv_layers=2 \ --num_conv_layers=2 \
--num_rnn_layers=3 \ --num_rnn_layers=3 \
--rnn_layer_size=2048 \ --rnn_layer_size=2048 \
...@@ -20,9 +30,17 @@ python -u evaluate.py \ ...@@ -20,9 +30,17 @@ python -u evaluate.py \
--share_rnn_weights=True \ --share_rnn_weights=True \
--test_manifest='data/librispeech/manifest.test-clean' \ --test_manifest='data/librispeech/manifest.test-clean' \
--mean_std_path='data/librispeech/mean_std.npz' \ --mean_std_path='data/librispeech/mean_std.npz' \
--vocab_path='data/librispeech/eng_vocab.txt' \ --vocab_path='data/librispeech/vocab.txt' \
--model_path='checkpoints/params.latest.tar.gz' \ --model_path='checkpoints/libri/params.latest.tar.gz' \
--lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \ --lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \ --decoding_method='ctc_beam_search' \
--error_rate_type='wer' \ --error_rate_type='wer' \
--specgram_type='linear' --specgram_type='linear'
if [ $? -ne 0 ]; then
echo "Failed in evaluation!"
exit 1
fi
exit 0
#! /usr/bin/bash
pushd ../.. > /dev/null
# download language model
pushd models/lm > /dev/null
sh download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
fi
popd > /dev/null
# download well-trained model
pushd models/librispeech > /dev/null
sh download_model.sh
if [ $? -ne 0 ]; then
exit 1
fi
popd > /dev/null
# evaluate model
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
python -u test.py \
--batch_size=128 \
--trainer_count=8 \
--beam_size=500 \
--num_proc_bsearch=8 \
--num_proc_data=4 \
--num_conv_layers=2 \
--num_rnn_layers=3 \
--rnn_layer_size=2048 \
--alpha=0.36 \
--beta=0.25 \
--cutoff_prob=0.99 \
--use_gru=False \
--use_gpu=True \
--share_rnn_weights=True \
--test_manifest='data/tiny/manifest.test-clean' \
--mean_std_path='models/librispeech/mean_std.npz' \
--vocab_path='models/librispeech/vocab.txt' \
--model_path='models/librispeech/params.tar.gz' \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \
--error_rate_type='wer' \
--specgram_type='linear'
if [ $? -ne 0 ]; then
echo "Failed in evaluation!"
exit 1
fi
exit 0
#! /usr/bin/bash #! /usr/bin/bash
pushd ../.. pushd ../.. > /dev/null
# train model
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
python -u train.py \ python -u train.py \
--batch_size=256 \ --batch_size=512 \
--trainer_count=8 \ --trainer_count=8 \
--num_passes=50 \ --num_passes=50 \
--num_proc_data=12 \ --num_proc_data=12 \
...@@ -23,8 +24,16 @@ python -u train.py \ ...@@ -23,8 +24,16 @@ python -u train.py \
--train_manifest='data/librispeech/manifest.train' \ --train_manifest='data/librispeech/manifest.train' \
--dev_manifest='data/librispeech/manifest.dev' \ --dev_manifest='data/librispeech/manifest.dev' \
--mean_std_path='data/librispeech/mean_std.npz' \ --mean_std_path='data/librispeech/mean_std.npz' \
--vocab_path='data/librispeech/eng_vocab.txt' \ --vocab_path='data/librispeech/vocab.txt' \
--output_model_dir='./checkpoints' \ --output_model_dir='./checkpoints/libri' \
--augment_conf_path='conf/augmentation.config' \ --augment_conf_path='conf/augmentation.config' \
--specgram_type='linear' \ --specgram_type='linear' \
--shuffle_method='batch_shuffle_clipped' --shuffle_method='batch_shuffle_clipped'
if [ $? -ne 0 ]; then
echo "Failed in training!"
exit 1
fi
exit 0
#! /usr/bin/bash #! /usr/bin/bash
pushd ../.. pushd ../.. > /dev/null
# grid-search for hyper-parameters in language model
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
python -u tools/tune.py \ python -u tools/tune.py \
--num_samples=100 \ --num_samples=100 \
...@@ -23,8 +24,16 @@ python -u tools/tune.py \ ...@@ -23,8 +24,16 @@ python -u tools/tune.py \
--share_rnn_weights=True \ --share_rnn_weights=True \
--tune_manifest='data/librispeech/manifest.dev-clean' \ --tune_manifest='data/librispeech/manifest.dev-clean' \
--mean_std_path='data/librispeech/mean_std.npz' \ --mean_std_path='data/librispeech/mean_std.npz' \
--vocab_path='data/librispeech/eng_vocab.txt' \ --vocab_path='data/librispeech/vocab.txt' \
--model_path='checkpoints/params.latest.tar.gz' \ --model_path='checkpoints/libri/params.latest.tar.gz' \
--lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \ --lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--error_rate_type='wer' \ --error_rate_type='wer' \
--specgram_type='linear' --specgram_type='linear'
if [ $? -ne 0 ]; then
echo "Failed in tuning!"
exit 1
fi
exit 0
#! /usr/bin/bash
pushd ../.. > /dev/null
# start demo client
CUDA_VISIBLE_DEVICES=0 \
python -u deploy/demo_client.py \
--host_ip='localhost' \
--host_port=8086 \
if [ $? -ne 0 ]; then
echo "Failed in starting demo client!"
exit 1
fi
exit 0
#! /usr/bin/bash
# TODO: replace the model with a mandarin model
pushd ../.. > /dev/null
# download language model
pushd models/lm > /dev/null
sh download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
fi
popd > /dev/null
# download well-trained model
pushd models/librispeech > /dev/null
sh download_model.sh
if [ $? -ne 0 ]; then
exit 1
fi
popd > /dev/null
# start demo server
CUDA_VISIBLE_DEVICES=0 \
python -u deploy/demo_server.py \
--host_ip='localhost' \
--host_port=8086 \
--num_conv_layers=2 \
--num_rnn_layers=3 \
--rnn_layer_size=2048 \
--alpha=0.36 \
--beta=0.25 \
--cutoff_prob=0.99 \
--use_gru=False \
--use_gpu=True \
--share_rnn_weights=True \
--speech_save_dir='demo_cache' \
--warmup_manifest='data/tiny/manifest.test-clean' \
--mean_std_path='models/librispeech/mean_std.npz' \
--vocab_path='models/librispeech/vocab.txt' \
--model_path='models/librispeech/params.tar.gz' \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \
--specgram_type='linear'
if [ $? -ne 0 ]; then
echo "Failed in starting demo server!"
exit 1
fi
exit 0
#! /usr/bin/bash #! /usr/bin/bash
pushd ../.. pushd ../.. > /dev/null
# download data, generate manifests # download data, generate manifests
python data/tiny/tiny.py \ python data/librispeech/librispeech.py \
--manifest_prefix='data/tiny/manifest' \ --manifest_prefix='data/tiny/manifest' \
--target_dir=$HOME'/.cache/paddle/dataset/speech/tiny' --target_dir='~/.cache/paddle/dataset/speech/libri' \
--full_download='False'
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Prepare LibriSpeech failed. Terminated." echo "Prepare LibriSpeech failed. Terminated."
exit 1 exit 1
fi fi
cat data/tiny/manifest.dev-clean | head -n 32 > data/tiny/manifest.train head -n 64 data/tiny/manifest.dev-clean > data/tiny/manifest.tiny
cat data/tiny/manifest.dev-clean | head -n 48 | tail -n 16 > data/tiny/manifest.dev
cat data/tiny/manifest.dev-clean | head -n 64 | tail -n 16 > data/tiny/manifest.test
# build vocabulary # build vocabulary
python tools/build_vocab.py \ python tools/build_vocab.py \
--count_threshold=0 \ --count_threshold=0 \
--vocab_path='data/tiny/vocab.txt' \ --vocab_path='data/tiny/vocab.txt' \
--manifest_paths='data/tiny/manifest.train' --manifest_paths='data/tiny/manifest.dev'
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Build vocabulary failed. Terminated." echo "Build vocabulary failed. Terminated."
...@@ -31,8 +30,8 @@ fi ...@@ -31,8 +30,8 @@ fi
# compute mean and stddev for normalizer # compute mean and stddev for normalizer
python tools/compute_mean_std.py \ python tools/compute_mean_std.py \
--manifest_path='data/tiny/manifest.train' \ --manifest_path='data/tiny/manifest.tiny' \
--num_samples=32 \ --num_samples=64 \
--specgram_type='linear' \ --specgram_type='linear' \
--output_path='data/tiny/mean_std.npz' --output_path='data/tiny/mean_std.npz'
...@@ -43,3 +42,4 @@ fi ...@@ -43,3 +42,4 @@ fi
echo "Tiny data preparation done." echo "Tiny data preparation done."
exit 0
#! /usr/bin/bash #! /usr/bin/bash
pushd ../.. pushd ../.. > /dev/null
# download language model
pushd models/lm > /dev/null
sh download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
fi
popd > /dev/null
# infer
CUDA_VISIBLE_DEVICES=0 \ CUDA_VISIBLE_DEVICES=0 \
python -u infer.py \ python -u infer.py \
--num_samples=4 \ --num_samples=10 \
--trainer_count=1 \ --trainer_count=1 \
--beam_size=500 \ --beam_size=500 \
--num_proc_bsearch=12 \ --num_proc_bsearch=8 \
--num_conv_layers=2 \ --num_conv_layers=2 \
--num_rnn_layers=3 \ --num_rnn_layers=3 \
--rnn_layer_size=2048 \ --rnn_layer_size=2048 \
...@@ -17,11 +27,19 @@ python -u infer.py \ ...@@ -17,11 +27,19 @@ python -u infer.py \
--use_gru=False \ --use_gru=False \
--use_gpu=True \ --use_gpu=True \
--share_rnn_weights=True \ --share_rnn_weights=True \
--infer_manifest='data/tiny/manifest.train' \ --infer_manifest='data/tiny/manifest.tiny' \
--mean_std_path='data/tiny/mean_std.npz' \ --mean_std_path='data/tiny/mean_std.npz' \
--vocab_path='data/tiny/vocab.txt' \ --vocab_path='data/tiny/vocab.txt' \
--model_path='checkpoints/params.pass-14.tar.gz' \ --model_path='checkpoints/tiny/params.pass-19.tar.gz' \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ --lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \ --decoding_method='ctc_beam_search' \
--error_rate_type='wer' \ --error_rate_type='wer' \
--specgram_type='linear' --specgram_type='linear'
if [ $? -ne 0 ]; then
echo "Failed in inference!"
exit 1
fi
exit 0
#! /usr/bin/bash
pushd ../.. > /dev/null
# download language model
pushd models/lm > /dev/null
sh download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
fi
popd > /dev/null
# download well-trained model
pushd models/librispeech > /dev/null
sh download_model.sh
if [ $? -ne 0 ]; then
exit 1
fi
popd > /dev/null
# infer
CUDA_VISIBLE_DEVICES=0 \
python -u infer.py \
--num_samples=10 \
--trainer_count=1 \
--beam_size=500 \
--num_proc_bsearch=8 \
--num_conv_layers=2 \
--num_rnn_layers=3 \
--rnn_layer_size=2048 \
--alpha=0.36 \
--beta=0.25 \
--cutoff_prob=0.99 \
--use_gru=False \
--use_gpu=True \
--share_rnn_weights=True \
--infer_manifest='data/tiny/manifest.test-clean' \
--mean_std_path='models/librispeech/mean_std.npz' \
--vocab_path='models/librispeech/vocab.txt' \
--model_path='models/librispeech/params.tar.gz' \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \
--error_rate_type='wer' \
--specgram_type='linear'
if [ $? -ne 0 ]; then
echo "Failed in inference!"
exit 1
fi
exit 0
#! /usr/bin/bash #! /usr/bin/bash
pushd ../.. pushd ../.. > /dev/null
# download language model
pushd models/lm > /dev/null
sh download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
fi
popd > /dev/null
# evaluate model
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
python -u evaluate.py \ python -u test.py \
--batch_size=128 \ --batch_size=16 \
--trainer_count=8 \ --trainer_count=8 \
--beam_size=500 \ --beam_size=500 \
--num_proc_bsearch=12 \ --num_proc_bsearch=8 \
--num_proc_data=12 \ --num_proc_data=4 \
--num_conv_layers=2 \ --num_conv_layers=2 \
--num_rnn_layers=3 \ --num_rnn_layers=3 \
--rnn_layer_size=2048 \ --rnn_layer_size=2048 \
...@@ -18,11 +28,19 @@ python -u evaluate.py \ ...@@ -18,11 +28,19 @@ python -u evaluate.py \
--use_gru=False \ --use_gru=False \
--use_gpu=True \ --use_gpu=True \
--share_rnn_weights=True \ --share_rnn_weights=True \
--test_manifest='data/librispeech/manifest.test-clean' \ --test_manifest='data/tiny/manifest.tiny' \
--mean_std_path='data/librispeech/mean_std.npz' \ --mean_std_path='data/tiny/mean_std.npz' \
--vocab_path='data/librispeech/eng_vocab.txt' \ --vocab_path='data/tiny/vocab.txt' \
--model_path='checkpoints/params.latest.tar.gz' \ --model_path='checkpoints/params.pass-19.tar.gz' \
--lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \ --lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \ --decoding_method='ctc_beam_search' \
--error_rate_type='wer' \ --error_rate_type='wer' \
--specgram_type='linear' --specgram_type='linear'
if [ $? -ne 0 ]; then
echo "Failed in evaluation!"
exit 1
fi
exit 0
#! /usr/bin/bash
pushd ../.. > /dev/null
# download language model
pushd models/lm > /dev/null
sh download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
fi
popd > /dev/null
# download well-trained model
pushd models/librispeech > /dev/null
sh download_model.sh
if [ $? -ne 0 ]; then
exit 1
fi
popd > /dev/null
# evaluate model
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
python -u test.py \
--batch_size=128 \
--trainer_count=8 \
--beam_size=500 \
--num_proc_bsearch=8 \
--num_proc_data=4 \
--num_conv_layers=2 \
--num_rnn_layers=3 \
--rnn_layer_size=2048 \
--alpha=0.36 \
--beta=0.25 \
--cutoff_prob=0.99 \
--use_gru=False \
--use_gpu=True \
--share_rnn_weights=True \
--test_manifest='data/tiny/manifest.test-clean' \
--mean_std_path='models/librispeech/mean_std.npz' \
--vocab_path='models/librispeech/vocab.txt' \
--model_path='models/librispeech/params.tar.gz' \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \
--error_rate_type='wer' \
--specgram_type='linear'
if [ $? -ne 0 ]; then
echo "Failed in evaluation!"
exit 1
fi
exit 0
#! /usr/bin/bash #! /usr/bin/bash
pushd ../.. pushd ../.. > /dev/null
CUDA_VISIBLE_DEVICES=0,1 \ # train model
CUDA_VISIBLE_DEVICES=0,1,2,3 \
python -u train.py \ python -u train.py \
--batch_size=2 \ --batch_size=16 \
--trainer_count=1 \ --trainer_count=4 \
--num_passes=10 \ --num_passes=20 \
--num_proc_data=1 \ --num_proc_data=1 \
--num_conv_layers=2 \ --num_conv_layers=2 \
--num_rnn_layers=3 \ --num_rnn_layers=3 \
--rnn_layer_size=2048 \ --rnn_layer_size=2048 \
--num_iter_print=100 \ --num_iter_print=100 \
--learning_rate=5e-5 \ --learning_rate=1e-5 \
--max_duration=27.0 \ --max_duration=27.0 \
--min_duration=0.0 \ --min_duration=0.0 \
--use_sortagrad=True \ --use_sortagrad=True \
...@@ -20,11 +21,19 @@ python -u train.py \ ...@@ -20,11 +21,19 @@ python -u train.py \
--use_gpu=True \ --use_gpu=True \
--is_local=True \ --is_local=True \
--share_rnn_weights=True \ --share_rnn_weights=True \
--train_manifest='data/tiny/manifest.train' \ --train_manifest='data/tiny/manifest.tiny' \
--dev_manifest='data/tiny/manifest.train' \ --dev_manifest='data/tiny/manifest.tiny' \
--mean_std_path='data/tiny/mean_std.npz' \ --mean_std_path='data/tiny/mean_std.npz' \
--vocab_path='data/tiny/vocab.txt' \ --vocab_path='data/tiny/vocab.txt' \
--output_model_dir='./checkpoints' \ --output_model_dir='./checkpoints/tiny' \
--augment_conf_path='conf/augmentation.config' \ --augment_conf_path='conf/augmentation.config' \
--specgram_type='linear' \ --specgram_type='linear' \
--shuffle_method='batch_shuffle_clipped' --shuffle_method='batch_shuffle_clipped'
if [ $? -ne 0 ]; then
echo "Fail to do inference!"
exit 1
fi
exit 0
#! /usr/bin/bash #! /usr/bin/bash
pushd ../.. pushd ../.. > /dev/null
# grid-search for hyper-parameters in language model
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
python -u tools/tune.py \ python -u tools/tune.py \
--num_samples=100 \ --num_samples=100 \
...@@ -21,10 +22,18 @@ python -u tools/tune.py \ ...@@ -21,10 +22,18 @@ python -u tools/tune.py \
--use_gru=False \ --use_gru=False \
--use_gpu=True \ --use_gpu=True \
--share_rnn_weights=True \ --share_rnn_weights=True \
--tune_manifest='data/librispeech/manifest.dev-clean' \ --tune_manifest='data/tiny/manifest.tiny' \
--mean_std_path='data/librispeech/mean_std.npz' \ --mean_std_path='data/tiny/mean_std.npz' \
--vocab_path='data/librispeech/eng_vocab.txt' \ --vocab_path='data/tiny/vocab.txt' \
--model_path='checkpoints/params.latest.tar.gz' \ --model_path='checkpoints/params.pass-9.tar.gz' \
--lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \ --lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--error_rate_type='wer' \ --error_rate_type='wer' \
--specgram_type='linear' --specgram_type='linear'
if [ $? -ne 0 ]; then
echo "Failed in tuning!"
exit 1
fi
exit 0
#! /usr/bin/bash
source ../../utils/utility.sh
# TODO: add urls
URL='to-be-added'
MD5=5b4af224b26c1dc4dd972b7d32f2f52a
TARGET=./librispeech_model.tar.gz
echo "Download LibriSpeech model ..."
download $URL $MD5 $TARGET
if [ $? -ne 0 ]; then
echo "Fail to download LibriSpeech model!"
exit 1
fi
tar -zxvf $TARGET
exit 0
echo "Downloading language model ..."
mkdir data
LM=common_crawl_00.prune01111.trie.klm
MD5="099a601759d467cd0a8523ff939819c5"
wget -c http://paddlepaddle.bj.bcebos.com/model_zoo/speech/$LM -P ./data
echo "Checking md5sum ..."
md5_tmp=`md5sum ./data/$LM | awk -F[' '] '{print $1}'`
if [ $MD5 != $md5_tmp ]; then
echo "Fail to download the language model!"
exit 1
fi
#! /usr/bin/bash
source ../../utils/utility.sh
URL=http://paddlepaddle.bj.bcebos.com/model_zoo/speech/common_crawl_00.prune01111.trie.klm
MD5="099a601759d467cd0a8523ff939819c5"
TARGET=./common_crawl_00.prune01111.trie.klm
echo "Download language model ..."
download $URL $MD5 $TARGET
if [ $? -ne 0 ]; then
echo "Fail to download the language model!"
exit 1
fi
exit 0
download() {
URL=$1
MD5=$2
TARGET=$3
if [ -e $TARGET ]; then
md5_result=`md5sum $TARGET | awk -F[' '] '{print $1}'`
if [ $MD5 == $md5_result ]; then
echo "$TARGET already exists, download skipped."
return 0
fi
fi
wget -c $URL -P `dirname "$TARGET"`
md5_result=`md5sum $TARGET | awk -F[' '] '{print $1}'`
if [ $MD5 == $md5_result ]; then
echo "Fail to download the language model!"
return 1
fi
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册