提交 ae7ef792 编写于 作者: X Xinghai Sun

Rename some folders and update examples.

上级 a00a436b
......@@ -41,7 +41,7 @@ MD5_TRAIN_OTHER_500 = "d1a0fd59409feb2c614ce4d30c387708"
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--target_dir",
default=DATA_HOME + "/Libri",
default=DATA_HOME + "/libri",
type=str,
help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument(
......
"""Prepare Librispeech ASR datasets.
Download, unpack and create manifest files.
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import distutils.util
import os
import sys
import tarfile
import argparse
import soundfile
import json
import codecs
from paddle.v2.dataset.common import md5file
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
URL_ROOT = "http://www.openslr.org/resources/12"
URL_DEV_CLEAN = URL_ROOT + "/dev-clean.tar.gz"
MD5_DEV_CLEAN = "42e2234ba48799c1f50f24a7926300a1"
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--target_dir",
default=DATA_HOME + "/tiny",
type=str,
help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument(
"--manifest_prefix",
default="manifest",
type=str,
help="Filepath prefix for output manifests. (default: %(default)s)")
args = parser.parse_args()
def download(url, md5sum, target_dir):
"""
Download file from url to target_dir, and check md5sum.
"""
if not os.path.exists(target_dir): os.makedirs(target_dir)
filepath = os.path.join(target_dir, url.split("/")[-1])
if not (os.path.exists(filepath) and md5file(filepath) == md5sum):
print("Downloading %s ..." % url)
os.system("wget -c " + url + " -P " + target_dir)
print("\nMD5 Chesksum %s ..." % filepath)
if not md5file(filepath) == md5sum:
raise RuntimeError("MD5 checksum failed.")
else:
print("File exists, skip downloading. (%s)" % filepath)
return filepath
def unpack(filepath, target_dir):
"""
Unpack the file to the target_dir.
"""
print("Unpacking %s ..." % filepath)
tar = tarfile.open(filepath)
tar.extractall(target_dir)
tar.close()
def create_manifest(data_dir, manifest_path):
"""
Create a manifest json file summarizing the data set, with each line
containing the meta data (i.e. audio filepath, transcription text, audio
duration) of each audio file within the data set.
"""
print("Creating manifest %s ..." % manifest_path)
json_lines = []
for subfolder, _, filelist in sorted(os.walk(data_dir)):
text_filelist = [
filename for filename in filelist if filename.endswith('trans.txt')
]
if len(text_filelist) > 0:
text_filepath = os.path.join(data_dir, subfolder, text_filelist[0])
for line in open(text_filepath):
segments = line.strip().split()
text = ' '.join(segments[1:]).lower()
audio_filepath = os.path.join(data_dir, subfolder,
segments[0] + '.flac')
audio_data, samplerate = soundfile.read(audio_filepath)
duration = float(len(audio_data)) / samplerate
json_lines.append(
json.dumps({
'audio_filepath': audio_filepath,
'duration': duration,
'text': text
}))
with codecs.open(manifest_path, 'w', 'utf-8') as out_file:
for line in json_lines:
out_file.write(line + '\n')
def prepare_dataset(url, md5sum, target_dir, manifest_path):
"""
Download, unpack and create summmary manifest file.
"""
if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
# download
filepath = download(url, md5sum, target_dir)
# unpack
unpack(filepath, target_dir)
else:
print("Skip downloading and unpacking. Data already exists in %s." %
target_dir)
# create manifest json file
create_manifest(target_dir, manifest_path)
def main():
prepare_dataset(
url=URL_DEV_CLEAN,
md5sum=MD5_DEV_CLEAN,
target_dir=os.path.join(args.target_dir, "dev-clean"),
manifest_path=args.manifest_prefix + ".dev-clean")
if __name__ == '__main__':
main()
......@@ -16,7 +16,7 @@ fi
cat data/librispeech/manifest.train* | shuf > data/librispeech/manifest.train
# build vocabulary (for English data, we can just skip this)
# build vocabulary (can be skipped for English, as already provided)
# python tools/build_vocab.py \
# --count_threshold=0 \
# --vocab_path='data/librispeech/eng_vocab.txt' \
......
......@@ -3,32 +3,38 @@
pushd ../..
# download data, generate manifests
python data/librispeech/librispeech.py \
--manifest_prefix='data/librispeech/manifest' \
--full_download='True' \
--target_dir='~/.cache/paddle/dataset/speech/Libri'
python data/tiny/tiny.py \
--manifest_prefix='data/tiny/manifest' \
--target_dir=$HOME'/.cache/paddle/dataset/speech/tiny'
if [ $? -ne 0 ]; then
echo "Prepare LibriSpeech failed. Terminated."
exit 1
fi
cat data/librispeech/manifest.train* | shuf > data/librispeech/manifest.train
cat data/tiny/manifest.dev-clean | head -n 32 > data/tiny/manifest.train
cat data/tiny/manifest.dev-clean | head -n 48 | tail -n 16 > data/tiny/manifest.dev
cat data/tiny/manifest.dev-clean | head -n 64 | tail -n 16 > data/tiny/manifest.test
# build vocabulary (for English data, we can just skip this)
# python tools/build_vocab.py \
# --count_threshold=0 \
# --vocab_path='data/librispeech/eng_vocab.txt' \
# --manifest_paths='data/librispeech/manifeset.train'
# build vocabulary
python tools/build_vocab.py \
--count_threshold=0 \
--vocab_path='data/tiny/vocab.txt' \
--manifest_paths='data/tiny/manifest.train'
if [ $? -ne 0 ]; then
echo "Build vocabulary failed. Terminated."
exit 1
fi
# compute mean and stddev for normalizer
python tools/compute_mean_std.py \
--manifest_path='data/librispeech/manifest.train' \
--num_samples=2000 \
--manifest_path='data/tiny/manifest.train' \
--num_samples=32 \
--specgram_type='linear' \
--output_path='data/librispeech/mean_std.npz'
--output_path='data/tiny/mean_std.npz'
if [ $? -ne 0 ]; then
echo "Compute mean and stddev failed. Terminated."
......@@ -36,4 +42,4 @@ if [ $? -ne 0 ]; then
fi
echo "LibriSpeech Data preparation done."
echo "Tiny data preparation done."
......@@ -4,7 +4,7 @@ pushd ../..
CUDA_VISIBLE_DEVICES=0 \
python -u infer.py \
--num_samples=10 \
--num_samples=4 \
--trainer_count=1 \
--beam_size=500 \
--num_proc_bsearch=12 \
......@@ -17,11 +17,11 @@ python -u infer.py \
--use_gru=False \
--use_gpu=True \
--share_rnn_weights=True \
--infer_manifest='data/librispeech/manifest.dev-clean' \
--mean_std_path='data/librispeech/mean_std.npz' \
--vocab_path='data/librispeech/eng_vocab.txt' \
--model_path='checkpoints/params.latest.tar.gz' \
--lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \
--infer_manifest='data/tiny/manifest.train' \
--mean_std_path='data/tiny/mean_std.npz' \
--vocab_path='data/tiny/vocab.txt' \
--model_path='checkpoints/params.pass-14.tar.gz' \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \
--error_rate_type='wer' \
--specgram_type='linear'
......@@ -2,17 +2,17 @@
pushd ../..
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
CUDA_VISIBLE_DEVICES=0,1 \
python -u train.py \
--batch_size=256 \
--trainer_count=8 \
--num_passes=50 \
--num_proc_data=12 \
--batch_size=2 \
--trainer_count=1 \
--num_passes=10 \
--num_proc_data=1 \
--num_conv_layers=2 \
--num_rnn_layers=3 \
--rnn_layer_size=2048 \
--num_iter_print=100 \
--learning_rate=5e-4 \
--learning_rate=5e-5 \
--max_duration=27.0 \
--min_duration=0.0 \
--use_sortagrad=True \
......@@ -20,10 +20,10 @@ python -u train.py \
--use_gpu=True \
--is_local=True \
--share_rnn_weights=True \
--train_manifest='data/librispeech/manifest.train' \
--dev_manifest='data/librispeech/manifest.dev' \
--mean_std_path='data/librispeech/mean_std.npz' \
--vocab_path='data/librispeech/eng_vocab.txt' \
--train_manifest='data/tiny/manifest.train' \
--dev_manifest='data/tiny/manifest.train' \
--mean_std_path='data/tiny/mean_std.npz' \
--vocab_path='data/tiny/vocab.txt' \
--output_model_dir='./checkpoints' \
--augment_conf_path='conf/augmentation.config' \
--specgram_type='linear' \
......
......@@ -7,7 +7,7 @@ import argparse
import functools
import paddle.v2 as paddle
from data_utils.data import DataGenerator
from models.model import DeepSpeech2Model
from model_utils.model import DeepSpeech2Model
from utils.error_rate import wer, cer
from utils.utility import add_arguments, print_arguments
......@@ -35,10 +35,10 @@ add_arg('mean_std_path', str,
'data/librispeech/mean_std.npz',
"Filepath of normalizer's mean & std.")
add_arg('vocab_path', str,
'data/librispeech/eng_vocab.txt',
'data/librispeech/vocab.txt',
"Filepath of vocabulary.")
add_arg('lang_model_path', str,
'lm/data/common_crawl_00.prune01111.trie.klm',
'model_zoo/lm/common_crawl_00.prune01111.trie.klm',
"Filepath for language model.")
add_arg('model_path', str,
'./checkpoints/params.latest.tar.gz',
......
......@@ -180,6 +180,8 @@ def ctc_beam_search_decoder(probs_seq,
prob = prob * ext_scoring_func(result)
log_prob = log(prob)
beam_result.append((log_prob, result))
else:
beam_result.append((float('-inf'), ''))
## output top beam_size decoding results
beam_result = sorted(beam_result, key=lambda asd: asd[0], reverse=True)
......
......@@ -8,9 +8,10 @@ import os
import time
import gzip
import paddle.v2 as paddle
from lm.lm_scorer import LmScorer
from models.decoder import ctc_greedy_decoder, ctc_beam_search_decoder
from models.network import deep_speech_v2_network
from model_utils.lm_scorer import LmScorer
from model_utils.decoder import ctc_greedy_decoder, ctc_beam_search_decoder
from model_utils.decoder import ctc_beam_search_decoder_batch
from model_utils.network import deep_speech_v2_network
class DeepSpeech2Model(object):
......
......@@ -4,7 +4,7 @@ from __future__ import division
from __future__ import print_function
import unittest
from models import decoder
from model_utils import decoder
class TestDecoders(unittest.TestCase):
......
......@@ -14,6 +14,3 @@ if [ $MD5 != $md5_tmp ]; then
echo "Fail to download the language model!"
exit 1
fi
......@@ -7,7 +7,7 @@ import argparse
import functools
import paddle.v2 as paddle
from data_utils.data import DataGenerator
from models.model import DeepSpeech2Model
from model_utils.model import DeepSpeech2Model
from utils.error_rate import wer, cer
from utils.utility import add_arguments, print_arguments
......@@ -36,14 +36,14 @@ add_arg('mean_std_path', str,
'data/librispeech/mean_std.npz',
"Filepath of normalizer's mean & std.")
add_arg('vocab_path', str,
'data/librispeech/eng_vocab.txt',
'data/librispeech/vocab.txt',
"Filepath of vocabulary.")
add_arg('model_path', str,
'./checkpoints/params.latest.tar.gz',
"If None, the training starts from scratch, "
"otherwise, it resumes from the pre-trained model.")
add_arg('lang_model_path', str,
'lm/data/common_crawl_00.prune01111.trie.klm',
'model_zoo/lm/common_crawl_00.prune01111.trie.klm',
"Filepath for language model.")
add_arg('decoding_method', str,
'ctc_beam_search',
......
......@@ -21,10 +21,8 @@ add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('count_threshold', int, 0, "Truncation threshold for char counts.")
add_arg('vocab_path', str,
None,
"Filepath to write the vocabulary.",
nargs='+',
required=True)
'data/librispeech/vocab.txt',
"Filepath to write the vocabulary.")
add_arg('manifest_paths', str,
None,
"Filepaths of manifests for building vocabulary. "
......
......@@ -9,7 +9,7 @@ import functools
import paddle.v2 as paddle
import _init_paths
from data_utils.data import DataGenerator
from models.model import DeepSpeech2Model
from model_utils.model import DeepSpeech2Model
from utils.error_rate import wer
from utils.utility import add_arguments, print_arguments
......@@ -41,10 +41,10 @@ add_arg('mean_std_path', str,
'data/librispeech/mean_std.npz',
"Filepath of normalizer's mean & std.")
add_arg('vocab_path', str,
'data/librispeech/eng_vocab.txt',
'data/librispeech/vocab.txt',
"Filepath of vocabulary.")
add_arg('lang_model_path', str,
'lm/data/common_crawl_00.prune01111.trie.klm',
'model_zoo/lm/common_crawl_00.prune01111.trie.klm',
"Filepath for language model.")
add_arg('model_path', str,
'./checkpoints/params.latest.tar.gz',
......
......@@ -6,7 +6,7 @@ from __future__ import print_function
import argparse
import functools
import paddle.v2 as paddle
from models.model import DeepSpeech2Model
from model_utils.model import DeepSpeech2Model
from data_utils.data import DataGenerator
from utils.utility import add_arguments, print_arguments
......@@ -41,7 +41,7 @@ add_arg('mean_std_path', str,
'data/librispeech/mean_std.npz',
"Filepath of normalizer's mean & std.")
add_arg('vocab_path', str,
'data/librispeech/eng_vocab.txt',
'data/librispeech/vocab.txt',
"Filepath of vocabulary.")
add_arg('init_model_path', str,
None,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册