From f7ba027331ede48eb52545ce4482e239204ca851 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 11 Jul 2017 13:32:35 +0800 Subject: [PATCH] upload the language model --- deep_speech_2/README.md | 38 ++++++++++++++++++++++++++++++++++++-- deep_speech_2/evaluate.py | 3 ++- deep_speech_2/infer.py | 2 +- deep_speech_2/lm/run.sh | 20 ++++++++++++++++++-- deep_speech_2/tune.py | 2 +- 5 files changed, 58 insertions(+), 7 deletions(-) diff --git a/deep_speech_2/README.md b/deep_speech_2/README.md index 41acf102..48f4b0db 100644 --- a/deep_speech_2/README.md +++ b/deep_speech_2/README.md @@ -66,12 +66,36 @@ More help for arguments: python train.py --help ``` -### Inferencing +### Preparing language model + +The following steps, inference, parameters tuning and evaluating, will require a language model during decoding. +A compressed language model is provided and can be accessed by + +``` +cd ./lm +sh run.sh +``` + +After the downloading is completed, then + +``` +cd .. +``` + +### Inference + +For GPU inference ``` CUDA_VISIBLE_DEVICES=0 python infer.py ``` +For CPU inference + +``` +python infer.py --use_gpu=False +``` + More help for arguments: ``` @@ -92,14 +116,24 @@ python evaluate.py --help ### Parameters tuning -Parameters tuning for the CTC beam search decoder +Usually, the parameters $\alpha$ and $\beta$ for the CTC [prefix beam search](https://arxiv.org/abs/1408.2873) decoder need to be tuned after retraining the acoustic model. + +For GPU tuning ``` CUDA_VISIBLE_DEVICES=0 python tune.py ``` +For CPU tuning + +``` +python tune.py --use_gpu=False +``` + More help for arguments: ``` python tune.py --help ``` + +Then reset parameters with the tuning result before inference or evaluating. diff --git a/deep_speech_2/evaluate.py b/deep_speech_2/evaluate.py index a4f2a690..00516dcb 100644 --- a/deep_speech_2/evaluate.py +++ b/deep_speech_2/evaluate.py @@ -62,7 +62,7 @@ parser.add_argument( ) parser.add_argument( "--language_model_path", - default="lm/data/1Billion.klm", + default="lm/data/common_crawl_00.prune01111.trie.klm", type=str, help="Path for language model. (default: %(default)s)") parser.add_argument( @@ -139,6 +139,7 @@ def evaluate(): batch_reader = data_generator.batch_reader_creator( manifest_path=args.decode_manifest_path, batch_size=args.batch_size, + min_batch_size=1, sortagrad=False, shuffle_method=None) diff --git a/deep_speech_2/infer.py b/deep_speech_2/infer.py index dc143080..bb81feac 100644 --- a/deep_speech_2/infer.py +++ b/deep_speech_2/infer.py @@ -89,7 +89,7 @@ parser.add_argument( help="Number of output per sample in beam search. (default: %(default)d)") parser.add_argument( "--language_model_path", - default="lm/data/1Billion.klm", + default="lm/data/common_crawl_00.prune01111.trie.klm", type=str, help="Path for language model. (default: %(default)s)") parser.add_argument( diff --git a/deep_speech_2/lm/run.sh b/deep_speech_2/lm/run.sh index bf523740..2108ea55 100644 --- a/deep_speech_2/lm/run.sh +++ b/deep_speech_2/lm/run.sh @@ -1,3 +1,19 @@ -echo "Downloading language model." +echo "Downloading language model ..." + +mkdir data + +LM=common_crawl_00.prune01111.trie.klm +MD5="099a601759d467cd0a8523ff939819c5" + +wget -c http://paddlepaddle.bj.bcebos.com/model_zoo/speech/$LM -P ./data + +echo "Checking md5sum ..." +md5_tmp=`md5sum ./data/$LM | awk -F[' '] '{print $1}'` + +if [ $MD5 != $md5_tmp ]; then + echo "Fail to download the language model!" + exit 1 +fi + + -wget -c ftp://xxx/xxx/en.00.UNKNOWN.klm -P ./data diff --git a/deep_speech_2/tune.py b/deep_speech_2/tune.py index 4e9e268f..19a2d559 100644 --- a/deep_speech_2/tune.py +++ b/deep_speech_2/tune.py @@ -77,7 +77,7 @@ parser.add_argument( help="Width for beam search decoding. (default: %(default)d)") parser.add_argument( "--language_model_path", - default="lm/data/1Billion.klm", + default="lm/data/common_crawl_00.prune01111.trie.klm", type=str, help="Path for language model. (default: %(default)s)") parser.add_argument( -- GitLab