diff --git a/.clang_format.hook b/.clang_format.hook
deleted file mode 100755
index 4cbc972bbd200d0dcb6d8ba404bb1286ee81736c..0000000000000000000000000000000000000000
--- a/.clang_format.hook
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env bash
-set -e
-
-readonly VERSION="3.9"
-
-version=$(clang-format -version)
-
-if ! [[ $version == *"$VERSION"* ]]; then
- echo "clang-format version check failed."
- echo "a version contains '$VERSION' is needed, but get '$version'"
- echo "you can install the right version, and make an soft-link to '\$PATH' env"
- exit -1
-fi
-
-clang-format $@
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ede1c53a4aefc9cc144ea20f57193bc5dbb886ec..7aad026c56d3b9265aa8280e49a51c2f5dcb6fb8 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -30,6 +30,12 @@
- id: clang-format
name: clang-format
description: Format files with ClangFormat
- entry: bash .clang_format.hook -i
+ entry: bash .pre-commit-hooks/clang-format.hook -i
language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$
+ - id: copyright_checker
+ name: copyright_checker
+ entry: python .pre-commit-hooks/copyright-check.hook
+ language: system
+ files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
+ exclude: (?=decoders/swig).*(\.cpp|\.h)$
\ No newline at end of file
diff --git a/.pre-commit-hooks/clang-format.hook b/.pre-commit-hooks/clang-format.hook
new file mode 100755
index 0000000000000000000000000000000000000000..ceb4a7ea21f122e76349a1d192cfe27538f320c7
--- /dev/null
+++ b/.pre-commit-hooks/clang-format.hook
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+set -e
+
+readonly VERSION="3.9"
+
+version=$(clang-format -version)
+
+# if ! [[ $version == *"$VERSION"* ]]; then
+# echo "clang-format version check failed."
+# echo "a version contains '$VERSION' is needed, but get '$version'"
+# echo "you can install the right version, and make an soft-link to '\$PATH' env"
+# exit -1
+# fi
+
+clang-format $@
diff --git a/.pre-commit-hooks/copyright-check.hook b/.pre-commit-hooks/copyright-check.hook
new file mode 100644
index 0000000000000000000000000000000000000000..26044c29e4fdc827abb4ba2d415db66c780fd366
--- /dev/null
+++ b/.pre-commit-hooks/copyright-check.hook
@@ -0,0 +1,131 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import io, re
+import sys, os
+import subprocess
+import platform
+
+COPYRIGHT = '''
+Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+LANG_COMMENT_MARK = None
+
+NEW_LINE_MARK = None
+
+COPYRIGHT_HEADER = None
+
+if platform.system() == "Windows":
+ NEW_LINE_MARK = "\r\n"
+else:
+ NEW_LINE_MARK = '\n'
+ COPYRIGHT_HEADER = COPYRIGHT.split(NEW_LINE_MARK)[1]
+ p = re.search('(\d{4})', COPYRIGHT_HEADER).group(0)
+ process = subprocess.Popen(["date", "+%Y"], stdout=subprocess.PIPE)
+ date, err = process.communicate()
+ date = date.decode("utf-8").rstrip("\n")
+ COPYRIGHT_HEADER = COPYRIGHT_HEADER.replace(p, date)
+
+
+def generate_copyright(template, lang='C'):
+ if lang == 'Python':
+ LANG_COMMENT_MARK = '#'
+ else:
+ LANG_COMMENT_MARK = "//"
+
+ lines = template.split(NEW_LINE_MARK)
+ BLANK = " "
+ ans = LANG_COMMENT_MARK + BLANK + COPYRIGHT_HEADER + NEW_LINE_MARK
+ for lino, line in enumerate(lines):
+ if lino == 0 or lino == 1 or lino == len(lines) - 1: continue
+ if len(line) == 0:
+ BLANK = ""
+ else:
+ BLANK = " "
+ ans += LANG_COMMENT_MARK + BLANK + line + NEW_LINE_MARK
+
+ return ans + "\n"
+
+
+def lang_type(filename):
+ if filename.endswith(".py"):
+ return "Python"
+ elif filename.endswith(".h"):
+ return "C"
+ elif filename.endswith(".c"):
+ return "C"
+ elif filename.endswith(".hpp"):
+ return "C"
+ elif filename.endswith(".cc"):
+ return "C"
+ elif filename.endswith(".cpp"):
+ return "C"
+ elif filename.endswith(".cu"):
+ return "C"
+ elif filename.endswith(".cuh"):
+ return "C"
+ elif filename.endswith(".go"):
+ return "C"
+ elif filename.endswith(".proto"):
+ return "C"
+ else:
+ print("Unsupported filetype %s", filename)
+ exit(0)
+
+
+PYTHON_ENCODE = re.compile("^[ \t\v]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)")
+
+
+def main(argv=None):
+ parser = argparse.ArgumentParser(
+ description='Checker for copyright declaration.')
+ parser.add_argument('filenames', nargs='*', help='Filenames to check')
+ args = parser.parse_args(argv)
+
+ retv = 0
+ for filename in args.filenames:
+ fd = io.open(filename, encoding="utf-8")
+ first_line = fd.readline()
+ second_line = fd.readline()
+ if "COPYRIGHT (C)" in first_line.upper(): continue
+ if first_line.startswith("#!") or PYTHON_ENCODE.match(
+ second_line) != None or PYTHON_ENCODE.match(first_line) != None:
+ continue
+ original_contents = io.open(filename, encoding="utf-8").read()
+ new_contents = generate_copyright(
+ COPYRIGHT, lang_type(filename)) + original_contents
+ print('Auto Insert Copyright Header {}'.format(filename))
+ retv = 1
+ with io.open(filename, 'w') as output_file:
+ output_file.write(new_contents)
+
+ return retv
+
+
+if __name__ == '__main__':
+ exit(main())
\ No newline at end of file
diff --git a/README.md b/README.md
index 521351c37c0666f9c59d4a44263dfac57c3f06cf..7d99ef99b7d5a2a6e105cdaf78e5efdda7412bbc 100644
--- a/README.md
+++ b/README.md
@@ -6,21 +6,17 @@
## Table of Contents
- [Installation](#installation)
-- [Running in Docker Container](#running-in-docker-container)
- [Getting Started](#getting-started)
- [Data Preparation](#data-preparation)
- [Training a Model](#training-a-model)
-- [Data Augmentation Pipeline](#data-augmentation-pipeline)
- [Inference and Evaluation](#inference-and-evaluation)
- [Hyper-parameters Tuning](#hyper-parameters-tuning)
-- [Training for Mandarin Language](#training-for-mandarin-language)
- [Trying Live Demo with Your Own Voice](#trying-live-demo-with-your-own-voice)
-- [Released Models](#released-models)
- [Experiments and Benchmarks](#experiments-and-benchmarks)
+- [Released Models](#released-models)
- [Questions and Help](#questions-and-help)
-
## Installation
To avoid the trouble of environment setup, [running in Docker container](#running-in-docker-container) is highly recommended. Otherwise follow the guidelines below to install the dependencies manually.
@@ -105,41 +101,16 @@ Let's take a tiny sampled subset of [LibriSpeech dataset](http://www.openslr.org
```
Notice that this is only a toy example with a tiny sampled subset of LibriSpeech. If you would like to try with the complete dataset (would take several days for training), please go to `examples/librispeech` instead.
-- Prepare the data
-
- ```bash
- sh run_data.sh
- ```
-
- `run_data.sh` will download dataset, generate manifests, collect normalizer's statistics and build vocabulary. Once the data preparation is done, you will find the data (only part of LibriSpeech) downloaded in `./dataset/librispeech` and the corresponding manifest files generated in `./data/tiny` as well as a mean stddev file and a vocabulary file. It has to be run for the very first time you run this dataset and is reusable for all further experiments.
-- Train your own ASR model
-
- ```bash
- sh run_train.sh
- ```
-
- `run_train.sh` will start a training job, with training logs printed to stdout and model checkpoint of every pass/epoch saved to `./checkpoints/tiny`. These checkpoints could be used for training resuming, inference, evaluation and deployment.
-- Case inference with an existing model
-
- ```bash
- sh run_infer.sh
- ```
-
- `run_infer.sh` will show us some speech-to-text decoding results for several (default: 10) samples with the trained model. The performance might not be good now as the current model is only trained with a toy subset of LibriSpeech. To see the results with a better model, you can download a well-trained (trained for several days, with the complete LibriSpeech) model and do the inference:
-
+- Source env
+
```bash
- sh run_infer_golden.sh
+ source path.sh
```
-- Evaluate an existing model
+ Set `MAIN_ROOT` as project dir.
+- Main entrypoint
```bash
- sh run_test.sh
- ```
-
- `run_test.sh` will evaluate the model with Word Error Rate (or Character Error Rate) measurement. Similarly, you can also download a well-trained model and test its performance:
-
- ```bash
- sh run_test_golden.sh
+ bash run.sh
```
More detailed information are provided in the following sections. Wish you a happy journey with the *DeepSpeech2 on PaddlePaddle* ASR engine!
@@ -158,7 +129,7 @@ More detailed information are provided in the following sections. Wish you a hap
To use your custom data, you only need to generate such manifest files to summarize the dataset. Given such summarized manifests, training, inference and all other modules can be aware of where to access the audio files, as well as their meta data including the transcription labels.
-For how to generate such manifest files, please refer to `data/librispeech/librispeech.py`, which will download data and generate manifest files for LibriSpeech dataset.
+For how to generate such manifest files, please refer to `examples/librispeech/local/librispeech.py`, which will download data and generate manifest files for LibriSpeech dataset.
### Compute Mean & Stddev for Normalizer
@@ -168,11 +139,11 @@ To perform z-score normalization (zero-mean, unit stddev) upon audio features, w
python3 tools/compute_mean_std.py \
--num_samples 2000 \
--specgram_type linear \
---manifest_path data/librispeech/manifest.train \
---output_path data/librispeech/mean_std.npz
+--manifest_path examples/librispeech/data/manifest.train \
+--output_path examples/librispeech/data/mean_std.npz
```
-It will compute the mean and standard deviatio of power spectrum feature with 2000 random sampled audio clips listed in `data/librispeech/manifest.train` and save the results to `data/librispeech/mean_std.npz` for further usage.
+It will compute the mean and standard deviatio of power spectrum feature with 2000 random sampled audio clips listed in `examples/librispeech/data/manifest.train` and save the results to `examples/librispeech/data/mean_std.npz` for further usage.
### Build Vocabulary
@@ -182,18 +153,18 @@ A vocabulary of possible characters is required to convert the transcription int
```bash
python3 tools/build_vocab.py \
--count_threshold 0 \
---vocab_path data/librispeech/eng_vocab.txt \
---manifest_paths data/librispeech/manifest.train
+--vocab_path examples/librispeech/data/eng_vocab.txt \
+--manifest_paths examples/librispeech/data/manifest.train
```
-It will write a vocabuary file `data/librispeeech/eng_vocab.txt` with all transcription text in `data/librispeech/manifest.train`, without vocabulary truncation (`--count_threshold 0`).
+It will write a vocabuary file `examples/librispeech/data/eng_vocab.txt` with all transcription text in `examples/librispeech/data/manifest.train`, without vocabulary truncation (`--count_threshold 0`).
### More Help
For more help on arguments:
```bash
-python3 data/librispeech/librispeech.py --help
+python3 examples/librispeech/local/librispeech.py --help
python3 tools/compute_mean_std.py --help
python3 tools/build_vocab.py --help
```
@@ -226,10 +197,10 @@ For more help on arguments:
```bash
python3 train.py --help
```
-or refer to `example/librispeech/run_train.sh`.
+or refer to `example/librispeech/local/run_train.sh`.
-## Data Augmentation Pipeline
+### Data Augmentation Pipeline
Data augmentation has often been a highly effective technique to boost the deep learning performance. We augment our speech data by synthesizing new audios with small random perturbation (label-invariant transformation) added upon raw audios. You don't have to do the syntheses on your own, as it is already embedded into the data provider and is done on the fly, randomly for each epoch during training.
@@ -265,6 +236,12 @@ For other configuration examples, please refer to `conf/augmenatation.config.exa
Be careful when utilizing the data augmentation technique, as improper augmentation will do harm to the training, due to the enlarged train-test gap.
+
+### Training for Mandarin Language
+
+The key steps of training for Mandarin language are same to that of English language and we have also provided an example for Mandarin training with Aishell in ```examples/aishell/local```. As mentioned above, please execute ```sh run_data.sh```, ```sh run_train.sh```, ```sh run_test.sh``` and ```sh run_infer.sh``` to do data preparation, training, testing and inference correspondingly. We have also prepared a pre-trained model (downloaded by ./models/aishell/download_model.sh) for users to try with ```sh run_infer_golden.sh``` and ```sh run_test_golden.sh```. Notice that, different from English LM, the Mandarin LM is character-based and please run ```tools/tune.py``` to find an optimal setting.
+
+
## Inference and Evaluation
### Prepare Language Model
@@ -322,7 +299,7 @@ For more help on arguments:
```
python3 infer.py --help
```
-or refer to `example/librispeech/run_infer.sh`.
+or refer to `example/librispeech/local/run_infer.sh`.
### Evaluate a Model
@@ -347,7 +324,7 @@ For more help on arguments:
```bash
python3 test.py --help
```
-or refer to `example/librispeech/run_test.sh`.
+or refer to `example/librispeech/local/run_test.sh`.
## Hyper-parameters Tuning
@@ -387,11 +364,8 @@ After tuning, you can reset $\alpha$ and $\beta$ in the inference and evaluation
```bash
python3 tune.py --help
```
-or refer to `example/librispeech/run_tune.sh`.
+or refer to `example/librispeech/local/run_tune.sh`.
-## Training for Mandarin Language
-
-The key steps of training for Mandarin language are same to that of English language and we have also provided an example for Mandarin training with Aishell in ```examples/aishell```. As mentioned above, please execute ```sh run_data.sh```, ```sh run_train.sh```, ```sh run_test.sh``` and ```sh run_infer.sh``` to do data preparation, training, testing and inference correspondingly. We have also prepared a pre-trained model (downloaded by ./models/aishell/download_model.sh) for users to try with ```sh run_infer_golden.sh``` and ```sh run_test_golden.sh```. Notice that, different from English LM, the Mandarin LM is character-based and please run ```tools/tune.py``` to find an optimal setting.
## Trying Live Demo with Your Own Voice
@@ -429,7 +403,7 @@ Now, in the client console, press the `whitespace` key, hold, and start speaking
Notice that `deploy/demo_client.py` must be run on a machine with a microphone device, while `deploy/demo_server.py` could be run on one without any audio recording hardware, e.g. any remote server machine. Just be careful to set the `host_ip` and `host_port` argument with the actual accessible IP address and port, if the server and client are running with two separate machines. Nothing should be done if they are running on one single machine.
-Please also refer to `examples/deploy_demo/run_english_demo_server.sh`, which will first download a pre-trained English model (trained with 3000 hours of internal speech data) and then start the demo server with the model. With running `examples/mandarin/run_demo_client.sh`, you can speak English to test it. If you would like to try some other models, just update `--model_path` argument in the script.
+Please also refer to `examples/deploy_demo/run_english_demo_server.sh`, which will first download a pre-trained English model (trained with 3000 hours of internal speech data) and then start the demo server with the model. With running `examples/deploy_demo/run_demo_client.sh`, you can speak English to test it. If you would like to try some other models, just update `--model_path` argument in the script.
For more help on arguments:
@@ -438,24 +412,6 @@ python3 deploy/demo_server.py --help
python3 deploy/demo_client.py --help
```
-## Released Models
-
-#### Speech Model Released
-
-Language | Model Name | Training Data | Hours of Speech
-:-----------: | :------------: | :----------: | -------:
-English | [LibriSpeech Model](https://deepspeech.bj.bcebos.com/eng_models/librispeech_model_fluid.tar.gz) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h
-English | [BaiduEN8k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_en8k_model_fluid.tar.gz) | Baidu Internal English Dataset | 8628 h
-Mandarin | [Aishell Model](https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model_fluid.tar.gz) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h
-Mandarin | [BaiduCN1.2k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_cn1.2k_model_fluid.tar.gz) | Baidu Internal Mandarin Dataset | 1204 h
-
-#### Language Model Released
-
-Language Model | Training Data | Token-based | Size | Descriptions
-:-------------:| :------------:| :-----: | -----: | :-----------------
-[English LM](https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm) | [CommonCrawl(en.00)](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | Pruned with 0 1 1 1 1;
About 1.85 billion n-grams;
'trie' binary with '-a 22 -q 8 -b 8'
-[Mandarin LM Small](https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm) | Baidu Internal Corpus | Char-based | 2.8 GB | Pruned with 0 1 2 4 4;
About 0.13 billion n-grams;
'probing' binary with default settings
-[Mandarin LM Large](https://deepspeech.bj.bcebos.com/zh_lm/zhidao_giga.klm) | Baidu Internal Corpus | Char-based | 70.4 GB | No Pruning;
About 3.7 billion n-grams;
'probing' binary with default settings
## Experiments and Benchmarks
@@ -494,6 +450,27 @@ We compare the training time with 1, 2, 4, 8 Tesla V100 GPUs (with a subset of L
`tools/profile.sh` provides such a profiling tool.
+
+## Released Models
+
+#### Speech Model Released
+
+Language | Model Name | Training Data | Hours of Speech
+:-----------: | :------------: | :----------: | -------:
+English | [LibriSpeech Model](https://deepspeech.bj.bcebos.com/eng_models/librispeech_model_fluid.tar.gz) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h
+English | [BaiduEN8k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_en8k_model_fluid.tar.gz) | Baidu Internal English Dataset | 8628 h
+Mandarin | [Aishell Model](https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model_fluid.tar.gz) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h
+Mandarin | [BaiduCN1.2k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_cn1.2k_model_fluid.tar.gz) | Baidu Internal Mandarin Dataset | 1204 h
+
+#### Language Model Released
+
+Language Model | Training Data | Token-based | Size | Descriptions
+:-------------:| :------------:| :-----: | -----: | :-----------------
+[English LM](https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm) | [CommonCrawl(en.00)](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | Pruned with 0 1 1 1 1;
About 1.85 billion n-grams;
'trie' binary with '-a 22 -q 8 -b 8'
+[Mandarin LM Small](https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm) | Baidu Internal Corpus | Char-based | 2.8 GB | Pruned with 0 1 2 4 4;
About 0.13 billion n-grams;
'probing' binary with default settings
+[Mandarin LM Large](https://deepspeech.bj.bcebos.com/zh_lm/zhidao_giga.klm) | Baidu Internal Corpus | Char-based | 70.4 GB | No Pruning;
About 3.7 billion n-grams;
'probing' binary with default settings
+
+
## Questions and Help
You are welcome to submit questions and bug reports in [Github Issues](https://github.com/PaddlePaddle/DeepSpeech/issues). You are also welcome to contribute to this project.
diff --git a/README_cn.md b/README_cn.md
index 176f0cd770e0ae8aac202d020ffca09f1e20a694..94825cb96dcad71ece8d9e78f7a2308f7a5a8c51 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -7,17 +7,14 @@
## 目录
- [安装](#安装)
-- [在 Docker 容器上运行](#在Docker容器上运行)
- [开始](#开始)
- [数据准备](#数据准备)
- [训练模型](#训练模型)
-- [数据增强流水线](#数据增强流水线)
- [推断和评价](#推断和评价)
- [超参数调整](#超参数调整)
-- [训练汉语语言](#训练汉语语言)
- [用自己的声音尝试现场演示](#用自己的声音尝试现场演示)
-- [发布模型](#发布模型)
- [试验和基准](#试验和基准)
+- [发布模型](#发布模型)
- [问题和帮助](#问题和帮助)
## 安装
@@ -102,42 +99,16 @@ python3 -m pip install paddlepaddle-gpu==1.8.0.post107
cd examples/tiny
```
- 注意这仅仅是 LibriSpeech 一个小数据集的例子。如果你想尝试完整的数据集(可能需要花好几天来训练模型),请使用这个路径`examples/librispeech`。
-- 准备数据
-
- ```bash
- sh run_data.sh
- ```
-
- 运行`run_data.sh`脚本将会下载数据集,产出 manifests 文件,收集一些归一化需要的统计信息并建立词表。当数据准备完成之后,下载完的数据(仅有 LibriSpeech 一部分)在`dataset/librispeech`中;其对应的 manifest 文件,均值标准差和词表文件在`./data/tiny`中。在第一次执行的时候一定要执行这个脚本,在接下来所有的实验中我们都会用到这个数据集。
-- 训练你自己的 ASR 模型
-
- ```bash
- sh run_train.sh
- ```
-
- `run_train.sh`将会启动训练任务,训练日志会打印到终端,并且模型每个 epoch 的 checkpoint 都会保存到`./checkpoints/tiny`目录中。这些 checkpoint 可以用来恢复训练,推断,评价和部署。
-- 用已有的模型进行案例推断
-
- ```bash
- sh run_infer.sh
- ```
-
- `run_infer.sh`将会利用训练好的模型展现一些(默认 10 个)样本语音到文本的解码结果。由于当前模型只使用了 LibriSpeech 一部分数据集训练,因此性能可能不会太好。为了看到更好模型上的表现,你可以下载一个已训练好的模型(用完整的 LibriSpeech 训练了好几天)来做推断。
-
- ```bash
- sh run_infer_golden.sh
- ```
-- 评价一个已经存在的模型
+ 注意这仅仅是 LibriSpeech 一个小数据集的例子。如果你想尝试完整的数据集(可能需要花好几天来训练模型),请使用这个路径`examples/librispeech`。
+- 设置环境变量
```bash
- sh run_test.sh
+ source path.sh
```
-
- `run_test.sh`能够利用误字率(或字符错误率)来评价模型。类似的,你可以下载一个完全训练好的模型来测试它的性能:
+- 入口脚本
```bash
- sh run_test_golden.sh
+ bash run.sh
```
更多细节会在接下来的章节中阐述。祝你在*DeepSpeech2*ASR引擎学习中过得愉快!
@@ -156,7 +127,7 @@ python3 -m pip install paddlepaddle-gpu==1.8.0.post107
如果你要使用自定义数据,你只需要按照以上格式生成自己的 manifest 文件即可。给定 manifest 文件,训练、推断以及其它所有模块都能够访问到音频数据以及对应的时长和标签数据。
-关于如何生成 manifest 文件,请参考`data/librispeech/librispeech.py`。该脚本将会下载 LibriSpeech 数据集并生成 manifest 文件。
+关于如何生成 manifest 文件,请参考`examples/librispeech/local/librispeech.py`。该脚本将会下载 LibriSpeech 数据集并生成 manifest 文件。
### 计算均值和标准差用于归一化
@@ -166,11 +137,11 @@ python3 -m pip install paddlepaddle-gpu==1.8.0.post107
python3 tools/compute_mean_std.py \
--num_samples 2000 \
--specgram_type linear \
---manifest_path data/librispeech/manifest.train \
---output_path data/librispeech/mean_std.npz
+--manifest_path examples/librispeech/data/manifest.train \
+--output_path examples/librispeech/data/mean_std.npz
```
-以上这段代码会计算在`data/librispeech/manifest.train`路径中,2000 个随机采样的语音频谱特征的均值和标准差,并将结果保存在`data/librispeech/mean_std.npz`中,方便以后使用。
+以上这段代码会计算在`examples/librispeech/data/manifest.train`路径中,2000 个随机采样的语音频谱特征的均值和标准差,并将结果保存在`examples/librispeech/data/mean_std.npz`中,方便以后使用。
### 建立词表
@@ -179,22 +150,24 @@ python3 tools/compute_mean_std.py \
```bash
python3 tools/build_vocab.py \
--count_threshold 0 \
---vocab_path data/librispeech/eng_vocab.txt \
---manifest_paths data/librispeech/manifest.train
+--vocab_path examples/librispeech/data/eng_vocab.txt \
+--manifest_paths examples/librispeech/data/manifest.train
```
-它将`data/librispeech/manifest.train`目录中的所有录音文本写入词表文件`data/librispeeech/eng_vocab.txt`,并且没有词汇截断(`--count_threshold 0`)。
+它将`examples/librispeech/data/manifest.train`目录中的所有录音文本写入词表文件`examples/librispeeech/data/eng_vocab.txt`,并且没有词汇截断(`--count_threshold 0`)。
### 更多帮助
获得更多帮助:
```bash
-python3 data/librispeech/librispeech.py --help
+python3 examples/librispeech/local/librispeech.py --help
python3 tools/compute_mean_std.py --help
python3 tools/build_vocab.py --help
```
+
+
## 训练模型
`train.py`是训练模块的主要调用者。使用示例如下。
@@ -224,10 +197,10 @@ python3 tools/build_vocab.py --help
```bash
python3 train.py --help
```
-或参考 `example/librispeech/run_train.sh`.
+或参考 `example/librispeech/local/run_train.sh`.
-## 数据增强流水线
+### 数据增强流水线
数据增强是用来提升深度学习性能的非常有效的技术。我们通过在原始音频中添加小的随机扰动(标签不变转换)获得新音频来增强我们的语音数据。你不必自己合成,因为数据增强已经嵌入到数据生成器中并且能够即时完成,在训练模型的每个epoch中随机合成音频。
@@ -263,6 +236,12 @@ python3 train.py --help
使用数据增强技术时要小心,由于扩大了训练和测试集的差异,不恰当的增强会对训练模型不利,导致训练和预测的差距增大。
+### 训练普通话语言
+
+普通话语言训练与英语训练的关键步骤相同,我们提供了一个使用 Aishell 进行普通话训练的例子```examples/aishell```。如上所述,请执行```sh run_data.sh```, ```sh run_train.sh```, ```sh run_test.sh```和```sh run_infer.sh```做相应的数据准备,训练,测试和推断。我们还准备了一个预训练过的模型(执行./models/aishell/download_model.sh下载)供用户使用```run_infer_golden.sh```和```run_test_golden.sh```来。请注意,与英语语言模型不同,普通话语言模型是基于汉字的,请运行```tools/tune.py```来查找最佳设置。
+
+
+
## 推断和评价
### 准备语言模型
@@ -321,7 +300,7 @@ bash download_lm_ch.sh
```
python3 infer.py --help
```
-或参考`example/librispeech/run_infer.sh`.
+或参考`example/librispeech/local/run_infer.sh`.
### 评估模型
@@ -346,7 +325,9 @@ python3 infer.py --help
```bash
python3 test.py --help
```
-或参考`example/librispeech/run_test.sh`.
+或参考`example/librispeech/local/run_test.sh`.
+
+
## 超参数调整
@@ -386,11 +367,8 @@ python3 test.py --help
```bash
python3 tune.py --help
```
-或参考`example/librispeech/run_tune.sh`.
-
-## 训练普通话语言
+或参考`example/librispeech/local/run_tune.sh`.
-普通话语言训练与英语训练的关键步骤相同,我们提供了一个使用 Aishell 进行普通话训练的例子```examples/aishell```。如上所述,请执行```sh run_data.sh```, ```sh run_train.sh```, ```sh run_test.sh```和```sh run_infer.sh```做相应的数据准备,训练,测试和推断。我们还准备了一个预训练过的模型(执行./models/aishell/download_model.sh下载)供用户使用```run_infer_golden.sh```和```run_test_golden.sh```来。请注意,与英语语言模型不同,普通话语言模型是基于汉字的,请运行```tools/tune.py```来查找最佳设置。
## 用自己的声音尝试现场演示
@@ -428,7 +406,7 @@ python3 -u deploy/demo_client.py \
请注意,`deploy/demo_client.py`必须在带麦克风设备的机器上运行,而`deploy/demo_server.py`可以在没有任何录音硬件的情况下运行,例如任何远程服务器机器。如果服务器和客户端使用两台独立的机器运行,只需要注意将`host_ip`和`host_port`参数设置为实际可访问的IP地址和端口。如果它们在单台机器上运行,则不用作任何处理。
-请参考`examples/deploy_demo/run_english_demo_server.sh`,它将首先下载一个预先训练过的英语模型(用3000小时的内部语音数据训练),然后用模型启动演示服务器。通过运行`examples/mandarin/run_demo_client.sh`,你可以说英语来测试它。如果您想尝试其他模型,只需更新脚本中的`--model_path`参数即可。
+请参考`examples/deploy_demo/run_english_demo_server.sh`,它将首先下载一个预先训练过的英语模型(用3000小时的内部语音数据训练),然后用模型启动演示服务器。通过运行`examples/deploy_demo/run_demo_client.sh`,你可以说英语来测试它。如果您想尝试其他模型,只需更新脚本中的`--model_path`参数即可。
获得更多帮助:
@@ -437,24 +415,6 @@ python3 deploy/demo_server.py --help
python3 deploy/demo_client.py --help
```
-## 发布模型
-
-#### 语音模型发布
-
-语种 | 模型名 | 训练数据 | 语音时长
-:-----------: | :------------: | :----------: | -------:
-English | [LibriSpeech Model](https://deepspeech.bj.bcebos.com/eng_models/librispeech_model_fluid.tar.gz) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h
-English | [BaiduEN8k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_en8k_model_fluid.tar.gz) | Baidu Internal English Dataset | 8628 h
-Mandarin | [Aishell Model](https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model_fluid.tar.gz) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h
-Mandarin | [BaiduCN1.2k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_cn1.2k_model_fluid.tar.gz) | Baidu Internal Mandarin Dataset | 1204 h
-
-#### 语言模型发布
-
-语言模型 | 训练数据 | 基于的字符 | 大小 | 描述
-:-------------:| :------------:| :-----: | -----: | :-----------------
-[English LM](https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm) | [CommonCrawl(en.00)](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | Pruned with 0 1 1 1 1;
About 1.85 billion n-grams;
'trie' binary with '-a 22 -q 8 -b 8'
-[Mandarin LM Small](https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm) | Baidu Internal Corpus | Char-based | 2.8 GB | Pruned with 0 1 2 4 4;
About 0.13 billion n-grams;
'probing' binary with default settings
-[Mandarin LM Large](https://deepspeech.bj.bcebos.com/zh_lm/zhidao_giga.klm) | Baidu Internal Corpus | Char-based | 70.4 GB | No Pruning;
About 3.7 billion n-grams;
'probing' binary with default settings
## 实验和baseline
@@ -494,6 +454,27 @@ Baidu Internal Testset | 12.64
`tools/profile.sh`提供了上述分析工具.
+
+## 发布模型
+
+#### 语音模型发布
+
+语种 | 模型名 | 训练数据 | 语音时长
+:-----------: | :------------: | :----------: | -------:
+English | [LibriSpeech Model](https://deepspeech.bj.bcebos.com/eng_models/librispeech_model_fluid.tar.gz) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h
+English | [BaiduEN8k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_en8k_model_fluid.tar.gz) | Baidu Internal English Dataset | 8628 h
+Mandarin | [Aishell Model](https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model_fluid.tar.gz) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h
+Mandarin | [BaiduCN1.2k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_cn1.2k_model_fluid.tar.gz) | Baidu Internal Mandarin Dataset | 1204 h
+
+#### 语言模型发布
+
+语言模型 | 训练数据 | 基于的字符 | 大小 | 描述
+:-------------:| :------------:| :-----: | -----: | :-----------------
+[English LM](https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm) | [CommonCrawl(en.00)](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | Pruned with 0 1 1 1 1;
About 1.85 billion n-grams;
'trie' binary with '-a 22 -q 8 -b 8'
+[Mandarin LM Small](https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm) | Baidu Internal Corpus | Char-based | 2.8 GB | Pruned with 0 1 2 4 4;
About 0.13 billion n-grams;
'probing' binary with default settings
+[Mandarin LM Large](https://deepspeech.bj.bcebos.com/zh_lm/zhidao_giga.klm) | Baidu Internal Corpus | Char-based | 70.4 GB | No Pruning;
About 3.7 billion n-grams;
'probing' binary with default settings
+
+
## 问题和帮助
欢迎您在[Github问题](https://github.com/PaddlePaddle/models/issues)中提交问题和bug。也欢迎您为这个项目做出贡献。
diff --git a/data/noise/chime3_background.py b/data/noise/chime3_background.py
index 30a2e14e70d30bad81a491a1719b7cd5dae8a2a6..8db09204ec3f78e49a5784e7112b8e6072b0110e 100644
--- a/data/noise/chime3_background.py
+++ b/data/noise/chime3_background.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Prepare CHiME3 background data.
Download, unpack and create manifest files.
diff --git a/data/voxforge/voxforge.py b/data/voxforge/voxforge.py
index 8478166fbd3b7eca71c94901732812a303eab781..3fb0ded88c196be5fd0969e50ee2ff04b052d0e3 100644
--- a/data/voxforge/voxforge.py
+++ b/data/voxforge/voxforge.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Prepare VoxForge dataset
Download, unpack and create manifest files.
diff --git a/data_utils/__init__.py b/data_utils/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..185a92b8d94d3426d616c0624f0f2ee04339349e 100644
--- a/data_utils/__init__.py
+++ b/data_utils/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/data_utils/audio.py b/data_utils/audio.py
index d7b88952da0f2ba6da4598cf47c02b3598d92ebd..2ba7019a2d7bb08c15626eb361b6af7104517594 100644
--- a/data_utils/audio.py
+++ b/data_utils/audio.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contains the audio segment class."""
import numpy as np
diff --git a/data_utils/augmentor/__init__.py b/data_utils/augmentor/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..185a92b8d94d3426d616c0624f0f2ee04339349e 100644
--- a/data_utils/augmentor/__init__.py
+++ b/data_utils/augmentor/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py
index 37d6bc1dd4abf6bfd4319b4b71f9e38dabc50d8c..349cdc5642cca3b7daa7a5b9ec64f9df3e1782bf 100644
--- a/data_utils/augmentor/augmentation.py
+++ b/data_utils/augmentor/augmentation.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contains the data augmentation pipeline."""
import json
diff --git a/data_utils/augmentor/base.py b/data_utils/augmentor/base.py
index adeda6465d8c750fe2aae7957c091643fba36931..5b80be2fe0478b09968ab738b9c83c8c2e65b76a 100644
--- a/data_utils/augmentor/base.py
+++ b/data_utils/augmentor/base.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contains the abstract base class for augmentation models."""
from abc import ABCMeta, abstractmethod
diff --git a/data_utils/augmentor/impulse_response.py b/data_utils/augmentor/impulse_response.py
index 937e578bdeec7110b6d8ea66fabb5187f7d1c62d..839c6a809b47366c60465329b72408a1b26b4db3 100644
--- a/data_utils/augmentor/impulse_response.py
+++ b/data_utils/augmentor/impulse_response.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contains the impulse response augmentation model."""
from data_utils.augmentor.base import AugmentorBase
diff --git a/data_utils/augmentor/noise_perturb.py b/data_utils/augmentor/noise_perturb.py
index 952ca3fb97860f5dfbe091da63aca3313ca340d7..954d1b4193e5a475812767875f955890c0057e44 100644
--- a/data_utils/augmentor/noise_perturb.py
+++ b/data_utils/augmentor/noise_perturb.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contains the noise perturb augmentation model."""
from data_utils.augmentor.base import AugmentorBase
diff --git a/data_utils/augmentor/online_bayesian_normalization.py b/data_utils/augmentor/online_bayesian_normalization.py
index deba9a77a53527e92866aac715c56ea45e6e147a..f5c7d99fd57891bc69c09c8cf975fb15138301b9 100644
--- a/data_utils/augmentor/online_bayesian_normalization.py
+++ b/data_utils/augmentor/online_bayesian_normalization.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contain the online bayesian normalization augmentation model."""
from data_utils.augmentor.base import AugmentorBase
diff --git a/data_utils/augmentor/resample.py b/data_utils/augmentor/resample.py
index e26d06b46bd41d138d46573a6540d3c4a2c9a254..3732e09cd9822e0667609e748b2abd6929612007 100644
--- a/data_utils/augmentor/resample.py
+++ b/data_utils/augmentor/resample.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contain the resample augmentation model."""
from data_utils.augmentor.base import AugmentorBase
diff --git a/data_utils/augmentor/shift_perturb.py b/data_utils/augmentor/shift_perturb.py
index a9b732161d0d8d703b52e774bf16603551582eef..8b8e603621819058974b7ade112a75c848222aa5 100644
--- a/data_utils/augmentor/shift_perturb.py
+++ b/data_utils/augmentor/shift_perturb.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contains the volume perturb augmentation model."""
from data_utils.augmentor.base import AugmentorBase
diff --git a/data_utils/augmentor/speed_perturb.py b/data_utils/augmentor/speed_perturb.py
index 78dfd0c657242e74939f40371c72111fbb74a38b..7b28f7ec61cd79e1b41fdb158da024eb3b8cf8e9 100644
--- a/data_utils/augmentor/speed_perturb.py
+++ b/data_utils/augmentor/speed_perturb.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contain the speech perturbation augmentation model."""
from data_utils.augmentor.base import AugmentorBase
diff --git a/data_utils/augmentor/volume_perturb.py b/data_utils/augmentor/volume_perturb.py
index b1d8c190b1b25da7f3e41d53b4622459e1fd4936..b98c7a3b4574c72d79f3f582a337334c1ce1d9d4 100644
--- a/data_utils/augmentor/volume_perturb.py
+++ b/data_utils/augmentor/volume_perturb.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contains the volume perturb augmentation model."""
from data_utils.augmentor.base import AugmentorBase
diff --git a/data_utils/data.py b/data_utils/data.py
index ed65a0947e68f230c11ebad8c89b6edb31668617..125768898028308dc6be859fa8aa57be3bc00668 100644
--- a/data_utils/data.py
+++ b/data_utils/data.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contains data generator for orgnaizing various audio data preprocessing
pipeline and offering data reader interface of PaddlePaddle requirements.
"""
diff --git a/data_utils/featurizer/__init__.py b/data_utils/featurizer/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..185a92b8d94d3426d616c0624f0f2ee04339349e 100644
--- a/data_utils/featurizer/__init__.py
+++ b/data_utils/featurizer/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/data_utils/featurizer/audio_featurizer.py b/data_utils/featurizer/audio_featurizer.py
index c5f20a16bf5412406719ed87041d265bb82a63f0..0afd19870cb7dceced6e8694350a3e15dedb6e13 100644
--- a/data_utils/featurizer/audio_featurizer.py
+++ b/data_utils/featurizer/audio_featurizer.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contains the audio featurizer class."""
import numpy as np
diff --git a/data_utils/featurizer/speech_featurizer.py b/data_utils/featurizer/speech_featurizer.py
index a4484ae44110086264838c8eab5202fbdd17908e..2e1424fa4fcd8283cad4f80451d8f4268580d993 100644
--- a/data_utils/featurizer/speech_featurizer.py
+++ b/data_utils/featurizer/speech_featurizer.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contains the speech featurizer class."""
from data_utils.featurizer.audio_featurizer import AudioFeaturizer
diff --git a/data_utils/featurizer/text_featurizer.py b/data_utils/featurizer/text_featurizer.py
index 0c48b38e032bb76c3609237c48e58992564e6c19..70aa10ead301dfa4f4c8338cb9e914d6a6794376 100644
--- a/data_utils/featurizer/text_featurizer.py
+++ b/data_utils/featurizer/text_featurizer.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contains the text featurizer class."""
import os
diff --git a/data_utils/normalizer.py b/data_utils/normalizer.py
index 378714cc0e2a5393675f07e50962ef1e9cdf528e..83a008f1001a4a6085f0f6d0de77fbf02db618b6 100644
--- a/data_utils/normalizer.py
+++ b/data_utils/normalizer.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contains feature normalizers."""
import numpy as np
diff --git a/data_utils/speech.py b/data_utils/speech.py
index 3ea6ec296394cbbadc48de7d8952da088f744e3e..01c1787a4825b7a25aa40c80d47f46acf3032cd8 100644
--- a/data_utils/speech.py
+++ b/data_utils/speech.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contains the speech segment class."""
import numpy as np
diff --git a/data_utils/utility.py b/data_utils/utility.py
index 2377bc221c6155589f4b2524d21aebd5f2a9ce25..6cc1b2713ff685c94b150d4c88e36f5e4b94a43a 100644
--- a/data_utils/utility.py
+++ b/data_utils/utility.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contains data helper functions."""
import json
diff --git a/decoders/__init__.py b/decoders/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..185a92b8d94d3426d616c0624f0f2ee04339349e 100644
--- a/decoders/__init__.py
+++ b/decoders/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/decoders/decoders_deprecated.py b/decoders/decoders_deprecated.py
index 0dad44c4a8e2ba1e157dd96c899d504aa98af902..99e14e49d2bf171ae49b67b597e73cb377337bfe 100644
--- a/decoders/decoders_deprecated.py
+++ b/decoders/decoders_deprecated.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contains various CTC decoders."""
from itertools import groupby
diff --git a/decoders/scorer_deprecated.py b/decoders/scorer_deprecated.py
index 266df559bf873b261be10266e75d3e77258e09c7..919b02946824eade3c1c5a47f9a0bc9456d83e09 100644
--- a/decoders/scorer_deprecated.py
+++ b/decoders/scorer_deprecated.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""External Scorer for Beam Search Decoder."""
import os
diff --git a/decoders/swig/__init__.py b/decoders/swig/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..185a92b8d94d3426d616c0624f0f2ee04339349e 100644
--- a/decoders/swig/__init__.py
+++ b/decoders/swig/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/decoders/swig/_init_paths.py b/decoders/swig/_init_paths.py
index 3bb2fd19722c6bf65fffc25f9d9f0b253125e4b8..c4b28c6433bad914acdddbe11a32536cfeebc8d7 100644
--- a/decoders/swig/_init_paths.py
+++ b/decoders/swig/_init_paths.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Set up paths for DS2"""
import os.path
diff --git a/decoders/swig/setup.py b/decoders/swig/setup.py
index c5a7c4ca61d4c5e66bbbc3dd04fe5b7ee25e5296..0fcb24b5080612eb14de1b86a0caa011a8ec0097 100644
--- a/decoders/swig/setup.py
+++ b/decoders/swig/setup.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Script to build and install decoder package."""
from setuptools import setup, Extension, distutils
diff --git a/decoders/swig_wrapper.py b/decoders/swig_wrapper.py
index b32893b92d0c64040b5302da8c619b21a5787701..0a0579ad090774fb3e76d7b67a75e85aac4ce4a9 100644
--- a/decoders/swig_wrapper.py
+++ b/decoders/swig_wrapper.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Wrapper for various CTC decoders in SWIG."""
import swig_decoders
diff --git a/decoders/tests/test_decoders.py b/decoders/tests/test_decoders.py
index 878417432ace35338f38cb0e79d533ba7b3058b6..9c4b1c8eb49e303b2b1b37944ed0528a225c7dcf 100644
--- a/decoders/tests/test_decoders.py
+++ b/decoders/tests/test_decoders.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Test decoders."""
import unittest
diff --git a/deploy/_init_paths.py b/deploy/_init_paths.py
index 3bb2fd19722c6bf65fffc25f9d9f0b253125e4b8..c4b28c6433bad914acdddbe11a32536cfeebc8d7 100644
--- a/deploy/_init_paths.py
+++ b/deploy/_init_paths.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Set up paths for DS2"""
import os.path
diff --git a/deploy/demo_client.py b/deploy/demo_client.py
index 45a9d319ab827fee17d7e1b0ee5fbebb8ef9876f..b4aa50e8e03825dcf6aa8eb9217e99013ee2b12d 100644
--- a/deploy/demo_client.py
+++ b/deploy/demo_client.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Client-end for the ASR demo."""
import keyboard
import struct
diff --git a/deploy/demo_server.py b/deploy/demo_server.py
index 88c2e9ef9545a0b5b6bb57bb4833dc4b9226d624..bfc48c9f13c3308dd6d6f9e9cced5e2b2cf18eb0 100644
--- a/deploy/demo_server.py
+++ b/deploy/demo_server.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Server-end for the ASR demo."""
import os
import time
diff --git a/data/aishell/aishell.py b/examples/aishell/local/aishell.py
similarity index 86%
rename from data/aishell/aishell.py
rename to examples/aishell/local/aishell.py
index 6290712f2c717b623cd6382c678fd0efcef69084..ba59b744d5c02a8967ad5bff990a37c05b6ca0db 100644
--- a/data/aishell/aishell.py
+++ b/examples/aishell/local/aishell.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Prepare Aishell mandarin dataset
Download, unpack and create manifest files.
diff --git a/examples/aishell/run_data.sh b/examples/aishell/local/run_data.sh
similarity index 50%
rename from examples/aishell/run_data.sh
rename to examples/aishell/local/run_data.sh
index fc002fcd2884de64e8aa7c06f0214afe44f9193b..b874b2df8680c9273608842c8b74fb5518dff344 100644
--- a/examples/aishell/run_data.sh
+++ b/examples/aishell/local/run_data.sh
@@ -1,11 +1,11 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
+mkdir -p data
# download data, generate manifests
-PYTHONPATH=.:$PYTHONPATH python3 data/aishell/aishell.py \
---manifest_prefix='data/aishell/manifest' \
---target_dir='./dataset/aishell'
+PYTHONPATH=.:$PYTHONPATH python3 local/aishell.py \
+--manifest_prefix="data/manifest" \
+--target_dir="${MAIN_ROOT}/dataset/aishell"
if [ $? -ne 0 ]; then
echo "Prepare Aishell failed. Terminated."
@@ -14,10 +14,10 @@ fi
# build vocabulary
-python3 tools/build_vocab.py \
+python3 ${MAIN_ROOT}/tools/build_vocab.py \
--count_threshold=0 \
---vocab_path='data/aishell/vocab.txt' \
---manifest_paths 'data/aishell/manifest.train' 'data/aishell/manifest.dev'
+--vocab_path="data/vocab.txt" \
+--manifest_paths "data/manifest.train" "data/manifest.dev"
if [ $? -ne 0 ]; then
echo "Build vocabulary failed. Terminated."
@@ -26,11 +26,11 @@ fi
# compute mean and stddev for normalizer
-python3 tools/compute_mean_std.py \
---manifest_path='data/aishell/manifest.train' \
+python3 ${MAIN_ROOT}/tools/compute_mean_std.py \
+--manifest_path="data/manifest.train" \
--num_samples=2000 \
---specgram_type='linear' \
---output_path='data/aishell/mean_std.npz'
+--specgram_type="linear" \
+--output_path="data/mean_std.npz"
if [ $? -ne 0 ]; then
echo "Compute mean and stddev failed. Terminated."
diff --git a/examples/aishell/run_infer.sh b/examples/aishell/local/run_infer.sh
similarity index 55%
rename from examples/aishell/run_infer.sh
rename to examples/aishell/local/run_infer.sh
index ef382e310ebe0dba8317585c755444c834022bcd..90be581bedb01220bc8fd42eeeefafbc851718d6 100644
--- a/examples/aishell/run_infer.sh
+++ b/examples/aishell/local/run_infer.sh
@@ -1,9 +1,8 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
# download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
bash download_lm_ch.sh
if [ $? -ne 0 ]; then
exit 1
@@ -13,7 +12,7 @@ cd - > /dev/null
# infer
CUDA_VISIBLE_DEVICES=0 \
-python3 -u infer.py \
+python3 -u ${MAIN_ROOT}/infer.py \
--num_samples=10 \
--beam_size=300 \
--num_proc_bsearch=8 \
@@ -27,14 +26,14 @@ python3 -u infer.py \
--use_gru=True \
--use_gpu=True \
--share_rnn_weights=False \
---infer_manifest='data/aishell/manifest.test' \
---mean_std_path='data/aishell/mean_std.npz' \
---vocab_path='data/aishell/vocab.txt' \
---model_path='checkpoints/aishell/step_final' \
---lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \
---decoding_method='ctc_beam_search' \
---error_rate_type='cer' \
---specgram_type='linear'
+--infer_manifest="data/manifest.test" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="data/vocab.txt" \
+--model_path="checkpoints/step_final" \
+--lang_model_path="${MAIN_ROOT}/models/lm/zh_giga.no_cna_cmn.prune01244.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="cer" \
+--specgram_type="linear"
if [ $? -ne 0 ]; then
echo "Failed in inference!"
diff --git a/examples/aishell/run_infer_golden.sh b/examples/aishell/local/run_infer_golden.sh
similarity index 55%
rename from examples/aishell/run_infer_golden.sh
rename to examples/aishell/local/run_infer_golden.sh
index dabdc0c68f5a295d5d0517a51de5c26889431511..296c0d5b4acac3691363462075e923320a72644a 100644
--- a/examples/aishell/run_infer_golden.sh
+++ b/examples/aishell/local/run_infer_golden.sh
@@ -1,9 +1,7 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
-
# download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
bash download_lm_ch.sh
if [ $? -ne 0 ]; then
exit 1
@@ -12,7 +10,7 @@ cd - > /dev/null
# download well-trained model
-cd models/aishell > /dev/null
+cd ${MAIN_ROOT}/models/aishell > /dev/null
bash download_model.sh
if [ $? -ne 0 ]; then
exit 1
@@ -22,7 +20,7 @@ cd - > /dev/null
# infer
CUDA_VISIBLE_DEVICES=0 \
-python3 -u infer.py \
+python3 -u ${MAIN_ROOT}/infer.py \
--num_samples=10 \
--beam_size=300 \
--num_proc_bsearch=8 \
@@ -36,14 +34,14 @@ python3 -u infer.py \
--use_gru=True \
--use_gpu=False \
--share_rnn_weights=False \
---infer_manifest='data/aishell/manifest.test' \
---mean_std_path='models/aishell/mean_std.npz' \
---vocab_path='models/aishell/vocab.txt' \
---model_path='models/aishell' \
---lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \
---decoding_method='ctc_beam_search' \
---error_rate_type='cer' \
---specgram_type='linear'
+--infer_manifest="data/manifest.test" \
+--mean_std_path="${MAIN_ROOT}/models/aishell/mean_std.npz" \
+--vocab_path="${MAIN_ROOT}/models/aishell/vocab.txt" \
+--model_path="${MAIN_ROOT}/models/aishell" \
+--lang_model_path="${MAIN_ROOT}/models/lm/zh_giga.no_cna_cmn.prune01244.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="cer" \
+--specgram_type="linear"
if [ $? -ne 0 ]; then
echo "Failed in inference!"
diff --git a/examples/aishell/run_test.sh b/examples/aishell/local/run_test.sh
similarity index 56%
rename from examples/aishell/run_test.sh
rename to examples/aishell/local/run_test.sh
index b56d164b99cabd8fe319d1456b0e6b106e9feef7..d2dbfb4f040eb1765682fa913a6dbb6e8bdcf529 100644
--- a/examples/aishell/run_test.sh
+++ b/examples/aishell/local/run_test.sh
@@ -1,9 +1,7 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
-
# download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
bash download_lm_ch.sh
if [ $? -ne 0 ]; then
exit 1
@@ -13,7 +11,7 @@ cd - > /dev/null
# evaluate model
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python3 -u test.py \
+python3 -u ${MAIN_ROOT}/test.py \
--batch_size=128 \
--beam_size=300 \
--num_proc_bsearch=8 \
@@ -27,14 +25,14 @@ python3 -u test.py \
--use_gru=True \
--use_gpu=True \
--share_rnn_weights=False \
---test_manifest='data/aishell/manifest.test' \
---mean_std_path='data/aishell/mean_std.npz' \
---vocab_path='data/aishell/vocab.txt' \
---model_path='checkpoints/aishell/step_final' \
---lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \
---decoding_method='ctc_beam_search' \
---error_rate_type='cer' \
---specgram_type='linear'
+--test_manifest="data/manifest.test" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="data/vocab.txt" \
+--model_path="checkpoints/step_final" \
+--lang_model_path="${MAIN_ROOT}/models/lm/zh_giga.no_cna_cmn.prune01244.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="cer" \
+--specgram_type="linear"
if [ $? -ne 0 ]; then
echo "Failed in evaluation!"
diff --git a/examples/aishell/run_test_golden.sh b/examples/aishell/local/run_test_golden.sh
similarity index 56%
rename from examples/aishell/run_test_golden.sh
rename to examples/aishell/local/run_test_golden.sh
index 2f79cc64797231c04899b9c726af53eab8f1c246..062a1b99bf65fee845d3580a9ad8c3d136477719 100644
--- a/examples/aishell/run_test_golden.sh
+++ b/examples/aishell/local/run_test_golden.sh
@@ -1,9 +1,7 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
-
# download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
bash download_lm_ch.sh
if [ $? -ne 0 ]; then
exit 1
@@ -12,7 +10,7 @@ cd - > /dev/null
# download well-trained model
-cd models/aishell > /dev/null
+cd ${MAIN_ROOT}/models/aishell > /dev/null
bash download_model.sh
if [ $? -ne 0 ]; then
exit 1
@@ -22,7 +20,7 @@ cd - > /dev/null
# evaluate model
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python3 -u test.py \
+python3 -u ${MAIN_ROOT}/test.py \
--batch_size=128 \
--beam_size=300 \
--num_proc_bsearch=8 \
@@ -36,14 +34,14 @@ python3 -u test.py \
--use_gru=True \
--use_gpu=True \
--share_rnn_weights=False \
---test_manifest='data/aishell/manifest.test' \
---mean_std_path='models/aishell/mean_std.npz' \
---vocab_path='models/aishell/vocab.txt' \
---model_path='models/aishell' \
---lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \
---decoding_method='ctc_beam_search' \
---error_rate_type='cer' \
---specgram_type='linear'
+--test_manifest="data/manifest.test" \
+--mean_std_path="${MAIN_ROOT}/models/aishell/mean_std.npz" \
+--vocab_path="${MAIN_ROOT}/models/aishell/vocab.txt" \
+--model_path="${MAIN_ROOT}/models/aishell" \
+--lang_model_path="${MAIN_ROOT}/models/lm/zh_giga.no_cna_cmn.prune01244.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="cer" \
+--specgram_type="linear"
if [ $? -ne 0 ]; then
echo "Failed in evaluation!"
diff --git a/examples/aishell/run_train.sh b/examples/aishell/local/run_train.sh
similarity index 61%
rename from examples/aishell/run_train.sh
rename to examples/aishell/local/run_train.sh
index 889e1904735a88b8b2a63c2160da54eff9ae5bbe..5bde13721bc48291b666b5ae7fc29d0026b8f3fd 100644
--- a/examples/aishell/run_train.sh
+++ b/examples/aishell/local/run_train.sh
@@ -1,12 +1,10 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
-
# train model
# if you wish to resume from an exists model, uncomment --init_from_pretrained_model
export FLAGS_sync_nccl_allreduce=0
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python3 -u train.py \
+python3 -u ${MAIN_ROOT}/train.py \
--batch_size=64 \
--num_epoch=50 \
--num_conv_layers=2 \
@@ -24,14 +22,14 @@ python3 -u train.py \
--use_gpu=True \
--is_local=True \
--share_rnn_weights=False \
---train_manifest='data/aishell/manifest.train' \
---dev_manifest='data/aishell/manifest.dev' \
---mean_std_path='data/aishell/mean_std.npz' \
---vocab_path='data/aishell/vocab.txt' \
---output_model_dir='./checkpoints/aishell' \
---augment_conf_path='conf/augmentation.config' \
---specgram_type='linear' \
---shuffle_method='batch_shuffle_clipped' \
+--train_manifest="data/manifest.train" \
+--dev_manifest="data/manifest.dev" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="data/vocab.txt" \
+--output_model_dir="./checkpoints" \
+--augment_conf_path="${MAIN_ROOT}/conf/augmentation.config" \
+--specgram_type="linear" \
+--shuffle_method="batch_shuffle_clipped" \
if [ $? -ne 0 ]; then
echo "Failed in training!"
diff --git a/examples/aishell/path.sh b/examples/aishell/path.sh
new file mode 100644
index 0000000000000000000000000000000000000000..fd1cebba8ea9313ca30ee8bece2f6ddb0f29112d
--- /dev/null
+++ b/examples/aishell/path.sh
@@ -0,0 +1,8 @@
+export MAIN_ROOT=${PWD}/../../
+
+export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH}
+export LC_ALL=C
+
+# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
+export PYTHONIOENCODING=UTF-8
+export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
diff --git a/examples/aishell/run.sh b/examples/aishell/run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..93bf86388714b6dae7d14bbf05dc357799b5fb49
--- /dev/null
+++ b/examples/aishell/run.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+source path.sh
+
+# prepare data
+bash ./local/run_data.sh
+
+# test pretrain model
+bash ./local/run_test_golden.sh
+
+# test pretain model
+bash ./local/run_infer_golden.sh
+
+# train model
+bash ./local/run_train.sh
+
+# test model
+bash ./local/run_test.sh
+
+# infer model
+bash ./local/run_infer.sh
diff --git a/examples/baidu_en8k/path.sh b/examples/baidu_en8k/path.sh
new file mode 100644
index 0000000000000000000000000000000000000000..fd1cebba8ea9313ca30ee8bece2f6ddb0f29112d
--- /dev/null
+++ b/examples/baidu_en8k/path.sh
@@ -0,0 +1,8 @@
+export MAIN_ROOT=${PWD}/../../
+
+export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH}
+export LC_ALL=C
+
+# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
+export PYTHONIOENCODING=UTF-8
+export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
diff --git a/examples/baidu_en8k/run_infer_golden.sh b/examples/baidu_en8k/run_infer_golden.sh
index 85fea5f479cebd01a9cc615ab059738f953acbcc..11d7541eec9e309fe999a8c9956c6176015ae2b0 100644
--- a/examples/baidu_en8k/run_infer_golden.sh
+++ b/examples/baidu_en8k/run_infer_golden.sh
@@ -1,9 +1,9 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
+source path.sh
# download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
bash download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
@@ -12,7 +12,7 @@ cd - > /dev/null
# download well-trained model
-cd models/baidu_en8k > /dev/null
+cd ${MAIN_ROOT}/models/baidu_en8k > /dev/null
bash download_model.sh
if [ $? -ne 0 ]; then
exit 1
@@ -22,7 +22,7 @@ cd - > /dev/null
# infer
CUDA_VISIBLE_DEVICES=0 \
-python3 -u infer.py \
+python3 -u ${MAIN_ROOT}/infer.py \
--num_samples=10 \
--beam_size=500 \
--num_proc_bsearch=5 \
@@ -36,14 +36,14 @@ python3 -u infer.py \
--use_gru=True \
--use_gpu=False \
--share_rnn_weights=False \
---infer_manifest='data/librispeech/manifest.test-clean' \
---mean_std_path='models/baidu_en8k/mean_std.npz' \
---vocab_path='models/baidu_en8k/vocab.txt' \
---model_path='models/baidu_en8k' \
---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
---decoding_method='ctc_beam_search' \
---error_rate_type='wer' \
---specgram_type='linear'
+--infer_manifest="${MAIN_ROOT}/examples/librispeech/data/manifest.test-clean" \
+--mean_std_path="${MAIN_ROOT}/models/baidu_en8k/mean_std.npz" \
+--vocab_path="${MAIN_ROOT}/models/baidu_en8k/vocab.txt" \
+--model_path="${MAIN_ROOT}/models/baidu_en8k" \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"
if [ $? -ne 0 ]; then
echo "Failed in inference!"
diff --git a/examples/baidu_en8k/run_test_golden.sh b/examples/baidu_en8k/run_test_golden.sh
index 1ed63362477e9746665b0beb547ef6ebad10c311..10c61a0960f7a80c66695fcd6458fdd34e99fd4c 100644
--- a/examples/baidu_en8k/run_test_golden.sh
+++ b/examples/baidu_en8k/run_test_golden.sh
@@ -1,9 +1,9 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
+source path.sh
# download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
bash download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
@@ -12,7 +12,7 @@ cd - > /dev/null
# download well-trained model
-cd models/baidu_en8k > /dev/null
+cd ${MAIN_ROOT}/models/baidu_en8k > /dev/null
bash download_model.sh
if [ $? -ne 0 ]; then
exit 1
@@ -22,7 +22,7 @@ cd - > /dev/null
# evaluate model
CUDA_VISIBLE_DEVICES=0,1,2,3 \
-python3 -u test.py \
+python3 -u ${MAIN_ROOT}/test.py \
--batch_size=128 \
--beam_size=500 \
--num_proc_bsearch=8 \
@@ -37,14 +37,14 @@ python3 -u test.py \
--use_gru=True \
--use_gpu=False \
--share_rnn_weights=False \
---test_manifest='data/librispeech/manifest.test-clean' \
---mean_std_path='models/baidu_en8k/mean_std.npz' \
---vocab_path='models/baidu_en8k/vocab.txt' \
---model_path='models/baidu_en8k' \
---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
---decoding_method='ctc_beam_search' \
---error_rate_type='wer' \
---specgram_type='linear'
+--test_manifest="data/manifest.test-clean" \
+--mean_std_path="${MAIN_ROOT}/models/baidu_en8k/mean_std.npz" \
+--vocab_path="${MAIN_ROOT}/models/baidu_en8k/vocab.txt" \
+--model_path="${MAIN_ROOT}/models/baidu_en8k" \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"
if [ $? -ne 0 ]; then
echo "Failed in evaluation!"
diff --git a/examples/deploy_demo/path.sh b/examples/deploy_demo/path.sh
new file mode 100644
index 0000000000000000000000000000000000000000..fd1cebba8ea9313ca30ee8bece2f6ddb0f29112d
--- /dev/null
+++ b/examples/deploy_demo/path.sh
@@ -0,0 +1,8 @@
+export MAIN_ROOT=${PWD}/../../
+
+export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH}
+export LC_ALL=C
+
+# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
+export PYTHONIOENCODING=UTF-8
+export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
diff --git a/examples/deploy_demo/run_demo_client.sh b/examples/deploy_demo/run_demo_client.sh
index 7c2609511afc5760865fcd49ab2d515b1b2cc5d3..60581c6615d078da237fc45cf8263b269c5d3b06 100644
--- a/examples/deploy_demo/run_demo_client.sh
+++ b/examples/deploy_demo/run_demo_client.sh
@@ -1,11 +1,11 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
+source path.sh
# start demo client
CUDA_VISIBLE_DEVICES=0 \
-python3 -u deploy/demo_client.py \
---host_ip='localhost' \
+python3 -u ${MAIN_ROOT}/deploy/demo_client.py \
+--host_ip="localhost" \
--host_port=8086 \
if [ $? -ne 0 ]; then
diff --git a/examples/deploy_demo/run_english_demo_server.sh b/examples/deploy_demo/run_english_demo_server.sh
index 5d86ab6e10f6bb08b993ee1ef961c7985ff90057..ae092dbce81626e1228992603d303df5148111d0 100644
--- a/examples/deploy_demo/run_english_demo_server.sh
+++ b/examples/deploy_demo/run_english_demo_server.sh
@@ -1,10 +1,10 @@
#! /usr/bin/env bash
# TODO: replace the model with a mandarin model
-cd ../.. > /dev/null
+source path.sh
# download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
bash download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
@@ -13,7 +13,7 @@ cd - > /dev/null
# download well-trained model
-cd models/baidu_en8k > /dev/null
+cd ${MAIN_ROOT}/models/baidu_en8k > /dev/null
bash download_model.sh
if [ $? -ne 0 ]; then
exit 1
@@ -23,8 +23,8 @@ cd - > /dev/null
# start demo server
CUDA_VISIBLE_DEVICES=0 \
-python3 -u deploy/demo_server.py \
---host_ip='localhost' \
+python3 -u ${MAIN_ROOT}/deploy/demo_server.py \
+--host_ip="localhost" \
--host_port=8086 \
--num_conv_layers=2 \
--num_rnn_layers=3 \
@@ -36,14 +36,14 @@ python3 -u deploy/demo_server.py \
--use_gru=True \
--use_gpu=True \
--share_rnn_weights=False \
---speech_save_dir='demo_cache' \
---warmup_manifest='data/tiny/manifest.test-clean' \
---mean_std_path='models/baidu_en8k/mean_std.npz' \
---vocab_path='models/baidu_en8k/vocab.txt' \
---model_path='models/baidu_en8k' \
---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
---decoding_method='ctc_beam_search' \
---specgram_type='linear'
+--speech_save_dir="demo_cache" \
+--warmup_manifest="${MAIN_ROOT}/examples/tiny/data/manifest.test-clean" \
+--mean_std_path="${MAIN_ROOT}/models/baidu_en8k/mean_std.npz" \
+--vocab_path="${MAIN_ROOT}/models/baidu_en8k/vocab.txt" \
+--model_path="${MAIN_ROOT}/models/baidu_en8k" \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--decoding_method="ctc_beam_search" \
+--specgram_type="linear"
if [ $? -ne 0 ]; then
echo "Failed in starting demo server!"
diff --git a/data/librispeech/librispeech.py b/examples/librispeech/local/librispeech.py
similarity index 90%
rename from data/librispeech/librispeech.py
rename to examples/librispeech/local/librispeech.py
index 8a136453b6c61cb84cdd65bc97f003a50edbefd8..ae1bae2defee069b0a228c08deebeb9d54548c67 100644
--- a/data/librispeech/librispeech.py
+++ b/examples/librispeech/local/librispeech.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Prepare Librispeech ASR datasets.
Download, unpack and create manifest files.
diff --git a/examples/librispeech/local/run_data.sh b/examples/librispeech/local/run_data.sh
new file mode 100644
index 0000000000000000000000000000000000000000..cbcad7b8dba00fa36ce5fb28eb18938de1e3634d
--- /dev/null
+++ b/examples/librispeech/local/run_data.sh
@@ -0,0 +1,45 @@
+#! /usr/bin/env bash
+
+mkdir -p data
+
+# download data, generate manifests
+PYTHONPATH=.:$PYTHONPATH python3 local/librispeech.py \
+--manifest_prefix="data/manifest" \
+--target_dir="${MAIN_ROOT}/dataset/librispeech" \
+--full_download="True"
+
+if [ $? -ne 0 ]; then
+ echo "Prepare LibriSpeech failed. Terminated."
+ exit 1
+fi
+
+cat data/manifest.train-* | shuf > data/manifest.train
+
+
+# build vocabulary
+python3 ${MAIN_ROOT}/tools/build_vocab.py \
+--count_threshold=0 \
+--vocab_path="data/vocab.txt" \
+--manifest_paths="data/manifest.train"
+
+if [ $? -ne 0 ]; then
+ echo "Build vocabulary failed. Terminated."
+ exit 1
+fi
+
+
+# compute mean and stddev for normalizer
+python3 ${MAIN_ROOT}/tools/compute_mean_std.py \
+--manifest_path="data/manifest.train" \
+--num_samples=2000 \
+--specgram_type="linear" \
+--output_path="data/mean_std.npz"
+
+if [ $? -ne 0 ]; then
+ echo "Compute mean and stddev failed. Terminated."
+ exit 1
+fi
+
+
+echo "LibriSpeech Data preparation done."
+exit 0
diff --git a/examples/tiny/run_infer.sh b/examples/librispeech/local/run_infer.sh
similarity index 54%
rename from examples/tiny/run_infer.sh
rename to examples/librispeech/local/run_infer.sh
index d88f4526a4a8c13f7a946fc01f644c1111c4d5b1..33959b38107e9f2e49ee1288c82204d2b2dfef0b 100644
--- a/examples/tiny/run_infer.sh
+++ b/examples/librispeech/local/run_infer.sh
@@ -1,9 +1,7 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
-
# download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
bash download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
@@ -13,7 +11,7 @@ cd - > /dev/null
# infer
CUDA_VISIBLE_DEVICES=0 \
-python3 -u infer.py \
+python3 -u ${MAIN_ROOT}/infer.py \
--num_samples=10 \
--beam_size=500 \
--num_proc_bsearch=8 \
@@ -27,14 +25,14 @@ python3 -u infer.py \
--use_gru=False \
--use_gpu=True \
--share_rnn_weights=True \
---infer_manifest='data/tiny/manifest.test-clean' \
---mean_std_path='data/tiny/mean_std.npz' \
---vocab_path='data/tiny/vocab.txt' \
---model_path='./checkpoints/tiny/step_final' \
---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
---decoding_method='ctc_beam_search' \
---error_rate_type='wer' \
---specgram_type='linear'
+--infer_manifest="data/manifest.test-clean" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="data/vocab.txt" \
+--model_path="checkpoints/step_final" \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"
if [ $? -ne 0 ]; then
echo "Failed in inference!"
diff --git a/examples/librispeech/run_infer_golden.sh b/examples/librispeech/local/run_infer_golden.sh
similarity index 54%
rename from examples/librispeech/run_infer_golden.sh
rename to examples/librispeech/local/run_infer_golden.sh
index 6004c6af6d8c3911487f3c9c2fefa11c04c73b14..21663681ba1813c797aedac24bb9a7eb74f0d0c2 100644
--- a/examples/librispeech/run_infer_golden.sh
+++ b/examples/librispeech/local/run_infer_golden.sh
@@ -1,9 +1,7 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
-
# download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
bash download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
@@ -12,7 +10,7 @@ cd - > /dev/null
# download well-trained model
-cd models/librispeech > /dev/null
+cd ${MAIN_ROOT}/models/librispeech > /dev/null
bash download_model.sh
if [ $? -ne 0 ]; then
exit 1
@@ -22,7 +20,7 @@ cd - > /dev/null
# infer
CUDA_VISIBLE_DEVICES=0 \
-python3 -u infer.py \
+python3 -u ${MAIN_ROOT}/infer.py \
--num_samples=10 \
--beam_size=500 \
--num_proc_bsearch=8 \
@@ -36,14 +34,14 @@ python3 -u infer.py \
--use_gru=False \
--use_gpu=True \
--share_rnn_weights=True \
---infer_manifest='data/librispeech/manifest.test-clean' \
---mean_std_path='models/librispeech/mean_std.npz' \
---vocab_path='models/librispeech/vocab.txt' \
---model_path='models/librispeech' \
---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
---decoding_method='ctc_beam_search' \
---error_rate_type='wer' \
---specgram_type='linear'
+--infer_manifest="data/manifest.test-clean" \
+--mean_std_path="${MAIN_ROOT}/models/librispeech/mean_std.npz" \
+--vocab_path="${MAIN_ROOT}/models/librispeech/vocab.txt" \
+--model_path="${MAIN_ROOT}/models/librispeech" \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"
if [ $? -ne 0 ]; then
echo "Failed in inference!"
diff --git a/examples/tiny/run_test.sh b/examples/librispeech/local/run_test.sh
similarity index 56%
rename from examples/tiny/run_test.sh
rename to examples/librispeech/local/run_test.sh
index 81eafe236f368ac7f1ed95dc96ec6fcf3040ed47..cd8c07542d387c9ff04a568dec2c24f11355a369 100644
--- a/examples/tiny/run_test.sh
+++ b/examples/librispeech/local/run_test.sh
@@ -1,9 +1,7 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
-
# download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
bash download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
@@ -13,7 +11,7 @@ cd - > /dev/null
# evaluate model
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python3 -u test.py \
+python3 -u ${MAIN_ROOT}/test.py \
--batch_size=128 \
--beam_size=500 \
--num_proc_bsearch=8 \
@@ -27,14 +25,14 @@ python3 -u test.py \
--use_gru=False \
--use_gpu=True \
--share_rnn_weights=True \
---test_manifest='data/tiny/manifest.test-clean' \
---mean_std_path='data/tiny/mean_std.npz' \
---vocab_path='data/tiny/vocab.txt' \
---model_path='checkpoints/tiny/step_final' \
---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
---decoding_method='ctc_beam_search' \
---error_rate_type='wer' \
---specgram_type='linear'
+--test_manifest="data/manifest.test-clean" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="data/vocab.txt" \
+--model_path="checkpoints/step_final" \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"
if [ $? -ne 0 ]; then
echo "Failed in evaluation!"
diff --git a/examples/tiny/run_test_golden.sh b/examples/librispeech/local/run_test_golden.sh
similarity index 55%
rename from examples/tiny/run_test_golden.sh
rename to examples/librispeech/local/run_test_golden.sh
index d82865f420f174cd2e8f69701e007caffe39edde..54ec6ad03bc0c41f30703ae6d8bdab198d58b7fa 100644
--- a/examples/tiny/run_test_golden.sh
+++ b/examples/librispeech/local/run_test_golden.sh
@@ -1,9 +1,7 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
-
# download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
bash download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
@@ -12,7 +10,7 @@ cd - > /dev/null
# download well-trained model
-cd models/librispeech > /dev/null
+cd ${MAIN_ROOT}/models/librispeech > /dev/null
bash download_model.sh
if [ $? -ne 0 ]; then
exit 1
@@ -22,7 +20,7 @@ cd - > /dev/null
# evaluate model
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python3 -u test.py \
+python3 -u ${MAIN_ROOT}/test.py \
--batch_size=128 \
--beam_size=500 \
--num_proc_bsearch=8 \
@@ -36,14 +34,14 @@ python3 -u test.py \
--use_gru=False \
--use_gpu=True \
--share_rnn_weights=True \
---test_manifest='data/tiny/manifest.test-clean' \
---mean_std_path='models/librispeech/mean_std.npz' \
---vocab_path='models/librispeech/vocab.txt' \
---model_path='models/librispeech' \
---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
---decoding_method='ctc_beam_search' \
---error_rate_type='wer' \
---specgram_type='linear'
+--test_manifest="data/manifest.test-clean" \
+--mean_std_path="${MAIN_ROOT}/models/librispeech/mean_std.npz" \
+--vocab_path="${MAIN_ROOT}/models/librispeech/vocab.txt" \
+--model_path="${MAIN_ROOT}/models/librispeech" \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"
if [ $? -ne 0 ]; then
echo "Failed in evaluation!"
diff --git a/examples/librispeech/run_train.sh b/examples/librispeech/local/run_train.sh
similarity index 60%
rename from examples/librispeech/run_train.sh
rename to examples/librispeech/local/run_train.sh
index 6b8982d78ecdc7674191e7de2e5c6f9c82f9cf56..32aa2657b725ddae546f4a3c090a1d27399a61e5 100644
--- a/examples/librispeech/run_train.sh
+++ b/examples/librispeech/local/run_train.sh
@@ -1,13 +1,11 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
-
# train model
# if you wish to resume from an exists model, uncomment --init_from_pretrained_model
export FLAGS_sync_nccl_allreduce=0
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python3 -u train.py \
+python3 -u ${MAIN_ROOT}/train.py \
--batch_size=20 \
--num_epoch=50 \
--num_conv_layers=2 \
@@ -25,14 +23,14 @@ python3 -u train.py \
--use_gpu=True \
--is_local=True \
--share_rnn_weights=True \
---train_manifest='data/librispeech/manifest.train' \
---dev_manifest='data/librispeech/manifest.dev-clean' \
---mean_std_path='data/librispeech/mean_std.npz' \
---vocab_path='data/librispeech/vocab.txt' \
---output_model_dir='./checkpoints/libri' \
---augment_conf_path='conf/augmentation.config' \
---specgram_type='linear' \
---shuffle_method='batch_shuffle_clipped' \
+--train_manifest="data/manifest.train" \
+--dev_manifest="data/manifest.dev-clean" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="data/vocab.txt" \
+--output_model_dir="./checkpoints/libri" \
+--augment_conf_path="${MAIN_ROOT}/conf/augmentation.config" \
+--specgram_type="linear" \
+--shuffle_method="batch_shuffle_clipped" \
if [ $? -ne 0 ]; then
echo "Failed in training!"
diff --git a/examples/librispeech/run_tune.sh b/examples/librispeech/local/run_tune.sh
similarity index 59%
rename from examples/librispeech/run_tune.sh
rename to examples/librispeech/local/run_tune.sh
index 834f060fa4ef175ca7482d96da60ab31fe308a56..848f0b8f99e236a09fb334afbf99de3a2ec1709c 100644
--- a/examples/librispeech/run_tune.sh
+++ b/examples/librispeech/local/run_tune.sh
@@ -1,10 +1,8 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
-
# grid-search for hyper-parameters in language model
CUDA_VISIBLE_DEVICES=0,1,2,3 \
-python3 -u tools/tune.py \
+python3 -u ${MAIN_ROOT}tools/tune.py \
--num_batches=-1 \
--batch_size=128 \
--beam_size=500 \
@@ -23,13 +21,13 @@ python3 -u tools/tune.py \
--use_gru=False \
--use_gpu=True \
--share_rnn_weights=True \
---tune_manifest='data/librispeech/manifest.dev-clean' \
---mean_std_path='data/librispeech/mean_std.npz' \
---vocab_path='models/librispeech/vocab.txt' \
---model_path='models/librispeech' \
---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
---error_rate_type='wer' \
---specgram_type='linear'
+--tune_manifest="data/manifest.dev-clean" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="${MAIN_ROOT}/models/librispeech/vocab.txt" \
+--model_path="${MAIN_ROOT}/models/librispeech" \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--error_rate_type="wer" \
+--specgram_type="linear"
if [ $? -ne 0 ]; then
echo "Failed in tuning!"
diff --git a/examples/librispeech/path.sh b/examples/librispeech/path.sh
new file mode 100644
index 0000000000000000000000000000000000000000..fd1cebba8ea9313ca30ee8bece2f6ddb0f29112d
--- /dev/null
+++ b/examples/librispeech/path.sh
@@ -0,0 +1,8 @@
+export MAIN_ROOT=${PWD}/../../
+
+export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH}
+export LC_ALL=C
+
+# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
+export PYTHONIOENCODING=UTF-8
+export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
diff --git a/examples/librispeech/run.sh b/examples/librispeech/run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c8e58913900677679a40210c1f2dae658c435db6
--- /dev/null
+++ b/examples/librispeech/run.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+source path.sh
+
+# prepare data
+bash ./local/run_data.sh
+
+# test pretrain model
+bash ./local/run_test_golden.sh
+
+# test pretain model
+bash ./local/run_infer_golden.sh
+
+# train model
+bash ./local/run_train.sh
+
+# test model
+bash ./local/run_test.sh
+
+# infer model
+bash ./local/run_infer.sh
+
+# tune model
+bash ./local/run_tune.sh
diff --git a/examples/librispeech/run_data.sh b/examples/librispeech/run_data.sh
deleted file mode 100644
index 788b415de41909c7f60e0b29e9002d860ad160e2..0000000000000000000000000000000000000000
--- a/examples/librispeech/run_data.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#! /usr/bin/env bash
-
-cd ../.. > /dev/null
-
-# download data, generate manifests
-PYTHONPATH=.:$PYTHONPATH python3 data/librispeech/librispeech.py \
---manifest_prefix='data/librispeech/manifest' \
---target_dir='./dataset/librispeech' \
---full_download='True'
-
-if [ $? -ne 0 ]; then
- echo "Prepare LibriSpeech failed. Terminated."
- exit 1
-fi
-
-cat data/librispeech/manifest.train-* | shuf > data/librispeech/manifest.train
-
-
-# build vocabulary
-python3 tools/build_vocab.py \
---count_threshold=0 \
---vocab_path='data/librispeech/vocab.txt' \
---manifest_paths='data/librispeech/manifest.train'
-
-if [ $? -ne 0 ]; then
- echo "Build vocabulary failed. Terminated."
- exit 1
-fi
-
-
-# compute mean and stddev for normalizer
-python3 tools/compute_mean_std.py \
---manifest_path='data/librispeech/manifest.train' \
---num_samples=2000 \
---specgram_type='linear' \
---output_path='data/librispeech/mean_std.npz'
-
-if [ $? -ne 0 ]; then
- echo "Compute mean and stddev failed. Terminated."
- exit 1
-fi
-
-
-echo "LibriSpeech Data preparation done."
-exit 0
diff --git a/examples/tiny/README.md b/examples/tiny/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d7361b263c2ab8ac8dddc7013b87e0e58ae33f88
--- /dev/null
+++ b/examples/tiny/README.md
@@ -0,0 +1,42 @@
+# Tiny Example
+
+1. `source path.sh`
+2. `bash run.sh`
+
+## Steps
+- Prepare the data
+
+ ```bash
+ sh local/run_data.sh
+ ```
+
+ `run_data.sh` will download dataset, generate manifests, collect normalizer's statistics and build vocabulary. Once the data preparation is done, you will find the data (only part of LibriSpeech) downloaded in `${MAIN_ROOT}/dataset/librispeech` and the corresponding manifest files generated in `${PWD}/data` as well as a mean stddev file and a vocabulary file. It has to be run for the very first time you run this dataset and is reusable for all further experiments.
+- Train your own ASR model
+
+ ```bash
+ sh local/run_train.sh
+ ```
+
+ `run_train.sh` will start a training job, with training logs printed to stdout and model checkpoint of every pass/epoch saved to `${PWD}/checkpoints`. These checkpoints could be used for training resuming, inference, evaluation and deployment.
+- Case inference with an existing model
+
+ ```bash
+ sh local/run_infer.sh
+ ```
+
+ `run_infer.sh` will show us some speech-to-text decoding results for several (default: 10) samples with the trained model. The performance might not be good now as the current model is only trained with a toy subset of LibriSpeech. To see the results with a better model, you can download a well-trained (trained for several days, with the complete LibriSpeech) model and do the inference:
+
+ ```bash
+ sh local/run_infer_golden.sh
+ ```
+- Evaluate an existing model
+
+ ```bash
+ sh local/run_test.sh
+ ```
+
+ `run_test.sh` will evaluate the model with Word Error Rate (or Character Error Rate) measurement. Similarly, you can also download a well-trained model and test its performance:
+
+ ```bash
+ sh local/run_test_golden.sh
+ ```
\ No newline at end of file
diff --git a/examples/tiny/local/run_data.sh b/examples/tiny/local/run_data.sh
new file mode 100644
index 0000000000000000000000000000000000000000..3ad387dbcc966468dedf0b0935f030089e4209e5
--- /dev/null
+++ b/examples/tiny/local/run_data.sh
@@ -0,0 +1,47 @@
+#! /usr/bin/env bash
+
+# prepare folder
+if [ ! -e data ]; then
+ mkdir data
+fi
+
+# download data, generate manifests
+PYTHONPATH=.:$PYTHONPATH python3 ../librispeech/local/librispeech.py \
+--manifest_prefix="data/manifest" \
+--target_dir="${MAIN_ROOT}/dataset/librispeech" \
+--full_download="False"
+
+if [ $? -ne 0 ]; then
+ echo "Prepare LibriSpeech failed. Terminated."
+ exit 1
+fi
+
+head -n 64 data/manifest.dev-clean > data/manifest.tiny
+
+# build vocabulary
+python3 ${MAIN_ROOT}/tools/build_vocab.py \
+--count_threshold=0 \
+--vocab_path="data/vocab.txt" \
+--manifest_paths="data/manifest.tiny"
+
+if [ $? -ne 0 ]; then
+ echo "Build vocabulary failed. Terminated."
+ exit 1
+fi
+
+
+# compute mean and stddev for normalizer
+python3 ${MAIN_ROOT}/tools/compute_mean_std.py \
+--manifest_path="data/manifest.tiny" \
+--num_samples=64 \
+--specgram_type="linear" \
+--output_path="data/mean_std.npz"
+
+if [ $? -ne 0 ]; then
+ echo "Compute mean and stddev failed. Terminated."
+ exit 1
+fi
+
+
+echo "LibriSpeech Data preparation done."
+exit 0
diff --git a/examples/librispeech/run_infer.sh b/examples/tiny/local/run_infer.sh
similarity index 53%
rename from examples/librispeech/run_infer.sh
rename to examples/tiny/local/run_infer.sh
index 2452b45e3da7460ddce9477afa9e01e3bc974172..bbaa094e9d52fe960250b2130d01a3b504acc17f 100644
--- a/examples/librispeech/run_infer.sh
+++ b/examples/tiny/local/run_infer.sh
@@ -1,9 +1,7 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
-
# download language model
-cd models/lm > /dev/null
+cd $MAIN_ROOT/models/lm > /dev/null
bash download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
@@ -13,7 +11,7 @@ cd - > /dev/null
# infer
CUDA_VISIBLE_DEVICES=0 \
-python3 -u infer.py \
+python3 -u $MAIN_ROOT/infer.py \
--num_samples=10 \
--beam_size=500 \
--num_proc_bsearch=8 \
@@ -27,14 +25,14 @@ python3 -u infer.py \
--use_gru=False \
--use_gpu=True \
--share_rnn_weights=True \
---infer_manifest='data/librispeech/manifest.test-clean' \
---mean_std_path='data/librispeech/mean_std.npz' \
---vocab_path='data/librispeech/vocab.txt' \
---model_path='checkpoints/libri/step_final' \
---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
---decoding_method='ctc_beam_search' \
---error_rate_type='wer' \
---specgram_type='linear'
+--infer_manifest="data/manifest.test-clean" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="data/vocab.txt" \
+--model_path="checkpoints/step_final" \
+--lang_model_path="$MAIN_ROOT/models/lm/common_crawl_00.prune01111.trie.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"
if [ $? -ne 0 ]; then
echo "Failed in inference!"
diff --git a/examples/tiny/run_infer_golden.sh b/examples/tiny/local/run_infer_golden.sh
similarity index 54%
rename from examples/tiny/run_infer_golden.sh
rename to examples/tiny/local/run_infer_golden.sh
index d18c21f540794dc7c234ae0000c1d36a0e880943..21663681ba1813c797aedac24bb9a7eb74f0d0c2 100644
--- a/examples/tiny/run_infer_golden.sh
+++ b/examples/tiny/local/run_infer_golden.sh
@@ -1,9 +1,7 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
-
# download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
bash download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
@@ -12,7 +10,7 @@ cd - > /dev/null
# download well-trained model
-cd models/librispeech > /dev/null
+cd ${MAIN_ROOT}/models/librispeech > /dev/null
bash download_model.sh
if [ $? -ne 0 ]; then
exit 1
@@ -22,7 +20,7 @@ cd - > /dev/null
# infer
CUDA_VISIBLE_DEVICES=0 \
-python3 -u infer.py \
+python3 -u ${MAIN_ROOT}/infer.py \
--num_samples=10 \
--beam_size=500 \
--num_proc_bsearch=8 \
@@ -36,14 +34,14 @@ python3 -u infer.py \
--use_gru=False \
--use_gpu=True \
--share_rnn_weights=True \
---infer_manifest='data/tiny/manifest.test-clean' \
---mean_std_path='models/librispeech/mean_std.npz' \
---vocab_path='models/librispeech/vocab.txt' \
---model_path='models/librispeech' \
---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
---decoding_method='ctc_beam_search' \
---error_rate_type='wer' \
---specgram_type='linear'
+--infer_manifest="data/manifest.test-clean" \
+--mean_std_path="${MAIN_ROOT}/models/librispeech/mean_std.npz" \
+--vocab_path="${MAIN_ROOT}/models/librispeech/vocab.txt" \
+--model_path="${MAIN_ROOT}/models/librispeech" \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"
if [ $? -ne 0 ]; then
echo "Failed in inference!"
diff --git a/examples/librispeech/run_test.sh b/examples/tiny/local/run_test.sh
similarity index 55%
rename from examples/librispeech/run_test.sh
rename to examples/tiny/local/run_test.sh
index e8e65c39ed1730ca029d3a7b56009f179fc9a669..ef1fa5a2dd094ae14e3781318d869bb135ad0ad8 100644
--- a/examples/librispeech/run_test.sh
+++ b/examples/tiny/local/run_test.sh
@@ -1,9 +1,7 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
-
# download language model
-cd models/lm > /dev/null
+cd $MAIN_ROOT/models/lm > /dev/null
bash download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
@@ -13,7 +11,7 @@ cd - > /dev/null
# evaluate model
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python3 -u test.py \
+python3 -u $MAIN_ROOT/test.py \
--batch_size=128 \
--beam_size=500 \
--num_proc_bsearch=8 \
@@ -27,14 +25,14 @@ python3 -u test.py \
--use_gru=False \
--use_gpu=True \
--share_rnn_weights=True \
---test_manifest='data/librispeech/manifest.test-clean' \
---mean_std_path='data/librispeech/mean_std.npz' \
---vocab_path='data/librispeech/vocab.txt' \
---model_path='checkpoints/libri/step_final' \
---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
---decoding_method='ctc_beam_search' \
---error_rate_type='wer' \
---specgram_type='linear'
+--test_manifest="data/manifest.test-clean" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="data/vocab.txt" \
+--model_path="checkpoints/step_final" \
+--lang_model_path="$MAIN_ROOT/models/lm/common_crawl_00.prune01111.trie.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"
if [ $? -ne 0 ]; then
echo "Failed in evaluation!"
diff --git a/examples/librispeech/run_test_golden.sh b/examples/tiny/local/run_test_golden.sh
similarity index 55%
rename from examples/librispeech/run_test_golden.sh
rename to examples/tiny/local/run_test_golden.sh
index 1db36758a3e9546e68fb7c78e5b1c5d47183f792..9983fade8e7e40a29f4423e126aabda1fab469f4 100644
--- a/examples/librispeech/run_test_golden.sh
+++ b/examples/tiny/local/run_test_golden.sh
@@ -1,9 +1,7 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
-
# download language model
-cd models/lm > /dev/null
+cd $MAIN_ROOT/models/lm > /dev/null
bash download_lm_en.sh
if [ $? -ne 0 ]; then
exit 1
@@ -12,7 +10,7 @@ cd - > /dev/null
# download well-trained model
-cd models/librispeech > /dev/null
+cd $MAIN_ROOT/models/librispeech > /dev/null
bash download_model.sh
if [ $? -ne 0 ]; then
exit 1
@@ -22,7 +20,7 @@ cd - > /dev/null
# evaluate model
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python3 -u test.py \
+python3 -u $MAIN_ROOT/test.py \
--batch_size=128 \
--beam_size=500 \
--num_proc_bsearch=8 \
@@ -36,14 +34,14 @@ python3 -u test.py \
--use_gru=False \
--use_gpu=True \
--share_rnn_weights=True \
---test_manifest='data/librispeech/manifest.test-clean' \
---mean_std_path='models/librispeech/mean_std.npz' \
---vocab_path='models/librispeech/vocab.txt' \
---model_path='models/librispeech' \
---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
---decoding_method='ctc_beam_search' \
---error_rate_type='wer' \
---specgram_type='linear'
+--test_manifest="data/manifest.test-clean" \
+--mean_std_path="$MAIN_ROOT/models/librispeech/mean_std.npz" \
+--vocab_path="$MAIN_ROOT/models/librispeech/vocab.txt" \
+--model_path="$MAIN_ROOT/models/librispeech" \
+--lang_model_path="$MAIN_ROOT/models/lm/common_crawl_00.prune01111.trie.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"
if [ $? -ne 0 ]; then
echo "Failed in evaluation!"
diff --git a/examples/tiny/run_train.sh b/examples/tiny/local/run_train.sh
similarity index 62%
rename from examples/tiny/run_train.sh
rename to examples/tiny/local/run_train.sh
index fe5b620301853de5defa7aa5d5a39ba20aded966..de9dcbd74138f6e81bfd950ec7389ca34d8152ac 100644
--- a/examples/tiny/run_train.sh
+++ b/examples/tiny/local/run_train.sh
@@ -1,12 +1,10 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
-
# train model
# if you wish to resume from an exists model, uncomment --init_from_pretrained_model
export FLAGS_sync_nccl_allreduce=0
CUDA_VISIBLE_DEVICES=0,1,2,3 \
-python3 -u train.py \
+python3 -u ${MAIN_ROOT}/train.py \
--batch_size=4 \
--num_epoch=20 \
--num_conv_layers=2 \
@@ -24,14 +22,14 @@ python3 -u train.py \
--use_gpu=True \
--is_local=True \
--share_rnn_weights=True \
---train_manifest='data/tiny/manifest.tiny' \
---dev_manifest='data/tiny/manifest.tiny' \
---mean_std_path='data/tiny/mean_std.npz' \
---vocab_path='data/tiny/vocab.txt' \
---output_model_dir='./checkpoints/tiny' \
---augment_conf_path='conf/augmentation.config' \
---specgram_type='linear' \
---shuffle_method='batch_shuffle_clipped' \
+--train_manifest="data/manifest.tiny" \
+--dev_manifest="data/manifest.tiny" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="data/vocab.txt" \
+--output_model_dir="./checkpoints/" \
+--augment_conf_path="${MAIN_ROOT}/conf/augmentation.config" \
+--specgram_type="linear" \
+--shuffle_method="batch_shuffle_clipped" \
if [ $? -ne 0 ]; then
echo "Failed in training!"
diff --git a/examples/tiny/run_tune.sh b/examples/tiny/local/run_tune.sh
similarity index 61%
rename from examples/tiny/run_tune.sh
rename to examples/tiny/local/run_tune.sh
index bec71111a131f6747396cea0a151d7b3d5945945..b5cc4d6a1b49ab770b87c08a0dd83cc736228285 100644
--- a/examples/tiny/run_tune.sh
+++ b/examples/tiny/local/run_tune.sh
@@ -1,10 +1,8 @@
#! /usr/bin/env bash
-cd ../.. > /dev/null
-
# grid-search for hyper-parameters in language model
CUDA_VISIBLE_DEVICES=0,1,2,3 \
-python3 -u tools/tune.py \
+python3 -u $MAIN_ROOT/tools/tune.py \
--num_batches=-1 \
--batch_size=128 \
--beam_size=500 \
@@ -23,13 +21,13 @@ python3 -u tools/tune.py \
--use_gru=False \
--use_gpu=True \
--share_rnn_weights=True \
---tune_manifest='data/tiny/manifest.dev-clean' \
---mean_std_path='data/tiny/mean_std.npz' \
---vocab_path='data/tiny/vocab.txt' \
---model_path='models/librispeech' \
---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
---error_rate_type='wer' \
---specgram_type='linear'
+--tune_manifest="data/manifest.dev-clean" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="data/vocab.txt" \
+--model_path="$MAIN_ROOT/models/librispeech" \
+--lang_model_path="$MAIN_ROOT/models/lm/common_crawl_00.prune01111.trie.klm" \
+--error_rate_type="wer" \
+--specgram_type="linear"
if [ $? -ne 0 ]; then
echo "Failed in tuning!"
diff --git a/examples/tiny/path.sh b/examples/tiny/path.sh
new file mode 100644
index 0000000000000000000000000000000000000000..fd1cebba8ea9313ca30ee8bece2f6ddb0f29112d
--- /dev/null
+++ b/examples/tiny/path.sh
@@ -0,0 +1,8 @@
+export MAIN_ROOT=${PWD}/../../
+
+export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH}
+export LC_ALL=C
+
+# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
+export PYTHONIOENCODING=UTF-8
+export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
diff --git a/examples/tiny/run.sh b/examples/tiny/run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c8e58913900677679a40210c1f2dae658c435db6
--- /dev/null
+++ b/examples/tiny/run.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+source path.sh
+
+# prepare data
+bash ./local/run_data.sh
+
+# test pretrain model
+bash ./local/run_test_golden.sh
+
+# test pretain model
+bash ./local/run_infer_golden.sh
+
+# train model
+bash ./local/run_train.sh
+
+# test model
+bash ./local/run_test.sh
+
+# infer model
+bash ./local/run_infer.sh
+
+# tune model
+bash ./local/run_tune.sh
diff --git a/examples/tiny/run_data.sh b/examples/tiny/run_data.sh
deleted file mode 100644
index dd75ddadca17d9e4610d5841967c3032f6e5fe7f..0000000000000000000000000000000000000000
--- a/examples/tiny/run_data.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#! /usr/bin/env bash
-
-cd ../.. > /dev/null
-
-# prepare folder
-if [ ! -e data/tiny ]; then
- mkdir data/tiny
-fi
-
-# download data, generate manifests
-PYTHONPATH=.:$PYTHONPATH python3 data/librispeech/librispeech.py \
---manifest_prefix='data/tiny/manifest' \
---target_dir='./dataset/librispeech' \
---full_download='False'
-
-if [ $? -ne 0 ]; then
- echo "Prepare LibriSpeech failed. Terminated."
- exit 1
-fi
-
-head -n 64 data/tiny/manifest.dev-clean > data/tiny/manifest.tiny
-
-# build vocabulary
-python3 tools/build_vocab.py \
---count_threshold=0 \
---vocab_path='data/tiny/vocab.txt' \
---manifest_paths='data/tiny/manifest.tiny'
-
-if [ $? -ne 0 ]; then
- echo "Build vocabulary failed. Terminated."
- exit 1
-fi
-
-
-# compute mean and stddev for normalizer
-python3 tools/compute_mean_std.py \
---manifest_path='data/tiny/manifest.tiny' \
---num_samples=64 \
---specgram_type='linear' \
---output_path='data/tiny/mean_std.npz'
-
-if [ $? -ne 0 ]; then
- echo "Compute mean and stddev failed. Terminated."
- exit 1
-fi
-
-
-echo "LibriSpeech Data preparation done."
-exit 0
diff --git a/infer.py b/infer.py
index 910140282a7e2aa0659697a14dcee07e6a201b2f..ffcb48eb6f8b904f59a24dda045e6f39c45a4f7e 100644
--- a/infer.py
+++ b/infer.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Inferer for DeepSpeech2 model."""
import sys
diff --git a/model_utils/__init__.py b/model_utils/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..185a92b8d94d3426d616c0624f0f2ee04339349e 100644
--- a/model_utils/__init__.py
+++ b/model_utils/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/model_utils/model.py b/model_utils/model.py
index 68b963cdf4ff3d6ec4a0741076e8707d24b98e37..f4555bd69b8eb98c5fdec781b19812df791ab549 100644
--- a/model_utils/model.py
+++ b/model_utils/model.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contains DeepSpeech2 model."""
import sys
diff --git a/model_utils/network.py b/model_utils/network.py
index b8812e609114e0d0e9ff81831822367fc8cb6818..19f9d887cef2dde2dd3fdfb4d6bb923e38233535 100644
--- a/model_utils/network.py
+++ b/model_utils/network.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
import collections
import paddle.fluid as fluid
import numpy as np
diff --git a/test.py b/test.py
index 053a43acdb2eb40cdfc91ab00ade0675e025588c..d3b601e98bbc41c41ff91b953fae5c80bcfd8bbb 100644
--- a/test.py
+++ b/test.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Evaluation for DeepSpeech2 model."""
import argparse
diff --git a/tools/_init_paths.py b/tools/_init_paths.py
index 3bb2fd19722c6bf65fffc25f9d9f0b253125e4b8..c4b28c6433bad914acdddbe11a32536cfeebc8d7 100644
--- a/tools/_init_paths.py
+++ b/tools/_init_paths.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Set up paths for DS2"""
import os.path
diff --git a/tools/build_vocab.py b/tools/build_vocab.py
index 13ab843c41055cbcaca75fa66be090c68a3d7dee..77fd1fb637ffb6a54259ed8850d55fa9bff90f16 100644
--- a/tools/build_vocab.py
+++ b/tools/build_vocab.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Build vocabulary from manifest files.
Each item in vocabulary file is a character.
diff --git a/tools/compute_mean_std.py b/tools/compute_mean_std.py
index 203f83a2c3cdadf6e344bc6430defc0596da6255..d40739f32ca8ac17fc25505332181c90b74604b5 100644
--- a/tools/compute_mean_std.py
+++ b/tools/compute_mean_std.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Compute mean and std for feature normalizer, and save to file."""
import argparse
diff --git a/tools/tune.py b/tools/tune.py
index 74007f55adb9d5e433b11a105ccbaadc1683f52c..36443e28bb61fab52ddce96a0909f4d57d368297 100644
--- a/tools/tune.py
+++ b/tools/tune.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Beam search parameters tuning for DeepSpeech2 model."""
import sys
diff --git a/train.py b/train.py
index caa7c266e64812d90e866b4b241595c8eb4486c1..067f6d7863c667af376fe8dd00c6f0e1780addf8 100644
--- a/train.py
+++ b/train.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Trainer for DeepSpeech2 model."""
import argparse
diff --git a/utils/__init__.py b/utils/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..185a92b8d94d3426d616c0624f0f2ee04339349e 100644
--- a/utils/__init__.py
+++ b/utils/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/utils/error_rate.py b/utils/error_rate.py
index 628a84cb4c9d2d488ad6e5044d4dfc75796e6041..d80546ee2d45c6c1bdeb102897d721e05d923026 100644
--- a/utils/error_rate.py
+++ b/utils/error_rate.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""This module provides functions to calculate error rate in different level.
e.g. wer for word-level, cer for char-level.
"""
diff --git a/utils/tests/test_error_rate.py b/utils/tests/test_error_rate.py
index efa04b827ae86e5268b5503888391c6222581713..80c5b192a4ed01802e88d59cf78d277e8374e685 100644
--- a/utils/tests/test_error_rate.py
+++ b/utils/tests/test_error_rate.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Test error rate."""
import unittest
diff --git a/utils/utility.py b/utils/utility.py
index 543f3ebce9ffa97c1eda494d7856083c885aceb6..cd716659319376f30a3d4ecddc0ae14b769a4ff5 100644
--- a/utils/utility.py
+++ b/utils/utility.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
"""Contains common utility functions."""
import distutils.util