未验证 提交 4ce08040 编写于 作者: H Hui Zhang 提交者: GitHub

Merge pull request #711 from PaddlePaddle/ds2-en

libri s0 w/ spec-aug result
...@@ -34,6 +34,8 @@ URL_ROOT = 'http://www.openslr.org/resources/33' ...@@ -34,6 +34,8 @@ URL_ROOT = 'http://www.openslr.org/resources/33'
# URL_ROOT = 'https://openslr.magicdatatech.com/resources/33' # URL_ROOT = 'https://openslr.magicdatatech.com/resources/33'
DATA_URL = URL_ROOT + '/data_aishell.tgz' DATA_URL = URL_ROOT + '/data_aishell.tgz'
MD5_DATA = '2f494334227864a8a8fec932999db9d8' MD5_DATA = '2f494334227864a8a8fec932999db9d8'
RESOURCE_URL = URL_ROOT + '/resource_aishell.tgz'
MD5_RESOURCE = '957d480a0fcac85fc18e550756f624e5'
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument( parser.add_argument(
...@@ -110,7 +112,7 @@ def create_manifest(data_dir, manifest_path_prefix): ...@@ -110,7 +112,7 @@ def create_manifest(data_dir, manifest_path_prefix):
print(f"{total_sec / total_num} sec/utt", file=f) print(f"{total_sec / total_num} sec/utt", file=f)
def prepare_dataset(url, md5sum, target_dir, manifest_path): def prepare_dataset(url, md5sum, target_dir, manifest_path=None):
"""Download, unpack and create manifest file.""" """Download, unpack and create manifest file."""
data_dir = os.path.join(target_dir, 'data_aishell') data_dir = os.path.join(target_dir, 'data_aishell')
if not os.path.exists(data_dir): if not os.path.exists(data_dir):
...@@ -124,7 +126,9 @@ def prepare_dataset(url, md5sum, target_dir, manifest_path): ...@@ -124,7 +126,9 @@ def prepare_dataset(url, md5sum, target_dir, manifest_path):
else: else:
print("Skip downloading and unpacking. Data already exists in %s." % print("Skip downloading and unpacking. Data already exists in %s." %
target_dir) target_dir)
create_manifest(data_dir, manifest_path)
if manifest_path:
create_manifest(data_dir, manifest_path)
def main(): def main():
...@@ -137,6 +141,12 @@ def main(): ...@@ -137,6 +141,12 @@ def main():
target_dir=args.target_dir, target_dir=args.target_dir,
manifest_path=args.manifest_prefix) manifest_path=args.manifest_prefix)
prepare_dataset(
url=RESOURCE_URL,
md5sum=MD5_RESOURCE,
target_dir=args.target_dir,
manifest_path=None)
print("Data download and manifest prepare done!") print("Data download and manifest prepare done!")
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
| Model | Params | release | Config | Test set | Loss | WER | | Model | Params | release | Config | Test set | Loss | WER |
| --- | --- | --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- | --- | --- |
| DeepSpeech2 | 42.96M | 2.2.0 | conf/deepspeech2.yaml + spec_aug | 14.49190807 | test-clean | 0.067283 |
| DeepSpeech2 | 42.96M | 2.1.0 | conf/deepspeech2.yaml | 15.184467315673828 | test-clean | 0.072154 | | DeepSpeech2 | 42.96M | 2.1.0 | conf/deepspeech2.yaml | 15.184467315673828 | test-clean | 0.072154 |
| DeepSpeech2 | 42.96M | 2.0.0 | conf/deepspeech2.yaml | - | test-clean | 0.073973 | | DeepSpeech2 | 42.96M | 2.0.0 | conf/deepspeech2.yaml | - | test-clean | 0.073973 |
| DeepSpeech2 | 42.96M | 1.8.5 | - | test-clean | - | 0.074939 | | DeepSpeech2 | 42.96M | 1.8.5 | - | test-clean | - | 0.074939 |
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册