From 69c36a56a86c83fa5e317734bfbea03bfd3639f6 Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Tue, 28 Jun 2022 06:29:54 +0000 Subject: [PATCH] fix url in librispeech.py --- dataset/aidatatang_200zh/README.md | 2 +- dataset/aishell/README.md | 2 +- dataset/aishell/aishell.py | 2 +- dataset/librispeech/librispeech.py | 2 +- dataset/magicdata/README.md | 2 +- dataset/mini_librispeech/mini_librispeech.py | 2 +- dataset/musan/musan.py | 2 +- dataset/primewords/README.md | 2 +- dataset/rir_noise/rir_noise.py | 2 +- dataset/st-cmds/README.md | 2 +- dataset/thchs30/README.md | 2 +- dataset/thchs30/thchs30.py | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/dataset/aidatatang_200zh/README.md b/dataset/aidatatang_200zh/README.md index e6f1eefb..addc323a 100644 --- a/dataset/aidatatang_200zh/README.md +++ b/dataset/aidatatang_200zh/README.md @@ -1,4 +1,4 @@ -# [Aidatatang_200zh](http://www.openslr.org/62/) +# [Aidatatang_200zh](http://openslr.elda.org/62/) Aidatatang_200zh is a free Chinese Mandarin speech corpus provided by Beijing DataTang Technology Co., Ltd under Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International Public License. The contents and the corresponding descriptions of the corpus include: diff --git a/dataset/aishell/README.md b/dataset/aishell/README.md index 6770cd20..a7dd0cf3 100644 --- a/dataset/aishell/README.md +++ b/dataset/aishell/README.md @@ -1,3 +1,3 @@ -# [Aishell1](http://www.openslr.org/33/) +# [Aishell1](http://openslr.elda.org/33/) This Open Source Mandarin Speech Corpus, AISHELL-ASR0009-OS1, is 178 hours long. It is a part of AISHELL-ASR0009, of which utterance contains 11 domains, including smart home, autonomous driving, and industrial production. The whole recording was put in quiet indoor environment, using 3 different devices at the same time: high fidelity microphone (44.1kHz, 16-bit,); Android-system mobile phone (16kHz, 16-bit), iOS-system mobile phone (16kHz, 16-bit). Audios in high fidelity were re-sampled to 16kHz to build AISHELL- ASR0009-OS1. 400 speakers from different accent areas in China were invited to participate in the recording. The manual transcription accuracy rate is above 95%, through professional speech annotation and strict quality inspection. The corpus is divided into training, development and testing sets. ( This database is free for academic research, not in the commerce, if without permission. ) diff --git a/dataset/aishell/aishell.py b/dataset/aishell/aishell.py index 7431fc08..ec43104d 100644 --- a/dataset/aishell/aishell.py +++ b/dataset/aishell/aishell.py @@ -31,7 +31,7 @@ from utils.utility import unpack DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') -URL_ROOT = 'http://www.openslr.org/resources/33' +URL_ROOT = 'http://openslr.elda.org/resources/33' # URL_ROOT = 'https://openslr.magicdatatech.com/resources/33' DATA_URL = URL_ROOT + '/data_aishell.tgz' MD5_DATA = '2f494334227864a8a8fec932999db9d8' diff --git a/dataset/librispeech/librispeech.py b/dataset/librispeech/librispeech.py index 65cab249..2d6f1763 100644 --- a/dataset/librispeech/librispeech.py +++ b/dataset/librispeech/librispeech.py @@ -31,7 +31,7 @@ import soundfile from utils.utility import download from utils.utility import unpack -URL_ROOT = "http://www.openslr.org/resources/12" +URL_ROOT = "http://openslr.elda.org/resources/12" #URL_ROOT = "https://openslr.magicdatatech.com/resources/12" URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz" URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz" diff --git a/dataset/magicdata/README.md b/dataset/magicdata/README.md index 083aee97..4641a21d 100644 --- a/dataset/magicdata/README.md +++ b/dataset/magicdata/README.md @@ -1,4 +1,4 @@ -# [MagicData](http://www.openslr.org/68/) +# [MagicData](http://openslr.elda.org/68/) MAGICDATA Mandarin Chinese Read Speech Corpus was developed by MAGIC DATA Technology Co., Ltd. and freely published for non-commercial use. The contents and the corresponding descriptions of the corpus include: diff --git a/dataset/mini_librispeech/mini_librispeech.py b/dataset/mini_librispeech/mini_librispeech.py index 730c73a8..0eb80bf8 100644 --- a/dataset/mini_librispeech/mini_librispeech.py +++ b/dataset/mini_librispeech/mini_librispeech.py @@ -30,7 +30,7 @@ import soundfile from utils.utility import download from utils.utility import unpack -URL_ROOT = "http://www.openslr.org/resources/31" +URL_ROOT = "http://openslr.elda.org/resources/31" URL_TRAIN_CLEAN = URL_ROOT + "/train-clean-5.tar.gz" URL_DEV_CLEAN = URL_ROOT + "/dev-clean-2.tar.gz" diff --git a/dataset/musan/musan.py b/dataset/musan/musan.py index 2ac701be..ae3430b2 100644 --- a/dataset/musan/musan.py +++ b/dataset/musan/musan.py @@ -34,7 +34,7 @@ from utils.utility import unpack DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') -URL_ROOT = 'https://www.openslr.org/resources/17' +URL_ROOT = 'https://openslr.elda.org/resources/17' DATA_URL = URL_ROOT + '/musan.tar.gz' MD5_DATA = '0c472d4fc0c5141eca47ad1ffeb2a7df' diff --git a/dataset/primewords/README.md b/dataset/primewords/README.md index a4f1ed65..dba51cec 100644 --- a/dataset/primewords/README.md +++ b/dataset/primewords/README.md @@ -1,4 +1,4 @@ -# [Primewords](http://www.openslr.org/47/) +# [Primewords](http://openslr.elda.org/47/) This free Chinese Mandarin speech corpus set is released by Shanghai Primewords Information Technology Co., Ltd. The corpus is recorded by smart mobile phones from 296 native Chinese speakers. The transcription accuracy is larger than 98%, at the confidence level of 95%. It is free for academic use. diff --git a/dataset/rir_noise/rir_noise.py b/dataset/rir_noise/rir_noise.py index 009175e5..c409ce21 100644 --- a/dataset/rir_noise/rir_noise.py +++ b/dataset/rir_noise/rir_noise.py @@ -34,7 +34,7 @@ from utils.utility import unzip DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') -URL_ROOT = '--no-check-certificate http://www.openslr.org/resources/28' +URL_ROOT = '--no-check-certificate http://openslr.elda.org/resources/28' DATA_URL = URL_ROOT + '/rirs_noises.zip' MD5_DATA = 'e6f48e257286e05de56413b4779d8ffb' diff --git a/dataset/st-cmds/README.md b/dataset/st-cmds/README.md index c7ae50e5..bbf85c3e 100644 --- a/dataset/st-cmds/README.md +++ b/dataset/st-cmds/README.md @@ -1 +1 @@ -# [FreeST](http://www.openslr.org/38/) +# [FreeST](http://openslr.elda.org/38/) diff --git a/dataset/thchs30/README.md b/dataset/thchs30/README.md index 6b59d663..b488a355 100644 --- a/dataset/thchs30/README.md +++ b/dataset/thchs30/README.md @@ -1,4 +1,4 @@ -# [THCHS30](http://www.openslr.org/18/) +# [THCHS30](http://openslr.elda.org/18/) This is the *data part* of the `THCHS30 2015` acoustic data & scripts dataset. diff --git a/dataset/thchs30/thchs30.py b/dataset/thchs30/thchs30.py index cdfc0a75..d41c0e17 100644 --- a/dataset/thchs30/thchs30.py +++ b/dataset/thchs30/thchs30.py @@ -32,7 +32,7 @@ from utils.utility import unpack DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') -URL_ROOT = 'http://www.openslr.org/resources/18' +URL_ROOT = 'http://openslr.elda.org/resources/18' # URL_ROOT = 'https://openslr.magicdatatech.com/resources/18' DATA_URL = URL_ROOT + '/data_thchs30.tgz' TEST_NOISE_URL = URL_ROOT + '/test-noise.tgz' -- GitLab