diff --git a/dataset/aidatatang_200zh/README.md b/dataset/aidatatang_200zh/README.md index e6f1eefbd1f9f885bb36b075f79e3855bfc4b834..addc323a6c5e1dff621a0acf6fc8a1c6d39feae9 100644 --- a/dataset/aidatatang_200zh/README.md +++ b/dataset/aidatatang_200zh/README.md @@ -1,4 +1,4 @@ -# [Aidatatang_200zh](http://www.openslr.org/62/) +# [Aidatatang_200zh](http://openslr.elda.org/62/) Aidatatang_200zh is a free Chinese Mandarin speech corpus provided by Beijing DataTang Technology Co., Ltd under Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International Public License. The contents and the corresponding descriptions of the corpus include: diff --git a/dataset/aishell/README.md b/dataset/aishell/README.md index 6770cd20777c441601e174f77d2801f7559ee767..a7dd0cf326ad51dc49fc83207a89fe9adc457dbf 100644 --- a/dataset/aishell/README.md +++ b/dataset/aishell/README.md @@ -1,3 +1,3 @@ -# [Aishell1](http://www.openslr.org/33/) +# [Aishell1](http://openslr.elda.org/33/) This Open Source Mandarin Speech Corpus, AISHELL-ASR0009-OS1, is 178 hours long. It is a part of AISHELL-ASR0009, of which utterance contains 11 domains, including smart home, autonomous driving, and industrial production. The whole recording was put in quiet indoor environment, using 3 different devices at the same time: high fidelity microphone (44.1kHz, 16-bit,); Android-system mobile phone (16kHz, 16-bit), iOS-system mobile phone (16kHz, 16-bit). Audios in high fidelity were re-sampled to 16kHz to build AISHELL- ASR0009-OS1. 400 speakers from different accent areas in China were invited to participate in the recording. The manual transcription accuracy rate is above 95%, through professional speech annotation and strict quality inspection. The corpus is divided into training, development and testing sets. ( This database is free for academic research, not in the commerce, if without permission. ) diff --git a/dataset/aishell/aishell.py b/dataset/aishell/aishell.py index 7431fc08369546f372c93dc923f50300f1da10a3..ec43104dbc9dc4efc693c6b97b5fb004bc14ce1e 100644 --- a/dataset/aishell/aishell.py +++ b/dataset/aishell/aishell.py @@ -31,7 +31,7 @@ from utils.utility import unpack DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') -URL_ROOT = 'http://www.openslr.org/resources/33' +URL_ROOT = 'http://openslr.elda.org/resources/33' # URL_ROOT = 'https://openslr.magicdatatech.com/resources/33' DATA_URL = URL_ROOT + '/data_aishell.tgz' MD5_DATA = '2f494334227864a8a8fec932999db9d8' diff --git a/dataset/librispeech/librispeech.py b/dataset/librispeech/librispeech.py index 65cab2490305762b84a06408b6d302517caea182..2d6f1763d9eccda4d8208538e0b76174c4c8445d 100644 --- a/dataset/librispeech/librispeech.py +++ b/dataset/librispeech/librispeech.py @@ -31,7 +31,7 @@ import soundfile from utils.utility import download from utils.utility import unpack -URL_ROOT = "http://www.openslr.org/resources/12" +URL_ROOT = "http://openslr.elda.org/resources/12" #URL_ROOT = "https://openslr.magicdatatech.com/resources/12" URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz" URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz" diff --git a/dataset/magicdata/README.md b/dataset/magicdata/README.md index 083aee97b9fec0bd916fdd1fc125319881894c0c..4641a21d6cdfb765605440a66a091d35c6daee38 100644 --- a/dataset/magicdata/README.md +++ b/dataset/magicdata/README.md @@ -1,4 +1,4 @@ -# [MagicData](http://www.openslr.org/68/) +# [MagicData](http://openslr.elda.org/68/) MAGICDATA Mandarin Chinese Read Speech Corpus was developed by MAGIC DATA Technology Co., Ltd. and freely published for non-commercial use. The contents and the corresponding descriptions of the corpus include: diff --git a/dataset/mini_librispeech/mini_librispeech.py b/dataset/mini_librispeech/mini_librispeech.py index 730c73a8b4dc44691351717de1bfe918f3b957ac..0eb80bf8f52a6ea23e114948ebaec30c5ad8d4cd 100644 --- a/dataset/mini_librispeech/mini_librispeech.py +++ b/dataset/mini_librispeech/mini_librispeech.py @@ -30,7 +30,7 @@ import soundfile from utils.utility import download from utils.utility import unpack -URL_ROOT = "http://www.openslr.org/resources/31" +URL_ROOT = "http://openslr.elda.org/resources/31" URL_TRAIN_CLEAN = URL_ROOT + "/train-clean-5.tar.gz" URL_DEV_CLEAN = URL_ROOT + "/dev-clean-2.tar.gz" diff --git a/dataset/musan/musan.py b/dataset/musan/musan.py index 2ac701bed0c9c24be1d1dffbd0482b6f4ce3f473..ae3430b2a3e5bc57631de668279b3ec3c9225b44 100644 --- a/dataset/musan/musan.py +++ b/dataset/musan/musan.py @@ -34,7 +34,7 @@ from utils.utility import unpack DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') -URL_ROOT = 'https://www.openslr.org/resources/17' +URL_ROOT = 'https://openslr.elda.org/resources/17' DATA_URL = URL_ROOT + '/musan.tar.gz' MD5_DATA = '0c472d4fc0c5141eca47ad1ffeb2a7df' diff --git a/dataset/primewords/README.md b/dataset/primewords/README.md index a4f1ed65d01cc5db123f68beba3f69c7ef8be8ae..dba51cec7998fe3411613d58ce7571c2ddd47220 100644 --- a/dataset/primewords/README.md +++ b/dataset/primewords/README.md @@ -1,4 +1,4 @@ -# [Primewords](http://www.openslr.org/47/) +# [Primewords](http://openslr.elda.org/47/) This free Chinese Mandarin speech corpus set is released by Shanghai Primewords Information Technology Co., Ltd. The corpus is recorded by smart mobile phones from 296 native Chinese speakers. The transcription accuracy is larger than 98%, at the confidence level of 95%. It is free for academic use. diff --git a/dataset/rir_noise/rir_noise.py b/dataset/rir_noise/rir_noise.py index 009175e5bcce158b427cdf676540f2d1a7464032..c409ce2117c101431aa76f7e48c5cfcebd80c455 100644 --- a/dataset/rir_noise/rir_noise.py +++ b/dataset/rir_noise/rir_noise.py @@ -34,7 +34,7 @@ from utils.utility import unzip DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') -URL_ROOT = '--no-check-certificate http://www.openslr.org/resources/28' +URL_ROOT = '--no-check-certificate http://openslr.elda.org/resources/28' DATA_URL = URL_ROOT + '/rirs_noises.zip' MD5_DATA = 'e6f48e257286e05de56413b4779d8ffb' diff --git a/dataset/st-cmds/README.md b/dataset/st-cmds/README.md index c7ae50e59d206e47cecbe19fe42d3f35004f603a..bbf85c3e7ef6c5f1194622686b9941a743d013bd 100644 --- a/dataset/st-cmds/README.md +++ b/dataset/st-cmds/README.md @@ -1 +1 @@ -# [FreeST](http://www.openslr.org/38/) +# [FreeST](http://openslr.elda.org/38/) diff --git a/dataset/thchs30/README.md b/dataset/thchs30/README.md index 6b59d663a2d94fef01f42e9c7d8191ec10b4b43a..b488a3551a81751b883f1b6311e8e3424094aba4 100644 --- a/dataset/thchs30/README.md +++ b/dataset/thchs30/README.md @@ -1,4 +1,4 @@ -# [THCHS30](http://www.openslr.org/18/) +# [THCHS30](http://openslr.elda.org/18/) This is the *data part* of the `THCHS30 2015` acoustic data & scripts dataset. diff --git a/dataset/thchs30/thchs30.py b/dataset/thchs30/thchs30.py index cdfc0a75c0aacfdf89492d2f83642cb7f5decea8..d41c0e175c7ccd2a8252592908b4cbaf89bade72 100644 --- a/dataset/thchs30/thchs30.py +++ b/dataset/thchs30/thchs30.py @@ -32,7 +32,7 @@ from utils.utility import unpack DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') -URL_ROOT = 'http://www.openslr.org/resources/18' +URL_ROOT = 'http://openslr.elda.org/resources/18' # URL_ROOT = 'https://openslr.magicdatatech.com/resources/18' DATA_URL = URL_ROOT + '/data_thchs30.tgz' TEST_NOISE_URL = URL_ROOT + '/test-noise.tgz'