未验证 提交 afe49c5f 编写于 作者: H Hui Zhang 提交者: GitHub

Merge pull request #517 from PaddlePaddle/1.8

update data source and remove useless code
...@@ -19,6 +19,7 @@ from data_utils.utility import download, unpack ...@@ -19,6 +19,7 @@ from data_utils.utility import download, unpack
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
URL_ROOT = 'http://www.openslr.org/resources/33' URL_ROOT = 'http://www.openslr.org/resources/33'
URL_ROOT = 'https://openslr.magicdatatech.com/resources/33'
DATA_URL = URL_ROOT + '/data_aishell.tgz' DATA_URL = URL_ROOT + '/data_aishell.tgz'
MD5_DATA = '2f494334227864a8a8fec932999db9d8' MD5_DATA = '2f494334227864a8a8fec932999db9d8'
......
...@@ -20,6 +20,7 @@ import io ...@@ -20,6 +20,7 @@ import io
from data_utils.utility import download, unpack from data_utils.utility import download, unpack
URL_ROOT = "http://www.openslr.org/resources/12" URL_ROOT = "http://www.openslr.org/resources/12"
URL_ROOT = "https://openslr.magicdatatech.com/resources/12"
URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz" URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz"
URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz" URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz"
URL_DEV_CLEAN = URL_ROOT + "/dev-clean.tar.gz" URL_DEV_CLEAN = URL_ROOT + "/dev-clean.tar.gz"
......
...@@ -11,6 +11,7 @@ from __future__ import print_function ...@@ -11,6 +11,7 @@ from __future__ import print_function
import os import os
import codecs import codecs
import datetime
import soundfile import soundfile
import json import json
import argparse import argparse
......
...@@ -276,8 +276,8 @@ class DataGenerator(object): ...@@ -276,8 +276,8 @@ class DataGenerator(object):
def reader(): def reader():
for instance in manifest: for instance in manifest:
inst = self.process_utterance(instance["audio_filepath"], inst = self.process_utterance(instance["audio_filepath"],
instance["text"]), instance["text"])
yield inst[0] yield inst
return reader return reader
......
...@@ -3,6 +3,7 @@ from __future__ import absolute_import ...@@ -3,6 +3,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np
from data_utils.audio import AudioSegment from data_utils.audio import AudioSegment
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册