未验证 提交 afe49c5f 编写于 作者: H Hui Zhang 提交者: GitHub

Merge pull request #517 from PaddlePaddle/1.8

update data source and remove useless code
......@@ -19,6 +19,7 @@ from data_utils.utility import download, unpack
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
URL_ROOT = 'http://www.openslr.org/resources/33'
URL_ROOT = 'https://openslr.magicdatatech.com/resources/33'
DATA_URL = URL_ROOT + '/data_aishell.tgz'
MD5_DATA = '2f494334227864a8a8fec932999db9d8'
......
......@@ -20,6 +20,7 @@ import io
from data_utils.utility import download, unpack
URL_ROOT = "http://www.openslr.org/resources/12"
URL_ROOT = "https://openslr.magicdatatech.com/resources/12"
URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz"
URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz"
URL_DEV_CLEAN = URL_ROOT + "/dev-clean.tar.gz"
......
......@@ -11,6 +11,7 @@ from __future__ import print_function
import os
import codecs
import datetime
import soundfile
import json
import argparse
......
......@@ -276,8 +276,8 @@ class DataGenerator(object):
def reader():
for instance in manifest:
inst = self.process_utterance(instance["audio_filepath"],
instance["text"]),
yield inst[0]
instance["text"])
yield inst
return reader
......
......@@ -3,6 +3,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from data_utils.audio import AudioSegment
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册