提交 b7a809a1 编写于 作者: Y Yancey1989

add download api for dataset

上级 c1feccdc
...@@ -75,3 +75,8 @@ def test10(): ...@@ -75,3 +75,8 @@ def test10():
return reader_creator( return reader_creator(
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
'test_batch') 'test_batch')
def download():
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5)
paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5)
...@@ -196,3 +196,11 @@ def test(): ...@@ -196,3 +196,11 @@ def test():
words_name='conll05st-release/test.wsj/words/test.wsj.words.gz', words_name='conll05st-release/test.wsj/words/test.wsj.words.gz',
props_name='conll05st-release/test.wsj/props/test.wsj.props.gz') props_name='conll05st-release/test.wsj/props/test.wsj.props.gz')
return reader_creator(reader, word_dict, verb_dict, label_dict) return reader_creator(reader, word_dict, verb_dict, label_dict)
def download():
paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5)
paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5)
paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5)
paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5)
paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5)
...@@ -123,3 +123,7 @@ def test(word_idx): ...@@ -123,3 +123,7 @@ def test(word_idx):
def word_dict(): def word_dict():
return build_dict( return build_dict(
re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150) re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150)
def download():
paddle.v2.dataset.common.download(URL, 'imdb', MD5)
...@@ -89,3 +89,7 @@ def train(word_idx, n): ...@@ -89,3 +89,7 @@ def train(word_idx, n):
def test(word_idx, n): def test(word_idx, n):
return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n) return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n)
def download():
paddle.v2.dataset.common.download(URL, "imikolov", MD5)
...@@ -106,3 +106,8 @@ def test(): ...@@ -106,3 +106,8 @@ def test():
TEST_IMAGE_MD5), TEST_IMAGE_MD5),
paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist',
TEST_LABEL_MD5), 100) TEST_LABEL_MD5), 100)
def download():
paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5)
paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5)
...@@ -30,6 +30,9 @@ __all__ = [ ...@@ -30,6 +30,9 @@ __all__ = [
age_table = [1, 18, 25, 35, 45, 50, 56] age_table = [1, 18, 25, 35, 45, 50, 56]
URL = 'http://files.grouplens.org/datasets/movielens/ml-1m.zip'
MD5 = 'c4d9eecfca2ab87c1945afe126590906'
class MovieInfo(object): class MovieInfo(object):
def __init__(self, index, categories, title): def __init__(self, index, categories, title):
...@@ -77,10 +80,7 @@ USER_INFO = None ...@@ -77,10 +80,7 @@ USER_INFO = None
def __initialize_meta_info__(): def __initialize_meta_info__():
fn = download( fn = download(URL, "movielens", MD5)
url='http://files.grouplens.org/datasets/movielens/ml-1m.zip',
module_name='movielens',
md5sum='c4d9eecfca2ab87c1945afe126590906')
global MOVIE_INFO global MOVIE_INFO
if MOVIE_INFO is None: if MOVIE_INFO is None:
pattern = re.compile(r'^(.*)\((\d+)\)$') pattern = re.compile(r'^(.*)\((\d+)\)$')
...@@ -205,5 +205,9 @@ def unittest(): ...@@ -205,5 +205,9 @@ def unittest():
print train_count, test_count print train_count, test_count
def download():
paddle.v2.dataset.common.download(URL, "movielens", MD5)
if __name__ == '__main__': if __name__ == '__main__':
unittest() unittest()
...@@ -125,3 +125,7 @@ def test(): ...@@ -125,3 +125,7 @@ def test():
""" """
data_set = load_sentiment_data() data_set = load_sentiment_data()
return reader_creator(data_set[NUM_TRAINING_INSTANCES:]) return reader_creator(data_set[NUM_TRAINING_INSTANCES:])
def download():
nltk.download('movie_reviews', download_dir=common.DATA_HOME)
...@@ -89,3 +89,7 @@ def test(): ...@@ -89,3 +89,7 @@ def test():
yield d[:-1], d[-1:] yield d[:-1], d[-1:]
return reader return reader
def download():
paddle.v2.dataset.common.download(URL, 'uci_housing', MD5)
...@@ -103,3 +103,7 @@ def test(dict_size): ...@@ -103,3 +103,7 @@ def test(dict_size):
return reader_creator( return reader_creator(
paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
'test/test', dict_size) 'test/test', dict_size)
def download():
paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册