From 14eb5b8e104d89bdb82be40e31acb5a0f7bc79e0 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Fri, 24 Mar 2017 10:51:12 +0800 Subject: [PATCH] rename fetch_all to fetch; add fetch_all function --- python/paddle/v2/dataset/cifar.py | 21 ++++++++------------- python/paddle/v2/dataset/common.py | 12 ++++++++++++ python/paddle/v2/dataset/conll05.py | 12 ++++++------ python/paddle/v2/dataset/imdb.py | 2 +- python/paddle/v2/dataset/imikolov.py | 2 +- python/paddle/v2/dataset/mnist.py | 4 +++- python/paddle/v2/dataset/movielens.py | 4 ++-- python/paddle/v2/dataset/sentiment.py | 4 ++-- python/paddle/v2/dataset/uci_housing.py | 4 ++-- python/paddle/v2/dataset/wmt14.py | 12 +++++------- 10 files changed, 42 insertions(+), 35 deletions(-) diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py index c38caf7a897..3a8b98b8f04 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/v2/dataset/cifar.py @@ -20,7 +20,7 @@ TODO(yuyang18): Complete the comments. import cPickle import itertools import numpy -import paddle.v2.dataset.common +from common import download import tarfile __all__ = ['train100', 'test100', 'train10', 'test10'] @@ -55,28 +55,23 @@ def reader_creator(filename, sub_name): def train100(): return reader_creator( - paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), - 'train') + download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'train') def test100(): - return reader_creator( - paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), - 'test') + return reader_creator(download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'test') def train10(): return reader_creator( - paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), - 'data_batch') + download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch') def test10(): return reader_creator( - paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), - 'test_batch') + download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch') -def fetch_data(): - paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5) - paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5) +def fetch(): + download(CIFAR10_URL, 'cifar', CIFAR10_MD5) + download(CIFAR100_URL, 'cifar', CIFAR100_MD5) diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py index 3021b68ddb0..7021a6da05d 100644 --- a/python/paddle/v2/dataset/common.py +++ b/python/paddle/v2/dataset/common.py @@ -17,6 +17,8 @@ import hashlib import os import shutil import sys +import importlib +import paddle.v2.dataset __all__ = ['DATA_HOME', 'download', 'md5file'] @@ -69,3 +71,13 @@ def dict_add(a_dict, ele): a_dict[ele] += 1 else: a_dict[ele] = 1 + + +def fetch_all(): + for module_name in filter(lambda x: not x.startswith("__"), + dir(paddle.v2.dataset)): + if "fetch" in dir( + importlib.import_module("paddle.v2.dataset.%s" % module_name)): + getattr( + importlib.import_module("paddle.v2.dataset.%s" % module_name), + "fetch")() diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py index dede47cbd39..f1b0ce16f21 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/v2/dataset/conll05.py @@ -198,9 +198,9 @@ def test(): return reader_creator(reader, word_dict, verb_dict, label_dict) -def fetch_data(): - paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5) - paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5) - paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5) - paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) - paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5) +def fetch(): + download(WORDDICT_URL, 'conll05st', WORDDICT_MD5) + download(VERBDICT_URL, 'conll05st', VERBDICT_MD5) + download(TRGDICT_URL, 'conll05st', TRGDICT_MD5) + download(EMB_URL, 'conll05st', EMB_MD5) + download(DATA_URL, 'conll05st', DATA_MD5) diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py index 05e67f39d14..5284017ce08 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/v2/dataset/imdb.py @@ -125,5 +125,5 @@ def word_dict(): re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150) -def fetch_data(): +def fetch(): paddle.v2.dataset.common.download(URL, 'imdb', MD5) diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py index b89628cea5b..2931d06e7eb 100644 --- a/python/paddle/v2/dataset/imikolov.py +++ b/python/paddle/v2/dataset/imikolov.py @@ -91,5 +91,5 @@ def test(word_idx, n): return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n) -def fetch_data(): +def fetch(): paddle.v2.dataset.common.download(URL, "imikolov", MD5) diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py index 8f1ce4df25e..48a39b5493a 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/v2/dataset/mnist.py @@ -108,6 +108,8 @@ def test(): TEST_LABEL_MD5), 100) -def fetch_data(): +def fetch(): paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5) paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) + paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5) + paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py index cebf8a13ec2..e148ddeca03 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/v2/dataset/movielens.py @@ -205,8 +205,8 @@ def unittest(): print train_count, test_count -def fetch_data(): - paddle.v2.dataset.common.download(URL, "movielens", MD5) +def fetch(): + download(URL, "movielens", MD5) if __name__ == '__main__': diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py index 3183288a7ae..0eeb6d5affd 100644 --- a/python/paddle/v2/dataset/sentiment.py +++ b/python/paddle/v2/dataset/sentiment.py @@ -26,7 +26,7 @@ from itertools import chain import nltk from nltk.corpus import movie_reviews -import paddle.v2.dataset.common +import common __all__ = ['train', 'test', 'get_word_dict'] NUM_TRAINING_INSTANCES = 1600 @@ -127,5 +127,5 @@ def test(): return reader_creator(data_set[NUM_TRAINING_INSTANCES:]) -def fetch_data(): +def fetch(): nltk.download('movie_reviews', download_dir=common.DATA_HOME) diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/v2/dataset/uci_housing.py index 5066c0886ba..dab8620441c 100644 --- a/python/paddle/v2/dataset/uci_housing.py +++ b/python/paddle/v2/dataset/uci_housing.py @@ -91,5 +91,5 @@ def test(): return reader -def fetch_data(): - paddle.v2.dataset.common.download(URL, 'uci_housing', MD5) +def fetch(): + download(URL, 'uci_housing', MD5) diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py index 759c77ccfd0..ee63a93f5ad 100644 --- a/python/paddle/v2/dataset/wmt14.py +++ b/python/paddle/v2/dataset/wmt14.py @@ -16,7 +16,7 @@ wmt14 dataset """ import tarfile -import paddle.v2.dataset.common +from paddle.v2.dataset.common import download __all__ = ['train', 'test', 'build_dict'] @@ -95,15 +95,13 @@ def reader_creator(tar_file, file_name, dict_size): def train(dict_size): return reader_creator( - paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), - 'train/train', dict_size) + download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'train/train', dict_size) def test(dict_size): return reader_creator( - paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), - 'test/test', dict_size) + download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'test/test', dict_size) -def fetch_data(): - paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) +def fetch(): + download(URL_TRAIN, 'wmt14', MD5_TRAIN) -- GitLab