diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py index d9f7a830ee60a331b55a1e218923e690103e1c5b..f8b97f7c1fb400cc5c3cd9d9c1c4e179f81f22fc 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/v2/dataset/cifar.py @@ -75,3 +75,8 @@ def test10(): return reader_creator( paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch') + + +def download(): + paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5) + paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5) diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py index 9eab49ee39325c1c60fc511e0bd834e83aa987f0..d39f81e4125d15e1b9363ddc051b4ac4321fcb83 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/v2/dataset/conll05.py @@ -196,3 +196,11 @@ def test(): words_name='conll05st-release/test.wsj/words/test.wsj.words.gz', props_name='conll05st-release/test.wsj/props/test.wsj.props.gz') return reader_creator(reader, word_dict, verb_dict, label_dict) + + +def download(): + paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5) + paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5) + paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5) + paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) + paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5) diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py index 76019d9f54020ff6f02c17eb6047cbd014a8ccf2..c4c6d738ed7588fabd4af41082487298703b482b 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/v2/dataset/imdb.py @@ -123,3 +123,7 @@ def test(word_idx): def word_dict(): return build_dict( re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150) + + +def download(): + paddle.v2.dataset.common.download(URL, 'imdb', MD5) diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py index 97c160f111d09d61eb860c7f02552e635f2400a7..db83361d2fb2fba9efb73c295044dd702e05b250 100644 --- a/python/paddle/v2/dataset/imikolov.py +++ b/python/paddle/v2/dataset/imikolov.py @@ -89,3 +89,7 @@ def train(word_idx, n): def test(word_idx, n): return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n) + + +def download(): + paddle.v2.dataset.common.download(URL, "imikolov", MD5) diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py index 16f2fcb99de4cb1971a7375a97b5daa209ee95ef..7b536bfa030a3b9244ef581ccefea2f0a70cd9a5 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/v2/dataset/mnist.py @@ -106,3 +106,8 @@ def test(): TEST_IMAGE_MD5), paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TEST_LABEL_MD5), 100) + + +def download(): + paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5) + paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py index 25fd8227da2f219d75c6b830e65627ecf35be453..ced1b4c273437930da7d583dc39673e3e2ab56c8 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/v2/dataset/movielens.py @@ -30,6 +30,9 @@ __all__ = [ age_table = [1, 18, 25, 35, 45, 50, 56] +URL = 'http://files.grouplens.org/datasets/movielens/ml-1m.zip' +MD5 = 'c4d9eecfca2ab87c1945afe126590906' + class MovieInfo(object): def __init__(self, index, categories, title): @@ -77,10 +80,7 @@ USER_INFO = None def __initialize_meta_info__(): - fn = download( - url='http://files.grouplens.org/datasets/movielens/ml-1m.zip', - module_name='movielens', - md5sum='c4d9eecfca2ab87c1945afe126590906') + fn = download(URL, "movielens", MD5) global MOVIE_INFO if MOVIE_INFO is None: pattern = re.compile(r'^(.*)\((\d+)\)$') @@ -205,5 +205,9 @@ def unittest(): print train_count, test_count +def download(): + paddle.v2.dataset.common.download(URL, "movielens", MD5) + + if __name__ == '__main__': unittest() diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py index 71689fd61b6b14a7b5072caff4e2fd48a7f74072..b1d517fc6a112051769528d18e17ac528c3204eb 100644 --- a/python/paddle/v2/dataset/sentiment.py +++ b/python/paddle/v2/dataset/sentiment.py @@ -125,3 +125,7 @@ def test(): """ data_set = load_sentiment_data() return reader_creator(data_set[NUM_TRAINING_INSTANCES:]) + + +def download(): + nltk.download('movie_reviews', download_dir=common.DATA_HOME) diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/v2/dataset/uci_housing.py index 27f454b137e3a40febd19cf085e2f4034cc16b24..52bfc9e3382f23462e65e9b04915c7c9b1e7900d 100644 --- a/python/paddle/v2/dataset/uci_housing.py +++ b/python/paddle/v2/dataset/uci_housing.py @@ -89,3 +89,7 @@ def test(): yield d[:-1], d[-1:] return reader + + +def download(): + paddle.v2.dataset.common.download(URL, 'uci_housing', MD5) diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py index c686870a497668517d1c78c11c616ad8a71a2980..a5b4ea98626fc4f17a2179ee1ad6884ce0736139 100644 --- a/python/paddle/v2/dataset/wmt14.py +++ b/python/paddle/v2/dataset/wmt14.py @@ -103,3 +103,7 @@ def test(dict_size): return reader_creator( paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'test/test', dict_size) + + +def download(): + paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN)