cache_dataset.py 1.7 KB
Newer Older
Y
Yancey1989 已提交
1 2
#!/bin/env python
import paddle.v2.dataset as dataset
G
gongweibao 已提交
3 4 5 6 7 8 9
import nltk

#cifar
dataset.common.download(dataset.cifar.CIFAR100_URL, 'cifar',
                        dataset.cifar.CIFAR100_MD5)
dataset.common.download(dataset.cifar.CIFAR10_URL, 'cifar',
                        dataset.cifar.CIFAR10_MD5)
Y
Yancey1989 已提交
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32

# Cache conll05
dataset.common.download(dataset.conll05.WORDDICT_URL, 'conll05st', \
                        dataset.conll05.WORDDICT_MD5)
dataset.common.download(dataset.conll05.VERBDICT_URL, 'conll05st', \
                        dataset.conll05.VERBDICT_MD5)
dataset.common.download(dataset.conll05.TRGDICT_URL, 'conll05st', \
                        dataset.conll05.TRGDICT_MD5)

# Cache imdb
dataset.common.download(dataset.imdb.URL, "imdb", dataset.imdb.MD5)

# Cache imikolov
dataset.common.download(dataset.imikolov.URL, "imikolov", dataset.imikolov.MD5)

# Cache movielens
dataset.common.download('http://files.grouplens.org/datasets/movielens/ml-1m.zip',\
                        'movielens','c4d9eecfca2ab87c1945afe126590906')

# Cache nltk
nltk.download('movie_reviews', download_dir=dataset.common.DATA_HOME)

# Cache uci housing
Y
update  
Yancey1989 已提交
33 34
dataset.common.download(dataset.uci_housing.URL, "uci_housing", \
                        dataset.uci_housing.MD5)
Y
Yancey1989 已提交
35 36

# Cache vmt14
G
gongweibao 已提交
37 38
dataset.common.download(dataset.wmt14.URL_TRAIN, "wmt14",\
                        dataset.wmt14.MD5_TRAIN)
G
fix bug  
gongweibao 已提交
39 40 41 42 43 44

#mnist
dataset.common.download(dataset.mnist.TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5)
dataset.common.download(dataset.mnist.TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5)
dataset.common.download(dataset.mnist.TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5)
dataset.common.download(dataset.mnist.TEST_LABEL_URL, 'mnist', TEST_LABEL_MD5)