diff --git a/.tools/build_docker.sh b/.tools/build_docker.sh index 7a06c180d2ed0a99c2826ebba916aa9cbd842767..269687c3e3be4afad1e492cdba9214b6bac1a435 100755 --- a/.tools/build_docker.sh +++ b/.tools/build_docker.sh @@ -25,8 +25,10 @@ MAINTAINER PaddlePaddle Authors COPY . /book -RUN pip install -U nltk \ - && python /book/.tools/cache_dataset.py +#RUN pip install -U nltk \ +# && python /book/.tools/cache_dataset.py + +RUN python -c "import paddle.v2.dataset as dataset; dataset.fetch_all()" RUN ${update_mirror_cmd} apt-get update && \ diff --git a/.tools/cache_dataset.py b/.tools/cache_dataset.py deleted file mode 100755 index ae0125c6c6d14f4eb7d970e8911e6fc744451922..0000000000000000000000000000000000000000 --- a/.tools/cache_dataset.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/env python -import paddle.v2.dataset as dataset -import nltk - -#cifar -dataset.common.download(dataset.cifar.CIFAR100_URL, 'cifar', - dataset.cifar.CIFAR100_MD5) -dataset.common.download(dataset.cifar.CIFAR10_URL, 'cifar', - dataset.cifar.CIFAR10_MD5) - -# Cache conll05 -dataset.common.download(dataset.conll05.WORDDICT_URL, 'conll05st', \ - dataset.conll05.WORDDICT_MD5) -dataset.common.download(dataset.conll05.VERBDICT_URL, 'conll05st', \ - dataset.conll05.VERBDICT_MD5) -dataset.common.download(dataset.conll05.TRGDICT_URL, 'conll05st', \ - dataset.conll05.TRGDICT_MD5) -dataset.common.download(dataset.conll05.EMB_URL, 'conll05st', - dataset.conll05.EMB_MD5) -dataset.common.download(dataset.conll05.DATA_URL, 'conll05st', - dataset.conll05.DATA_MD5) - -# Cache imdb -dataset.common.download(dataset.imdb.URL, "imdb", dataset.imdb.MD5) - -# Cache imikolov -dataset.common.download(dataset.imikolov.URL, "imikolov", dataset.imikolov.MD5) - -# Cache movielens -dataset.common.download('http://files.grouplens.org/datasets/movielens/ml-1m.zip',\ - 'movielens','c4d9eecfca2ab87c1945afe126590906') - -# Cache nltk -nltk.download('movie_reviews', download_dir=dataset.common.DATA_HOME) - -# Cache uci housing -dataset.common.download(dataset.uci_housing.URL, "uci_housing", \ - dataset.uci_housing.MD5) - -# Cache vmt14 -dataset.common.download(dataset.wmt14.URL_TRAIN, "wmt14",\ - dataset.wmt14.MD5_TRAIN) - -#mnist -dataset.common.download(dataset.mnist.TRAIN_IMAGE_URL, 'mnist', - dataset.mnist.TRAIN_IMAGE_MD5) -dataset.common.download(dataset.mnist.TRAIN_LABEL_URL, 'mnist', - dataset.mnist.TRAIN_LABEL_MD5) -dataset.common.download(dataset.mnist.TEST_IMAGE_URL, 'mnist', - dataset.mnist.TEST_IMAGE_MD5) -dataset.common.download(dataset.mnist.TEST_LABEL_URL, 'mnist', - dataset.mnist.TEST_LABEL_MD5)