提交 4eac3c33 编写于 作者: G gongweibao 提交者: GitHub

Merge pull request #3 from Yancey1989/builddockerimage3_yx

Cache paddle dataset 
......@@ -4,3 +4,5 @@ pandoc.template
.DS_Store
.idea
py_env*
*.ipynb
build
......@@ -12,8 +12,8 @@ fi
#convert md to ipynb
.tools/convert-markdown-into-ipynb-and-test.sh
paddle_version=0.10.0rc2
latest_label=latest
paddle_tag=0.10.0rc2
latest_tag=latest
#generate docker file
if [ ${USE_UBUNTU_REPO_MIRROR} ]; then
......@@ -23,38 +23,25 @@ else
fi
mkdir -p build
cat > build/Dockerfile <<EOF1
FROM paddlepaddle/paddle:${paddle_version}
cat > ./build/Dockerfile << EOF
FROM paddlepaddle/paddle:${paddle_tag}
MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
RUN ${update_mirror_cmd}
apt-get install locales
RUN localedef -f UTF-8 -i en_US en_US.UTF-8
RUN apt-get -y install gcc && \
apt-get -y clean
RUN pip install -U matplotlib jupyter numpy requests scipy
apt-get update && \
apt-get install -y locales && \
apt-get -y install gcc && \
apt-get -y clean && \
localedef -f UTF-8 -i en_US en_US.UTF-8 && \
pip install -U matplotlib jupyter numpy requests scipy
COPY . /book
RUN rm -rf /book/build
EOF1
if [ ${COPY_CACHE_DATA} ]; then
cat >> build/Dockerfile << EOF2
RUN mkdir -p /root/${cache_data_path}
RUN mv /book/${cache_data_path}/* /root/${cache_data_path}/ && rm -rf /book/${cache_data_path}
EOF2
fi
cat >> build/Dockerfile << EOF3
RUN python /book/.tools/cache_dataset.py
EXPOSE 8888
CMD ["sh", "-c", "jupyter notebook --ip=0.0.0.0 --no-browser --NotebookApp.token='' --NotebookApp.disable_check_xsrf=True /book/"]
EOF3
EOF
#build docker image
echo "paddle_version:"$paddle_version
docker build --no-cache -t paddlepaddle/book:${paddle_version} -t paddlepaddle/book:${latest_label} -f ./build/Dockerfile .
echo "paddle_tag:"$paddle_tag
echo $dockerfile | docker build --no-cache -t paddlepaddle/book:${paddle_tag} -t paddlepaddle/book:${latest_tag} -f ./build/Dockerfile .
#!/bin/env python
import paddle.v2.dataset as dataset
import ntlk
# Cache conll05
dataset.common.download(dataset.conll05.WORDDICT_URL, 'conll05st', \
dataset.conll05.WORDDICT_MD5)
dataset.common.download(dataset.conll05.VERBDICT_URL, 'conll05st', \
dataset.conll05.VERBDICT_MD5)
dataset.common.download(dataset.conll05.TRGDICT_URL, 'conll05st', \
dataset.conll05.TRGDICT_MD5)
# Cache imdb
dataset.common.download(dataset.imdb.URL, "imdb", dataset.imdb.MD5)
# Cache imikolov
dataset.common.download(dataset.imikolov.URL, "imikolov", dataset.imikolov.MD5)
# Cache movielens
dataset.common.download('http://files.grouplens.org/datasets/movielens/ml-1m.zip',\
'movielens','c4d9eecfca2ab87c1945afe126590906')
# Cache nltk
nltk.download('movie_reviews', download_dir=dataset.common.DATA_HOME)
# Cache uci housing
dataset.common.download(dataset.uci_housing.URL, "uci_housing", \
dataset.uci_housing.MD5)
# Cache vmt14
dataset.common.download(dataset.vmt14.URL_TRAIN, "wmt14",\
dataset.vmt14.MD5_TRAIN)
......@@ -5,14 +5,14 @@ if [ $? -ne 0 ]; then
exit 1
fi
GOPATH=~/.cache/go go get -u github.com/wangkuiyi/ipynb/markdown-to-ipynb
export GOPATH=~/go; go get -u github.com/wangkuiyi/ipynb/markdown-to-ipynb
cur_path="$(cd "$(dirname "$0")" && pwd -P)"
cd $cur_path/../
#convert md to ipynb
for file in */{README,README\.en}.md ; do
~/.cache/go/bin/markdown-to-ipynb < $file > ${file%.*}".ipynb"
~/go/bin/markdown-to-ipynb < $file > ${file%.*}".ipynb"
if [ $? -ne 0 ]; then
echo >&2 "markdown-to-ipynb $file error"
exit 1
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册