提交 d8eab128 编写于 作者: Y Yancey1989

update docker build

上级 3c9ec464
......@@ -4,3 +4,5 @@ pandoc.template
.DS_Store
.idea
py_env*
*.ipynb
build
#!/bin/bash
set -e
# Conver markdown to ipynb
/book/.tools/convert-markdown-into-ipynb-and-test.sh
# Cache dataset
/book/.tools/cache_dataset.py
......@@ -12,8 +12,8 @@ fi
#convert md to ipynb
.tools/convert-markdown-into-ipynb-and-test.sh
paddle_version=0.10.0rc2
latest_label=latest
paddle_tag=0.10.0rc2
latest_tag=latest
#generate docker file
if [ ${USE_UBUNTU_REPO_MIRROR} ]; then
......@@ -23,38 +23,25 @@ else
fi
mkdir -p build
cat > build/Dockerfile <<EOF1
FROM paddlepaddle/paddle:${paddle_version}
cat > ./build/Dockerfile << EOF
FROM paddlepaddle/paddle:${paddle_tag}
MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
RUN ${update_mirror_cmd}
apt-get install locales
RUN localedef -f UTF-8 -i en_US en_US.UTF-8
RUN apt-get -y install gcc && \
apt-get -y clean
RUN pip install -U matplotlib jupyter numpy requests scipy
apt-get update && \
apt-get install -y locales && \
apt-get -y install gcc && \
apt-get -y clean && \
localedef -f UTF-8 -i en_US en_US.UTF-8 && \
pip install -U matplotlib jupyter numpy requests scipy
COPY . /book
RUN rm -rf /book/build
EOF1
if [ ${COPY_CACHE_DATA} ]; then
cat >> build/Dockerfile << EOF2
RUN mkdir -p /root/${cache_data_path}
RUN mv /book/${cache_data_path}/* /root/${cache_data_path}/ && rm -rf /book/${cache_data_path}
EOF2
fi
cat >> build/Dockerfile << EOF3
RUN /book/.tools/cache_dataset.py
EXPOSE 8888
CMD ["sh", "-c", "jupyter notebook --ip=0.0.0.0 --no-browser --NotebookApp.token='' --NotebookApp.disable_check_xsrf=True /book/"]
EOF3
EOF
#build docker image
echo "paddle_version:"$paddle_version
docker build --no-cache -t paddlepaddle/book:${paddle_version} -t paddlepaddle/book:${latest_label} -f ./build/Dockerfile .
echo "paddle_tag:"$paddle_tag
echo $dockerfile | docker build --no-cache -t paddlepaddle/book:${paddle_tag} -t paddlepaddle/book:${latest_tag} -f ./build/Dockerfile .
#!/bin/env python
import paddle.v2.dataset as dataset
import ntlk
# Cache conll05
dataset.common.download(dataset.conll05.WORDDICT_URL, 'conll05st', \
dataset.conll05.WORDDICT_MD5)
dataset.common.download(dataset.conll05.VERBDICT_URL, 'conll05st', \
dataset.conll05.VERBDICT_MD5)
dataset.common.download(dataset.conll05.TRGDICT_URL, 'conll05st', \
dataset.conll05.TRGDICT_MD5)
# Cache imdb
dataset.common.download(dataset.imdb.URL, "imdb", dataset.imdb.MD5)
# Cache imikolov
dataset.common.download(dataset.imikolov.URL, "imikolov", dataset.imikolov.MD5)
# Cache movielens
dataset.common.download('http://files.grouplens.org/datasets/movielens/ml-1m.zip',\
'movielens','c4d9eecfca2ab87c1945afe126590906')
# Cache nltk
nltk.download('movie_reviews', download_dir=dataset.common.DATA_HOME)
# Cache uci housing
dataset.common.download(dataset.uci_housing.URL, "uci_housing", dataset.uci_housing.MD5)
# Cache vmt14
dataset.common.download(dataset.vmt14.URL_TRAIN, "wmt14",dataset.vmt14.MD5_TRAIN)
......@@ -5,14 +5,14 @@ if [ $? -ne 0 ]; then
exit 1
fi
GOPATH=~/.cache/go go get -u github.com/wangkuiyi/ipynb/markdown-to-ipynb
export GOPATH=~/go; go get -u github.com/wangkuiyi/ipynb/markdown-to-ipynb
cur_path="$(cd "$(dirname "$0")" && pwd -P)"
cd $cur_path/../
#convert md to ipynb
for file in */{README,README\.en}.md ; do
~/.cache/go/bin/markdown-to-ipynb < $file > ${file%.*}".ipynb"
~/go/bin/markdown-to-ipynb < $file > ${file%.*}".ipynb"
if [ $? -ne 0 ]; then
echo >&2 "markdown-to-ipynb $file error"
exit 1
......
......@@ -141,7 +141,7 @@
"\n",
"## 数据准备\n",
"\n",
"通用图像分类公开的标准数据集常用的有[CIFAR](\u003chttps://www.cs.toronto.edu/~kriz/cifar.html)、[ImageNet](http://image-net.org/)、[COCO](http://mscoco.org/)等,常用的细粒度图像分类数据集包括[CUB-200-2011](http://www.vision.caltech.edu/visipedia/CUB-200-2011.html)、[Stanford Dog](http://vision.stanford.edu/aditya86/ImageNetDogs/)、[Oxford-flowers](http://www.robots.ox.ac.uk/~vgg/data/flowers/)等。其中ImageNet数据集规模相对较大,如[模型概览](#模型概览)一章所讲,大量研究成果基于ImageNet。ImageNet数据从2010年来稍有变化,常用的是ImageNet-2012数据集,该数据集包含1000个类别:训练集包含1,281,167张图片,每个类别数据732至1300张不等,验证集包含50,000张图片,平均每个类别50张图片。\n",
"通用图像分类公开的标准数据集常用的有[CIFAR](https://www.cs.toronto.edu/~kriz/cifar.html)、[ImageNet](http://image-net.org/)、[COCO](http://mscoco.org/)等,常用的细粒度图像分类数据集包括[CUB-200-2011](http://www.vision.caltech.edu/visipedia/CUB-200-2011.html)、[Stanford Dog](http://vision.stanford.edu/aditya86/ImageNetDogs/)、[Oxford-flowers](http://www.robots.ox.ac.uk/~vgg/data/flowers/)等。其中ImageNet数据集规模相对较大,如[模型概览](#模型概览)一章所讲,大量研究成果基于ImageNet。ImageNet数据从2010年来稍有变化,常用的是ImageNet-2012数据集,该数据集包含1000个类别:训练集包含1,281,167张图片,每个类别数据732至1300张不等,验证集包含50,000张图片,平均每个类别50张图片。\n",
"\n",
"由于ImageNet数据集较大,下载和训练较慢,为了方便大家学习,我们使用[CIFAR10](\u003chttps://www.cs.toronto.edu/~kriz/cifar.html\u003e)数据集。CIFAR10数据集包含60,000张32x32的彩色图片,10个类别,每个类包含6,000张。其中50,000张图片作为训练集,10000张作为测试集。图11从每个类别中随机抽取了10张图片,展示了所有的类别。\n",
"\n",
......
......@@ -88,33 +88,6 @@
"We use the [MovieLens ml-1m](http://files.grouplens.org/datasets/movielens/ml-1m.zip) to train our model. This dataset includes 10,000 ratings of 4,000 movies from 6,000 users to 4,000 movies. Each rate is in the range of 1~5. Thanks to GroupLens Research for collecting, processing and publishing the dataset.\n",
"\n",
"`paddle.v2.datasets` package encapsulates multiple public datasets, including `cifar`, `imdb`, `mnist`, `moivelens` and `wmt14`, etc. There's no need for us to manually download and preprocess `MovieLens` dataset.\n",
"\n"
]
},
{
"cell_type": "code",
"metadata": {
"editable": true
},
"source": [
"# Run this block to show dataset's documentation\n",
"help(paddle.v2.dataset.movielens)\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"execution_count": 1
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"The raw `MoiveLens` contains movie ratings, relevant features from both movies and users.\n",
"For instance, one movie's feature could be:\n",
......@@ -127,6 +100,7 @@
"editable": true
},
"source": [
"import paddle.v2 as paddle\n",
"movie_info = paddle.dataset.movielens.movie_info()\n",
"print movie_info.values()[0]\n"
],
......@@ -283,7 +257,6 @@
"import cPickle\n",
"\n",
"import paddle.v2 as paddle\n",
"\n",
"paddle.init(use_gpu=False)\n"
],
"outputs": [
......@@ -551,9 +524,9 @@
"editable": true
},
"source": [
"reader=paddle.reader.batch(\n",
"reader=paddle.batch(\n",
" paddle.reader.shuffle(\n",
" paddle.dataset.movielens.trai(), buf_size=8192),\n",
" paddle.dataset.movielens.train(), buf_size=8192),\n",
" batch_size=256)\n"
],
"outputs": [
......
......@@ -725,7 +725,7 @@
"infer_dict = copy.copy(feeding)\n",
"del infer_dict['score']\n",
"\n",
"prediction = paddle.infer(output=inference, parameters=parameters, input=[feature], feeding=infer_dict)\n",
"prediction = paddle.infer(inference, parameters=parameters, input=[feature], feeding=infer_dict)\n",
"score = (prediction[0][0] + 5.0) / 2\n",
"print \"[Predict] User %d Rating Movie %d With Score %.2f\"%(user_id, movie_id, score)\n"
],
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册