提交 f41daaa2 编写于 作者: S Shanqing Cai

Fixes and improvements to docker build script and dockerfiles

1) Clean up large Bazel build cache. Total filesystem size reduction as seen by du -sh /:
  devel image: 1.5 GB (Before: 2.9 GB; After: 1.4 GB)
  devel-gpu image: 2.3 GB (Before: 4.7 GB; After: 2.4 GB)
2) Using nvidia-docker for GPU docker build.
3) Upgrade Bazel version from 0.3.1 to 0.3.2.
4) Add missing libcurl3-dev build dependency to devel images.
5) Add scipy and sklearn to Dockerfile.devel-gpu to enhance consistency with other image types (e.g., Dockerfile.devel).
6) Remove the obsolete and unnecessary --recurse-submodules flag for git clone.

Related to GH issues: https://github.com/tensorflow/tensorflow/issues/4116 and https://github.com/tensorflow/tensorflow/issues/4117

However, not using the "git clone --depth 1" suggested by issue #4117, because the size of the git repo is only reduced by about 50 MB by the "--depth 1" flag. This space saving is small compared to the space saving due to bazel cache removal. The complete history of the git repo can be useful for certain development purposes.
Change: 136302103
上级 671f4c65
......@@ -6,6 +6,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
git \
libcurl3-dev \
libfreetype6-dev \
libpng12-dev \
libzmq3-dev \
......@@ -65,7 +66,7 @@ RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
>>/root/.bazelrc
ENV BAZELRC /root/.bazelrc
# Install the most recent bazel release.
ENV BAZEL_VERSION 0.3.1
ENV BAZEL_VERSION 0.3.2
WORKDIR /
RUN mkdir /bazel && \
cd /bazel && \
......@@ -78,7 +79,7 @@ RUN mkdir /bazel && \
# Download and build TensorFlow.
RUN git clone --recursive https://github.com/tensorflow/tensorflow.git && \
RUN git clone https://github.com/tensorflow/tensorflow.git && \
cd tensorflow && \
git checkout r0.11
WORKDIR /tensorflow
......@@ -87,10 +88,13 @@ WORKDIR /tensorflow
# more difficult to experiment with local changes. Instead, just add
# the built directory to the path.
RUN ./configure && \
RUN tensorflow/tools/ci_build/builds/configured CPU \
bazel build -c opt tensorflow/tools/pip_package:build_pip_package && \
bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/pip && \
pip install --upgrade /tmp/pip/tensorflow-*.whl
pip install --upgrade /tmp/pip/tensorflow-*.whl && \
rm -rf /tmp/pip && \
rm -rf /root/.cache
# Clean up pip wheel and Bazel cache when done.
# TensorBoard
EXPOSE 6006
......
......@@ -6,6 +6,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
git \
libcurl3-dev \
libfreetype6-dev \
libpng12-dev \
libzmq3-dev \
......@@ -31,6 +32,8 @@ RUN pip --no-cache-dir install \
jupyter \
matplotlib \
numpy \
scipy \
sklearn \
&& \
python -m ipykernel.kernelspec
......@@ -66,7 +69,7 @@ RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
>>/root/.bazelrc
ENV BAZELRC /root/.bazelrc
# Install the most recent bazel release.
ENV BAZEL_VERSION 0.3.1
ENV BAZEL_VERSION 0.3.2
WORKDIR /
RUN mkdir /bazel && \
cd /bazel && \
......@@ -79,7 +82,7 @@ RUN mkdir /bazel && \
# Download and build TensorFlow.
RUN git clone -b r0.11 --recursive --recurse-submodules https://github.com/tensorflow/tensorflow.git && \
RUN git clone https://github.com/tensorflow/tensorflow.git && \
cd tensorflow && \
git checkout r0.11
WORKDIR /tensorflow
......@@ -89,10 +92,13 @@ ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
ENV TF_NEED_CUDA 1
ENV TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2
RUN ./configure && \
RUN tensorflow/tools/ci_build/builds/configured GPU \
bazel build -c opt --config=cuda tensorflow/tools/pip_package:build_pip_package && \
bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/pip && \
pip install --upgrade /tmp/pip/tensorflow-*.whl
pip install --upgrade /tmp/pip/tensorflow-*.whl && \
rm -rf /tmp/pip && \
rm -rf /root/.cache
# Clean up pip wheel and Bazel cache when done.
WORKDIR /root
......
......@@ -120,8 +120,10 @@ else
fi
if [[ ${TF_DOCKER_BUILD_TYPE} == "cpu" ]]; then
:
DOCKER_BINARY="docker"
elif [[ ${TF_DOCKER_BUILD_TYPE} == "gpu" ]]; then
DOCKER_BINARY="nvidia-docker"
FINAL_TAG="${FINAL_TAG}-gpu"
if [[ ${ORIG_DOCKERFILE} == *"."* ]]; then
# There is already a dot in the tag, use "-"
......@@ -235,17 +237,18 @@ fi
IMG="${USER}/tensorflow:${FINAL_TAG}"
echo "Building docker image with image name and tag: ${IMG}"
docker build --no-cache -t "${IMG}" -f "${DOCKERFILE}" "${TMP_DIR}"
"${DOCKER_BINARY}" build --no-cache -t "${IMG}" -f "${DOCKERFILE}" "${TMP_DIR}"
if [[ $? == "0" ]]; then
echo "docker build of ${IMG} succeeded"
echo "${DOCKER_BINARY} build of ${IMG} succeeded"
else
die "FAIL: docker build of ${IMG} with Dockerfile ${DOCKERFILE} failed"
die "FAIL: ${DOCKER_BINARY} build of ${IMG} with Dockerfile ${DOCKERFILE} "\
"failed"
fi
# Make sure that there is no other containers of the same image running
# TODO(cais): Move to an earlier place.
if [[ ! -z $(docker ps | grep "${IMG}") ]]; then
if [[ ! -z $("${DOCKER_BINARY}" ps | grep "${IMG}") ]]; then
die "ERROR: It appears that there are docker containers of the image "\
"${IMG} running. Please stop them before proceeding"
fi
......@@ -258,7 +261,7 @@ echo " (Log file is at: ${DOCKER_RUN_LOG}"
echo ""
if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then
docker run --rm -p ${CONTAINER_PORT}:${CONTAINER_PORT} \
"${DOCKER_BINARY}" run --rm -p ${CONTAINER_PORT}:${CONTAINER_PORT} \
-v ${TMP_DIR}/notebooks:/root/notebooks "${IMG}" \
2>&1 > "${DOCKER_RUN_LOG}" &
......@@ -267,7 +270,7 @@ if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then
while [[ -z ${CONTAINER_ID} ]]; do
sleep 1
echo "Polling for container ID..."
CONTAINER_ID=$(docker ps | grep "${IMG}" | awk '{print $1}')
CONTAINER_ID=$("${DOCKER_BINARY}" ps | grep "${IMG}" | awk '{print $1}')
done
echo "ID of the running docker container: ${CONTAINER_ID}"
......@@ -293,10 +296,10 @@ if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then
# Stop the running docker container
sleep 1
docker stop --time=0 ${CONTAINER_ID}
"${DOCKER_BINARY}" stop --time=0 ${CONTAINER_ID}
else
docker run --rm -p ${CONTAINER_PORT}:${CONTAINER_PORT} \
"${DOCKER_BINARY}" run --rm -p ${CONTAINER_PORT}:${CONTAINER_PORT} \
-v ${TMP_DIR}/notebooks:/root/notebooks "${IMG}" \
bash -c \
"cd /tensorflow; tensorflow/tools/ci_build/builds/test_tutorials.sh"
......@@ -324,9 +327,9 @@ fi
# Apply the final image name and tag
FINAL_IMG="${FINAL_IMAGE_NAME}:${FINAL_TAG}"
DOCKER_VER=$(docker version | grep Version | head -1 | awk '{print $NF}')
DOCKER_VER=$("${DOCKER_BINARY}" version | grep Version | head -1 | awk '{print $NF}')
if [[ -z "${DOCKER_VER}" ]]; then
die "ERROR: Failed to determine docker version"
die "ERROR: Failed to determine ${DOCKER_BINARY} version"
fi
DOCKER_MAJOR_VER=$(echo "${DOCKER_VER}" | cut -d. -f 1)
DOCKER_MINOR_VER=$(echo "${DOCKER_VER}" | cut -d. -f 2)
......@@ -337,7 +340,7 @@ if [[ "${DOCKER_MAJOR_VER}" -le 1 ]] && \
FORCE_TAG="--force"
fi
docker tag ${FORCE_TAG} "${IMG}" "${FINAL_IMG}" || \
"${DOCKER_BINARY}" tag ${FORCE_TAG} "${IMG}" "${FINAL_IMG}" || \
die "Failed to tag intermediate docker image ${IMG} as ${FINAL_IMG}"
echo ""
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册