提交 d49fbd7e 编写于 作者: R Renaud Gaubert

Merge branch 'runtimespec' into 'master'

Add initial runtime container

See merge request nvidia/container-toolkit/nvidia-container-runtime!16
# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
.PHONY: all build builder test
.DEFAULT_GOAL := all
##### Global variables #####
DOCKERFILE ?= $(CURDIR)/docker/Dockerfile
DOCKERDEVEL ?= $(CURDIR)/docker/Dockerfile.builder
IMAGE ?= nvidia/container-toolkit:docker19.03
BUILDER ?= nvidia/container-toolkit:builder
##### Public rules #####
all: build
build:
docker build -f $(DOCKERFILE) -t $(IMAGE) .
builder:
docker build -f $(DOCKERDEVEL) -t $(BUILDER) .
test: build
$(CURDIR)/test/docker_test.sh $(CURDIR)/shared $(IMAGE)
From ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
apt-transport-https \
ca-certificates \
curl \
gnupg2 \
jq \
moreutils \
software-properties-common
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - && \
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | apt-key add - && \
curl -s -L https://nvidia.github.io/nvidia-docker/ubuntu18.04/nvidia-docker.list | \
tee /etc/apt/sources.list.d/nvidia-docker.list && \
add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
$(lsb_release -cs) stable"
RUN apt-get update && apt-get install -y --no-install-recommends \
docker-ce-cli \
nvidia-container-runtime
WORKDIR /work
COPY src/. .
RUN cp /etc/nvidia-container-runtime/config.toml ./
RUN chmod +x /work/run.sh
CMD ["bash"]
From docker:stable-dind
RUN apk add --update make bash
CMD ["bash"]
#! /bin/bash
# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
# shellcheck disable=SC2015
[ -t 2 ] && readonly LOG_TTY=1 || readonly LOG_NO_TTY=1
if [ "${LOG_TTY-0}" -eq 1 ] && [ "$(tput colors)" -ge 15 ]; then
readonly FMT_CLEAR=$(tput sgr0)
readonly FMT_BOLD=$(tput bold)
readonly FMT_RED=$(tput setaf 1)
readonly FMT_YELLOW=$(tput setaf 3)
readonly FMT_BLUE=$(tput setaf 12)
fi
log() {
local -r level="$1"; shift
local -r message="$*"
local fmt_on="${FMT_CLEAR-}"
local -r fmt_off="${FMT_CLEAR-}"
case "${level}" in
INFO) fmt_on="${FMT_BLUE-}" ;;
WARN) fmt_on="${FMT_YELLOW-}" ;;
ERROR) fmt_on="${FMT_RED-}" ;;
esac
printf "%s[%s]%s %b\n" "${fmt_on}" "${level}" "${fmt_off}" "${message}" >&2
}
#! /bin/bash
# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
docker::info() {
local -r docker_socket="${1:-unix:///var/run/docker.socket}"
# Docker in Docker has a startup race
for i in $(seq 1 5); do
# Calling in a subshell so that we can recover from a failure
if [[ ! $(docker -H "${docker_socket}" info -f '{{json .Runtimes}}') ]]; then
sleep 2
continue
fi
docker -H "${docker_socket}" info -f '{{json .Runtimes}}' | jq '.nvidia.path'
return
done
exit 1
}
# Echo an empty config if the config file doesn't exist
docker::daemon_config() {
local -r daemon_file="${1:-"/etc/docker/daemon.json"}"
([[ -f "${daemon_file}" ]] && cat "${daemon_file}") || echo {}
}
docker::refresh_configuration() {
log INFO "Refreshing the docker daemon configuration"
pkill -SIGHUP dockerd
}
docker::update_config_file() {
local -r destination="${1:-/run/nvidia}"
local -r nvcr="${destination}/nvidia-container-runtime"
local config_json
IFS='' read -r config_json
echo "${config_json}" | \
jq -r ".runtimes += {\"nvidia\": {\"path\": \"${nvcr}\"}}" | \
jq -r '. += {"default-runtime": "nvidia"}'
}
docker::ensure_prerequisites() {
# Ensure that the docker config path exists
if [[ ! -d "/etc/docker" ]]; then
log ERROR "Docker directory doesn't exist in container"
log ERROR "Ensure that you have correctly mounted the docker directoy"
exit 1
fi
mount | grep /etc/docker
if [[ ! $? ]]; then
log ERROR "Docker directory isn't mounted in container"
log ERROR "Ensure that you have correctly mounted the docker directoy"
exit 1
fi
}
docker::setup() {
local -r destination="${1:-/run/nvidia}"
log INFO "Setting up the configuration for the docker daemon"
docker::ensure_prerequisites
log INFO "current config: $(docker::daemon_config)"
# Append the nvidia runtime to the docker daemon's configuration
# We use sponge here because the input file is the output file
config=$(docker::daemon_config | docker::update_config_file "${destination}")
echo "${config}" > /etc/docker/daemon.json
log INFO "after: $(docker::daemon_config | jq .)"
docker::refresh_configuration
}
#! /bin/bash
# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
set -euxo pipefail
shopt -s lastpipe
source "common.sh"
source "docker.sh"
install_nvidia_container_runtime_toolkit() {
log INFO "Installing the NVIDIA Container Runtime Toolkit"
local -r destination="${1:-/run/nvidia}"
local -a packages=("/usr/bin/nvidia-container-runtime" \
"/usr/bin/nvidia-container-toolkit" \
"/usr/bin/nvidia-container-cli" \
"/etc/nvidia-container-runtime/config.toml" \
"/usr/lib/x86_64-linux-gnu/libnvidia-container.so.1")
# TODO workaround until we fix the runtime requiring this
# directory and file to exist at that location
cp ./config.toml /etc/nvidia-container-runtime
# Bash variables starts at 0
# ZSH variables starts at 1
for ((i=0; i < ${#packages[@]}; i++)); do
packages[$i]=$(readlink -f ${packages[$i]})
done
if [[ ! -d "${destination}" ]]; then
log ERROR "Destination directory doesn't exist in container"
log ERROR "Ensure that you have correctly mounted the destination directoy"
exit 1
fi
cp "${packages[@]}" "${destination}"
# Setup links to the real binaries to ensure that variables and configs
# are pointing to the right path
mv "${destination}/nvidia-container-toolkit" \
"${destination}/nvidia-container-toolkit.real"
mv "${destination}/nvidia-container-runtime" \
"${destination}/nvidia-container-runtime.real"
# Setup aliases so as to ensure that the path is correctly set
cat <<- EOF > ${destination}/nvidia-container-toolkit
#! /bin/sh
LD_LIBRARY_PATH="${destination}" \
PATH="\$PATH:${destination}" \
${destination}/nvidia-container-toolkit.real \
-config "${destination}/config.toml" \
\$@
EOF
cat <<- EOF > ${destination}/nvidia-container-runtime
#! /bin/sh
LD_LIBRARY_PATH="${destination}" \
PATH="\$PATH:${destination}" \
${destination}/nvidia-container-runtime.real \
\$@
EOF
# Make sure that the alias files are executable
chmod +x "${destination}/nvidia-container-toolkit"
chmod +x "${destination}/nvidia-container-runtime"
}
main() {
local -r destination="${1:-/run/nvidia}"
local -r docker_socket="${2:-/var/run/docker.socket}"
local -r nvidia_runtime="$(docker::info ${docker_socket})"
if [[ "${nvidia_runtime}" = "${destination}/nvidia-container-runtime" ]]; then
exit 0
fi
install_nvidia_container_runtime_toolkit "${destination}"
docker::setup "${destination}"
echo "docker info: $(docker::info ${docker_socket})"
}
main "$@"
#! /bin/bash
# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
set -eEux
readonly dind_name="nvidia-container-runtime-installer"
# TODO move rm -rf shared to cleanup
testing::cleanup() {
docker kill "${dind_name}" || true &> /dev/null
docker rm "${dind_name}" || true &> /dev/null
return
}
testing::setup() {
local shared_dir=${1:-"./shared"}
rm -rf "${shared_dir}" || true
mkdir -p "${shared_dir}"
mkdir -p "${shared_dir}"/etc/docker
mkdir -p "${shared_dir}"/run/nvidia
mkdir -p "${shared_dir}"/etc/nvidia-container-runtime
}
testing::main() {
local shared_dir="${1:-"./shared"}"
local image="${2:-"nvidia/container-toolkit:docker19.03"}"
testing::setup "${shared_dir}"
# Docker creates /etc/docker when starting
# by default there isn't any config in this directory (even after the daemon starts)
docker run --privileged \
-v "${shared_dir}/etc/docker:/etc/docker" \
-v "${shared_dir}/run/nvidia:/run/nvidia" \
-v "${shared_dir}/etc/nvidia-container-runtime:/etc/nvidia-container-runtime" \
--name "${dind_name}" -d docker:stable-dind -H unix://run/nvidia/docker.sock
# Share the volumes so that we can edit the config file and point to the new runtime
# Share the pid so that we can ask docker to reload its config
docker run -it --privileged \
--volumes-from "${dind_name}" \
--pid "container:${dind_name}" \
"${image}" \
bash -x -c "/work/run.sh /run/nvidia unix:///run/nvidia/docker.sock"
docker run -it --privileged \
--volumes-from "${dind_name}" \
alpine:latest chmod 766 /etc/docker /run/nvidia /etc/nvidia-container-runtime
testing::cleanup
rm -rf "${shared_dir}" || true
}
trap testing::cleanup ERR
testing::cleanup
testing::main "$@"
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册