Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
lukaCoding
nvidia-container-runtime
提交
f09ab93a
N
nvidia-container-runtime
项目概览
lukaCoding
/
nvidia-container-runtime
与 Fork 源项目一致
从无法访问的项目Fork
通知
4
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
N
nvidia-container-runtime
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
f09ab93a
编写于
9月 19, 2019
作者:
R
Renaud Gaubert
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refactor runtimeconfig
Signed-off-by:
N
Renaud Gaubert
<
rgaubert@nvidia.com
>
上级
ec41181e
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
262 addition
and
138 deletion
+262
-138
runtimeconfig/docker/Dockerfile
runtimeconfig/docker/Dockerfile
+2
-8
runtimeconfig/src/common.sh
runtimeconfig/src/common.sh
+27
-0
runtimeconfig/src/docker.sh
runtimeconfig/src/docker.sh
+68
-45
runtimeconfig/src/run.sh
runtimeconfig/src/run.sh
+23
-70
runtimeconfig/src/toolkit.sh
runtimeconfig/src/toolkit.sh
+132
-0
runtimeconfig/test/docker_test.sh
runtimeconfig/test/docker_test.sh
+10
-15
未找到文件。
runtimeconfig/docker/Dockerfile
浏览文件 @
f09ab93a
...
...
@@ -6,24 +6,18 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
curl
\
gnupg2
\
jq
\
moreutils
\
software-properties-common
RUN
curl
-fsSL
https://download.docker.com/linux/ubuntu/gpg | apt-key add -
&&
\
curl
-s
-L
https://nvidia.github.io/nvidia-docker/gpgkey | apt-key add -
&&
\
RUN
curl
-s
-L
https://nvidia.github.io/nvidia-docker/gpgkey | apt-key add -
&&
\
curl
-s
-L
https://nvidia.github.io/nvidia-docker/ubuntu18.04/nvidia-docker.list |
\
tee
/etc/apt/sources.list.d/nvidia-docker.list
&&
\
add-apt-repository
"deb [arch=amd64] https://download.docker.com/linux/ubuntu
\
$(
lsb_release
-cs
)
stable"
tee
/etc/apt/sources.list.d/nvidia-docker.list
RUN
apt-get update
&&
apt-get
install
-y
--no-install-recommends
\
docker-ce-cli
\
nvidia-container-runtime
WORKDIR
/work
COPY
src/. .
RUN
cp
/etc/nvidia-container-runtime/config.toml ./
RUN
chmod
+x /work/run.sh
...
...
runtimeconfig/src/common.sh
浏览文件 @
f09ab93a
...
...
@@ -27,3 +27,30 @@ log() {
printf
"%s[%s]%s %b
\n
"
"
${
fmt_on
}
"
"
${
level
}
"
"
${
fmt_off
}
"
"
${
message
}
"
>
&2
}
with_retry
()
{
local
max_attempts
=
"
$1
"
local
delay
=
"
$2
"
local
count
=
0
local
rc
shift
2
while
true
;
do
set
+e
"
$@
"
;
rc
=
"
$?
"
set
-e
count
=
"
$((
count+1
))
"
if
[[
"
${
rc
}
"
-eq
0
]]
;
then
return
0
fi
if
[[
"
${
max_attempts
}
"
-le
0
]]
||
[[
"
${
count
}
"
-lt
"
${
max_attempts
}
"
]]
;
then
sleep
"
${
delay
}
"
else
break
fi
done
return
1
}
runtimeconfig/src/docker.sh
浏览文件 @
f09ab93a
#! /bin/bash
# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
readonly
DOCKER_CONFIG
=
"/etc/docker/daemon.json"
docker::info
()
{
local
-r
docker_socket
=
"
${
1
:-
unix
:///var/run/docker.socket
}
"
# Docker in Docker has a startup race
for
i
in
$(
seq
1 5
)
;
do
# Calling in a subshell so that we can recover from a failure
if
[[
!
$(
docker
-H
"
${
docker_socket
}
"
info
-f
'{{json .Runtimes}}'
)
]]
;
then
sleep
2
continue
local
-r
docker_socket
=
"
${
1
:-
/var/run/docker.sock
}
"
curl
--unix-socket
"
${
docker_socket
}
"
'http://v1.40/info'
| jq
'.Runtimes.nvidia.path'
}
docker::ensure::mounted
()
{
mount |
grep
/etc/docker
if
[[
!
$?
]]
;
then
log ERROR
"Docker directory isn't mounted in container"
log ERROR
"Ensure that you have correctly mounted the docker directoy"
exit
1
fi
}
docker
-H
"
${
docker_socket
}
"
info
-f
'{{json .Runtimes}}'
| jq
'.nvidia.path'
return
done
docker::ensure::config_dir
()
{
# Ensure that the docker config path exists
if
[[
!
-d
"/etc/docker"
]]
;
then
log ERROR
"Docker directory doesn't exist in container"
log ERROR
"Ensure that you have correctly mounted the docker directoy"
exit
1
fi
}
# Echo an empty config if the config file doesn't exist
docker::daemon_config
()
{
local
-r
daemon_file
=
"
${
1
:-
"/etc/docker/daemon.json"
}
"
([[
-f
"
${
daemon_file
}
"
]]
&&
cat
"
${
daemon_file
}
"
)
||
echo
{}
docker::config::backup
()
{
if
[[
-f
"
${
DOCKER_CONFIG
}
"
]]
;
then
mv
"
${
DOCKER_CONFIG
}
"
"
${
DOCKER_CONFIG
}
.bak
"
fi
}
docker::refresh_configuration
()
{
log INFO
"Refreshing the docker daemon configuration"
pkill
-SIGHUP
dockerd
docker::config::restore
()
{
if
[[
-f
"
${
DOCKER_CONFIG
}
"
]]
;
then
mv
"
${
DOCKER_CONFIG
}
.bak"
"
${
DOCKER_CONFIG
}
"
fi
}
docker::
update_config_fil
e
()
{
docker::
config::add_runtim
e
()
{
local
-r
destination
=
"
${
1
:-
/run/nvidia
}
"
local
-r
nvcr
=
"
${
destination
}
/nvidia-container-runtime"
local
config_json
IFS
=
''
read
-r
config_json
echo
"
${
config_json
}
"
|
\
cat
- |
\
jq
-r
".runtimes = {}"
|
\
jq
-r
".runtimes += {
\"
nvidia
\"
: {
\"
path
\"
:
\"
${
nvcr
}
\"
}}"
|
\
jq
-r
'. += {"default-runtime": "nvidia"}'
}
docker::ensure_prerequisites
()
{
# Ensure that the docker config path exists
if
[[
!
-d
"/etc/docker"
]]
;
then
log ERROR
"Docker directory doesn't exist in container"
log ERROR
"Ensure that you have correctly mounted the docker directoy"
exit
1
fi
docker::config
()
{
([[
-f
"
${
DOCKER_CONFIG
}
"
]]
&&
cat
"
${
DOCKER_CONFIG
}
"
)
||
echo
{}
}
mount |
grep
/etc/docker
if
[[
!
$?
]]
;
then
log ERROR
"Docker directory isn't mounted in container"
log ERROR
"Ensure that you have correctly mounted the docker directoy"
exit
1
fi
docker::config::refresh
()
{
log INFO
"Refreshing the docker daemon configuration"
pkill
-SIGHUP
dockerd
}
docker::config::get_nvidia_runtime
()
{
cat
- | jq
-r
'.runtimes | keys[0]'
}
docker::setup
()
{
local
-r
destination
=
"
${
1
:-
/run/nvidia
}
"
docker::ensure::mounted
docker::ensure::config_dir
log INFO
"Setting up the configuration for the docker daemon"
docker::ensure_prerequisites
local
-r
destination
=
"
${
1
:-
/run/nvidia
}
"
local
-r
docker_socket
=
"
${
2
:-
"/var/run/docker.socket"
}
"
local
config
=
$(
docker::config
)
log INFO
"current config:
${
config
}
"
log INFO
"current config:
$(
docker::daemon_config
)
"
local
-r
nvidia_runtime
=
"
$(
with_retry 5 2s docker::info
"
${
docker_socket
}
"
)
"
if
[[
"
${
nvidia_runtime
}
"
=
"
${
destination
}
/nvidia-container-runtime"
]]
;
then
return
fi
# Append the nvidia runtime to the docker daemon's configuration
# We use sponge here because the input file is the output file
config
=
$(
docker::daemon_config | docker::update_config_file
"
${
destination
}
"
)
echo
"
${
config
}
"
>
/etc/docker/daemon.json
local
updated_config
=
$(
echo
"
${
config
}
"
| docker::config::add_runtime
"
${
destination
}
"
)
local
-r
config_runtime
=
$(
echo
"
${
updated_config
}
"
| docker::config::get_nvidia_runtime
)
# If there was an error while parsing the file catch it here
if
[[
"
${
config_runtime
}
"
!=
"nvidia"
]]
;
then
config
=
$(
echo
"{}"
| docker::config::add_runtime
"
${
destination
}
"
)
fi
docker::config::backup
echo
"
${
updated_config
}
"
>
/etc/docker/daemon.json
log INFO
"after:
$(
docker::
daemon_
config | jq .
)
"
docker::
refresh_configuration
log INFO
"after:
$(
docker::config | jq .
)
"
docker::
config::refresh
}
runtimeconfig/src/run.sh
浏览文件 @
f09ab93a
...
...
@@ -4,80 +4,33 @@
set
-euxo
pipefail
shopt
-s
lastpipe
source
"common.sh
"
source
"docker.sh
"
readonly
RUN_DIR
=
"/run/nvidia
"
readonly
TOOLKIT_DIR
=
"
${
RUN_DIR
}
/toolkit
"
install_nvidia_container_runtime_toolkit
()
{
log INFO
"Installing the NVIDIA Container Runtime Toolkit"
readonly
basedir
=
"
$(
dirname
"
$(
realpath
"
$0
"
)
"
)
"
local
-r
destination
=
"
${
1
:-
/run/nvidia
}
"
local
-a
packages
=(
"/usr/bin/nvidia-container-runtime"
\
"/usr/bin/nvidia-container-toolkit"
\
"/usr/bin/nvidia-container-cli"
\
"/etc/nvidia-container-runtime/config.toml"
\
"/usr/lib/x86_64-linux-gnu/libnvidia-container.so.1"
)
# TODO workaround until we fix the runtime requiring this
# directory and file to exist at that location
cp
./config.toml /etc/nvidia-container-runtime
# Bash variables starts at 0
# ZSH variables starts at 1
for
((
i
=
0
;
i <
${#
packages
[@]
}
;
i++
))
;
do
packages[
$i
]=
$(
readlink
-f
${
packages
[
$i
]
}
)
done
if
[[
!
-d
"
${
destination
}
"
]]
;
then
log ERROR
"Destination directory doesn't exist in container"
log ERROR
"Ensure that you have correctly mounted the destination directoy"
exit
1
fi
cp
"
${
packages
[@]
}
"
"
${
destination
}
"
# Setup links to the real binaries to ensure that variables and configs
# are pointing to the right path
mv
"
${
destination
}
/nvidia-container-toolkit"
\
"
${
destination
}
/nvidia-container-toolkit.real"
mv
"
${
destination
}
/nvidia-container-runtime"
\
"
${
destination
}
/nvidia-container-runtime.real"
# Setup aliases so as to ensure that the path is correctly set
cat
<<-
EOF
>
${
destination
}
/nvidia-container-toolkit
#! /bin/sh
LD_LIBRARY_PATH="
${
destination
}
"
\
PATH="
\$
PATH:
${
destination
}
"
\
${
destination
}
/nvidia-container-toolkit.real
\
-config "
${
destination
}
/config.toml"
\
\$
@
EOF
cat
<<-
EOF
>
${
destination
}
/nvidia-container-runtime
#! /bin/sh
LD_LIBRARY_PATH="
${
destination
}
"
\
PATH="
\$
PATH:
${
destination
}
"
\
${
destination
}
/nvidia-container-runtime.real
\
\$
@
EOF
# Make sure that the alias files are executable
chmod
+x
"
${
destination
}
/nvidia-container-toolkit"
chmod
+x
"
${
destination
}
/nvidia-container-runtime"
}
source
"
${
basedir
}
/common.sh"
source
"
${
basedir
}
/toolkit.sh"
source
"
${
basedir
}
/docker.sh"
main
()
{
local
-r
destination
=
"
${
1
:-
/run/nvidia
}
"
local
-r
docker_socket
=
"
${
2
:-
/var/run/docker.socket
}
"
local
-r
nvidia_runtime
=
"
$(
docker::info
${
docker_socket
}
)
"
if
[[
"
${
nvidia_runtime
}
"
=
"
${
destination
}
/nvidia-container-runtime"
]]
;
then
local
-r
destination
=
"
${
1
:-
"
${
RUN_DIR
}
"
}
/toolkit"
local
-r
docker_socket
=
"
${
2
:-
"/var/run/docker.socket"
}
"
toolkit::setup
"
${
destination
}
"
docker::setup
"
${
destination
}
"
"
${
docker_socket
}
"
echo
"docker info:
$(
docker::info
"
${
docker_socket
}
"
)
"
echo
"Done, now waiting for signal"
sleep
infinity &
# shellcheck disable=SC2064
# We want the expand to happen now rather than at trap time
# Setup a new signal handler and reset the EXIT signal handler
trap
"echo 'Caught signal'; toolkit::uninstall && { kill
$!
; exit 0; }"
HUP INT QUIT PIPE TERM
trap
- EXIT
while
true
;
do
wait
$!
||
continue
;
done
exit
0
fi
install_nvidia_container_runtime_toolkit
"
${
destination
}
"
docker::setup
"
${
destination
}
"
echo
"docker info:
$(
docker::info
${
docker_socket
}
)
"
}
main
"
$@
"
runtimeconfig/src/toolkit.sh
0 → 100644
浏览文件 @
f09ab93a
#! /bin/bash
# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
packages
=(
"/usr/bin/nvidia-container-runtime"
\
"/usr/bin/nvidia-container-toolkit"
\
"/usr/bin/nvidia-container-cli"
\
"/etc/nvidia-container-runtime/config.toml"
\
"/usr/lib/x86_64-linux-gnu/libnvidia-container.so.1"
)
toolkit::install
()
{
local
-r
destination
=
"
${
1
:-
"
${
TOOLKIT_DIR
}
"
}
"
log INFO
"
${
FUNCNAME
[0]
}
$*
"
mkdir
-p
"/nvidia"
"
${
destination
}
"
mount
--rbind
"/nvidia"
"
${
destination
}
"
mkdir
-p
"
${
destination
}
"
mkdir
-p
"
${
destination
}
/.config/nvidia-container-runtime"
# Note: Bash arrays start at 0 (zsh arrays start at 1)
for
((
i
=
0
;
i <
${#
packages
[@]
}
;
i++
))
;
do
packages[
$i
]=
$(
readlink
-f
"
${
packages
[
$i
]
}
"
)
done
cp
"
${
packages
[@]
}
"
"
${
destination
}
"
mv
"
${
destination
}
/config.toml"
"
${
destination
}
/.config/nvidia-container-runtime/"
}
toolkit::uninstall
()
{
local
-r
destination
=
"
${
1
:-
"
${
TOOLKIT_DIR
}
"
}
"
log INFO
"
${
FUNCNAME
[0]
}
$*
"
if
findmnt
-r
-o
TARGET |
grep
"
${
destination
}
"
>
/dev/null
;
then
umount
-l
-R
"
${
destination
}
"
fi
}
toolkit::setup::config
()
{
local
-r
destination
=
"
${
1
:-
"
${
TOOLKIT_DIR
}
"
}
"
local
-r
config_path
=
"
${
destination
}
/.config/nvidia-container-runtime/config.toml"
log INFO
"
${
FUNCNAME
[0]
}
$*
"
sed
-i
's/^#root/root/;'
"
${
config_path
}
"
sed
-i
"s@/run/nvidia/driver@
${
RUN_DIR
}
/driver@;"
"
${
config_path
}
"
sed
-i
"s;@/sbin/ldconfig.real;@
${
RUN_DIR
}
/driver/sbin/ldconfig.real;"
"
${
config_path
}
"
}
toolkit::setup::cli_binary
()
{
local
-r
destination
=
"
${
1
:-
"
${
TOOLKIT_DIR
}
"
}
"
log INFO
"
${
FUNCNAME
[0]
}
$*
"
# Setup links to the real binaries to ensure that variables and configs
# are pointing to the right path
mv
"
${
destination
}
/nvidia-container-cli"
\
"
${
destination
}
/nvidia-container-cli.real"
# Setup aliases so as to ensure that the path is correctly set
cat
<<-
EOF
| tr -s '
\t
' >
${
destination
}
/nvidia-container-cli
#! /bin/sh
LD_LIBRARY_PATH="
${
destination
}
"
\
PATH="
\$
PATH:
${
destination
}
"
\
${
destination
}
/nvidia-container-cli.real
\
\$
@
EOF
# Make sure that the alias files are executable
chmod
+x
"
${
destination
}
/nvidia-container-cli"
}
toolkit::setup::toolkit_binary
()
{
local
-r
destination
=
"
${
1
:-
"
${
TOOLKIT_DIR
}
"
}
"
log INFO
"
${
FUNCNAME
[0]
}
$*
"
mv
"
${
destination
}
/nvidia-container-toolkit"
\
"
${
destination
}
/nvidia-container-toolkit.real"
cat
<<-
EOF
| tr -s '
\t
' >
${
destination
}
/nvidia-container-toolkit
#! /bin/sh
PATH="
\$
PATH:
${
destination
}
"
\
${
destination
}
/nvidia-container-toolkit.real
\
-config "
${
destination
}
/.config/nvidia-container-runtime/config.toml"
\
\$
@
EOF
chmod
+x
"
${
destination
}
/nvidia-container-toolkit"
}
toolkit::setup::runtime_binary
()
{
local
-r
destination
=
"
${
1
:-
"
${
TOOLKIT_DIR
}
"
}
"
log INFO
"
${
FUNCNAME
[0]
}
$*
"
mv
"
${
destination
}
/nvidia-container-runtime"
\
"
${
destination
}
/nvidia-container-runtime.real"
cat
<<-
EOF
| tr -s '
\t
' >
${
destination
}
/nvidia-container-runtime
#! /bin/sh
PATH="
\$
PATH:
${
destination
}
"
\
XDG_CONFIG_HOME="
${
destination
}
/.config"
\
${
destination
}
/nvidia-container-runtime.real
\
\$
@
EOF
chmod
+x
"
${
destination
}
/nvidia-container-runtime"
}
toolkit::setup
()
{
local
-r
destination
=
"
${
1
:-
"
${
TOOLKIT_DIR
}
"
}
"
log INFO
"Installing the NVIDIA Container Toolkit"
# shellcheck disable=SC2064
# We want the expand to happen now rather than at trap time
trap
"echo 'Caught signal'; toolkit::uninstall
${
destination
}
"
EXIT
toolkit::install
"
${
destination
}
"
toolkit::setup::config
"
${
destination
}
"
toolkit::setup::cli_binary
"
${
destination
}
"
toolkit::setup::toolkit_binary
"
${
destination
}
"
toolkit::setup::runtime_binary
"
${
destination
}
"
# The runtime shim is still looking for the old binary
# Move to ${destination} to get expanded
# Make symlinks local so that they still refer to the
# local target when mounted on the host
cd
"
${
destination
}
"
ln
-s
"./nvidia-container-toolkit"
\
"
${
destination
}
/nvidia-container-runtime-hook"
ln
-s
"./libnvidia-container.so.1."
*
\
"
${
destination
}
/libnvidia-container.so.1"
cd
-
log INFO
"Done setting up the NVIDIA Container Toolkit"
}
runtimeconfig/test/docker_test.sh
浏览文件 @
f09ab93a
...
...
@@ -7,6 +7,9 @@ readonly dind_name="nvidia-container-runtime-installer"
# TODO move rm -rf shared to cleanup
testing::cleanup
()
{
docker run
-it
--privileged
-v
"
${
shared_dir
}
:/shared"
alpine:latest
chmod
-R
777 /shared
rm
-rf
"
${
shared_dir
}
"
||
true
docker
kill
"
${
dind_name
}
"
||
true
&> /dev/null
docker
rm
"
${
dind_name
}
"
||
true
&> /dev/null
...
...
@@ -14,10 +17,6 @@ testing::cleanup() {
}
testing::setup
()
{
local
shared_dir
=
${
1
:-
"./shared"
}
rm
-rf
"
${
shared_dir
}
"
||
true
mkdir
-p
"
${
shared_dir
}
"
mkdir
-p
"
${
shared_dir
}
"
/etc/docker
mkdir
-p
"
${
shared_dir
}
"
/run/nvidia
...
...
@@ -25,17 +24,15 @@ testing::setup() {
}
testing::main
()
{
local
shared_dir
=
"
${
1
:-
"./shared"
}
"
local
image
=
"
${
2
:-
"nvidia/container-toolkit:docker19.03"
}
"
local
image
=
"
${
1
:-
"nvidia/container-toolkit:docker19.03"
}
"
testing::setup
"
${
shared_dir
}
"
testing::setup
# Docker creates /etc/docker when starting
# by default there isn't any config in this directory (even after the daemon starts)
docker run
--privileged
\
-v
"
${
shared_dir
}
/etc/docker:/etc/docker"
\
-v
"
${
shared_dir
}
/run/nvidia:/run/nvidia"
\
-v
"
${
shared_dir
}
/etc/nvidia-container-runtime:/etc/nvidia-container-runtime"
\
-v
"
${
shared_dir
}
/run/nvidia:/run/nvidia:shared"
\
--name
"
${
dind_name
}
"
-d
docker:stable-dind
-H
unix://run/nvidia/docker.sock
# Share the volumes so that we can edit the config file and point to the new runtime
...
...
@@ -44,16 +41,14 @@ testing::main() {
--volumes-from
"
${
dind_name
}
"
\
--pid
"container:
${
dind_name
}
"
\
"
${
image
}
"
\
bash
-x
-c
"/work/run.sh /run/nvidia unix:///run/nvidia/docker.sock"
docker run
-it
--privileged
\
--volumes-from
"
${
dind_name
}
"
\
alpine:latest
chmod
766 /etc/docker /run/nvidia /etc/nvidia-container-runtime
bash
-x
-c
"/work/run.sh /run/nvidia /run/nvidia/docker.sock"
testing::cleanup
rm
-rf
"
${
shared_dir
}
"
||
true
}
readonly
shared_dir
=
"
${
1
:-
"./shared"
}
"
shift
trap
testing::cleanup ERR
testing::cleanup
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录