oci.go 15.6 KB
Newer Older
M
Medya Gh 已提交
1 2
/*
Copyright 2019 The Kubernetes Authors All rights reserved.
M
Medya Gh 已提交
3

M
Medya Gh 已提交
4 5 6
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
M
Medya Gh 已提交
7

M
Medya Gh 已提交
8
    http://www.apache.org/licenses/LICENSE-2.0
M
Medya Gh 已提交
9

M
Medya Gh 已提交
10 11 12 13 14 15 16 17 18 19
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package oci

import (
M
Medya Gh 已提交
20
	"context"
M
Medya Gh 已提交
21
	"os"
M
Medya Ghazizadeh 已提交
22
	"path/filepath"
M
Medya Gh 已提交
23
	"time"
M
Medya Gh 已提交
24 25 26 27

	"bufio"
	"bytes"

M
Medya Gh 已提交
28
	"github.com/golang/glog"
M
Medya Gh 已提交
29
	"github.com/pkg/errors"
M
Medya Gh 已提交
30
	"k8s.io/minikube/pkg/minikube/constants"
M
Medya Ghazizadeh 已提交
31
	"k8s.io/minikube/pkg/minikube/localpath"
32
	"k8s.io/minikube/pkg/minikube/out"
33
	"k8s.io/minikube/pkg/util/retry"
M
Medya Gh 已提交
34 35 36 37 38 39

	"fmt"
	"os/exec"
	"strings"
)

M
Medya Gh 已提交
40
// DeleteContainersByLabel deletes all containers that have a specific label
41
// if there no containers found with the given 	label, it will return nil
M
Medya Gh 已提交
42
func DeleteContainersByLabel(ociBin string, label string) []error {
43
	var deleteErrs []error
44

45 46
	cs, err := listContainersByLabel(ociBin, label)
	if err != nil {
M
Medya Gh 已提交
47
		return []error{fmt.Errorf("listing containers by label %q", label)}
48
	}
49

M
Medya Gh 已提交
50 51 52
	if len(cs) == 0 {
		return nil
	}
53

M
lint  
Medya Gh 已提交
54
	for _, c := range cs {
M
Medya Gh 已提交
55
		_, err := ContainerStatus(ociBin, c)
M
Medya Gh 已提交
56
		// only try to delete if docker/podman inspect returns
M
Medya Gh 已提交
57
		// if it doesn't it means docker daemon is stuck and needs restart
M
Medya Gh 已提交
58
		if err != nil {
M
Medya Gh 已提交
59
			deleteErrs = append(deleteErrs, errors.Wrapf(err, "delete container %s: %s daemon is stuck. please try again!", c, ociBin))
M
Medya Gh 已提交
60
			glog.Errorf("%s daemon seems to be stuck. Please try restarting your %s. :%v", ociBin, ociBin, err)
M
Medya Gh 已提交
61 62 63 64 65
			continue
		}
		cmd := exec.Command(ociBin, "rm", "-f", "-v", c)
		if out, err := cmd.CombinedOutput(); err != nil {
			deleteErrs = append(deleteErrs, errors.Wrapf(err, "delete container %s: output %s", c, out))
66
		}
M
Medya Gh 已提交
67

68 69 70 71
	}
	return deleteErrs
}

M
Medya Gh 已提交
72 73
// DeleteContainer deletes a container by ID or Name
func DeleteContainer(ociBin string, name string) error {
74

M
Medya Gh 已提交
75 76
	_, err := ContainerStatus(ociBin, name)
	if err != nil {
M
Medya Gh 已提交
77
		glog.Errorf("%s daemon seems to be stuck. Please try restarting your %s. Will try to delete anyways: %v", ociBin, ociBin, err)
M
Medya Gh 已提交
78 79 80 81 82 83 84 85 86
	}
	// try to delete anyways
	cmd := exec.Command(ociBin, "rm", "-f", "-v", name)
	if out, err := cmd.CombinedOutput(); err != nil {
		return errors.Wrapf(err, "delete container %s: output %s", name, out)
	}
	return nil
}

P
Priya Wadhwa 已提交
87 88
// PrepareContainerNode sets up the container node before CreateContainerNode is called.
// For the docker runtime, it creates a docker volume which will be mounted into kic
P
Priya Wadhwa 已提交
89
func PrepareContainerNode(p CreateParams) error {
90 91 92 93 94 95 96 97 98 99
	if p.OCIBinary != Docker {
		return nil
	}
	if err := createDockerVolume(p.Name, p.Name); err != nil {
		return errors.Wrapf(err, "creating volume for %s container", p.Name)
	}
	glog.Infof("Successfully created a docker volume %s", p.Name)
	return nil
}

M
Medya Gh 已提交
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
// CreateContainerNode creates a new container node
func CreateContainerNode(p CreateParams) error {
	runArgs := []string{
		"-d", // run the container detached
		"-t", // allocate a tty for entrypoint logs
		// running containers in a container requires privileged
		// NOTE: we could try to replicate this with --cap-add, and use less
		// privileges, but this flag also changes some mounts that are necessary
		// including some ones docker would otherwise do by default.
		// for now this is what we want. in the future we may revisit this.
		"--privileged",
		"--security-opt", "seccomp=unconfined", // also ignore seccomp
		"--tmpfs", "/tmp", // various things depend on working /tmp
		"--tmpfs", "/run", // systemd wants a writable /run
		// logs,pods be stroed on  filesystem vs inside container,
		// some k8s things want /lib/modules
		"-v", "/lib/modules:/lib/modules:ro",
		"--hostname", p.Name, // make hostname match container name
		"--name", p.Name, // ... and set the container name
119
		"--label", fmt.Sprintf("%s=%s", CreatedByLabelKey, "true"),
M
Medya Gh 已提交
120 121 122
		// label the node with the cluster ID
		"--label", p.ClusterLabel,
		// label the node with the role ID
123
		"--label", fmt.Sprintf("%s=%s", nodeRoleLabelKey, p.Role),
S
Sharif Elgamal 已提交
124 125
		// label th enode wuth the node ID
		"--label", p.NodeLabel,
M
Medya Ghazizadeh 已提交
126 127 128
	}

	if p.OCIBinary == Podman { // enable execing in /var
129 130 131 132 133
		// volume path in minikube home folder to mount to /var
		hostVarVolPath := filepath.Join(localpath.MiniPath(), "machines", p.Name, "var")
		if err := os.MkdirAll(hostVarVolPath, 0711); err != nil {
			return errors.Wrapf(err, "create var dir %s", hostVarVolPath)
		}
M
Medya Ghazizadeh 已提交
134 135 136 137
		// podman mounts var/lib with no-exec by default  https://github.com/containers/libpod/issues/5103
		runArgs = append(runArgs, "--volume", fmt.Sprintf("%s:/var:exec", hostVarVolPath))
	}
	if p.OCIBinary == Docker {
138
		runArgs = append(runArgs, "--volume", fmt.Sprintf("%s:/var", p.Name))
M
Medya Ghazizadeh 已提交
139 140 141 142 143
		// setting resource limit in privileged mode is only supported by docker
		// podman error: "Error: invalid configuration, cannot set resources with rootless containers not using cgroups v2 unified mode"
		runArgs = append(runArgs, fmt.Sprintf("--cpus=%s", p.CPUs), fmt.Sprintf("--memory=%s", p.Memory))
	}

M
Medya Gh 已提交
144 145 146 147 148 149 150
	for key, val := range p.Envs {
		runArgs = append(runArgs, "-e", fmt.Sprintf("%s=%s", key, val))
	}

	// adds node specific args
	runArgs = append(runArgs, p.ExtraArgs...)

T
Thomas Stromberg 已提交
151
	if enabled := isUsernsRemapEnabled(p.OCIBinary); enabled {
M
Medya Gh 已提交
152 153 154 155 156
		// We need this argument in order to make this command work
		// in systems that have userns-remap enabled on the docker daemon
		runArgs = append(runArgs, "--userns=host")
	}

T
Thomas Stromberg 已提交
157 158
	if err := createContainer(p.OCIBinary, p.Image, withRunArgs(runArgs...), withMounts(p.Mounts), withPortMappings(p.PortMappings)); err != nil {
		return errors.Wrap(err, "create container")
M
Medya Gh 已提交
159
	}
160 161 162 163 164 165 166 167 168

	checkRunning := func() error {
		s, err := ContainerStatus(p.OCIBinary, p.Name)
		if err != nil {
			return fmt.Errorf("temporary error checking status for %q : %v", p.Name, err)
		}
		if s != "running" {
			return fmt.Errorf("temporary error created container %q is not running yet", p.Name)
		}
M
Medya Ghazizadeh 已提交
169
		glog.Infof("the created container %q has a running status.", p.Name)
170 171 172
		return nil
	}

M
Medya Gh 已提交
173 174
	// retry up to up 13 seconds to make sure the created container status is running.
	if err := retry.Expo(checkRunning, 13*time.Millisecond, time.Second*13); err != nil {
M
Medya Gh 已提交
175
		return errors.Wrapf(err, "check container %q running", p.Name)
176 177
	}

M
Medya Gh 已提交
178 179 180 181
	return nil
}

// CreateContainer creates a container with "docker/podman run"
T
Thomas Stromberg 已提交
182
func createContainer(ociBinary string, image string, opts ...createOpt) error {
M
Medya Gh 已提交
183 184 185 186 187 188 189 190 191 192 193 194 195 196
	o := &createOpts{}
	for _, opt := range opts {
		o = opt(o)
	}
	// convert mounts to container run args
	runArgs := o.RunArgs
	for _, mount := range o.Mounts {
		runArgs = append(runArgs, generateMountBindings(mount)...)
	}
	for _, portMapping := range o.PortMappings {
		runArgs = append(runArgs, generatePortMappings(portMapping)...)
	}
	// construct the actual docker run argv
	args := []string{"run"}
197

M
Medya Ghazizadeh 已提交
198 199 200 201
	// to run nested container from privileged container in podman https://bugzilla.redhat.com/show_bug.cgi?id=1687713
	if ociBinary == Podman {
		args = append(args, "--cgroup-manager", "cgroupfs")
	}
202

M
Medya Gh 已提交
203 204 205 206
	args = append(args, runArgs...)
	args = append(args, image)
	args = append(args, o.ContainerArgs...)

T
Thomas Stromberg 已提交
207
	out, err := exec.Command(ociBinary, args...).CombinedOutput()
M
Medya Gh 已提交
208
	if err != nil {
T
Thomas Stromberg 已提交
209
		return errors.Wrapf(err, "failed args: %v output: %s", args, out)
M
Medya Gh 已提交
210
	}
211

T
Thomas Stromberg 已提交
212
	return nil
M
Medya Gh 已提交
213 214 215 216 217 218 219
}

// Copy copies a local asset into the container
func Copy(ociBinary string, ociID string, targetDir string, fName string) error {
	if _, err := os.Stat(fName); os.IsNotExist(err) {
		return errors.Wrapf(err, "error source %s does not exist", fName)
	}
220

M
Medya Gh 已提交
221 222
	destination := fmt.Sprintf("%s:%s", ociID, targetDir)
	cmd := exec.Command(ociBinary, "cp", fName, destination)
223
	if err := cmd.Run(); err != nil {
M
Medya Gh 已提交
224 225
		return errors.Wrapf(err, "error copying %s into node", fName)
	}
226

M
Medya Gh 已提交
227 228 229
	return nil
}

M
Medya Gh 已提交
230 231 232
// ContainerID returns id of a container name
func ContainerID(ociBinary string, nameOrID string) (string, error) {
	cmd := exec.Command(ociBinary, "inspect", "-f", "{{.Id}}", nameOrID)
M
Medya Gh 已提交
233
	out, err := cmd.CombinedOutput()
234

M
Medya Gh 已提交
235 236 237 238 239 240
	if err != nil { // don't return error if not found, only return empty string
		if strings.Contains(string(out), "Error: No such object:") || strings.Contains(string(out), "unable to find") {
			err = nil
		}
		out = []byte{}
	}
241

M
Medya Gh 已提交
242 243 244
	return string(out), err
}

245
// WarnIfSlow runs an oci command, warning about performance issues
M
Medya Gh 已提交
246
func WarnIfSlow(args ...string) ([]byte, error) {
M
Medya Gh 已提交
247
	killTime := 19 * time.Second
T
Thomas Stromberg 已提交
248
	warnTime := 2 * time.Second
249

M
Medya Gh 已提交
250 251 252 253 254
	if args[1] == "volume" || args[1] == "ps" { // volume and ps requires more time than inspect
		killTime = 30 * time.Second
		warnTime = 3 * time.Second
	}

255
	ctx, cancel := context.WithTimeout(context.Background(), killTime)
M
Medya Gh 已提交
256
	defer cancel()
257

258
	start := time.Now()
M
Medya Gh 已提交
259 260
	glog.Infof("executing with %s timeout: %v", args, killTime)
	cmd := exec.CommandContext(ctx, args[0], args[1:]...)
261 262 263 264
	stdout, err := cmd.Output()
	d := time.Since(start)
	if d > warnTime {
		out.WarningT(`Executing "{{.command}}" took an unusually long time: {{.duration}}`, out.V{"command": strings.Join(cmd.Args, " "), "duration": d})
M
Medya Gh 已提交
265
		out.ErrT(out.Tip, `Restarting the {{.name}} service may improve performance.`, out.V{"name": args[0]})
266
	}
267

M
Medya Gh 已提交
268
	if ctx.Err() == context.DeadlineExceeded {
269 270 271 272 273 274 275
		return stdout, fmt.Errorf("%q timed out after %s", strings.Join(cmd.Args, " "), killTime)
	}
	if err != nil {
		if exitErr, ok := err.(*exec.ExitError); ok {
			return stdout, fmt.Errorf("%q failed: %v: %s", strings.Join(cmd.Args, " "), exitErr, exitErr.Stderr)
		}
		return stdout, fmt.Errorf("%q failed: %v", strings.Join(cmd.Args, " "), err)
M
Medya Gh 已提交
276
	}
277 278
	return stdout, nil
}
279

280 281 282
// ContainerExists checks if container name exists (either running or exited)
func ContainerExists(ociBin string, name string) (bool, error) {
	out, err := WarnIfSlow(ociBin, "ps", "-a", "--format", "{{.Names}}")
M
Medya Gh 已提交
283 284 285
	if err != nil {
		return false, errors.Wrapf(err, string(out))
	}
286

M
Medya Gh 已提交
287 288 289 290 291 292
	containers := strings.Split(string(out), "\n")
	for _, c := range containers {
		if strings.TrimSpace(c) == name {
			return true, nil
		}
	}
293

M
Medya Gh 已提交
294 295 296
	return false, nil
}

M
Medya Gh 已提交
297 298 299 300 301
// IsCreatedByMinikube returns true if the container was created by minikube
// with default assumption that it is not created by minikube when we don't know for sure
func IsCreatedByMinikube(ociBinary string, nameOrID string) bool {
	cmd := exec.Command(ociBinary, "inspect", nameOrID, "--format", "{{.Config.Labels}}")
	out, err := cmd.CombinedOutput()
302

M
Medya Gh 已提交
303
	if err != nil {
M
Medya Gh 已提交
304 305
		return false
	}
306

M
Medya Gh 已提交
307 308
	if strings.Contains(string(out), fmt.Sprintf("%s:true", CreatedByLabelKey)) {
		return true
M
Medya Gh 已提交
309
	}
310

M
Medya Gh 已提交
311
	return false
M
Medya Gh 已提交
312 313
}

M
Medya Gh 已提交
314 315
// ListOwnedContainers lists all the containres that kic driver created on user's machine using a label
func ListOwnedContainers(ociBinary string) ([]string, error) {
316
	return listContainersByLabel(ociBinary, ProfileLabelKey)
M
Medya Gh 已提交
317 318 319 320
}

// inspect return low-level information on containers
func inspect(ociBinary string, containerNameOrID, format string) ([]string, error) {
321

M
Medya Gh 已提交
322
	cmd := exec.Command(ociBinary, "inspect",
M
Medya Gh 已提交
323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
		"-f", format,
		containerNameOrID) // ... against the "node" container
	var buff bytes.Buffer
	cmd.Stdout = &buff
	cmd.Stderr = &buff
	err := cmd.Run()
	scanner := bufio.NewScanner(&buff)
	var lines []string
	for scanner.Scan() {
		lines = append(lines, scanner.Text())
	}
	return lines, err
}

/*
This is adapated from:
https://github.com/kubernetes/kubernetes/blob/07a5488b2a8f67add543da72e8819407d8314204/pkg/kubelet/dockershim/helpers.go#L115-L155
*/
// generateMountBindings converts the mount list to a list of strings that
// can be understood by docker
// '<HostPath>:<ContainerPath>[:options]', where 'options'
// is a comma-separated list of the following strings:
// 'ro', if the path is read only
// 'Z', if the volume requires SELinux relabeling
M
Medya Gh 已提交
347
func generateMountBindings(mounts ...Mount) []string {
M
Medya Gh 已提交
348 349 350 351 352 353 354 355 356 357 358 359 360 361 362
	result := make([]string, 0, len(mounts))
	for _, m := range mounts {
		bind := fmt.Sprintf("%s:%s", m.HostPath, m.ContainerPath)
		var attrs []string
		if m.Readonly {
			attrs = append(attrs, "ro")
		}
		// Only request relabeling if the pod provides an SELinux context. If the pod
		// does not provide an SELinux context relabeling will label the volume with
		// the container's randomly allocated MCS label. This would restrict access
		// to the volume to the container which mounts it first.
		if m.SelinuxRelabel {
			attrs = append(attrs, "Z")
		}
		switch m.Propagation {
M
Medya Gh 已提交
363
		case MountPropagationNone:
M
Medya Gh 已提交
364
			// noop, private is default
M
Medya Gh 已提交
365
		case MountPropagationBidirectional:
M
Medya Gh 已提交
366
			attrs = append(attrs, "rshared")
M
Medya Gh 已提交
367
		case MountPropagationHostToContainer:
M
Medya Gh 已提交
368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
			attrs = append(attrs, "rslave")
		default:
			// Falls back to "private"
		}

		if len(attrs) > 0 {
			bind = fmt.Sprintf("%s:%s", bind, strings.Join(attrs, ","))
		}
		// our specific modification is the following line: make this a docker flag
		bind = fmt.Sprintf("--volume=%s", bind)
		result = append(result, bind)
	}
	return result
}

M
Medya Gh 已提交
383
// isUsernsRemapEnabled checks if userns-remap is enabled in docker
T
Thomas Stromberg 已提交
384
func isUsernsRemapEnabled(ociBinary string) bool {
M
Medya Gh 已提交
385
	cmd := exec.Command(ociBinary, "info", "--format", "'{{json .SecurityOptions}}'")
M
Medya Gh 已提交
386 387 388 389
	var buff bytes.Buffer
	cmd.Stdout = &buff
	cmd.Stderr = &buff
	err := cmd.Run()
390
	if err != nil {
T
Thomas Stromberg 已提交
391
		return false
392 393
	}

M
Medya Gh 已提交
394 395
	scanner := bufio.NewScanner(&buff)
	var lines []string
396

M
Medya Gh 已提交
397 398 399
	for scanner.Scan() {
		lines = append(lines, scanner.Text())
	}
400

M
Medya Gh 已提交
401 402
	if len(lines) > 0 {
		if strings.Contains(lines[0], "name=userns") {
T
Thomas Stromberg 已提交
403
			return true
M
Medya Gh 已提交
404 405
		}
	}
406

T
Thomas Stromberg 已提交
407
	return false
M
Medya Gh 已提交
408 409
}

M
Medya Gh 已提交
410
func generatePortMappings(portMappings ...PortMapping) []string {
M
Medya Gh 已提交
411 412
	result := make([]string, 0, len(portMappings))
	for _, pm := range portMappings {
413 414 415
		// let docker pick a host port by leaving it as ::
		// example --publish=127.0.0.17::8443 will get a random host port for 8443
		publish := fmt.Sprintf("--publish=%s::%d", pm.ListenAddress, pm.ContainerPort)
M
Medya Gh 已提交
416 417 418 419 420
		result = append(result, publish)
	}
	return result
}

M
Medya Gh 已提交
421
// withRunArgs sets the args for docker run
M
Medya Gh 已提交
422
// as in the args portion of `docker run args... image containerArgs...`
M
Medya Gh 已提交
423
func withRunArgs(args ...string) createOpt {
M
Medya Gh 已提交
424 425 426 427 428 429
	return func(r *createOpts) *createOpts {
		r.RunArgs = args
		return r
	}
}

M
Medya Gh 已提交
430 431
// withMounts sets the container mounts
func withMounts(mounts []Mount) createOpt {
M
Medya Gh 已提交
432 433 434 435 436 437
	return func(r *createOpts) *createOpts {
		r.Mounts = mounts
		return r
	}
}

M
Medya Gh 已提交
438 439
// withPortMappings sets the container port mappings to the host
func withPortMappings(portMappings []PortMapping) createOpt {
M
Medya Gh 已提交
440 441 442 443 444 445
	return func(r *createOpts) *createOpts {
		r.PortMappings = portMappings
		return r
	}
}

M
Medya Gh 已提交
446
// listContainersByLabel returns all the container names with a specified label
447
func listContainersByLabel(ociBinary string, label string) ([]string, error) {
448 449 450 451
	stdout, err := WarnIfSlow(ociBinary, "ps", "-a", "--filter", fmt.Sprintf("label=%s", label), "--format", "{{.Names}}")
	if err != nil {
		return nil, err
	}
452
	s := bufio.NewScanner(bytes.NewReader(stdout))
453
	var names []string
454 455 456 457
	for s.Scan() {
		n := strings.TrimSpace(s.Text())
		if n != "" {
			names = append(names, n)
M
Medya Gh 已提交
458
		}
M
Medya Gh 已提交
459
	}
460
	return names, err
M
Medya Gh 已提交
461
}
M
Medya Gh 已提交
462 463 464 465 466 467

// PointToHostDockerDaemon will unset env variables that point to docker inside minikube
// to make sure it points to the docker daemon installed by user.
func PointToHostDockerDaemon() error {
	p := os.Getenv(constants.MinikubeActiveDockerdEnv)
	if p != "" {
M
Medya Gh 已提交
468
		glog.Infof("shell is pointing to dockerd inside minikube. will unset to use host")
M
Medya Gh 已提交
469 470 471 472 473 474 475 476 477 478 479 480
	}

	for i := range constants.DockerDaemonEnvs {
		e := constants.DockerDaemonEnvs[i]
		err := os.Setenv(e, "")
		if err != nil {
			return errors.Wrapf(err, "resetting %s env", e)
		}

	}
	return nil
}
M
Medya Gh 已提交
481

M
Medya Gh 已提交
482 483
// ContainerStatus returns status of a container running,exited,...
func ContainerStatus(ociBin string, name string) (string, error) {
484 485
	out, err := WarnIfSlow(ociBin, "inspect", name, "--format={{.State.Status}}")
	return strings.TrimSpace(string(out)), err
M
Medya Gh 已提交
486
}