diff --git a/hack/jenkins/common.sh b/hack/jenkins/common.sh index f18dc1f81a3c6279777eb7bb124c76c363b86402..61d9dfe90caaea95305aad2ecbf447761b2f4da4 100755 --- a/hack/jenkins/common.sh +++ b/hack/jenkins/common.sh @@ -23,8 +23,6 @@ # EXTRA_START_ARGS: additional flags to pass into minikube start # EXTRA_ARGS: additional flags to pass into minikube # JOB_NAME: the name of the logfile and check name to update on github -# PARALLEL_COUNT: number of tests to run in parallel - readonly TEST_ROOT="${HOME}/minikube-integration" readonly TEST_HOME="${TEST_ROOT}/${OS_ARCH}-${VM_DRIVER}-${MINIKUBE_LOCATION}-$$-${COMMIT}" @@ -243,7 +241,7 @@ if [ "$(uname)" != "Darwin" ]; then docker build -t gcr.io/k8s-minikube/gvisor-addon:2 -f testdata/gvisor-addon-Dockerfile ./testdata fi -readonly LOAD=$(uptime | egrep -o "load average.*: [0-9]" | cut -d" " -f3) +readonly LOAD=$(uptime | egrep -o "load average.*: [0-9]+" | cut -d" " -f3) if [[ "${LOAD}" -gt 2 ]]; then echo "" echo "********************** LOAD WARNING ********************************" @@ -255,21 +253,18 @@ if [[ "${LOAD}" -gt 2 ]]; then top -b -n1 | head -n 15 fi echo "********************** LOAD WARNING ********************************" - echo "" echo "Sleeping 30s to see if load goes down ...." sleep 30 uptime fi - echo "" echo ">> Starting ${E2E_BIN} at $(date)" set -x ${SUDO_PREFIX}${E2E_BIN} \ -minikube-start-args="--vm-driver=${VM_DRIVER} ${EXTRA_START_ARGS}" \ -expected-default-driver="${EXPECTED_DEFAULT_DRIVER}" \ - -test.timeout=60m \ - -test.parallel=${PARALLEL_COUNT} \ + -test.timeout=70m \ ${EXTRA_TEST_ARGS} \ -binary="${MINIKUBE_BIN}" && result=$? || result=$? set +x diff --git a/hack/jenkins/linux_integration_tests_kvm.sh b/hack/jenkins/linux_integration_tests_kvm.sh index d8a3dae4521035f0deb36129a1bf93d562dd95f3..19181b70e2b733a3c299bd152ce8a1120e672a8a 100755 --- a/hack/jenkins/linux_integration_tests_kvm.sh +++ b/hack/jenkins/linux_integration_tests_kvm.sh @@ -28,7 +28,6 @@ set -e OS_ARCH="linux-amd64" VM_DRIVER="kvm2" JOB_NAME="KVM_Linux" -PARALLEL_COUNT=4 EXPECTED_DEFAULT_DRIVER="kvm2" # We pick kvm as our gvisor testbed because it is fast & reliable diff --git a/hack/jenkins/linux_integration_tests_none.sh b/hack/jenkins/linux_integration_tests_none.sh index 3e78045e2eceb27883c5139a66ec11eed8939c57..cb1cc4d3a2f46fdba5f88f3fe07d174753595f41 100755 --- a/hack/jenkins/linux_integration_tests_none.sh +++ b/hack/jenkins/linux_integration_tests_none.sh @@ -30,7 +30,6 @@ OS_ARCH="linux-amd64" VM_DRIVER="none" JOB_NAME="none_Linux" EXTRA_ARGS="--bootstrapper=kubeadm" -PARALLEL_COUNT=1 EXPECTED_DEFAULT_DRIVER="kvm2" SUDO_PREFIX="sudo -E " diff --git a/hack/jenkins/linux_integration_tests_virtualbox.sh b/hack/jenkins/linux_integration_tests_virtualbox.sh index 6f624eeead0afb2499a1ee9503676765e5482559..d159afef5cb732c5c8a7b7fc8ea0aeae8209815c 100755 --- a/hack/jenkins/linux_integration_tests_virtualbox.sh +++ b/hack/jenkins/linux_integration_tests_virtualbox.sh @@ -28,7 +28,6 @@ set -e OS_ARCH="linux-amd64" VM_DRIVER="virtualbox" JOB_NAME="VirtualBox_Linux" -PARALLEL_COUNT=4 EXPECTED_DEFAULT_DRIVER="kvm2" # Download files and set permissions diff --git a/hack/jenkins/osx_integration_tests_hyperkit.sh b/hack/jenkins/osx_integration_tests_hyperkit.sh index aa9d5d69d104ec4924cb1a994e3dfab295964621..8dc49ab367afc21dd432f1038e6059db1789f8bd 100755 --- a/hack/jenkins/osx_integration_tests_hyperkit.sh +++ b/hack/jenkins/osx_integration_tests_hyperkit.sh @@ -31,7 +31,6 @@ VM_DRIVER="hyperkit" JOB_NAME="HyperKit_macOS" EXTRA_ARGS="--bootstrapper=kubeadm" EXTRA_START_ARGS="" -PARALLEL_COUNT=3 EXPECTED_DEFAULT_DRIVER="hyperkit" diff --git a/hack/jenkins/osx_integration_tests_virtualbox.sh b/hack/jenkins/osx_integration_tests_virtualbox.sh index 8f7c6e3dd0b86f1e4ecd7df4b7a532dae3692e70..8ebdb846c1cf33f8f1d5e5d6689b5652b7549a05 100755 --- a/hack/jenkins/osx_integration_tests_virtualbox.sh +++ b/hack/jenkins/osx_integration_tests_virtualbox.sh @@ -29,7 +29,6 @@ OS_ARCH="darwin-amd64" VM_DRIVER="virtualbox" JOB_NAME="VirtualBox_macOS" EXTRA_ARGS="--bootstrapper=kubeadm" -PARALLEL_COUNT=3 # hyperkit behaves better, so it has higher precedence. # Assumes that hyperkit is also installed on the VirtualBox CI host. EXPECTED_DEFAULT_DRIVER="hyperkit" diff --git a/test/integration/addons_test.go b/test/integration/addons_test.go index e184b8e2202abfd9f65b1306cb80573f757f84f9..a575e96eb57b2ddbbab1156870ef7a6cb34c98b0 100644 --- a/test/integration/addons_test.go +++ b/test/integration/addons_test.go @@ -36,8 +36,8 @@ import ( // TestAddons tests addons that require no special environment -- in parallel func TestAddons(t *testing.T) { - MaybeSlowParallel(t) - + MaybeParallel(t) + WaitForStartSlot(t) profile := UniqueProfileName("addons") ctx, cancel := context.WithTimeout(context.Background(), 40*time.Minute) defer CleanupWithLogs(t, profile, cancel) diff --git a/test/integration/docker_test.go b/test/integration/docker_test.go index dd48ea54370cb0f05a83656b9eb81003395fc33d..7acb0f9dcc344c7fcc77da56ece8fd5d3d02fecd 100644 --- a/test/integration/docker_test.go +++ b/test/integration/docker_test.go @@ -30,7 +30,8 @@ func TestDockerFlags(t *testing.T) { if NoneDriver() { t.Skip("skipping: none driver does not support ssh or bundle docker") } - MaybeSlowParallel(t) + MaybeParallel(t) + WaitForStartSlot(t) profile := UniqueProfileName("docker-flags") ctx, cancel := context.WithTimeout(context.Background(), 20*time.Minute) diff --git a/test/integration/guest_env_test.go b/test/integration/guest_env_test.go index e9f4925a279025183adc46626425d98ce1436d7d..0e7dfb66347dc99dd4b4335ab54bc8eb983d90b0 100644 --- a/test/integration/guest_env_test.go +++ b/test/integration/guest_env_test.go @@ -27,7 +27,9 @@ import ( ) func TestGuestEnvironment(t *testing.T) { - MaybeSlowParallel(t) + MaybeParallel(t) + WaitForStartSlot(t) + profile := UniqueProfileName("guest") ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute) defer CleanupWithLogs(t, profile, cancel) diff --git a/test/integration/gvisor_addon_test.go b/test/integration/gvisor_addon_test.go index 368273e975466433e6aa2625fb0cab4d350c4fca..f8ffb4d2c0cee7bb392aaa831bc39ae97ab7346b 100644 --- a/test/integration/gvisor_addon_test.go +++ b/test/integration/gvisor_addon_test.go @@ -34,8 +34,8 @@ func TestGvisorAddon(t *testing.T) { t.Skip("skipping test because --gvisor=false") } - MaybeSlowParallel(t) - + MaybeParallel(t) + WaitForStartSlot(t) profile := UniqueProfileName("gvisor") ctx, cancel := context.WithTimeout(context.Background(), 60*time.Minute) defer func() { diff --git a/test/integration/helpers.go b/test/integration/helpers.go index 18f37848193d1904b3bf33ab9ad340dccbe426cc..654748250f6cf6236ba0001de33b8471dafc9efc 100644 --- a/test/integration/helpers.go +++ b/test/integration/helpers.go @@ -42,8 +42,10 @@ import ( ) var ( - antiRaceCounter = 0 - antiRaceMutex = &sync.Mutex{} + // startTimes is a list of startup times, to guarantee --start-offset + startTimes = []time.Time{} + // startTimesMutex is a lock to update startTimes without a race condition + startTimesMutex = &sync.Mutex{} ) // RunResult stores the result of an cmd.Run call @@ -330,25 +332,32 @@ func MaybeParallel(t *testing.T) { t.Parallel() } -// MaybeSlowParallel is a terrible workaround for tests which start clusters in a race-filled world -// TODO: Try removing this hack once certificates are deployed per-profile -func MaybeSlowParallel(t *testing.T) { - // NoneDriver shouldn't parallelize "minikube start" +// WaitForStartSlot enforces --start-offset to avoid startup race conditions +func WaitForStartSlot(t *testing.T) { + // Not parallel if NoneDriver() { return } - antiRaceMutex.Lock() - antiRaceCounter++ - antiRaceMutex.Unlock() + wakeup := time.Now() + startTimesMutex.Lock() + if len(startTimes) > 0 { + nextStart := startTimes[len(startTimes)-1].Add(*startOffset) + // Ignore nextStart if it is in the past - to guarantee offset for next caller + if time.Now().Before(nextStart) { + wakeup = nextStart + } + } + startTimes = append(startTimes, wakeup) + startTimesMutex.Unlock() - if antiRaceCounter > 0 { - // Slow enough to offset start, but not slow to be a major source of delay - penalty := time.Duration(5*antiRaceCounter) * time.Second - t.Logf("MaybeSlowParallel: Sleeping %s to avoid start race ...", penalty) - time.Sleep(penalty) + if time.Now().Before(wakeup) { + d := time.Until(wakeup) + t.Logf("Waiting for start slot at %s (sleeping %s) ...", wakeup, d) + time.Sleep(d) + } else { + t.Logf("No need to wait for start slot, it is already %s", time.Now()) } - t.Parallel() } // killProcessFamily kills a pid and all of its children diff --git a/test/integration/main.go b/test/integration/main.go index bffb12f07966579dc959a501d98fc9f00d16ef63..fe351e070d469c9ea5570686fe946787e43d814e 100644 --- a/test/integration/main.go +++ b/test/integration/main.go @@ -33,6 +33,7 @@ var defaultDriver = flag.String("expected-default-driver", "", "Expected default var forceProfile = flag.String("profile", "", "force tests to run against a particular profile") var cleanup = flag.Bool("cleanup", true, "cleanup failed test run") var enableGvisor = flag.Bool("gvisor", false, "run gvisor integration test (slow)") +var startOffset = flag.Duration("start-offset", 30*time.Second, "how much time to offset between cluster starts") var postMortemLogs = flag.Bool("postmortem-logs", true, "show logs after a failed test run") // Paths to files - normally set for CI diff --git a/test/integration/start_stop_delete_test.go b/test/integration/start_stop_delete_test.go index ada918f480afd0ad7700287031a74abafd5c3322..52ac7a42022e4317e689cc68c7f4f339f27ef0d3 100644 --- a/test/integration/start_stop_delete_test.go +++ b/test/integration/start_stop_delete_test.go @@ -75,7 +75,8 @@ func TestStartStop(t *testing.T) { for _, tc := range tests { tc := tc t.Run(tc.name, func(t *testing.T) { - MaybeSlowParallel(t) + MaybeParallel(t) + WaitForStartSlot(t) if !strings.Contains(tc.name, "docker") && NoneDriver() { t.Skipf("skipping %s - incompatible with none driver", t.Name()) @@ -136,6 +137,7 @@ func TestStartStop(t *testing.T) { t.Errorf("status = %q; want = %q", got, state.Stopped) } + WaitForStartSlot(t) rr, err = Run(t, exec.CommandContext(ctx, Target(), startArgs...)) if err != nil { // Explicit fatal so that failures don't move directly to deletion diff --git a/test/integration/version_upgrade_test.go b/test/integration/version_upgrade_test.go index 6ee49cb8117376507d4144337f3f321b8684448e..78bc9ef7122102ed5227c6e76d573e084788de45 100644 --- a/test/integration/version_upgrade_test.go +++ b/test/integration/version_upgrade_test.go @@ -39,9 +39,10 @@ import ( // the odlest supported k8s version and then runs the current head minikube // and it tries to upgrade from the older supported k8s to news supported k8s func TestVersionUpgrade(t *testing.T) { + MaybeParallel(t) + WaitForStartSlot(t) profile := UniqueProfileName("vupgrade") ctx, cancel := context.WithTimeout(context.Background(), 55*time.Minute) - MaybeSlowParallel(t) defer CleanupWithLogs(t, profile, cancel) @@ -89,6 +90,7 @@ func TestVersionUpgrade(t *testing.T) { t.Errorf("status = %q; want = %q", got, state.Stopped.String()) } + WaitForStartSlot(t) args = append([]string{"start", "-p", profile, fmt.Sprintf("--kubernetes-version=%s", constants.NewestKubernetesVersion), "--alsologtostderr", "-v=1"}, StartArgs()...) rr, err = Run(t, exec.CommandContext(ctx, Target(), args...)) if err != nil {