未验证 提交 14adf489 编写于 作者: T Thomas Strömberg 提交者: GitHub

Merge pull request #5782 from tstromberg/test-timeout

Improve parallel start scheduling and autoset parallelism
...@@ -23,8 +23,6 @@ ...@@ -23,8 +23,6 @@
# EXTRA_START_ARGS: additional flags to pass into minikube start # EXTRA_START_ARGS: additional flags to pass into minikube start
# EXTRA_ARGS: additional flags to pass into minikube # EXTRA_ARGS: additional flags to pass into minikube
# JOB_NAME: the name of the logfile and check name to update on github # JOB_NAME: the name of the logfile and check name to update on github
# PARALLEL_COUNT: number of tests to run in parallel
readonly TEST_ROOT="${HOME}/minikube-integration" readonly TEST_ROOT="${HOME}/minikube-integration"
readonly TEST_HOME="${TEST_ROOT}/${OS_ARCH}-${VM_DRIVER}-${MINIKUBE_LOCATION}-$$-${COMMIT}" readonly TEST_HOME="${TEST_ROOT}/${OS_ARCH}-${VM_DRIVER}-${MINIKUBE_LOCATION}-$$-${COMMIT}"
...@@ -243,7 +241,7 @@ if [ "$(uname)" != "Darwin" ]; then ...@@ -243,7 +241,7 @@ if [ "$(uname)" != "Darwin" ]; then
docker build -t gcr.io/k8s-minikube/gvisor-addon:2 -f testdata/gvisor-addon-Dockerfile ./testdata docker build -t gcr.io/k8s-minikube/gvisor-addon:2 -f testdata/gvisor-addon-Dockerfile ./testdata
fi fi
readonly LOAD=$(uptime | egrep -o "load average.*: [0-9]" | cut -d" " -f3) readonly LOAD=$(uptime | egrep -o "load average.*: [0-9]+" | cut -d" " -f3)
if [[ "${LOAD}" -gt 2 ]]; then if [[ "${LOAD}" -gt 2 ]]; then
echo "" echo ""
echo "********************** LOAD WARNING ********************************" echo "********************** LOAD WARNING ********************************"
...@@ -255,21 +253,18 @@ if [[ "${LOAD}" -gt 2 ]]; then ...@@ -255,21 +253,18 @@ if [[ "${LOAD}" -gt 2 ]]; then
top -b -n1 | head -n 15 top -b -n1 | head -n 15
fi fi
echo "********************** LOAD WARNING ********************************" echo "********************** LOAD WARNING ********************************"
echo ""
echo "Sleeping 30s to see if load goes down ...." echo "Sleeping 30s to see if load goes down ...."
sleep 30 sleep 30
uptime uptime
fi fi
echo "" echo ""
echo ">> Starting ${E2E_BIN} at $(date)" echo ">> Starting ${E2E_BIN} at $(date)"
set -x set -x
${SUDO_PREFIX}${E2E_BIN} \ ${SUDO_PREFIX}${E2E_BIN} \
-minikube-start-args="--vm-driver=${VM_DRIVER} ${EXTRA_START_ARGS}" \ -minikube-start-args="--vm-driver=${VM_DRIVER} ${EXTRA_START_ARGS}" \
-expected-default-driver="${EXPECTED_DEFAULT_DRIVER}" \ -expected-default-driver="${EXPECTED_DEFAULT_DRIVER}" \
-test.timeout=60m \ -test.timeout=70m \
-test.parallel=${PARALLEL_COUNT} \
${EXTRA_TEST_ARGS} \ ${EXTRA_TEST_ARGS} \
-binary="${MINIKUBE_BIN}" && result=$? || result=$? -binary="${MINIKUBE_BIN}" && result=$? || result=$?
set +x set +x
......
...@@ -28,7 +28,6 @@ set -e ...@@ -28,7 +28,6 @@ set -e
OS_ARCH="linux-amd64" OS_ARCH="linux-amd64"
VM_DRIVER="kvm2" VM_DRIVER="kvm2"
JOB_NAME="KVM_Linux" JOB_NAME="KVM_Linux"
PARALLEL_COUNT=4
EXPECTED_DEFAULT_DRIVER="kvm2" EXPECTED_DEFAULT_DRIVER="kvm2"
# We pick kvm as our gvisor testbed because it is fast & reliable # We pick kvm as our gvisor testbed because it is fast & reliable
......
...@@ -30,7 +30,6 @@ OS_ARCH="linux-amd64" ...@@ -30,7 +30,6 @@ OS_ARCH="linux-amd64"
VM_DRIVER="none" VM_DRIVER="none"
JOB_NAME="none_Linux" JOB_NAME="none_Linux"
EXTRA_ARGS="--bootstrapper=kubeadm" EXTRA_ARGS="--bootstrapper=kubeadm"
PARALLEL_COUNT=1
EXPECTED_DEFAULT_DRIVER="kvm2" EXPECTED_DEFAULT_DRIVER="kvm2"
SUDO_PREFIX="sudo -E " SUDO_PREFIX="sudo -E "
......
...@@ -28,7 +28,6 @@ set -e ...@@ -28,7 +28,6 @@ set -e
OS_ARCH="linux-amd64" OS_ARCH="linux-amd64"
VM_DRIVER="virtualbox" VM_DRIVER="virtualbox"
JOB_NAME="VirtualBox_Linux" JOB_NAME="VirtualBox_Linux"
PARALLEL_COUNT=4
EXPECTED_DEFAULT_DRIVER="kvm2" EXPECTED_DEFAULT_DRIVER="kvm2"
# Download files and set permissions # Download files and set permissions
......
...@@ -31,7 +31,6 @@ VM_DRIVER="hyperkit" ...@@ -31,7 +31,6 @@ VM_DRIVER="hyperkit"
JOB_NAME="HyperKit_macOS" JOB_NAME="HyperKit_macOS"
EXTRA_ARGS="--bootstrapper=kubeadm" EXTRA_ARGS="--bootstrapper=kubeadm"
EXTRA_START_ARGS="" EXTRA_START_ARGS=""
PARALLEL_COUNT=3
EXPECTED_DEFAULT_DRIVER="hyperkit" EXPECTED_DEFAULT_DRIVER="hyperkit"
......
...@@ -29,7 +29,6 @@ OS_ARCH="darwin-amd64" ...@@ -29,7 +29,6 @@ OS_ARCH="darwin-amd64"
VM_DRIVER="virtualbox" VM_DRIVER="virtualbox"
JOB_NAME="VirtualBox_macOS" JOB_NAME="VirtualBox_macOS"
EXTRA_ARGS="--bootstrapper=kubeadm" EXTRA_ARGS="--bootstrapper=kubeadm"
PARALLEL_COUNT=3
# hyperkit behaves better, so it has higher precedence. # hyperkit behaves better, so it has higher precedence.
# Assumes that hyperkit is also installed on the VirtualBox CI host. # Assumes that hyperkit is also installed on the VirtualBox CI host.
EXPECTED_DEFAULT_DRIVER="hyperkit" EXPECTED_DEFAULT_DRIVER="hyperkit"
......
...@@ -36,8 +36,8 @@ import ( ...@@ -36,8 +36,8 @@ import (
// TestAddons tests addons that require no special environment -- in parallel // TestAddons tests addons that require no special environment -- in parallel
func TestAddons(t *testing.T) { func TestAddons(t *testing.T) {
MaybeSlowParallel(t) MaybeParallel(t)
WaitForStartSlot(t)
profile := UniqueProfileName("addons") profile := UniqueProfileName("addons")
ctx, cancel := context.WithTimeout(context.Background(), 40*time.Minute) ctx, cancel := context.WithTimeout(context.Background(), 40*time.Minute)
defer CleanupWithLogs(t, profile, cancel) defer CleanupWithLogs(t, profile, cancel)
......
...@@ -30,7 +30,8 @@ func TestDockerFlags(t *testing.T) { ...@@ -30,7 +30,8 @@ func TestDockerFlags(t *testing.T) {
if NoneDriver() { if NoneDriver() {
t.Skip("skipping: none driver does not support ssh or bundle docker") t.Skip("skipping: none driver does not support ssh or bundle docker")
} }
MaybeSlowParallel(t) MaybeParallel(t)
WaitForStartSlot(t)
profile := UniqueProfileName("docker-flags") profile := UniqueProfileName("docker-flags")
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Minute) ctx, cancel := context.WithTimeout(context.Background(), 20*time.Minute)
......
...@@ -27,7 +27,9 @@ import ( ...@@ -27,7 +27,9 @@ import (
) )
func TestGuestEnvironment(t *testing.T) { func TestGuestEnvironment(t *testing.T) {
MaybeSlowParallel(t) MaybeParallel(t)
WaitForStartSlot(t)
profile := UniqueProfileName("guest") profile := UniqueProfileName("guest")
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute) ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute)
defer CleanupWithLogs(t, profile, cancel) defer CleanupWithLogs(t, profile, cancel)
......
...@@ -34,8 +34,8 @@ func TestGvisorAddon(t *testing.T) { ...@@ -34,8 +34,8 @@ func TestGvisorAddon(t *testing.T) {
t.Skip("skipping test because --gvisor=false") t.Skip("skipping test because --gvisor=false")
} }
MaybeSlowParallel(t) MaybeParallel(t)
WaitForStartSlot(t)
profile := UniqueProfileName("gvisor") profile := UniqueProfileName("gvisor")
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Minute) ctx, cancel := context.WithTimeout(context.Background(), 60*time.Minute)
defer func() { defer func() {
......
...@@ -42,8 +42,10 @@ import ( ...@@ -42,8 +42,10 @@ import (
) )
var ( var (
antiRaceCounter = 0 // startTimes is a list of startup times, to guarantee --start-offset
antiRaceMutex = &sync.Mutex{} startTimes = []time.Time{}
// startTimesMutex is a lock to update startTimes without a race condition
startTimesMutex = &sync.Mutex{}
) )
// RunResult stores the result of an cmd.Run call // RunResult stores the result of an cmd.Run call
...@@ -330,25 +332,32 @@ func MaybeParallel(t *testing.T) { ...@@ -330,25 +332,32 @@ func MaybeParallel(t *testing.T) {
t.Parallel() t.Parallel()
} }
// MaybeSlowParallel is a terrible workaround for tests which start clusters in a race-filled world // WaitForStartSlot enforces --start-offset to avoid startup race conditions
// TODO: Try removing this hack once certificates are deployed per-profile func WaitForStartSlot(t *testing.T) {
func MaybeSlowParallel(t *testing.T) { // Not parallel
// NoneDriver shouldn't parallelize "minikube start"
if NoneDriver() { if NoneDriver() {
return return
} }
antiRaceMutex.Lock() wakeup := time.Now()
antiRaceCounter++ startTimesMutex.Lock()
antiRaceMutex.Unlock() if len(startTimes) > 0 {
nextStart := startTimes[len(startTimes)-1].Add(*startOffset)
// Ignore nextStart if it is in the past - to guarantee offset for next caller
if time.Now().Before(nextStart) {
wakeup = nextStart
}
}
startTimes = append(startTimes, wakeup)
startTimesMutex.Unlock()
if antiRaceCounter > 0 { if time.Now().Before(wakeup) {
// Slow enough to offset start, but not slow to be a major source of delay d := time.Until(wakeup)
penalty := time.Duration(5*antiRaceCounter) * time.Second t.Logf("Waiting for start slot at %s (sleeping %s) ...", wakeup, d)
t.Logf("MaybeSlowParallel: Sleeping %s to avoid start race ...", penalty) time.Sleep(d)
time.Sleep(penalty) } else {
t.Logf("No need to wait for start slot, it is already %s", time.Now())
} }
t.Parallel()
} }
// killProcessFamily kills a pid and all of its children // killProcessFamily kills a pid and all of its children
......
...@@ -33,6 +33,7 @@ var defaultDriver = flag.String("expected-default-driver", "", "Expected default ...@@ -33,6 +33,7 @@ var defaultDriver = flag.String("expected-default-driver", "", "Expected default
var forceProfile = flag.String("profile", "", "force tests to run against a particular profile") var forceProfile = flag.String("profile", "", "force tests to run against a particular profile")
var cleanup = flag.Bool("cleanup", true, "cleanup failed test run") var cleanup = flag.Bool("cleanup", true, "cleanup failed test run")
var enableGvisor = flag.Bool("gvisor", false, "run gvisor integration test (slow)") var enableGvisor = flag.Bool("gvisor", false, "run gvisor integration test (slow)")
var startOffset = flag.Duration("start-offset", 30*time.Second, "how much time to offset between cluster starts")
var postMortemLogs = flag.Bool("postmortem-logs", true, "show logs after a failed test run") var postMortemLogs = flag.Bool("postmortem-logs", true, "show logs after a failed test run")
// Paths to files - normally set for CI // Paths to files - normally set for CI
......
...@@ -75,7 +75,8 @@ func TestStartStop(t *testing.T) { ...@@ -75,7 +75,8 @@ func TestStartStop(t *testing.T) {
for _, tc := range tests { for _, tc := range tests {
tc := tc tc := tc
t.Run(tc.name, func(t *testing.T) { t.Run(tc.name, func(t *testing.T) {
MaybeSlowParallel(t) MaybeParallel(t)
WaitForStartSlot(t)
if !strings.Contains(tc.name, "docker") && NoneDriver() { if !strings.Contains(tc.name, "docker") && NoneDriver() {
t.Skipf("skipping %s - incompatible with none driver", t.Name()) t.Skipf("skipping %s - incompatible with none driver", t.Name())
...@@ -136,6 +137,7 @@ func TestStartStop(t *testing.T) { ...@@ -136,6 +137,7 @@ func TestStartStop(t *testing.T) {
t.Errorf("status = %q; want = %q", got, state.Stopped) t.Errorf("status = %q; want = %q", got, state.Stopped)
} }
WaitForStartSlot(t)
rr, err = Run(t, exec.CommandContext(ctx, Target(), startArgs...)) rr, err = Run(t, exec.CommandContext(ctx, Target(), startArgs...))
if err != nil { if err != nil {
// Explicit fatal so that failures don't move directly to deletion // Explicit fatal so that failures don't move directly to deletion
......
...@@ -39,9 +39,10 @@ import ( ...@@ -39,9 +39,10 @@ import (
// the odlest supported k8s version and then runs the current head minikube // the odlest supported k8s version and then runs the current head minikube
// and it tries to upgrade from the older supported k8s to news supported k8s // and it tries to upgrade from the older supported k8s to news supported k8s
func TestVersionUpgrade(t *testing.T) { func TestVersionUpgrade(t *testing.T) {
MaybeParallel(t)
WaitForStartSlot(t)
profile := UniqueProfileName("vupgrade") profile := UniqueProfileName("vupgrade")
ctx, cancel := context.WithTimeout(context.Background(), 55*time.Minute) ctx, cancel := context.WithTimeout(context.Background(), 55*time.Minute)
MaybeSlowParallel(t)
defer CleanupWithLogs(t, profile, cancel) defer CleanupWithLogs(t, profile, cancel)
...@@ -89,6 +90,7 @@ func TestVersionUpgrade(t *testing.T) { ...@@ -89,6 +90,7 @@ func TestVersionUpgrade(t *testing.T) {
t.Errorf("status = %q; want = %q", got, state.Stopped.String()) t.Errorf("status = %q; want = %q", got, state.Stopped.String())
} }
WaitForStartSlot(t)
args = append([]string{"start", "-p", profile, fmt.Sprintf("--kubernetes-version=%s", constants.NewestKubernetesVersion), "--alsologtostderr", "-v=1"}, StartArgs()...) args = append([]string{"start", "-p", profile, fmt.Sprintf("--kubernetes-version=%s", constants.NewestKubernetesVersion), "--alsologtostderr", "-v=1"}, StartArgs()...)
rr, err = Run(t, exec.CommandContext(ctx, Target(), args...)) rr, err = Run(t, exec.CommandContext(ctx, Target(), args...))
if err != nil { if err != nil {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册