From 92147c3597308bc05e6448ccc41409fcc7c05fd7 Mon Sep 17 00:00:00 2001 From: Stephen Connolly Date: Mon, 23 Mar 2015 21:00:19 +0000 Subject: [PATCH] [FIXED JENKINS-27565] Refactor the Queue and Nodes to use a consistent locking strategy The test system I set up to verify resolution of customer(s)' issues driving this change, required additional changes in order to fully resolve the issues at hand. As a result I am bundling these changes: - Moves nodes to being store in separate config files outside of the main config file (improves performance) [FIXED JENKINS-27562] - Makes the Jenkins is loading screen not block on the extensions loading lock [FIXED JENKINS-27563] - Removes race condition rendering the list of executors [FIXED JENKINS-27564] [FIXED JENKINS-15355] - Tidy up the locks that were causing deadlocks with the once retention strategy in durable tasks [FIXED JENKINS-27476] - Remove any requirement from Jenkins Core to lock on the Queue when rendering the Jenkins UI [FIXED-JENKINS-27566] --- core/src/main/java/hudson/Functions.java | 8 +- .../java/hudson/model/AbstractCIBase.java | 80 +- core/src/main/java/hudson/model/Computer.java | 120 ++- core/src/main/java/hudson/model/Executor.java | 368 ++++++++-- core/src/main/java/hudson/model/Hudson.java | 2 +- core/src/main/java/hudson/model/Node.java | 4 + core/src/main/java/hudson/model/Queue.java | 694 +++++++++++------- .../java/hudson/model/ResourceController.java | 88 ++- .../hudson/slaves/AbstractCloudSlave.java | 5 + .../hudson/slaves/ComputerRetentionWork.java | 34 +- .../java/hudson/slaves/NodeProvisioner.java | 185 +++-- .../java/hudson/slaves/RetentionStrategy.java | 34 +- .../java/hudson/slaves/SlaveComputer.java | 14 +- core/src/main/java/jenkins/model/Jenkins.java | 82 +-- core/src/main/java/jenkins/model/Nodes.java | 244 ++++++ .../jenkins/util/AtmostOneTaskExecutor.java | 9 +- .../hudson/model/Messages.properties | 1 + .../main/resources/lib/hudson/executors.jelly | 201 ++--- .../main/resources/lib/layout/layout.jelly | 27 +- 19 files changed, 1549 insertions(+), 651 deletions(-) create mode 100644 core/src/main/java/jenkins/model/Nodes.java diff --git a/core/src/main/java/hudson/Functions.java b/core/src/main/java/hudson/Functions.java index f331d7f1cb..1b696a431e 100644 --- a/core/src/main/java/hudson/Functions.java +++ b/core/src/main/java/hudson/Functions.java @@ -28,6 +28,7 @@ package hudson; import hudson.cli.CLICommand; import hudson.console.ConsoleAnnotationDescriptor; import hudson.console.ConsoleAnnotatorFactory; +import hudson.init.InitMilestone; import hudson.model.AbstractProject; import hudson.model.Action; import hudson.model.Describable; @@ -201,7 +202,12 @@ public class Functions { public static String rfc822Date(Calendar cal) { return Util.RFC822_DATETIME_FORMATTER.format(cal.getTime()); } - + + public static boolean isExtensionsAvailable() { + final Jenkins jenkins = Jenkins.getInstance(); + return jenkins == null ? false : jenkins.getInitLevel().compareTo(InitMilestone.EXTENSIONS_AUGMENTED) >= 0; + } + public static void initPageVariables(JellyContext context) { StaplerRequest currentRequest = Stapler.getCurrentRequest(); String rootURL = currentRequest.getContextPath(); diff --git a/core/src/main/java/hudson/model/AbstractCIBase.java b/core/src/main/java/hudson/model/AbstractCIBase.java index cf9cec9bf2..4d872606c2 100644 --- a/core/src/main/java/hudson/model/AbstractCIBase.java +++ b/core/src/main/java/hudson/model/AbstractCIBase.java @@ -48,8 +48,6 @@ public abstract class AbstractCIBase extends Node implements ItemGroup computers = getComputerMap(); - for (Map.Entry e : computers.entrySet()) { - if (e.getValue() == computer) { - computers.remove(e.getKey()); - computer.onRemoved(); - return; + /*package*/ void removeComputer(final Computer computer) { + Queue.withLock(new Runnable() { + @Override + public void run() { + Map computers = getComputerMap(); + for (Map.Entry e : computers.entrySet()) { + if (e.getValue() == computer) { + computers.remove(e.getKey()); + computer.onRemoved(); + return; + } + } } - } + }); } /*package*/ @CheckForNull Computer getComputer(Node n) { @@ -160,37 +163,40 @@ public abstract class AbstractCIBase extends Node implements ItemGroup computers = getComputerMap(); - synchronized(updateComputerLock) {// just so that we don't have two code updating computer list at the same time - Map byName = new HashMap(); - for (Computer c : computers.values()) { - Node node = c.getNode(); - if (node == null) - continue; // this computer is gone - byName.put(node.getNodeName(),c); - } + protected void updateComputerList(final boolean automaticSlaveLaunch) { + final Map computers = getComputerMap(); + Queue.withLock(new Runnable() { + @Override + public void run() { + Map byName = new HashMap(); + for (Computer c : computers.values()) { + Node node = c.getNode(); + if (node == null) + continue; // this computer is gone + byName.put(node.getNodeName(),c); + } - final Set old = new HashSet(computers.values()); - Set used = new HashSet(); + final Set old = new HashSet(computers.values()); + Set used = new HashSet(); - updateComputer(this, byName, used, automaticSlaveLaunch); - for (Node s : getNodes()) { - long start = System.currentTimeMillis(); - updateComputer(s, byName, used, automaticSlaveLaunch); - if(LOG_STARTUP_PERFORMANCE) - LOGGER.info(String.format("Took %dms to update node %s", - System.currentTimeMillis()-start, s.getNodeName())); - } + updateComputer(AbstractCIBase.this, byName, used, automaticSlaveLaunch); + for (Node s : getNodes()) { + long start = System.currentTimeMillis(); + updateComputer(s, byName, used, automaticSlaveLaunch); + if(LOG_STARTUP_PERFORMANCE) + LOGGER.info(String.format("Took %dms to update node %s", + System.currentTimeMillis()-start, s.getNodeName())); + } - // find out what computers are removed, and kill off all executors. - // when all executors exit, it will be removed from the computers map. - // so don't remove too quickly - old.removeAll(used); - for (Computer c : old) { - killComputer(c); + // find out what computers are removed, and kill off all executors. + // when all executors exit, it will be removed from the computers map. + // so don't remove too quickly + old.removeAll(used); + for (Computer c : old) { + killComputer(c); + } } - } + }); getQueue().scheduleMaintenance(); for (ComputerListener cl : ComputerListener.all()) cl.onConfigurationChange(); diff --git a/core/src/main/java/hudson/model/Computer.java b/core/src/main/java/hudson/model/Computer.java index c98ddd59ae..d0578f3640 100644 --- a/core/src/main/java/hudson/model/Computer.java +++ b/core/src/main/java/hudson/model/Computer.java @@ -176,6 +176,21 @@ public /*transient*/ abstract class Computer extends Actionable implements Acces protected final Object statusChangeLock = new Object(); + private transient final List terminatedBy = Collections.synchronizedList(new ArrayList + ()); + + public void recordTermination() { + try { + throw new RuntimeException(String.format("Termination requested by %s", Thread.currentThread())); + } catch (RuntimeException e) { + terminatedBy.add(e); + } + } + + public List getTerminatedBy() { + return new ArrayList(terminatedBy); + } + public Computer(Node node) { setNode(node); } @@ -404,6 +419,7 @@ public /*transient*/ abstract class Computer extends Actionable implements Acces * @since 1.320 */ public Future disconnect(OfflineCause cause) { + recordTermination(); offlineCause = cause; if (Util.isOverridden(Computer.class,getClass(),"disconnect")) return disconnect(); // legacy subtypes that extend disconnect(). @@ -419,6 +435,7 @@ public /*transient*/ abstract class Computer extends Actionable implements Acces * Use {@link #disconnect(OfflineCause)} and specify the cause. */ public Future disconnect() { + recordTermination(); if (Util.isOverridden(Computer.class,getClass(),"disconnect",OfflineCause.class)) // if the subtype already derives disconnect(OfflineCause), delegate to it return disconnect(null); @@ -808,6 +825,21 @@ public /*transient*/ abstract class Computer extends Actionable implements Acces return new ArrayList(oneOffExecutors); } + public List getDisplayExecutors() { + List result = new ArrayList(executors.size()+oneOffExecutors.size()); + int index = 0; + for (Executor e: executors) { + result.add(new DisplayExecutor(Integer.toString(index+1), String.format("executors/%d", index), e)); + index++; + } + index = 0; + for (OneOffExecutor e: oneOffExecutors) { + result.add(new DisplayExecutor("", String.format("oneOffExecutors/%d", index), e)); + index++; + } + return result; + } + /** * Returns true if all the executors of this computer are idle. */ @@ -867,14 +899,21 @@ public /*transient*/ abstract class Computer extends Actionable implements Acces /** * Called by {@link Executor} to kill excessive executors from this computer. */ - /*package*/ synchronized void removeExecutor(Executor e) { - executors.remove(e); - addNewExecutorIfNecessary(); - if(!isAlive()) - { - AbstractCIBase ciBase = Jenkins.getInstance(); - ciBase.removeComputer(this); - } + /*package*/ void removeExecutor(final Executor e) { + Queue.withLock(new Runnable() { + @Override + public void run() { + synchronized (Computer.this) { + executors.remove(e); + addNewExecutorIfNecessary(); + if(!isAlive()) + { + AbstractCIBase ciBase = Jenkins.getInstance(); + ciBase.removeComputer(Computer.this); + } + } + } + }); } /** @@ -1413,6 +1452,71 @@ public /*transient*/ abstract class Computer extends Actionable implements Acces } } + public static class DisplayExecutor implements ModelObject { + + @Nonnull + private final String displayName; + @Nonnull + private final String url; + @Nonnull + private final Executor executor; + + public DisplayExecutor(@Nonnull String displayName, @Nonnull String url, @Nonnull Executor executor) { + this.displayName = displayName; + this.url = url; + this.executor = executor; + } + + @Override + @Nonnull + public String getDisplayName() { + return displayName; + } + + @Nonnull + public String getUrl() { + return url; + } + + @Nonnull + public Executor getExecutor() { + return executor; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("DisplayExecutor{"); + sb.append("displayName='").append(displayName).append('\''); + sb.append(", url='").append(url).append('\''); + sb.append(", executor=").append(executor); + sb.append('}'); + return sb.toString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + DisplayExecutor that = (DisplayExecutor) o; + + if (!executor.equals(that.executor)) { + return false; + } + + return true; + } + + @Override + public int hashCode() { + return executor.hashCode(); + } + } + public static final PermissionGroup PERMISSIONS = new PermissionGroup(Computer.class,Messages._Computer_Permissions_Title()); public static final Permission CONFIGURE = new Permission(PERMISSIONS,"Configure", Messages._Computer_ConfigurePermission_Description(), Permission.CONFIGURE, PermissionScope.COMPUTER); /** diff --git a/core/src/main/java/hudson/model/Executor.java b/core/src/main/java/hudson/model/Executor.java index 574cfb7486..65e638f38b 100644 --- a/core/src/main/java/hudson/model/Executor.java +++ b/core/src/main/java/hudson/model/Executor.java @@ -37,6 +37,7 @@ import jenkins.model.CauseOfInterruption; import jenkins.model.CauseOfInterruption.UserInterruption; import jenkins.model.InterruptedBuildAction; import jenkins.model.Jenkins; +import net.jcip.annotations.GuardedBy; import org.acegisecurity.Authentication; import org.kohsuke.stapler.HttpResponse; import org.kohsuke.stapler.HttpResponses; @@ -48,11 +49,15 @@ import org.kohsuke.stapler.interceptor.RequirePOST; import javax.servlet.ServletException; import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Vector; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.logging.Level; import java.util.logging.Logger; @@ -73,7 +78,9 @@ import javax.annotation.Nonnull; public class Executor extends Thread implements ModelObject { protected final @Nonnull Computer owner; private final Queue queue; + private final ReadWriteLock lock = new ReentrantReadWriteLock(); + @GuardedBy("#lock") private long startTime; /** * Used to track when a job was last executed. @@ -87,29 +94,34 @@ public class Executor extends Thread implements ModelObject { /** * {@link hudson.model.Queue.Executable} being executed right now, or null if the executor is idle. */ - private volatile Queue.Executable executable; + @GuardedBy("#lock") + private Queue.Executable executable; /** * When {@link Queue} allocates a work for this executor, this field is set * and the executor is {@linkplain Thread#start() started}. */ - private volatile WorkUnit workUnit; + @GuardedBy("#lock") + private WorkUnit workUnit; private Throwable causeOfDeath; private boolean induceDeath; - private volatile boolean started; + @GuardedBy("#lock") + private boolean started; /** * When the executor is interrupted, we allow the code that interrupted the thread to override the * result code it prefers. */ + @GuardedBy("#lock") private Result interruptStatus; /** * Cause of interruption. Access needs to be synchronized. */ + @GuardedBy("#lock") private final List causes = new Vector(); public Executor(@Nonnull Computer owner, int n) { @@ -148,28 +160,37 @@ public class Executor extends Thread implements ModelObject { if (LOGGER.isLoggable(FINE)) LOGGER.log(FINE, String.format("%s is interrupted(%s): %s", getDisplayName(), result, Util.join(Arrays.asList(causes),",")), new InterruptedException()); - synchronized (this) { + lock.writeLock().lock(); + try { if (!started) { // not yet started, so simply dispose this owner.removeExecutor(this); return; } - } - interruptStatus = result; - synchronized (this.causes) { + interruptStatus = result; + for (CauseOfInterruption c : causes) { if (!this.causes.contains(c)) this.causes.add(c); } + } finally { + lock.writeLock().unlock(); } super.interrupt(); } public Result abortResult() { - Result r = interruptStatus; - if (r==null) r = Result.ABORTED; // this is when we programmatically throw InterruptedException instead of calling the interrupt method. - return r; + lock.readLock().lock(); + try { + Result r = interruptStatus; + if (r == null) r = + Result.ABORTED; // this is when we programmatically throw InterruptedException instead of calling the interrupt method. + + return r; + } finally { + lock.readLock().unlock(); + } } /** @@ -180,11 +201,14 @@ public class Executor extends Thread implements ModelObject { public void recordCauseOfInterruption(Run build, TaskListener listener) { List r; - // atomically get&clear causes, and minimize the lock. - synchronized (causes) { + // atomically get&clear causes. + lock.writeLock().lock(); + try { if (causes.isEmpty()) return; r = new ArrayList(causes); causes.clear(); + } finally { + lock.writeLock().unlock(); } build.addAction(new InterruptedBuildAction(r)); @@ -192,9 +216,64 @@ public class Executor extends Thread implements ModelObject { c.print(listener); } + /** + * There are some cases where an executor is started but the node is removed or goes off-line before we are ready + * to start executing the assigned work unit. This method is called to clear the assigned work unit so that + * the {@link Queue#maintain()} method can return the item to the buildable state. + * + * Note: once we create the {@link Executable} we cannot unwind the state and the build will have to end up being + * marked as a failure. + */ + private void resetWorkUnit(String reason) { + StringWriter writer = new StringWriter(); + PrintWriter pw = new PrintWriter(writer); + try { + pw.printf("%s grabbed %s from queue but %s %s. ", getName(), workUnit, owner.getDisplayName(), reason); + if (owner.getTerminatedBy().isEmpty()) { + pw.print("No termination trace available."); + } else { + pw.println("Termination trace follows:"); + for (RuntimeException terminator: owner.getTerminatedBy()) { + terminator.printStackTrace(pw); + } + } + } finally { + pw.close(); + } + LOGGER.log(WARNING, writer.toString()); + lock.writeLock().lock(); + try { + if (executable != null) { + throw new IllegalStateException("Cannot reset the work unit after the executable has been created"); + } + workUnit = null; + } finally { + lock.writeLock().unlock(); + } + } + @Override public void run() { - startTime = System.currentTimeMillis(); + if (!owner.isOnline()) { + resetWorkUnit("went off-line before the task's worker thread started"); + owner.removeExecutor(this); + queue.scheduleMaintenance(); + return; + } + if (owner.getNode() == null) { + resetWorkUnit("was removed before the task's worker thread started"); + owner.removeExecutor(this); + queue.scheduleMaintenance(); + return; + } + final WorkUnit workUnit; + lock.writeLock().lock(); + try { + startTime = System.currentTimeMillis(); + workUnit = this.workUnit; + } finally { + lock.writeLock().unlock(); + } ACL.impersonate(ACL.SYSTEM); @@ -204,14 +283,45 @@ public class Executor extends Thread implements ModelObject { SubTask task; // transition from idle to building. // perform this state change as an atomic operation wrt other queue operations - synchronized (queue) { - workUnit.setExecutor(this); - queue.onStartExecuting(this); - if (LOGGER.isLoggable(FINE)) - LOGGER.log(FINE, getName()+" grabbed "+workUnit+" from queue"); - task = workUnit.work; - executable = task.createExecutable(); - workUnit.setExecutable(executable); + task = Queue.withLock(new java.util.concurrent.Callable() { + @Override + public SubTask call() throws Exception { + if (!owner.isOnline()) { + resetWorkUnit("went off-line before the task's worker thread was ready to execute"); + return null; + } + if (owner.getNode() == null) { + resetWorkUnit("was removed before the task's worker thread was ready to execute"); + return null; + } + // after this point we cannot unwind the assignment of the work unit, if the owner + // is removed or goes off-line then the build will just have to fail. + workUnit.setExecutor(Executor.this); + queue.onStartExecuting(Executor.this); + if (LOGGER.isLoggable(FINE)) + LOGGER.log(FINE, getName()+" grabbed "+workUnit+" from queue"); + SubTask task = workUnit.work; + Executable executable = task.createExecutable(); + lock.writeLock().lock(); + try { + Executor.this.executable = executable; + } finally { + lock.writeLock().unlock(); + } + workUnit.setExecutable(executable); + return task; + } + }); + Executable executable; + lock.readLock().lock(); + try { + if (this.workUnit == null) { + // we called resetWorkUnit, so bail. Outer finally will remove this and schedule queue maintenance + return; + } + executable = this.executable; + } finally { + lock.readLock().unlock(); } if (LOGGER.isLoggable(FINE)) LOGGER.log(FINE, getName()+" is going to execute "+executable); @@ -267,6 +377,7 @@ public class Executor extends Thread implements ModelObject { causeOfDeath = e; LOGGER.log(SEVERE, "Unexpected executor death", e); } finally { + for (RuntimeException e1: owner.getTerminatedBy()) LOGGER.log(Level.WARNING, String.format("%s termination trace", getName()), e1); if (causeOfDeath==null) // let this thread die and be replaced by a fresh unstarted instance owner.removeExecutor(this); @@ -290,7 +401,12 @@ public class Executor extends Thread implements ModelObject { */ @Exported public @CheckForNull Queue.Executable getCurrentExecutable() { - return executable; + lock.readLock().lock(); + try { + return executable; + } finally { + lock.readLock().unlock(); + } } /** @@ -302,7 +418,12 @@ public class Executor extends Thread implements ModelObject { */ @Exported public WorkUnit getCurrentWorkUnit() { - return workUnit; + lock.readLock().lock(); + try { + return workUnit; + } finally { + lock.readLock().unlock(); + } } /** @@ -311,13 +432,19 @@ public class Executor extends Thread implements ModelObject { * to that point yet. */ public FilePath getCurrentWorkspace() { - Executable e = executable; - if(e==null) return null; - if (e instanceof AbstractBuild) { - AbstractBuild ab = (AbstractBuild) e; - return ab.getWorkspace(); + lock.readLock().lock(); + try { + if (executable == null) { + return null; + } + if (executable instanceof AbstractBuild) { + AbstractBuild ab = (AbstractBuild) executable; + return ab.getWorkspace(); + } + return null; + } finally { + lock.readLock().unlock(); } - return null; } /** @@ -344,14 +471,24 @@ public class Executor extends Thread implements ModelObject { */ @Exported public boolean isIdle() { - return executable==null; + lock.readLock().lock(); + try { + return workUnit == null && executable == null; + } finally { + lock.readLock().unlock(); + } } /** * The opposite of {@link #isIdle()} — the executor is doing some work. */ public boolean isBusy() { - return executable!=null; + lock.readLock().lock(); + try { + return workUnit != null || executable != null; + } finally { + lock.readLock().unlock(); + } } /** @@ -364,14 +501,24 @@ public class Executor extends Thread implements ModelObject { * @since 1.536 */ public boolean isActive() { - return !started || isAlive(); + lock.readLock().lock(); + try { + return !started || isAlive(); + } finally { + lock.readLock().unlock(); + } } /** * Returns true if this executor is waiting for a task to execute. */ public boolean isParking() { - return !started; + lock.readLock().lock(); + try { + return !started; + } finally { + lock.readLock().unlock(); + } } /** @@ -392,13 +539,24 @@ public class Executor extends Thread implements ModelObject { */ @Exported public int getProgress() { - Queue.Executable e = executable; - if(e==null) return -1; - long d = Executables.getEstimatedDurationFor(e); - if(d<0) return -1; + long d; + lock.readLock().lock(); + try { + if (executable == null) { + return -1; + } + d = Executables.getEstimatedDurationFor(executable); + } finally { + lock.readLock().unlock(); + } + if (d < 0) { + return -1; + } - int num = (int)(getElapsedTime()*100/d); - if(num>=100) num=99; + int num = (int) (getElapsedTime() * 100 / d); + if (num >= 100) { + num = 99; + } return num; } @@ -411,22 +569,35 @@ public class Executor extends Thread implements ModelObject { */ @Exported public boolean isLikelyStuck() { - Queue.Executable e = executable; - if(e==null) return false; + long d; + long elapsed; + lock.readLock().lock(); + try { + if (executable == null) { + return false; + } - long elapsed = getElapsedTime(); - long d = Executables.getEstimatedDurationFor(e); - if(d>=0) { + elapsed = getElapsedTime(); + d = Executables.getEstimatedDurationFor(executable); + } finally { + lock.readLock().unlock(); + } + if (d >= 0) { // if it's taking 10 times longer than ETA, consider it stuck - return d*10 < elapsed; + return d * 10 < elapsed; } else { // if no ETA is available, a build taking longer than a day is considered stuck - return TimeUnit2.MILLISECONDS.toHours(elapsed)>24; + return TimeUnit2.MILLISECONDS.toHours(elapsed) > 24; } } public long getElapsedTime() { - return System.currentTimeMillis() - startTime; + lock.readLock().lock(); + try { + return System.currentTimeMillis() - startTime; + } finally { + lock.readLock().unlock(); + } } /** @@ -435,7 +606,12 @@ public class Executor extends Thread implements ModelObject { * @since 1.440 */ public long getTimeSpentInQueue() { - return startTime - workUnit.context.item.buildableStartMilliseconds; + lock.readLock().lock(); + try { + return startTime - workUnit.context.item.buildableStartMilliseconds; + } finally { + lock.readLock().unlock(); + } } /** @@ -453,14 +629,25 @@ public class Executor extends Thread implements ModelObject { * until the build completes. */ public String getEstimatedRemainingTime() { - Queue.Executable e = executable; - if(e==null) return Messages.Executor_NotAvailable(); + long d; + lock.readLock().lock(); + try { + if (executable == null) { + return Messages.Executor_NotAvailable(); + } - long d = Executables.getEstimatedDurationFor(e); - if(d<0) return Messages.Executor_NotAvailable(); + d = Executables.getEstimatedDurationFor(executable); + } finally { + lock.readLock().unlock(); + } + if (d < 0) { + return Messages.Executor_NotAvailable(); + } - long eta = d-getElapsedTime(); - if(eta<=0) return Messages.Executor_NotAvailable(); + long eta = d - getElapsedTime(); + if (eta <= 0) { + return Messages.Executor_NotAvailable(); + } return Util.getTimeSpanString(eta); } @@ -470,14 +657,25 @@ public class Executor extends Thread implements ModelObject { * it as a number of milli-seconds. */ public long getEstimatedRemainingTimeMillis() { - Queue.Executable e = executable; - if(e==null) return -1; + long d; + lock.readLock().lock(); + try { + if (executable == null) { + return -1; + } - long d = Executables.getEstimatedDurationFor(e); - if(d<0) return -1; + d = Executables.getEstimatedDurationFor(executable); + } finally { + lock.readLock().unlock(); + } + if (d < 0) { + return -1; + } - long eta = d-getElapsedTime(); - if(eta<=0) return -1; + long eta = d - getElapsedTime(); + if (eta <= 0) { + return -1; + } return eta; } @@ -488,14 +686,19 @@ public class Executor extends Thread implements ModelObject { * @see #start(WorkUnit) */ @Override - public synchronized void start() { + public void start() { throw new UnsupportedOperationException(); } - /*protected*/ synchronized void start(WorkUnit task) { - this.workUnit = task; - super.start(); - started = true; + /*protected*/ void start(WorkUnit task) { + lock.writeLock().lock(); + try { + this.workUnit = task; + super.start(); + started = true; + } finally { + lock.writeLock().unlock(); + } } @@ -515,10 +718,14 @@ public class Executor extends Thread implements ModelObject { */ @RequirePOST public HttpResponse doStop() { - Queue.Executable e = executable; - if(e!=null) { - Tasks.getOwnerTaskOf(getParentOf(e)).checkAbortPermission(); - interrupt(); + lock.writeLock().lock(); // need write lock as interrupt will change the field + try { + if (executable != null) { + Tasks.getOwnerTaskOf(getParentOf(executable)).checkAbortPermission(); + interrupt(); + } + } finally { + lock.writeLock().unlock(); } return HttpResponses.forwardToPreviousPage(); } @@ -539,8 +746,12 @@ public class Executor extends Thread implements ModelObject { * Checks if the current user has a permission to stop this build. */ public boolean hasStopPermission() { - Queue.Executable e = executable; - return e!=null && Tasks.getOwnerTaskOf(getParentOf(e)).hasAbortPermission(); + lock.readLock().lock(); + try { + return executable != null && Tasks.getOwnerTaskOf(getParentOf(executable)).hasAbortPermission(); + } finally { + lock.readLock().unlock(); + } } public @Nonnull Computer getOwner() { @@ -551,11 +762,16 @@ public class Executor extends Thread implements ModelObject { * Returns when this executor started or should start being idle. */ public long getIdleStartMilliseconds() { - if (isIdle()) - return Math.max(creationTime, owner.getConnectTime()); - else { - return Math.max(startTime + Math.max(0, Executables.getEstimatedDurationFor(executable)), - System.currentTimeMillis() + 15000); + lock.readLock().lock(); + try { + if (isIdle()) + return Math.max(creationTime, owner.getConnectTime()); + else { + return Math.max(startTime + Math.max(0, Executables.getEstimatedDurationFor(executable)), + System.currentTimeMillis() + 15000); + } + } finally { + lock.readLock().unlock(); } } diff --git a/core/src/main/java/hudson/model/Hudson.java b/core/src/main/java/hudson/model/Hudson.java index c4b6cf4a61..64f00ead83 100644 --- a/core/src/main/java/hudson/model/Hudson.java +++ b/core/src/main/java/hudson/model/Hudson.java @@ -121,7 +121,7 @@ public class Hudson extends Jenkins { * Use {@link #getNodes()}. Since 1.252. */ public List getSlaves() { - return (List)slaves; + return (List)getNodes(); } /** diff --git a/core/src/main/java/hudson/model/Node.java b/core/src/main/java/hudson/model/Node.java index 419b7f9d3e..61d471e7d7 100644 --- a/core/src/main/java/hudson/model/Node.java +++ b/core/src/main/java/hudson/model/Node.java @@ -373,6 +373,10 @@ public abstract class Node extends AbstractModelObject implements Reconfigurable if (c!=null) return c; } + if (!isAcceptingTasks()) { + return CauseOfBlockage.fromMessage(Messages._Node_BecauseNodeIsNotAcceptingTasks(getNodeName())); + } + // Looks like we can take the task return null; } diff --git a/core/src/main/java/hudson/model/Queue.java b/core/src/main/java/hudson/model/Queue.java index aefe7b0575..d51c8735ba 100644 --- a/core/src/main/java/hudson/model/Queue.java +++ b/core/src/main/java/hudson/model/Queue.java @@ -88,6 +88,7 @@ import java.util.GregorianCalendar; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; @@ -98,6 +99,9 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; import java.util.logging.Level; import java.util.logging.Logger; @@ -140,9 +144,20 @@ import org.kohsuke.stapler.interceptor.RequirePOST; * | | * | v * +--> buildables ---> pending ---> left + * ^ | + * | | + * +---(rarely)---+ * * *

+ * Note: In the normal case of events pending items only move to left. However they can move back + * if the node they are assigned to execute on disappears before their {@link Executor} thread + * starts, where the node is removed before the {@link Executable} has been instantiated it + * is safe to move the pending item back to buildable. Once the {@link Executable} has been + * instantiated the only option is to let the {@link Executable} bomb out as soon as it starts + * to try an execute on the node that no longer exists. + * + *

* In addition, at any stage, an item can be removed from the queue (for example, when the user * cancels a job in the queue.) See the corresponding field for their exact meanings. * @@ -191,6 +206,8 @@ public class Queue extends ResourceController implements Saveable { */ private final ItemList pendings = new ItemList(); + private transient volatile Snapshot snapshot = new Snapshot(waitingList, blockedProjects, buildables, pendings); + /** * Items that left queue would stay here for a while to enable tracking via {@link Item#getId()}. * @@ -319,6 +336,10 @@ public class Queue extends ResourceController implements Saveable { } }); + private transient final Lock lock = new ReentrantLock(); + + private transient final Condition condition = lock.newCondition(); + public Queue(@Nonnull LoadBalancer loadBalancer) { this.loadBalancer = loadBalancer.sanitize(); // if all the executors are busy doing something, then the queue won't be maintained in @@ -354,7 +375,8 @@ public class Queue extends ResourceController implements Saveable { /** * Loads the queue contents that was {@link #save() saved}. */ - public synchronized void load() { + public void load() { + lock.lock(); try { // first try the old format File queueFile = getQueueFile(); @@ -430,31 +452,39 @@ public class Queue extends ResourceController implements Saveable { } } catch (IOException e) { LOGGER.log(Level.WARNING, "Failed to load the queue file " + getXMLQueueFile(), e); + } finally { + updateSnapshot(); + lock.unlock(); } } /** * Persists the queue contents to the disk. */ - public synchronized void save() { + public void save() { if(BulkChange.contains(this)) return; - // write out the queue state we want to save - State state = new State(); - state.counter = WaitingItem.COUNTER.longValue(); - - // write out the tasks on the queue - for (Item item: getItems()) { - if(item.task instanceof TransientTask) continue; - state.items.add(item); - } - + lock.lock(); try { - XmlFile queueFile = new XmlFile(XSTREAM, getXMLQueueFile()); - queueFile.write(state); - SaveableListener.fireOnChange(this, queueFile); - } catch (IOException e) { - LOGGER.log(Level.WARNING, "Failed to write out the queue file " + getXMLQueueFile(), e); + // write out the queue state we want to save + State state = new State(); + state.counter = WaitingItem.COUNTER.longValue(); + + // write out the tasks on the queue + for (Item item: getItems()) { + if(item.task instanceof TransientTask) continue; + state.items.add(item); + } + + try { + XmlFile queueFile = new XmlFile(XSTREAM, getXMLQueueFile()); + queueFile.write(state); + SaveableListener.fireOnChange(this, queueFile); + } catch (IOException e) { + LOGGER.log(Level.WARNING, "Failed to write out the queue file " + getXMLQueueFile(), e); + } + } finally { + lock.unlock(); } } @@ -462,13 +492,20 @@ public class Queue extends ResourceController implements Saveable { * Wipes out all the items currently in the queue, as if all of them are cancelled at once. */ @CLIMethod(name="clear-queue") - public synchronized void clear() { + public void clear() { Jenkins.getInstance().checkPermission(Jenkins.ADMINISTER); - for (WaitingItem i : new ArrayList(waitingList)) // copy the list as we'll modify it in the loop - i.cancel(this); - blockedProjects.cancelAll(); - pendings.cancelAll(); - buildables.cancelAll(); + lock.lock(); + try { + for (WaitingItem i : new ArrayList( + waitingList)) // copy the list as we'll modify it in the loop + i.cancel(this); + blockedProjects.cancelAll(); + pendings.cancelAll(); + buildables.cancelAll(); + } finally { + updateSnapshot(); + lock.unlock(); + } scheduleMaintenance(); } @@ -514,7 +551,7 @@ public class Queue extends ResourceController implements Saveable { * @deprecated as of 1.521 * Use {@link #schedule2(Task, int, List)} */ - public synchronized WaitingItem schedule(Task p, int quietPeriod, List actions) { + public WaitingItem schedule(Task p, int quietPeriod, List actions) { return schedule2(p, quietPeriod, actions).getCreateItem(); } @@ -539,7 +576,7 @@ public class Queue extends ResourceController implements Saveable { * * That said, one can still look at {@link Queue.Item#future}, {@link Queue.Item#getId()}, etc. */ - public synchronized @Nonnull ScheduleResult schedule2(Task p, int quietPeriod, List actions) { + public @Nonnull ScheduleResult schedule2(Task p, int quietPeriod, List actions) { // remove nulls actions = new ArrayList(actions); for (Iterator itr = actions.iterator(); itr.hasNext();) { @@ -547,11 +584,17 @@ public class Queue extends ResourceController implements Saveable { if (a==null) itr.remove(); } - for(QueueDecisionHandler h : QueueDecisionHandler.all()) - if (!h.shouldSchedule(p, actions)) - return ScheduleResult.refused(); // veto + lock.lock(); + try { + for (QueueDecisionHandler h : QueueDecisionHandler.all()) + if (!h.shouldSchedule(p, actions)) + return ScheduleResult.refused(); // veto - return scheduleInternal(p, quietPeriod, actions); + return scheduleInternal(p, quietPeriod, actions); + } finally { + updateSnapshot(); + lock.unlock(); + } } /** @@ -567,71 +610,77 @@ public class Queue extends ResourceController implements Saveable { * * That said, one can still look at {@link WaitingItem#future}, {@link WaitingItem#getId()}, etc. */ - private synchronized @Nonnull ScheduleResult scheduleInternal(Task p, int quietPeriod, List actions) { - Calendar due = new GregorianCalendar(); - due.add(Calendar.SECOND, quietPeriod); - - // Do we already have this task in the queue? Because if so, we won't schedule a new one. - List duplicatesInQueue = new ArrayList(); - for(Item item : getItems(p)) { - boolean shouldScheduleItem = false; - for (QueueAction action: item.getActions(QueueAction.class)) { - shouldScheduleItem |= action.shouldSchedule(actions); - } - for (QueueAction action: Util.filter(actions,QueueAction.class)) { - shouldScheduleItem |= action.shouldSchedule((new ArrayList(item.getAllActions()))); - } - if(!shouldScheduleItem) { - duplicatesInQueue.add(item); - } - } - if (duplicatesInQueue.isEmpty()) { - LOGGER.log(Level.FINE, "{0} added to queue", p); - - // put the item in the queue - WaitingItem added = new WaitingItem(due,p,actions); - added.enter(this); - scheduleMaintenance(); // let an executor know that a new item is in the queue. - return ScheduleResult.created(added); - } + private @Nonnull ScheduleResult scheduleInternal(Task p, int quietPeriod, List actions) { + lock.lock(); + try { + Calendar due = new GregorianCalendar(); + due.add(Calendar.SECOND, quietPeriod); + + // Do we already have this task in the queue? Because if so, we won't schedule a new one. + List duplicatesInQueue = new ArrayList(); + for (Item item : liveGetItems(p)) { + boolean shouldScheduleItem = false; + for (QueueAction action : item.getActions(QueueAction.class)) { + shouldScheduleItem |= action.shouldSchedule(actions); + } + for (QueueAction action : Util.filter(actions, QueueAction.class)) { + shouldScheduleItem |= action.shouldSchedule((new ArrayList(item.getAllActions()))); + } + if (!shouldScheduleItem) { + duplicatesInQueue.add(item); + } + } + if (duplicatesInQueue.isEmpty()) { + LOGGER.log(Level.FINE, "{0} added to queue", p); + + // put the item in the queue + WaitingItem added = new WaitingItem(due, p, actions); + added.enter(this); + scheduleMaintenance(); // let an executor know that a new item is in the queue. + return ScheduleResult.created(added); + } - LOGGER.log(Level.FINE, "{0} is already in the queue", p); + LOGGER.log(Level.FINE, "{0} is already in the queue", p); - // but let the actions affect the existing stuff. - for(Item item : duplicatesInQueue) { - for(FoldableAction a : Util.filter(actions,FoldableAction.class)) { - a.foldIntoExisting(item, p, actions); + // but let the actions affect the existing stuff. + for (Item item : duplicatesInQueue) { + for (FoldableAction a : Util.filter(actions, FoldableAction.class)) { + a.foldIntoExisting(item, p, actions); + } } - } - boolean queueUpdated = false; - for(WaitingItem wi : Util.filter(duplicatesInQueue,WaitingItem.class)) { - if(quietPeriod<=0) { - // the user really wants to build now, and they mean NOW. - // so let's pull in the timestamp if we can. - if (wi.timestamp.before(due)) - continue; - } else { - // otherwise we do the normal quiet period implementation - if (wi.timestamp.after(due)) - continue; - // quiet period timer reset. start the period over again - } + boolean queueUpdated = false; + for (WaitingItem wi : Util.filter(duplicatesInQueue, WaitingItem.class)) { + if (quietPeriod <= 0) { + // the user really wants to build now, and they mean NOW. + // so let's pull in the timestamp if we can. + if (wi.timestamp.before(due)) + continue; + } else { + // otherwise we do the normal quiet period implementation + if (wi.timestamp.after(due)) + continue; + // quiet period timer reset. start the period over again + } - // waitingList is sorted, so when we change a timestamp we need to maintain order - wi.leave(this); - wi.timestamp = due; - wi.enter(this); - queueUpdated=true; - } + // waitingList is sorted, so when we change a timestamp we need to maintain order + wi.leave(this); + wi.timestamp = due; + wi.enter(this); + queueUpdated = true; + } - if (queueUpdated) scheduleMaintenance(); + if (queueUpdated) scheduleMaintenance(); - // REVISIT: when there are multiple existing items in the queue that matches the incoming one, - // whether the new one should affect all existing ones or not is debatable. I for myself - // thought this would only affect one, so the code was bit of surprise, but I'm keeping the current - // behaviour. - return ScheduleResult.existing(duplicatesInQueue.get(0)); + // REVISIT: when there are multiple existing items in the queue that matches the incoming one, + // whether the new one should affect all existing ones or not is debatable. I for myself + // thought this would only affect one, so the code was bit of surprise, but I'm keeping the current + // behaviour. + return ScheduleResult.existing(duplicatesInQueue.get(0)); + } finally { + updateSnapshot(); + lock.unlock(); + } } @@ -639,11 +688,11 @@ public class Queue extends ResourceController implements Saveable { * @deprecated as of 1.311 * Use {@link #schedule(Task, int)} */ - public synchronized boolean add(Task p, int quietPeriod) { + public boolean add(Task p, int quietPeriod) { return schedule(p, quietPeriod)!=null; } - public synchronized @CheckForNull WaitingItem schedule(Task p, int quietPeriod) { + public @CheckForNull WaitingItem schedule(Task p, int quietPeriod) { return schedule(p, quietPeriod, new Action[0]); } @@ -651,21 +700,21 @@ public class Queue extends ResourceController implements Saveable { * @deprecated as of 1.311 * Use {@link #schedule(Task, int, Action...)} */ - public synchronized boolean add(Task p, int quietPeriod, Action... actions) { + public boolean add(Task p, int quietPeriod, Action... actions) { return schedule(p, quietPeriod, actions)!=null; } /** * Convenience wrapper method around {@link #schedule(Task, int, List)} */ - public synchronized @CheckForNull WaitingItem schedule(Task p, int quietPeriod, Action... actions) { + public @CheckForNull WaitingItem schedule(Task p, int quietPeriod, Action... actions) { return schedule2(p, quietPeriod, actions).getCreateItem(); } /** * Convenience wrapper method around {@link #schedule2(Task, int, List)} */ - public synchronized @Nonnull ScheduleResult schedule2(Task p, int quietPeriod, Action... actions) { + public @Nonnull ScheduleResult schedule2(Task p, int quietPeriod, Action... actions) { return schedule2(p, quietPeriod, Arrays.asList(actions)); } @@ -675,18 +724,28 @@ public class Queue extends ResourceController implements Saveable { * @return true if the project was indeed in the queue and was removed. * false if this was no-op. */ - public synchronized boolean cancel(Task p) { - LOGGER.log(Level.FINE, "Cancelling {0}", p); - for (WaitingItem item : waitingList) { - if (item.task.equals(p)) { - return item.cancel(this); + public boolean cancel(Task p) { + lock.lock(); + try { + LOGGER.log(Level.FINE, "Cancelling {0}", p); + for (WaitingItem item : waitingList) { + if (item.task.equals(p)) { + return item.cancel(this); + } } + // use bitwise-OR to make sure that both branches get evaluated all the time + return blockedProjects.cancel(p) != null | buildables.cancel(p) != null; + } finally { + updateSnapshot(); + lock.unlock(); } - // use bitwise-OR to make sure that both branches get evaluated all the time - return blockedProjects.cancel(p)!=null | buildables.cancel(p)!=null; } - public synchronized boolean cancel(Item item) { + private void updateSnapshot() { + snapshot = new Snapshot(waitingList, blockedProjects, buildables, pendings); + } + + public boolean cancel(Item item) { LOGGER.log(Level.FINE, "Cancelling {0} item#{1}", new Object[] {item.task, item.id}); return item.cancel(this); } @@ -703,11 +762,13 @@ public class Queue extends ResourceController implements Saveable { return HttpResponses.forwardToPreviousPage(); } - public synchronized boolean isEmpty() { - return waitingList.isEmpty() && blockedProjects.isEmpty() && buildables.isEmpty() && pendings.isEmpty(); + public boolean isEmpty() { + Snapshot snapshot = this.snapshot; + return snapshot.waitingList.isEmpty() && snapshot.blockedProjects.isEmpty() && snapshot.buildables.isEmpty() + && snapshot.pendings.isEmpty(); } - private synchronized WaitingItem peek() { + private WaitingItem peek() { return waitingList.iterator().next(); } @@ -718,16 +779,21 @@ public class Queue extends ResourceController implements Saveable { * at the end. */ @Exported(inline=true) - public synchronized Item[] getItems() { - Item[] r = new Item[waitingList.size() + blockedProjects.size() + buildables.size() + pendings.size()]; - waitingList.toArray(r); - int idx = waitingList.size(); - for (BlockedItem p : blockedProjects.values()) + public Item[] getItems() { + Snapshot s = this.snapshot; + Item[] r = new Item[s.waitingList.size() + s.blockedProjects.size() + s.buildables.size() + + s.pendings.size()]; + s.waitingList.toArray(r); + int idx = s.waitingList.size(); + for (BlockedItem p : s.blockedProjects) { r[idx++] = p; - for (BuildableItem p : reverse(buildables.values())) + } + for (BuildableItem p : reverse(s.buildables)) { r[idx++] = p; - for (BuildableItem p : reverse(pendings.values())) + } + for (BuildableItem p : reverse(s.pendings)) { r[idx++] = p; + } return r; } @@ -752,30 +818,44 @@ public class Queue extends ResourceController implements Saveable { return itemsView.get(); } - public synchronized Item getItem(long id) { - for (Item item: waitingList) if (item.id == id) return item; - for (Item item: blockedProjects) if (item.id == id) return item; - for (Item item: buildables) if (item.id == id) return item; - for (Item item: pendings) if (item.id == id) return item; - + public Item getItem(long id) { + Snapshot snapshot = this.snapshot; + for (Item item : snapshot.blockedProjects) { + if (item.id == id) + return item; + } + for (Item item : snapshot.buildables) { + if (item.id == id) + return item; + } + for (Item item : snapshot.pendings) { + if (item.id == id) + return item; + } + for (Item item : snapshot.waitingList) { + if (item.id == id) { + return item; + } + } return leftItems.getIfPresent(id); } /** * Gets all the {@link BuildableItem}s that are waiting for an executor in the given {@link Computer}. */ - public synchronized List getBuildableItems(Computer c) { + public List getBuildableItems(Computer c) { + Snapshot snapshot = this.snapshot; List result = new ArrayList(); - _getBuildableItems(c, buildables, result); - _getBuildableItems(c, pendings, result); + _getBuildableItems(c, snapshot.buildables, result); + _getBuildableItems(c, snapshot.pendings, result); return result; } - private void _getBuildableItems(Computer c, ItemList col, List result) { + private void _getBuildableItems(Computer c, List col, List result) { Node node = c.getNode(); if (node == null) // Deleted computers cannot take build items... return; - for (BuildableItem p : col.values()) { + for (BuildableItem p : col) { if (node.canTake(p) == null) result.add(p); } @@ -784,17 +864,18 @@ public class Queue extends ResourceController implements Saveable { /** * Gets the snapshot of all {@link BuildableItem}s. */ - public synchronized List getBuildableItems() { - ArrayList r = new ArrayList(buildables.values()); - r.addAll(pendings.values()); + public List getBuildableItems() { + Snapshot snapshot = this.snapshot; + ArrayList r = new ArrayList(snapshot.buildables); + r.addAll(snapshot.pendings); return r; } /** * Gets the snapshot of all {@link BuildableItem}s. */ - public synchronized List getPendingItems() { - return new ArrayList(pendings.values()); + public List getPendingItems() { + return new ArrayList(snapshot.pendings); } /** @@ -820,11 +901,12 @@ public class Queue extends ResourceController implements Saveable { * * @since 1.402 */ - public synchronized List getUnblockedItems() { - List queuedNotBlocked = new ArrayList(); - queuedNotBlocked.addAll(waitingList); - queuedNotBlocked.addAll(buildables); - queuedNotBlocked.addAll(pendings); + public List getUnblockedItems() { + Snapshot snapshot = this.snapshot; + List queuedNotBlocked = new ArrayList(); + queuedNotBlocked.addAll(snapshot.waitingList); + queuedNotBlocked.addAll(snapshot.buildables); + queuedNotBlocked.addAll(snapshot.pendings); // but not 'blockedProjects' return queuedNotBlocked; } @@ -834,7 +916,7 @@ public class Queue extends ResourceController implements Saveable { * * @since 1.402 */ - public synchronized Set getUnblockedTasks() { + public Set getUnblockedTasks() { List items = getUnblockedItems(); Set unblockedTasks = new HashSet(items.size()); for (Queue.Item t : items) @@ -845,8 +927,9 @@ public class Queue extends ResourceController implements Saveable { /** * Is the given task currently pending execution? */ - public synchronized boolean isPending(Task t) { - for (BuildableItem i : pendings) + public boolean isPending(Task t) { + Snapshot snapshot = this.snapshot; + for (BuildableItem i : snapshot.pendings) if (i.task.equals(t)) return true; return false; @@ -855,15 +938,16 @@ public class Queue extends ResourceController implements Saveable { /** * How many {@link BuildableItem}s are assigned for the given label? */ - public synchronized int countBuildableItemsFor(Label l) { + public int countBuildableItemsFor(Label l) { + Snapshot snapshot = this.snapshot; int r = 0; - for (BuildableItem bi : buildables.values()) + for (BuildableItem bi : snapshot.buildables) for (SubTask st : bi.task.getSubTasks()) - if (null==l || bi.getAssignedLabelFor(st)==l) + if (null == l || bi.getAssignedLabelFor(st) == l) r++; - for (BuildableItem bi : pendings.values()) + for (BuildableItem bi : snapshot.pendings) for (SubTask st : bi.task.getSubTasks()) - if (null==l || bi.getAssignedLabelFor(st)==l) + if (null == l || bi.getAssignedLabelFor(st) == l) r++; return r; } @@ -871,7 +955,7 @@ public class Queue extends ResourceController implements Saveable { /** * Counts all the {@link BuildableItem}s currently in the queue. */ - public synchronized int countBuildableItems() { + public int countBuildableItems() { return countBuildableItemsFor(null); } @@ -880,18 +964,21 @@ public class Queue extends ResourceController implements Saveable { * * @return null if the project is not in the queue. */ - public synchronized Item getItem(Task t) { - BlockedItem bp = blockedProjects.get(t); - if (bp!=null) - return bp; - BuildableItem bi = buildables.get(t); - if(bi!=null) - return bi; - bi = pendings.get(t); - if(bi!=null) - return bi; - - for (Item item : waitingList) { + public Item getItem(Task t) { + Snapshot snapshot = this.snapshot; + for (Item item : snapshot.blockedProjects) { + if (item.task.equals(t)) + return item; + } + for (Item item : snapshot.buildables) { + if (item.task.equals(t)) + return item; + } + for (Item item : snapshot.pendings) { + if (item.task.equals(t)) + return item; + } + for (Item item : snapshot.waitingList) { if (item.task.equals(t)) { return item; } @@ -899,17 +986,54 @@ public class Queue extends ResourceController implements Saveable { return null; } + /** + * Gets the information about the queue item for the given project. + * + * @return null if the project is not in the queue. + * @since 1.FIXME + */ + private List liveGetItems(Task t) { + lock.lock(); + try { + List result = new ArrayList(); + result.addAll(blockedProjects.getAll(t)); + result.addAll(buildables.getAll(t)); + result.addAll(pendings.getAll(t)); + for (Item item : waitingList) { + if (item.task.equals(t)) { + result.add(item); + } + } + return result; + } finally { + lock.unlock(); + } + } + /** * Gets the information about the queue item for the given project. * * @return null if the project is not in the queue. */ - public synchronized List getItems(Task t) { - List result =new ArrayList(); - result.addAll(blockedProjects.getAll(t)); - result.addAll(buildables.getAll(t)); - result.addAll(pendings.getAll(t)); - for (Item item : waitingList) { + public List getItems(Task t) { + Snapshot snapshot = this.snapshot; + List result = new ArrayList(); + for (Item item : snapshot.blockedProjects) { + if (item.task.equals(t)) { + result.add(item); + } + } + for (Item item : snapshot.buildables) { + if (item.task.equals(t)) { + result.add(item); + } + } + for (Item item : snapshot.pendings) { + if (item.task.equals(t)) { + result.add(item); + } + } + for (Item item : snapshot.waitingList) { if (item.task.equals(t)) { result.add(item); } @@ -921,7 +1045,7 @@ public class Queue extends ResourceController implements Saveable { * Left for backward compatibility. * * @see #getItem(Task) - public synchronized Item getItem(AbstractProject p) { + public Item getItem(AbstractProject p) { return getItem((Task) p); } */ @@ -929,10 +1053,21 @@ public class Queue extends ResourceController implements Saveable { /** * Returns true if this queue contains the said project. */ - public synchronized boolean contains(Task t) { - if (blockedProjects.containsKey(t) || buildables.containsKey(t) || pendings.containsKey(t)) - return true; - for (Item item : waitingList) { + public boolean contains(Task t) { + final Snapshot snapshot = this.snapshot; + for (Item item : snapshot.blockedProjects) { + if (item.task.equals(t)) + return true; + } + for (Item item : snapshot.buildables) { + if (item.task.equals(t)) + return true; + } + for (Item item : snapshot.pendings) { + if (item.task.equals(t)) + return true; + } + for (Item item : snapshot.waitingList) { if (item.task.equals(t)) { return true; } @@ -945,12 +1080,18 @@ public class Queue extends ResourceController implements Saveable { * * This moves the task from the pending state to the "left the queue" state. */ - /*package*/ synchronized void onStartExecuting(Executor exec) throws InterruptedException { - final WorkUnit wu = exec.getCurrentWorkUnit(); - pendings.remove(wu.context.item); + /*package*/ void onStartExecuting(Executor exec) throws InterruptedException { + lock.lock(); + try { + final WorkUnit wu = exec.getCurrentWorkUnit(); + pendings.remove(wu.context.item); - LeftItem li = new LeftItem(wu.context); - li.enter(this); + LeftItem li = new LeftItem(wu.context); + li.enter(this); + } finally { + updateSnapshot(); + lock.unlock(); + } } /** @@ -1054,14 +1195,29 @@ public class Queue extends ResourceController implements Saveable { } } + @Override + protected void _await() throws InterruptedException { + condition.await(); + } + + @Override + protected void _signalAll() { + condition.signalAll(); + } + /** * Some operations require to be performed with the {@link Queue} lock held. Use one of these methods rather * than locking directly on Queue in order to allow for future refactoring. * @param runnable the operation to perform. * @since 1.592 */ - protected synchronized void _withLock(Runnable runnable) { - runnable.run(); + protected void _withLock(Runnable runnable) { + lock.lock(); + try { + runnable.run(); + } finally { + lock.unlock(); + } } /** @@ -1075,8 +1231,13 @@ public class Queue extends ResourceController implements Saveable { * @throws T the exception of the callable * @since 1.592 */ - protected synchronized V _withLock(hudson.remoting.Callable callable) throws T { - return callable.call(); + protected V _withLock(hudson.remoting.Callable callable) throws T { + lock.lock(); + try { + return callable.call(); + } finally { + lock.unlock(); + } } /** @@ -1089,8 +1250,13 @@ public class Queue extends ResourceController implements Saveable { * @throws Exception if the callable throws an exception. * @since 1.592 */ - protected synchronized V _withLock(java.util.concurrent.Callable callable) throws Exception { - return callable.call(); + protected V _withLock(java.util.concurrent.Callable callable) throws Exception { + lock.lock(); + try { + return callable.call(); + } finally { + lock.unlock(); + } } /** @@ -1105,98 +1271,121 @@ public class Queue extends ResourceController implements Saveable { * the scheduling (such as new node becoming online, # of executors change, a task completes execution, etc.), * and it also gets invoked periodically (see {@link Queue.MaintainTask}.) */ - public synchronized void maintain() { - LOGGER.log(Level.FINE, "Queue maintenance started {0}", this); + public void maintain() { + lock.lock(); + try { + + LOGGER.log(Level.FINE, "Queue maintenance started {0}", this); - // The executors that are currently waiting for a job to run. - Map parked = new HashMap(); + // The executors that are currently waiting for a job to run. + Map parked = new HashMap(); - {// update parked - for (Computer c : Jenkins.getInstance().getComputers()) { - for (Executor e : c.getExecutors()) { - if (e.isParking()) { - parked.put(e,new JobOffer(e)); + {// update parked (and identify any pending items who's executor has disappeared) + List lostPendings = new ArrayList(pendings); + for (Computer c : Jenkins.getInstance().getComputers()) { + for (Executor e : c.getExecutors()) { + if (e.isParking()) { + parked.put(e, new JobOffer(e)); + } + final WorkUnit workUnit = e.getCurrentWorkUnit(); + if (workUnit != null) { + lostPendings.remove(workUnit.context.item); + } } } + // pending -> buildable + for (BuildableItem p: lostPendings) { + LOGGER.log(Level.INFO, + "BuildableItem {0}: pending -> buildable as the assigned executor disappeared", + p.task.getFullDisplayName()); + p.isPending = false; + pendings.remove(p); + makeBuildable(p); + } } - } - {// blocked -> buildable - for (BlockedItem p : new ArrayList(blockedProjects.values())) {// copy as we'll mutate the list - if (!isBuildBlocked(p) && allowNewBuildableTask(p.task)) { - // ready to be executed - Runnable r = makeBuildable(new BuildableItem(p)); - if (r != null) { - p.leave(this); - r.run(); + {// blocked -> buildable + for (BlockedItem p : new ArrayList(blockedProjects.values())) {// copy as we'll mutate the list + if (!isBuildBlocked(p) && allowNewBuildableTask(p.task)) { + // ready to be executed + Runnable r = makeBuildable(new BuildableItem(p)); + if (r != null) { + p.leave(this); + r.run(); + } } } } - } - // waitingList -> buildable/blocked - while (!waitingList.isEmpty()) { - WaitingItem top = peek(); + // waitingList -> buildable/blocked + while (!waitingList.isEmpty()) { + WaitingItem top = peek(); - if (top.timestamp.compareTo(new GregorianCalendar())>0) - break; // finished moving all ready items from queue + if (top.timestamp.compareTo(new GregorianCalendar()) > 0) + break; // finished moving all ready items from queue - top.leave(this); - Task p = top.task; - if (!isBuildBlocked(top) && allowNewBuildableTask(p)) { - // ready to be executed immediately - Runnable r = makeBuildable(new BuildableItem(top)); - if (r != null) { - r.run(); + top.leave(this); + Task p = top.task; + if (!isBuildBlocked(top) && allowNewBuildableTask(p)) { + // ready to be executed immediately + Runnable r = makeBuildable(new BuildableItem(top)); + if (r != null) { + r.run(); + } else { + new BlockedItem(top).enter(this); + } } else { + // this can't be built now because another build is in progress + // set this project aside. new BlockedItem(top).enter(this); } - } else { - // this can't be built now because another build is in progress - // set this project aside. - new BlockedItem(top).enter(this); } - } - - final QueueSorter s = sorter; - if (s != null) - s.sortBuildableItems(buildables); - // allocate buildable jobs to executors - for (BuildableItem p : new ArrayList(buildables)) {// copy as we'll mutate the list in the loop - // one last check to make sure this build is not blocked. - if (isBuildBlocked(p)) { - p.leave(this); - new BlockedItem(p).enter(this); - LOGGER.log(Level.FINE, "Catching that {0} is blocked in the last minute", p); - continue; - } + final QueueSorter s = sorter; + if (s != null) + s.sortBuildableItems(buildables); + + // allocate buildable jobs to executors + for (BuildableItem p : new ArrayList( + buildables)) {// copy as we'll mutate the list in the loop + // one last check to make sure this build is not blocked. + if (isBuildBlocked(p)) { + p.leave(this); + new BlockedItem(p).enter(this); + LOGGER.log(Level.FINE, "Catching that {0} is blocked in the last minute", p); + continue; + } - List candidates = new ArrayList(parked.size()); - for (JobOffer j : parked.values()) - if (j.canTake(p)) - candidates.add(j); - - MappingWorksheet ws = new MappingWorksheet(p, candidates); - Mapping m = loadBalancer.map(p.task, ws); - if (m == null) { - // if we couldn't find the executor that fits, - // just leave it in the buildables list and - // check if we can execute other projects - LOGGER.log(Level.FINER, "Failed to map {0} to executors. candidates={1} parked={2}", new Object[]{p, candidates, parked.values()}); - continue; - } + List candidates = new ArrayList(parked.size()); + for (JobOffer j : parked.values()) + if (j.canTake(p)) + candidates.add(j); + + MappingWorksheet ws = new MappingWorksheet(p, candidates); + Mapping m = loadBalancer.map(p.task, ws); + if (m == null) { + // if we couldn't find the executor that fits, + // just leave it in the buildables list and + // check if we can execute other projects + LOGGER.log(Level.FINER, "Failed to map {0} to executors. candidates={1} parked={2}", + new Object[]{p, candidates, parked.values()}); + continue; + } - // found a matching executor. use it. - WorkUnitContext wuc = new WorkUnitContext(p); - m.execute(wuc); + // found a matching executor. use it. + WorkUnitContext wuc = new WorkUnitContext(p); + m.execute(wuc); - p.leave(this); - if (!wuc.getWorkUnits().isEmpty()) - makePending(p); - else - LOGGER.log(Level.FINE, "BuildableItem {0} with empty work units!?", p); + p.leave(this); + if (!wuc.getWorkUnits().isEmpty()) + makePending(p); + else + LOGGER.log(Level.FINE, "BuildableItem {0} with empty work units!?", p); + } + } finally { + updateSnapshot(); + lock.unlock(); } } @@ -2324,6 +2513,21 @@ public class Queue extends ResourceController implements Saveable { } } + private static class Snapshot { + private final Set waitingList; + private final List blockedProjects; + private final List buildables; + private final List pendings; + + public Snapshot(Set waitingList, List blockedProjects, List buildables, + List pendings) { + this.waitingList = new LinkedHashSet(waitingList); + this.blockedProjects = new ArrayList(blockedProjects); + this.buildables = new ArrayList(buildables); + this.pendings = new ArrayList(pendings); + } + } + @CLIResolver public static Queue getInstance() { return Jenkins.getInstance().getQueue(); diff --git a/core/src/main/java/hudson/model/ResourceController.java b/core/src/main/java/hudson/model/ResourceController.java index d1239f3eec..e2de53ae35 100644 --- a/core/src/main/java/hudson/model/ResourceController.java +++ b/core/src/main/java/hudson/model/ResourceController.java @@ -29,6 +29,7 @@ import java.util.Set; import java.util.Collection; import java.util.AbstractCollection; import java.util.Iterator; +import java.util.concurrent.Callable; import java.util.concurrent.CopyOnWriteArraySet; import javax.annotation.Nonnull; @@ -74,25 +75,36 @@ public class ResourceController { * @throws InterruptedException * the thread can be interrupted while waiting for the available resources. */ - public void execute(@Nonnull Runnable task, ResourceActivity activity ) throws InterruptedException { - ResourceList resources = activity.getResourceList(); - synchronized(this) { - while(inUse.isCollidingWith(resources)) - wait(); - - // we have a go - inProgress.add(activity); - inUse = ResourceList.union(inUse,resources); - } + public void execute(@Nonnull Runnable task, final ResourceActivity activity ) throws InterruptedException { + final ResourceList resources = activity.getResourceList(); + _withLock(new Runnable() { + @Override + public void run() { + while(inUse.isCollidingWith(resources)) + try { + // TODO revalidate the resource list after re-acquiring lock, for now we just let the build fail + _await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + + // we have a go + inProgress.add(activity); + inUse = ResourceList.union(inUse,resources); + } + }); try { task.run(); } finally { - synchronized(this) { - inProgress.remove(activity); - inUse = ResourceList.union(resourceView); - notifyAll(); - } + _withLock(new Runnable() { + @Override + public void run() { + inProgress.remove(activity); + inUse = ResourceList.union(resourceView); + _signalAll(); + } + }); } } @@ -105,8 +117,17 @@ public class ResourceController { * another activity might acquire resources before the caller * gets to call {@link #execute(Runnable, ResourceActivity)}. */ - public synchronized boolean canRun(ResourceList resources) { - return !inUse.isCollidingWith(resources); + public boolean canRun(final ResourceList resources) { + try { + return _withLock(new Callable() { + @Override + public Boolean call() { + return !inUse.isCollidingWith(resources); + } + }); + } catch (Exception e) { + throw new IllegalStateException("Inner callable does not throw exception"); + } } /** @@ -117,8 +138,17 @@ public class ResourceController { * If more than one such resource exists, one is chosen and returned. * This method is used for reporting what's causing the blockage. */ - public synchronized Resource getMissingResource(ResourceList resources) { - return resources.getConflict(inUse); + public Resource getMissingResource(final ResourceList resources) { + try { + return _withLock(new Callable() { + @Override + public Resource call() { + return resources.getConflict(inUse); + } + }); + } catch (Exception e) { + throw new IllegalStateException("Inner callable does not throw exception"); + } } /** @@ -133,5 +163,25 @@ public class ResourceController { return a; return null; } + + protected void _await() throws InterruptedException { + wait(); + } + + protected void _signalAll() { + notifyAll(); + } + + protected void _withLock(Runnable runnable) { + synchronized (this) { + runnable.run(); + } + } + + protected V _withLock(java.util.concurrent.Callable callable) throws Exception { + synchronized (this) { + return callable.call(); + } + } } diff --git a/core/src/main/java/hudson/slaves/AbstractCloudSlave.java b/core/src/main/java/hudson/slaves/AbstractCloudSlave.java index 9b96e4af11..cc7967d8ef 100644 --- a/core/src/main/java/hudson/slaves/AbstractCloudSlave.java +++ b/core/src/main/java/hudson/slaves/AbstractCloudSlave.java @@ -23,6 +23,7 @@ */ package hudson.slaves; +import hudson.model.Computer; import hudson.model.Descriptor.FormException; import jenkins.model.Jenkins; import hudson.model.Slave; @@ -57,6 +58,10 @@ public abstract class AbstractCloudSlave extends Slave { * Releases and removes this slave. */ public void terminate() throws InterruptedException, IOException { + final Computer computer = toComputer(); + if (computer != null) { + computer.recordTermination(); + } try { // TODO: send the output to somewhere real _terminate(new StreamTaskListener(System.out, Charset.defaultCharset())); diff --git a/core/src/main/java/hudson/slaves/ComputerRetentionWork.java b/core/src/main/java/hudson/slaves/ComputerRetentionWork.java index b676a62f0b..1a1ad382e4 100644 --- a/core/src/main/java/hudson/slaves/ComputerRetentionWork.java +++ b/core/src/main/java/hudson/slaves/ComputerRetentionWork.java @@ -1,18 +1,18 @@ /* * The MIT License - * + * * Copyright (c) 2004-2009, Sun Microsystems, Inc., Kohsuke Kawaguchi, Stephen Connolly - * + * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -27,6 +27,7 @@ import java.util.Map; import java.util.WeakHashMap; import hudson.model.Computer; +import hudson.model.Queue; import jenkins.model.Jenkins; import hudson.model.Node; import hudson.model.PeriodicWork; @@ -56,16 +57,21 @@ public class ComputerRetentionWork extends PeriodicWork { @SuppressWarnings("unchecked") protected void doRun() { final long startRun = System.currentTimeMillis(); - for (Computer c : Jenkins.getInstance().getComputers()) { - Node n = c.getNode(); - if (n!=null && n.isHoldOffLaunchUntilSave()) - continue; - if (!nextCheck.containsKey(c) || startRun > nextCheck.get(c)) { - // at the moment I don't trust strategies to wait more than 60 minutes - // strategies need to wait at least one minute - final long waitInMins = Math.max(1, Math.min(60, c.getRetentionStrategy().check(c))); - nextCheck.put(c, startRun + waitInMins*1000*60 /*MINS->MILLIS*/); - } + for (final Computer c : Jenkins.getInstance().getComputers()) { + Queue.withLock(new Runnable() { + @Override + public void run() { + Node n = c.getNode(); + if (n!=null && n.isHoldOffLaunchUntilSave()) + return; + if (!nextCheck.containsKey(c) || startRun > nextCheck.get(c)) { + // at the moment I don't trust strategies to wait more than 60 minutes + // strategies need to wait at least one minute + final long waitInMins = Math.max(1, Math.min(60, c.getRetentionStrategy().check(c))); + nextCheck.put(c, startRun + waitInMins*1000*60 /*MINS->MILLIS*/); + } + } + }); } } } diff --git a/core/src/main/java/hudson/slaves/NodeProvisioner.java b/core/src/main/java/hudson/slaves/NodeProvisioner.java index 25018f190f..cbad383392 100644 --- a/core/src/main/java/hudson/slaves/NodeProvisioner.java +++ b/core/src/main/java/hudson/slaves/NodeProvisioner.java @@ -35,6 +35,7 @@ import net.jcip.annotations.GuardedBy; import javax.annotation.Nonnull; import java.awt.Color; import java.util.Arrays; +import java.util.concurrent.Callable; import java.util.concurrent.Future; import java.util.concurrent.ExecutionException; import java.util.List; @@ -42,6 +43,10 @@ import java.util.Collection; import java.util.ArrayList; import java.util.Iterator; import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.logging.Logger; import java.util.logging.Level; import java.io.IOException; @@ -120,9 +125,14 @@ public class NodeProvisioner { */ private final Label label; - @GuardedBy("self") + @GuardedBy("#provisioningLock") private final List pendingLaunches = new ArrayList(); + private final Lock provisioningLock = new ReentrantLock(); + + @GuardedBy("#provisioningLock") + private StrategyState provisioningState = null; + private transient volatile long lastSuggestedReview; /** @@ -148,8 +158,11 @@ public class NodeProvisioner { * @since 1.401 */ public List getPendingLaunches() { - synchronized (pendingLaunches) { + provisioningLock.lock(); + try { return new ArrayList(pendingLaunches); + } finally { + provisioningLock.unlock(); } } @@ -174,80 +187,96 @@ public class NodeProvisioner { * Periodically invoked to keep track of the load. * Launches additional nodes if necessary. */ - private synchronized void update() { - Jenkins jenkins = Jenkins.getInstance(); - lastSuggestedReview = System.currentTimeMillis(); + private void update() { + provisioningLock.lock(); + try { + lastSuggestedReview = System.currentTimeMillis(); + Queue.withLock(new Runnable() { + @Override + public void run() { + Jenkins jenkins = Jenkins.getInstance(); + // clean up the cancelled launch activity, then count the # of executors that we are about to + // bring up. + + int plannedCapacitySnapshot = 0; + List completedLaunches = new ArrayList(); + + for (Iterator itr = pendingLaunches.iterator(); itr.hasNext(); ) { + PlannedNode f = itr.next(); + if (f.future.isDone()) { + completedLaunches.add(f); + itr.remove(); + } else { + plannedCapacitySnapshot += f.numExecutors; + } + } - // clean up the cancelled launch activity, then count the # of executors that we are about to bring up. - int plannedCapacitySnapshot = 0; - List completedLaunches = new ArrayList(); + for (PlannedNode f : completedLaunches) { + try { + Node node = f.future.get(); + for (CloudProvisioningListener cl : CloudProvisioningListener.all()) { + cl.onComplete(f, node); + } - synchronized (pendingLaunches) { - for (Iterator itr = pendingLaunches.iterator(); itr.hasNext(); ) { - PlannedNode f = itr.next(); - if (f.future.isDone()) { - completedLaunches.add(f); - itr.remove(); - } else { - plannedCapacitySnapshot += f.numExecutors; - } - } - } + jenkins.addNode(node); + LOGGER.log(Level.INFO, + "{0} provisioning successfully completed. We have now {1,number,integer} computer" + + "(s)", + new Object[]{f.displayName, jenkins.getComputers().length}); + } catch (InterruptedException e) { + throw new AssertionError(e); // since we confirmed that the future is already done + } catch (ExecutionException e) { + LOGGER.log(Level.WARNING, "Provisioned slave " + f.displayName + " failed to launch", + e.getCause()); + for (CloudProvisioningListener cl : CloudProvisioningListener.all()) { + cl.onFailure(f, e.getCause()); + } + } catch (IOException e) { + LOGGER.log(Level.WARNING, "Provisioned slave " + f.displayName + " failed to launch", e); + for (CloudProvisioningListener cl : CloudProvisioningListener.all()) { + cl.onFailure(f, e); + } + } - for (PlannedNode f : completedLaunches) { - try { - Node node = f.future.get(); - for (CloudProvisioningListener cl : CloudProvisioningListener.all()) { - cl.onComplete(f, node); - } + f.spent(); + } - jenkins.addNode(node); - LOGGER.log(Level.INFO, - "{0} provisioning successfully completed. We have now {1,number,integer} computer(s)", - new Object[]{f.displayName, jenkins.getComputers().length}); - } catch (InterruptedException e) { - throw new AssertionError(e); // since we confirmed that the future is already done - } catch (ExecutionException e) { - LOGGER.log(Level.WARNING, "Provisioned slave " + f.displayName + " failed to launch", e.getCause()); - for (CloudProvisioningListener cl : CloudProvisioningListener.all()) { - cl.onFailure(f, e.getCause()); - } - } catch (IOException e) { - LOGGER.log(Level.WARNING, "Provisioned slave " + f.displayName + " failed to launch", e); - for (CloudProvisioningListener cl : CloudProvisioningListener.all()) { - cl.onFailure(f, e); + float plannedCapacity = plannedCapacitySnapshot; + plannedCapacitiesEMA.update(plannedCapacity); + + int idleSnapshot = stat.computeIdleExecutors(); + int queueLengthSnapshot = stat.computeQueueLength(); + + if (queueLengthSnapshot <= idleSnapshot) { + LOGGER.log(Level.FINE, + "Queue length {0} is less than the idle capacity {1}. No provisioning strategy " + + "required", + new Object[]{queueLengthSnapshot, idleSnapshot}); + provisioningState = null; + } else { + provisioningState = new StrategyState(queueLengthSnapshot, label, idleSnapshot, + stat.computeTotalExecutors(), + plannedCapacitySnapshot); + } } - } - - f.spent(); - } + }); - float plannedCapacity = plannedCapacitySnapshot; - plannedCapacitiesEMA.update(plannedCapacity); - - int idleSnapshot = stat.computeIdleExecutors(); - int queueLengthSnapshot = stat.computeQueueLength(); - - if (queueLengthSnapshot <= idleSnapshot) { - LOGGER.log(Level.FINE, - "Queue length {0} is less than the idle capacity {1}. No provisioning strategy required", - new Object[]{queueLengthSnapshot, idleSnapshot}); - } else { - StrategyState state = - new StrategyState(queueLengthSnapshot, label, idleSnapshot, stat.computeTotalExecutors(), - plannedCapacitySnapshot); - List strategies = Jenkins.getInstance().getExtensionList(Strategy.class); - for (Strategy strategy : strategies.isEmpty() - ? Arrays.asList(new StandardStrategyImpl()) - : strategies) { - LOGGER.log(Level.FINER, "Consulting {0} provisioning strategy with state {1}", - new Object[]{strategy, state}); - if (StrategyDecision.PROVISIONING_COMPLETED == strategy.apply(state)) { - LOGGER.log(Level.FINER, "Provisioning strategy {0} declared provisioning complete", - strategy); - break; + if (provisioningState != null) { + List strategies = Jenkins.getInstance().getExtensionList(Strategy.class); + for (Strategy strategy : strategies.isEmpty() + ? Arrays.asList(new StandardStrategyImpl()) + : strategies) { + LOGGER.log(Level.FINER, "Consulting {0} provisioning strategy with state {1}", + new Object[]{strategy, provisioningState}); + if (StrategyDecision.PROVISIONING_COMPLETED == strategy.apply(provisioningState)) { + LOGGER.log(Level.FINER, "Provisioning strategy {0} declared provisioning complete", + strategy); + break; + } } } + } finally { + provisioningLock.unlock(); } } @@ -384,8 +413,13 @@ public class NodeProvisioner { * The additional planned capacity for this {@link #getLabel()} and provisioned by previous strategies during * the current updating of the {@link NodeProvisioner}. */ - public synchronized int getAdditionalPlannedCapacity() { - return additionalPlannedCapacity; + public int getAdditionalPlannedCapacity() { + provisioningLock.lock(); + try { + return additionalPlannedCapacity; + } finally { + provisioningLock.unlock(); + } } /** @@ -451,13 +485,14 @@ public class NodeProvisioner { additionalPlannedCapacity += f.numExecutors; } } - synchronized (pendingLaunches) { + provisioningLock.lock(); + try { pendingLaunches.addAll(plannedNodes); - } - if (additionalPlannedCapacity > 0) { - synchronized (this) { - this.additionalPlannedCapacity += additionalPlannedCapacity; + if (additionalPlannedCapacity > 0) { + this.additionalPlannedCapacity += additionalPlannedCapacity; } + } finally { + provisioningLock.unlock(); } } diff --git a/core/src/main/java/hudson/slaves/RetentionStrategy.java b/core/src/main/java/hudson/slaves/RetentionStrategy.java index 25a9eb68dd..b19661d08e 100644 --- a/core/src/main/java/hudson/slaves/RetentionStrategy.java +++ b/core/src/main/java/hudson/slaves/RetentionStrategy.java @@ -33,8 +33,10 @@ import hudson.util.DescriptorList; import java.util.Collections; import java.util.HashMap; import jenkins.model.Jenkins; +import net.jcip.annotations.GuardedBy; import org.kohsuke.stapler.DataBoundConstructor; +import java.util.concurrent.TimeUnit; import java.util.logging.Level; import java.util.logging.Logger; @@ -54,6 +56,7 @@ public abstract class RetentionStrategy extends AbstractDesc * @return The number of minutes after which the strategy would like to be checked again. The strategy may be * rechecked earlier or later that this! */ + @GuardedBy("hudson.model.Queue#lock") public abstract long check(T c); /** @@ -91,8 +94,13 @@ public abstract class RetentionStrategy extends AbstractDesc * * @since 1.275 */ - public void start(T c) { - check(c); + public void start(final T c) { + Queue.withLock(new Runnable() { + @Override + public void run() { + check(c); + } + }); } /** @@ -257,21 +265,13 @@ public abstract class RetentionStrategy extends AbstractDesc } else if (c.isIdle()) { final long idleMilliseconds = System.currentTimeMillis() - c.getIdleStartMilliseconds(); if (idleMilliseconds > idleDelay * 1000 * 60 /*MINS->MILLIS*/) { - Queue.withLock(new Runnable() { - @Override - public void run() { - // re-check idle now that we are within the Queue lock - if (c.isIdle()) { - final long idleMilliseconds = System.currentTimeMillis() - c.getIdleStartMilliseconds(); - if (idleMilliseconds > idleDelay * 1000 * 60 /*MINS->MILLIS*/) { - // we've been idle for long enough - logger.log(Level.INFO, "Disconnecting computer {0} as it has been idle for {1}", - new Object[]{c.getName(), Util.getTimeSpanString(idleMilliseconds)}); - c.disconnect(OfflineCause.create(Messages._RetentionStrategy_Demand_OfflineIdle())); - } - } - } - }); + // we've been idle for long enough + logger.log(Level.INFO, "Disconnecting computer {0} as it has been idle for {1}", + new Object[]{c.getName(), Util.getTimeSpanString(idleMilliseconds)}); + c.disconnect(OfflineCause.create(Messages._RetentionStrategy_Demand_OfflineIdle())); + } else { + // no point revisiting until we can be confident we will be idle + return TimeUnit.MILLISECONDS.toMinutes(TimeUnit.MINUTES.toMillis(idleDelay) - idleMilliseconds); } } return 1; diff --git a/core/src/main/java/hudson/slaves/SlaveComputer.java b/core/src/main/java/hudson/slaves/SlaveComputer.java index 0a28b2064d..65f42f18fe 100644 --- a/core/src/main/java/hudson/slaves/SlaveComputer.java +++ b/core/src/main/java/hudson/slaves/SlaveComputer.java @@ -649,7 +649,7 @@ public class SlaveComputer extends Computer { } @Override - protected void setNode(Node node) { + protected void setNode(final Node node) { super.setNode(node); launcher = grabLauncher(node); @@ -657,10 +657,16 @@ public class SlaveComputer extends Computer { // "constructed==null" test is an ugly hack to avoid launching before the object is fully // constructed. if(constructed!=null) { - if (node instanceof Slave) - ((Slave)node).getRetentionStrategy().check(this); - else + if (node instanceof Slave) { + Queue.withLock(new Runnable() { + @Override + public void run() { + ((Slave)node).getRetentionStrategy().check(SlaveComputer.this); + } + }); + } else { connect(false); + } } } diff --git a/core/src/main/java/jenkins/model/Jenkins.java b/core/src/main/java/jenkins/model/Jenkins.java index d1084bd202..5cf4a5d09a 100644 --- a/core/src/main/java/jenkins/model/Jenkins.java +++ b/core/src/main/java/jenkins/model/Jenkins.java @@ -289,6 +289,7 @@ import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.logging.Level; import java.util.logging.LogRecord; import java.util.logging.Logger; @@ -498,17 +499,18 @@ public class Jenkins extends AbstractCIBase implements DirectlyModifiableTopLeve } /** - * Set of installed cluster nodes. - *

- * We use this field with copy-on-write semantics. - * This field has mutable list (to keep the serialization look clean), - * but it shall never be modified. Only new completely populated slave - * list can be set here. - *

- * The field name should be really {@code nodes}, but again the backward compatibility - * prevents us from renaming. + * Legacy store of the set of installed cluster nodes. + * @deprecated in favour of {@link Nodes} + */ + @Deprecated + protected transient volatile NodeList slaves; + + /** + * The holder of the set of installed cluster nodes. + * + * @since 1.FIXME */ - protected volatile NodeList slaves; + private transient final Nodes nodes = new Nodes(this); /** * Quiet period. @@ -1214,7 +1216,7 @@ public class Jenkins extends AbstractCIBase implements DirectlyModifiableTopLeve return null; } - protected void updateComputerList() throws IOException { + protected void updateComputerList() { updateComputerList(AUTOMATIC_SLAVE_LAUNCH); } @@ -1663,7 +1665,7 @@ public class Jenkins extends AbstractCIBase implements DirectlyModifiableTopLeve * Gets the slave node of the give name, hooked under this Hudson. */ public @CheckForNull Node getNode(String name) { - return slaves.getNode(name); + return nodes.getNode(name); } /** @@ -1682,38 +1684,25 @@ public class Jenkins extends AbstractCIBase implements DirectlyModifiableTopLeve * represents the master. */ public List getNodes() { - return slaves; + return nodes.getNodes(); } /** * Adds one more {@link Node} to Hudson. */ - public synchronized void addNode(Node n) throws IOException { - if(n==null) throw new IllegalArgumentException(); - ArrayList nl = new ArrayList(this.slaves); - if(!nl.contains(n)) // defensive check - nl.add(n); - setNodes(nl); + public void addNode(Node n) throws IOException { + nodes.addNode(n); } /** * Removes a {@link Node} from Hudson. */ - public synchronized void removeNode(@Nonnull Node n) throws IOException { - Computer c = n.toComputer(); - if (c!=null) - c.disconnect(OfflineCause.create(Messages._Hudson_NodeBeingRemoved())); - - ArrayList nl = new ArrayList(this.slaves); - nl.remove(n); - setNodes(nl); + public void removeNode(@Nonnull Node n) throws IOException { + nodes.removeNode(n); } - public void setNodes(final List nodes) throws IOException { - Jenkins.this.slaves = new NodeList(nodes); - updateComputerList(); - trimLabels(); - save(); + public void setNodes(final List n) throws IOException { + nodes.setNodes(n); } public DescribableList, NodePropertyDescriptor> getNodeProperties() { @@ -1730,7 +1719,7 @@ public class Jenkins extends AbstractCIBase implements DirectlyModifiableTopLeve * This should be called when the assumptions behind label cache computation changes, * but we also call this periodically to self-heal any data out-of-sync issue. */ - private void trimLabels() { + /*package*/ void trimLabels() { for (Iterator

- - - - - - - - - - ${%Offline} - - - ${%Idle} - - - - - - - - - - - -
- - - - - - - - - - - ${%Unknown Task} - - -
- + + + + ${name} + + + + + +
${%Dead} (!)
+
+ + + +
+ + + + + ${%Offline} + + + ${%Idle} + + + + + + + + + + - - - +
+ + + + + + + + + + + + ${%Unknown Task} + + + + + + ${%Idle} + + + + + + + + + + + + ${%Unknown Task} + + + + +
-
- - - - - -
-
- - - + + + + + +
+ + + + + +
+ + + - - + + + - - - - - - - - - - - - - - + + + + + + + + + + + + - - - - - + + + + + + diff --git a/core/src/main/resources/lib/layout/layout.jelly b/core/src/main/resources/lib/layout/layout.jelly index d41c186864..f5f1769da2 100644 --- a/core/src/main/resources/lib/layout/layout.jelly +++ b/core/src/main/resources/lib/layout/layout.jelly @@ -69,11 +69,14 @@ ${h.initPageVariables(context)} + ${h.advertiseHeaders(response)} - - - + + + + + @@ -126,9 +129,11 @@ ${h.initPageVariables(context)} + + + @@ -143,9 +148,11 @@ ${h.initPageVariables(context)} - - - + + + + +