Merge branch 'master' of github.com:jenkinsci/jenkins

22c42a39 · Jesse Glick · a4d43051 · ebccaa33 · 22c42a39 · 22c42a39
7 changed file
--- a/core/src/main/java/hudson/model/AbstractCIBase.java
+++ b/core/src/main/java/hudson/model/AbstractCIBase.java
@@ -140,6 +140,7 @@ public abstract class AbstractCIBase extends Node implements ItemGroup<TopLevelI
        for (Map.Entry<Node, Computer> e : computers.entrySet()) {
            if (e.getValue() == computer) {
                computers.remove(e.getKey());
+                computer.onRemoved();
                return;
            }
        }

--- a/core/src/main/java/hudson/model/Computer.java
+++ b/core/src/main/java/hudson/model/Computer.java
@@ -663,23 +663,46 @@ public /*transient*/ abstract class Computer extends Actionable implements Acces

    /**
     * Called by {@link Jenkins#updateComputerList()} to notify {@link Computer} that it will be discarded.
+     *
+     * <p>
+     * Note that at this point {@link #getNode()} returns null.
+     *
+     * @see #onRemoved()
     */
    protected void kill() {
        setNumExecutors(0);
    }

-    private synchronized void setNumExecutors(int n) {
-        if(numExecutors==n) return; // no-op
+    /**
+     * Called by {@link Jenkins} when this computer is removed.
+     *
+     * <p>
+     * This happens when list of nodes are updated (for example by {@link Jenkins#setNodes(List)} and
+     * the computer becomes redundant. Such {@link Computer}s get {@linkplain #kill() killed}, then
+     * after all its executors are finished, this method is called.
+     *
+     * <p>
+     * Note that at this point {@link #getNode()} returns null.
+     *
+     * @see #kill()
+     * @since 1.510
+     */
+    protected void onRemoved(){
+    }

-        int diff = n-numExecutors;
+    private synchronized void setNumExecutors(int n) {
        this.numExecutors = n;
+        int diff = executors.size()-n;

-        if(diff<0) {
+        if (diff>0) {
+            // we have too many executors
            // send signal to all idle executors to potentially kill them off
            for( Executor e : executors )
                if(e.isIdle())
                    e.interrupt();
-        } else {
+        }
+
+        if (diff<0) {
            // if the number is increased, add new ones
            addNewExecutorIfNecessary();
        }
@@ -1076,8 +1099,8 @@ public /*transient*/ abstract class Computer extends Actionable implements Acces
        offlineMessage = Util.fixEmptyAndTrim(offlineMessage);
        setTemporarilyOffline(true,
                OfflineCause.create(hudson.slaves.Messages._SlaveComputer_DisconnectedBy(
-                    Jenkins.getAuthentication().getName(),
-                    offlineMessage!=null ? " : " + offlineMessage : "")));
+                        Jenkins.getAuthentication().getName(),
+                        offlineMessage != null ? " : " + offlineMessage : "")));
        return HttpResponses.redirectToDot();
    }


--- a/core/src/main/java/hudson/model/Node.java
+++ b/core/src/main/java/hudson/model/Node.java
@@ -84,7 +84,7 @@ public abstract class Node extends AbstractModelObject implements Reconfigurable
    private static final Logger LOGGER = Logger.getLogger(Node.class.getName());

    /**
-     * Newly copied slaves get this flag set, so that Hudson doesn't try to start this node until its configuration
+     * Newly copied slaves get this flag set, so that Jenkins doesn't try to start/remove this node until its configuration
     * is saved once.
     */
    protected volatile transient boolean holdOffLaunchUntilSave;

--- a/core/src/main/java/hudson/slaves/AbstractCloudComputer.java
+++ b/core/src/main/java/hudson/slaves/AbstractCloudComputer.java
@@ -48,7 +48,7 @@ public class AbstractCloudComputer<T extends AbstractCloudSlave> extends SlaveCo
    }

    /**
-     * When the slave is deleted, free the node.
+     * When the slave is deleted, free the node right away.
     */
    @Override
    public HttpResponse doDoDelete() throws IOException {

--- a/core/src/main/java/hudson/slaves/AbstractCloudImpl.java
+++ b/core/src/main/java/hudson/slaves/AbstractCloudImpl.java
@@ -10,6 +10,7 @@ package hudson.slaves;
 * a new cloud to Jenkins.
 *
 * @author Kohsuke Kawaguchi
+ * @see AbstractCloudSlave
 */
 public abstract class AbstractCloudImpl extends Cloud {
    /**

--- a/core/src/main/java/hudson/slaves/Cloud.java
+++ b/core/src/main/java/hudson/slaves/Cloud.java
@@ -26,6 +26,8 @@ package hudson.slaves;
 import hudson.ExtensionPoint;
 import hudson.Extension;
 import hudson.DescriptorExtensionList;
+import hudson.model.Computer;
+import hudson.model.Slave;
 import hudson.slaves.NodeProvisioner.PlannedNode;
 import hudson.model.Describable;
 import jenkins.model.Jenkins;
@@ -37,6 +39,7 @@ import hudson.security.ACL;
 import hudson.security.AccessControlled;
 import hudson.security.Permission;
 import hudson.util.DescriptorList;
+import org.kohsuke.stapler.DataBoundConstructor;

 import java.util.Collection;

@@ -47,6 +50,33 @@ import java.util.Collection;
 * Put another way, this class encapsulates different communication protocols
 * needed to start a new slave programmatically.
 *
+ * <h2>Notes for implementers</h2>
+ * <h4>Automatically delete idle slaves</h4>
+ * <p>
+ * Nodes provisioned from a cloud do not automatically get released just because it's created from {@link Cloud}.
+ * Doing so requires a use of {@link RetentionStrategy}. Instantiate your {@link Slave} subtype with something
+ * like {@link CloudSlaveRetentionstrategy} so that it gets automatically deleted after some idle time.
+ *
+ * <h4>Freeing an external resource when a slave is removed</h4>
+ * <p>
+ * Whether you do auto scale-down or not, you often want to release an external resource tied to a cloud-allocated
+ * slave when it is removed.
+ *
+ * <p>
+ * To do this, have your {@link Slave} subtype remember the necessary handle (such as EC2 instance ID)
+ * as a field. Such fields need to survive the user-initiated re-configuration of {@link Slave}, so you'll need to
+ * expose it in your {@link Slave} <tt>configure-entries.jelly</tt> and read it back in through {@link DataBoundConstructor}.
+ *
+ * <p>
+ * You then implement your own {@link Computer} subtype, override {@link Slave#createComputer()}, and instantiate
+ * your own {@link Computer} subtype with this handle information.
+ *
+ * <p>
+ * Finally, override {@link Computer#onRemoved()} and use the handle to talk to the "cloud" and de-allocate
+ * the resource (such as shutting down a virtual machine.) {@link Computer} needs to own this handle information
+ * because by the time this happens, a {@link Slave} object is already long gone.
+ *
+ *
 * @author Kohsuke Kawaguchi
 * @see NodeProvisioner
 * @see AbstractCloudImpl

--- a/core/src/main/java/hudson/slaves/CloudSlaveRetentionstrategy.java
+++ b/core/src/main/java/hudson/slaves/CloudSlaveRetentionstrategy.java
+package hudson.slaves;
+
+import hudson.model.Computer;
+import hudson.model.Node;
+import hudson.util.TimeUnit2;
+import jenkins.model.Jenkins;
+
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * Default convenience implementation of {@link RetentionStrategy} for slaves provisioned from {@link Cloud}.
+ *
+ * If a slave is idle for 10 mins, this retention strategy will remove the slave. This can be used as-is for
+ * a {@link Node} provisioned by cloud to implement the auto-scaling semantics, it can be subtyped to twaeak
+ * the behaviour, or it can be used as an example.
+ *
+ * @author Kohsuke Kawaguchi
+ * @since 1.510
+ */
+public class CloudSlaveRetentionstrategy<T extends Computer> extends RetentionStrategy<T> {
+
+    @Override
+    public long check(T c) {
+        if (!c.isConnecting() && c.isAcceptingTasks()) {
+            if (isIdleForTooLong(c)) {
+                try {
+                    Node n = c.getNode();
+                    if (n!=null)    // rare, but n==null if the node is deleted and being checked roughly at the same time
+                        kill(n);
+                } catch (IOException e) {
+                    LOGGER.log(Level.WARNING, "Failed to remove "+c.getDisplayName(),e);
+                }
+            }
+        }
+        return checkCycle();
+    }
+
+    /**
+     * Remove the node.
+     *
+     * <p>
+     * To actually deallocate the resource tied to this {@link Node}, implement {@link Computer#onRemoved()}.
+     */
+    protected void kill(Node n) throws IOException {
+        Jenkins.getInstance().removeNode(n);
+    }
+
+    /**
+     * When do we check again next time?
+     */
+    protected long checkCycle() {
+        return getIdleMaxTime()/10;
+    }
+
+    /**
+     * Has this computer been idle for too long?
+     */
+    protected boolean isIdleForTooLong(T c) {
+        return System.currentTimeMillis()-c.getIdleStartMilliseconds() > getIdleMaxTime();
+    }
+
+    /**
+     * If the computer has been idle longer than this time, we'll kill the slave.
+     */
+    protected long getIdleMaxTime() {
+        return TIMEOUT;
+    }
+
+    // for debugging, it's convenient to be able to reduce this time
+    public static long TIMEOUT = Long.getLong(CloudSlaveRetentionstrategy.class.getName()+".timeout", TimeUnit2.MINUTES.toMillis(10));
+
+    private static final Logger LOGGER = Logger.getLogger(CloudSlaveRetentionstrategy.class.getName());
+}