提交 8669e66e 编写于 作者: K kohsuke

Hudson now tries to automatically attempt to reconnect slaves after a failure (#1364)


git-svn-id: https://hudson.dev.java.net/svn/hudson/trunk/hudson/main@7397 71c3de6d-444a-0410-be80-ed276b4c234a
上级 34397bb6
......@@ -259,6 +259,12 @@ public final class Slave implements Node, Serializable {
public static final class ComputerImpl extends Computer {
private volatile Channel channel;
private Boolean isUnix;
/**
* Number of failed attempts to reconnect to this node
* (so that if we keep failing to reconnect, we can stop
* trying.)
*/
private transient int numRetryAttempt;
/**
* This is where the log from the remote agent goes.
......@@ -324,7 +330,7 @@ public final class Slave implements Node, Serializable {
});
logger.info("slave agent launched for "+slave.getNodeName());
numRetryAttempt=0;
} catch (InterruptedException e) {
e.printStackTrace(listener.error("aborted"));
} catch (IOException e) {
......@@ -431,6 +437,15 @@ public final class Slave implements Node, Serializable {
rsp.sendRedirect("log");
}
public void tryReconnect() {
numRetryAttempt++;
if(numRetryAttempt<6 || (numRetryAttempt%12)==0) {
// initially retry several times quickly, and after that, do it infrequently.
logger.info("Attempting to reconnect "+nodeName);
launch();
}
}
public void launch() {
if(channel==null)
launch(getNode());
......
package hudson.model;
import hudson.model.Slave.ComputerImpl;
import hudson.triggers.SafeTimerTask;
/**
* Periodically checks the slaves and try to reconnect dead slaves.
*
* @author Kohsuke Kawaguchi
*/
public class SlaveReconnectionWork extends SafeTimerTask {
protected void doRun() {
for(Slave s : Hudson.getInstance().getSlaves()) {
ComputerImpl c = s.getComputer();
if(c==null) // shouldn't happen, but let's be defensive
continue;
if(c.isOffline() && !c.isJnlpAgent())
c.tryReconnect();
}
}
}
......@@ -13,6 +13,7 @@ import hudson.model.Hudson;
import hudson.model.Item;
import hudson.model.Project;
import hudson.model.WorkspaceCleanupThread;
import hudson.model.SlaveReconnectionWork;
import hudson.scheduler.CronTab;
import hudson.scheduler.CronTabList;
......@@ -207,9 +208,11 @@ public abstract class Trigger<J extends Item> implements Describable<Trigger<?>>
new DoubleLaunchChecker().schedule();
// clean up fingerprint once a day
long HOUR = 1000*60*60;
long DAY = HOUR*24;
long MIN = 1000*60;
long HOUR =60*MIN;
long DAY = 24*HOUR;
timer.scheduleAtFixedRate(new FingerprintCleanupThread(),DAY,DAY);
timer.scheduleAtFixedRate(new WorkspaceCleanupThread(),DAY+4*HOUR,DAY);
timer.scheduleAtFixedRate(new SlaveReconnectionWork(),15*MIN,5*MIN);
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册