提交 603b35ae 编写于 作者: K kohsuke

[FIXED HUDSON-2596] Preventive node monitoring of slave health metrics can be...

[FIXED HUDSON-2596] Preventive node monitoring of slave health metrics can be now configured individually. This will be in Hudson 1.301.

git-svn-id: https://hudson.dev.java.net/svn/hudson/trunk/hudson/main@17361 71c3de6d-444a-0410-be80-ed276b4c234a
上级 7b47ce30
......@@ -23,10 +23,14 @@
*/
package hudson.model;
import hudson.BulkChange;
import hudson.DescriptorExtensionList;
import hudson.Util;
import hudson.slaves.NodeDescriptor;
import hudson.XmlFile;
import hudson.model.Descriptor.FormException;
import hudson.node_monitors.NodeMonitor;
import hudson.slaves.NodeDescriptor;
import hudson.util.DescribableList;
import hudson.util.FormValidation;
import org.kohsuke.stapler.QueryParameter;
import org.kohsuke.stapler.StaplerRequest;
......@@ -36,11 +40,17 @@ import org.kohsuke.stapler.export.ExportedBean;
import javax.servlet.ServletException;
import static javax.servlet.http.HttpServletResponse.SC_BAD_REQUEST;
import java.io.File;
import java.io.IOException;
import java.text.ParseException;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
import java.util.TreeMap;
import java.util.Comparator;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Serves as the top of {@link Computer}s in the URL hierarchy.
......@@ -51,15 +61,29 @@ import java.util.List;
*/
@ExportedBean
public final class ComputerSet extends AbstractModelObject {
private static final List<NodeMonitor> monitors;
/**
* This is the owner that persists {@link #monitors}.
*/
private static final Saveable MONITORS_OWNER = new Saveable() {
public void save() throws IOException {
getConfigFile().write(monitors);
}
};
private static final DescribableList<NodeMonitor,Descriptor<NodeMonitor>> monitors
= new DescribableList<NodeMonitor, Descriptor<NodeMonitor>>(MONITORS_OWNER);
@Exported
public String getDisplayName() {
return "nodes";
}
/**
* @deprecated as of 1.301
* Use {@link #getMonitors()}.
*/
public static List<NodeMonitor> get_monitors() {
return monitors;
return monitors.toList();
}
@Exported(name="computer",inline=true)
......@@ -67,6 +91,29 @@ public final class ComputerSet extends AbstractModelObject {
return Hudson.getInstance().getComputers();
}
/**
* Exposing {@link NodeMonitor#all()} for Jelly binding.
*/
public DescriptorExtensionList<NodeMonitor,Descriptor<NodeMonitor>> getNodeMonitorDescriptors() {
return NodeMonitor.all();
}
public static DescribableList<NodeMonitor,Descriptor<NodeMonitor>> getMonitors() {
return monitors;
}
/**
* Returns a subset pf {@link #getMonitors()} that are {@linkplain NodeMonitor#isIgnored() not ignored}.
*/
public static Map<Descriptor<NodeMonitor>,NodeMonitor> getNonIgnoredMonitors() {
Map<Descriptor<NodeMonitor>,NodeMonitor> r = new HashMap<Descriptor<NodeMonitor>, NodeMonitor>();
for (NodeMonitor m : monitors) {
if(!m.isIgnored())
r.put(m.getDescriptor(),m);
}
return r;
}
/**
* Gets all the slave names.
*/
......@@ -262,6 +309,37 @@ public final class ComputerSet extends AbstractModelObject {
}
}
/**
* Accepts submission from the configuration page.
*/
public final synchronized void doConfigSubmit( StaplerRequest req, StaplerResponse rsp ) throws IOException, ServletException {
BulkChange bc = new BulkChange(MONITORS_OWNER);
try {
Hudson.getInstance().checkPermission(Hudson.ADMINISTER);
monitors.rebuild(req,req.getSubmittedForm(),getNodeMonitorDescriptors());
// add in the rest of instances are ignored instances
for (Descriptor<NodeMonitor> d : NodeMonitor.all())
if(monitors.get(d)==null) {
NodeMonitor i = createDefaultInstance(d, true);
if(i!=null)
monitors.add(i);
}
rsp.sendRedirect2(".");
} catch (FormException e) {
sendError(e,req,rsp);
} finally {
bc.commit();
}
}
/**
* {@link NodeMonitor}s are persisted in this file.
*/
private static XmlFile getConfigFile() {
return new XmlFile(new File(Hudson.getInstance().getRootDir(),"nodeMonitors.xml"));
}
public Api getApi() {
return new Api(this);
}
......@@ -271,15 +349,44 @@ public final class ComputerSet extends AbstractModelObject {
*/
public static void initialize() {}
private static final Logger LOGGER = Logger.getLogger(ComputerSet.class.getName());
static {
// create all instances
ArrayList<NodeMonitor> r = new ArrayList<NodeMonitor>();
for (Descriptor<NodeMonitor> d : NodeMonitor.all())
try {
r.add(d.newInstance(null,null));
} catch (FormException e) {
// so far impossible. TODO: report
try {
DescribableList<NodeMonitor,Descriptor<NodeMonitor>> r
= new DescribableList<NodeMonitor, Descriptor<NodeMonitor>>(Saveable.NOOP);
// load persisted monitors
XmlFile xf = getConfigFile();
if(xf.exists()) {
DescribableList<NodeMonitor,Descriptor<NodeMonitor>> persisted =
(DescribableList<NodeMonitor,Descriptor<NodeMonitor>>) xf.read();
r.replaceBy(persisted.toList());
}
monitors = r;
// if we have any new monitors, let's add them
for (Descriptor<NodeMonitor> d : NodeMonitor.all())
if(r.get(d)==null) {
NodeMonitor i = createDefaultInstance(d,false);
if(i!=null)
r.add(i);
}
monitors.replaceBy(r.toList());
} catch (IOException e) {
LOGGER.log(Level.WARNING, "Failed to instanciate NodeMonitors",e);
}
}
private static NodeMonitor createDefaultInstance(Descriptor<NodeMonitor> d, boolean ignored) {
try {
NodeMonitor nm = d.clazz.newInstance();
nm.setIgnored(ignored);
return nm;
} catch (InstantiationException e) {
LOGGER.log(Level.SEVERE, "Failed to instanciate "+d.clazz,e);
} catch (IllegalAccessException e) {
LOGGER.log(Level.SEVERE, "Failed to instanciate "+d.clazz,e);
}
return null;
}
}
......@@ -26,6 +26,8 @@ package hudson.node_monitors;
import hudson.model.Computer;
import hudson.model.Descriptor;
import hudson.model.Hudson;
import hudson.model.ComputerSet;
import hudson.model.AdministrativeMonitor;
import hudson.triggers.Trigger;
import hudson.triggers.SafeTimerTask;
......@@ -118,6 +120,34 @@ public abstract class AbstractNodeMonitorDescriptor<T> extends Descriptor<NodeMo
return record.data.get(c);
}
/**
* Is this monitor currently ignored?
*/
public boolean isIgnored() {
NodeMonitor m = ComputerSet.getMonitors().get(this);
return m==null || m.isIgnored();
}
/**
* Utility method to mark the computer offline for derived classes.
*
* @return true
* if the node was actually taken offline by this act (as opposed to us deciding not to do it,
* or the computer already marked offline.)
*/
protected boolean markOffline(Computer c) {
if(isIgnored() || c.isTemporarilyOffline()) return false; // noop
// TODO: define a mechanism to leave a note on this computer so that people know why we took it offline
c.setTemporarilyOffline(true);
// notify the admin
MonitorMarkedNodeOffline no = AdministrativeMonitor.all().get(MonitorMarkedNodeOffline.class);
if(no!=null)
no.active = true;
return true;
}
/**
* @see NodeMonitor#triggerUpdate()
*/
......
......@@ -83,14 +83,8 @@ import org.kohsuke.stapler.export.ExportedBean;
protected DiskSpace monitor(Computer c) throws IOException, InterruptedException {
DiskSpace size = getFreeSpace(c);
if(size!=null && !size.moreThanGB()) {
// TODO: this scheme should be generalized, so that Hudson can remember why it's marking the node
// as offline, as well as allowing the user to force Hudson to use it.
if(!c.isTemporarilyOffline()) {
LOGGER.warning(Messages.DiskSpaceMonitor_MarkedOffline(c.getName()));
c.setTemporarilyOffline(true);
}
}
if(size!=null && !size.moreThanGB() && markOffline(c))
LOGGER.warning(Messages.DiskSpaceMonitor_MarkedOffline(c.getName()));
return size;
}
......
/*
* The MIT License
*
* Copyright (c) 2004-2009, Sun Microsystems, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package hudson.node_monitors;
import hudson.model.AdministrativeMonitor;
import hudson.Extension;
/**
* If {@link NodeMonitor} marks the node as offline, we'll show this to the admin to get their attention.
*
* <p>
* This also allows them to disable the monitoring if they don't like it.
*
* @author Kohsuke Kawaguchi
* @since 1.301
*/
@Extension
public class MonitorMarkedNodeOffline extends AdministrativeMonitor {
public boolean active = false;
public boolean isActivated() {
return active;
}
}
......@@ -27,6 +27,7 @@ import hudson.ExtensionPoint;
import hudson.Functions;
import hudson.DescriptorExtensionList;
import hudson.Extension;
import hudson.tasks.Publisher;
import hudson.scm.RepositoryBrowser;
import hudson.model.Computer;
import hudson.model.ComputerSet;
......@@ -39,6 +40,8 @@ import hudson.util.DescriptorList;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.List;
import java.io.Serializable;
import org.kohsuke.stapler.export.Exported;
import org.kohsuke.stapler.export.ExportedBean;
......@@ -52,13 +55,24 @@ import org.kohsuke.stapler.export.ExportedBean;
* Invoked from {@link ComputerSet} <tt>index.jelly</tt> to render a column.
* The {@link NodeMonitor} instance is accessible through the "from" variable.
* Also see {@link #getColumnCaption()}.
*
* <dt>config.jelly (optional)</dt>
* <dd>
* Configuration fragment to be displayed in {@code http://server/hudson/computer/configure}.
* Used for configuring the threshold for taking nodes offline.
* </dl>
*
* <h2>Persistence</h2>
* <p>
* {@link NodeMonitor}s are persisted via XStream.
*
* @author Kohsuke Kawaguchi
* @since 1.123
*/
@ExportedBean
public abstract class NodeMonitor implements ExtensionPoint, Describable<NodeMonitor> {
private volatile boolean ignored;
/**
* Returns the name of the column to be added to {@link ComputerSet} index.jelly.
*
......@@ -100,6 +114,27 @@ public abstract class NodeMonitor implements ExtensionPoint, Describable<NodeMon
return ComputerSet.get_monitors();
}
/**
* True if this monitoring shouldn't mark the slaves offline.
*
* <p>
* Many {@link NodeMonitor}s implement a logic that if the value goes above/below
* a threshold, the slave will be marked offline as a preventive measure.
* This flag controls that.
*
* <p>
* Unlike {@link Publisher}, where the absence of an instance indicates that it's disengaged,
* in {@link NodeMonitor} this boolean flag is used to indicate the disengagement, so that
* monitors work in opt-out basis.
*/
public boolean isIgnored() {
return ignored;
}
public void setIgnored(boolean ignored) {
this.ignored = ignored;
}
/**
* All registered {@link NodeMonitor}s.
* @deprecated as of 1.286.
......
......@@ -66,14 +66,8 @@ public class ResponseTimeMonitor extends NodeMonitor {
d = new Data(old,-1L);
}
if(d.hasTooManyTimeouts()) {
// TODO: this scheme should be generalized, so that Hudson can remember why it's marking the node
// as offline, as well as allowing the user to force Hudson to use it.
if(!c.isTemporarilyOffline()) {
LOGGER.warning(Messages.ResponseTimeMonitor_MarkedOffline(c.getName()));
c.setTemporarilyOffline(true);
}
}
if(d.hasTooManyTimeouts() && markOffline(c))
LOGGER.warning(Messages.ResponseTimeMonitor_MarkedOffline(c.getName()));
return d;
}
......
<!--
The MIT License
Copyright (c) 2004-2009, Sun Microsystems, Inc., Kohsuke Kawaguchi
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
-->
<!--
Manage monitoring
-->
<j:jelly xmlns:j="jelly:core" xmlns:st="jelly:stapler" xmlns:d="jelly:define" xmlns:l="/lib/layout" xmlns:t="/lib/hudson" xmlns:f="/lib/form" xmlns:i="jelly:fmt">
<l:layout title="${%Node Monitoring Configuration}" norefresh="true">
<st:include page="sidepanel.jelly" />
<l:main-panel>
<!-- to make the form field binding work -->
<j:set var="instance" value="${it}" />
<j:set var="descriptor" value="${it.descriptor}" />
<f:form method="post" action="configSubmit" name="config">
<f:descriptorList title="${%Preventive Node Monitoring}"
descriptors="${it.nodeMonitorDescriptors}"
instances="${it.nonIgnoredMonitors}" />
<f:block>
<f:submit value="OK" />
</f:block>
</f:form>
</l:main-panel>
</l:layout>
</j:jelly>
\ No newline at end of file
......@@ -31,6 +31,7 @@ THE SOFTWARE.
<l:tasks>
<l:task icon="images/24x24/up.gif" href="${rootURL}/" title="${%Back to Dashboard}" />
<l:task icon="images/24x24/new-computer.gif" href="new" title="${%New Node}" permission="${app.ADMINISTER}" />
<l:task icon="images/24x24/setting.gif" href="configure" title="${%Configure}" permission="${app.ADMINISTER}" />
</l:tasks>
<t:queue items="${app.queue.items}" />
<t:executors />
......
<!--
The MIT License
Copyright (c) 2004-2009, Sun Microsystems, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
-->
<j:jelly xmlns:j="jelly:core" xmlns:st="jelly:stapler" xmlns:d="jelly:define" xmlns:l="/lib/layout" xmlns:t="/lib/hudson" xmlns:f="/lib/form">
<div class="warning">
<form method="post" action="${rootURL}/${it.url}/disable">
${%blurb(rootURL)}
<f:submit value="${%Dismiss}"/>
</form>
</div>
</j:jelly>
\ No newline at end of file
blurb=Hudson took some slaves offline because <a href="{0}/computer/">their key health metrics</a> went below a threshold. \
If you don''t want Hudson to do this, \
<a href="{0}/computer/configure">change the setting</a>.
\ No newline at end of file
......@@ -25,6 +25,7 @@ package hudson.model;
import org.jvnet.hudson.test.HudsonTestCase;
import org.jvnet.hudson.test.Bug;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
/**
* @author Kohsuke Kawaguchi
......@@ -36,4 +37,13 @@ public class ComputerSetTest extends HudsonTestCase {
createSlave();
client.goTo("computer");
}
/**
* Tests the basic UI behavior of the node monitoring
*/
public void testConfiguration() throws Exception {
HudsonTestCase.WebClient client = new WebClient();
HtmlForm form = client.goTo("computer/configure").getFormByName("config");
submit(form);
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册