提交 be1f8f91 编写于 作者: K Kohsuke Kawaguchi

Fixed a bug in Jenkins.overallNodeProvisioner

Until now, Jenkins.overallNodeProvisioner was tracking an inconsistent
pair of numbers. On one hand, it was tracking all the executors in the
system, but then it was also only tracking the # of free-roaming
label-unassigned jobs as the queue length.

So if all your slaves are set with Node.Mode.EXCLUSIVE and you have some
free-roaming jobs, then it'll never tickle NodePrivisioner.

In other words, the # of executors weren't reflecting the actual # of
executors that can execute what's counted as the queue length.

See the problem report at
https://groups.google.com/forum/?fromgroups#!topic/jenkinsci-dev/bUwGEgOwv4Q

To fix this, I introduced another LoadStatistics that only counts
the # of executors that can execute free-roaming jobs, and # of
free-roaming jobs as the queue length. In this way, two pairs of numbers
are consistent.

This now allows us to use overallLoadStatistics for really the entire
system, including all executors and the total queue length. This is
primarily for administrators to see the resource utilization, and it is
not useful for NodeProvisioner input because it's mixing too many
different things.

The semantics change in OverallLoadStatistics.queueLength allows us to
deprecate its totalQueueLength field.
上级 5bba8c76
......@@ -58,6 +58,9 @@ Upcoming changes</a>
<li class=bug>
When accessing a page that requires authentication, redirection to start authentication results in a content decoding failure.
(<a href="https://issues.jenkins-ci.org/browse/JENKINS-13625">issue 13625</a>)
<li class=bug>
Fixed a bug in the way cloud support handles free-roaming jobs.
(<a href="https://groups.google.com/forum/?fromgroups#!topic/jenkinsci-dev/bUwGEgOwv4Q">discussion</a>)
<li class=bug>
Fixed a regression in untar operation in exotic platforms
(<a href="https://issues.jenkins-ci.org/browse/JENKINS-13202">issue 13202</a>)
......
......@@ -60,6 +60,7 @@ import java.util.List;
* @author Kohsuke Kawaguchi
* @see Label#loadStatistics
* @see Jenkins#overallLoad
* @see Jenkins#unlabeledLoad
*/
@ExportedBean
public abstract class LoadStatistics {
......@@ -176,6 +177,20 @@ public abstract class LoadStatistics {
return new Api(this);
}
/**
* Updates {@link #totalExecutors} and {@link #busyExecutors} by using
* the current snapshot value.
*
* {@link #queueLength} is updated separately via {@link LoadStatisticsUpdater} to
* improve the efficiency because we are counting this for all {@link LoadStatistics} at once.
*/
protected void updateExecutorCounts() {
int t = computeTotalExecutors();
int i = computeIdleExecutors();
totalExecutors.update(t);
busyExecutors.update(t-i);
}
/**
* With 0.90 decay ratio for every 10sec, half reduction is about 1 min.
*/
......@@ -195,33 +210,30 @@ public abstract class LoadStatistics {
}
protected void doRun() {
Jenkins h = Jenkins.getInstance();
List<hudson.model.Queue.BuildableItem> bis = h.getQueue().getBuildableItems();
Jenkins j = Jenkins.getInstance();
List<Queue.BuildableItem> bis = j.getQueue().getBuildableItems();
// update statistics on slaves
for( Label l : h.getLabels() ) {
l.loadStatistics.totalExecutors.update(l.getTotalExecutors());
l.loadStatistics.busyExecutors .update(l.getBusyExecutors());
int q=0;
for (hudson.model.Queue.BuildableItem bi : bis) {
if(bi.task.getAssignedLabel()==l)
q++;
}
l.loadStatistics.queueLength.update(q);
for( Label l : j.getLabels() ) {
l.loadStatistics.updateExecutorCounts();
l.loadStatistics.queueLength.update(count(bis, l));
}
// update statistics of the entire system
ComputerSet cs = new ComputerSet();
h.overallLoad.totalExecutors.update(cs.getTotalExecutors());
h.overallLoad.busyExecutors .update(cs.getBusyExecutors());
j.unlabeledLoad.updateExecutorCounts();
j.unlabeledLoad.queueLength.update(count(bis, null));
j.overallLoad.updateExecutorCounts();
j.overallLoad.queueLength.update(bis.size());
}
private int count(List<Queue.BuildableItem> bis, Label l) {
int q=0;
for (hudson.model.Queue.BuildableItem bi : bis) {
if(bi.task.getAssignedLabel()==null)
for (Queue.BuildableItem bi : bis) {
if(bi.task.getAssignedLabel()==l)
q++;
}
h.overallLoad.queueLength.update(q);
h.overallLoad.totalQueueLength.update(bis.size());
return q;
}
}
}
......@@ -25,28 +25,30 @@ package hudson.model;
import hudson.model.MultiStageTimeSeries.TimeScale;
import hudson.model.MultiStageTimeSeries.TrendChart;
import hudson.util.ColorPalette;
import jenkins.model.Jenkins;
import jenkins.model.UnlabeldLoadStatistics;
import org.kohsuke.accmod.Restricted;
import org.kohsuke.accmod.restrictions.NoExternalUse;
import org.kohsuke.stapler.export.Exported;
/**
* {@link LoadStatistics} for the entire system (the master and all the slaves combined.)
*
* <p>
* {@link #computeQueueLength()} and {@link #queueLength} counts those tasks
* that are unassigned to any node, whereas {@link #totalQueueLength}
* tracks the queue length including tasks that are assigned to a specific node.
* {@link LoadStatistics} for the entire system (the master and all the slaves combined),
* and all the jobs that are running on it.
*
* @author Kohsuke Kawaguchi
* @see Jenkins#overallLoad
* @see UnlabeldLoadStatistics
*/
public class OverallLoadStatistics extends LoadStatistics {
/**
* Number of total {@link Queue.BuildableItem}s that represents blocked builds.
*
* @deprecated as of 1.467
* Use {@link #queueLength}. Left as an alias here for backward compatibility.
*/
@Exported
public final MultiStageTimeSeries totalQueueLength = new MultiStageTimeSeries(
Messages._LoadStatistics_Legends_QueueLength(), ColorPalette.GREY, 0,DECAY);
@Restricted(NoExternalUse.class)
public final MultiStageTimeSeries totalQueueLength = queueLength;
public OverallLoadStatistics() {
super(0,0);
......@@ -64,7 +66,7 @@ public class OverallLoadStatistics extends LoadStatistics {
@Override
public int computeQueueLength() {
return Jenkins.getInstance().getQueue().countBuildableItemsFor(null);
return Jenkins.getInstance().getQueue().countBuildableItems();
}
/**
......@@ -72,6 +74,6 @@ public class OverallLoadStatistics extends LoadStatistics {
* not {@link #queueLength}, which just shows jobs that are to be run on the master.
*/
protected TrendChart createOverallTrendChart(TimeScale timeScale) {
return MultiStageTimeSeries.createTrendChart(timeScale,busyExecutors,totalExecutors,totalQueueLength);
return MultiStageTimeSeries.createTrendChart(timeScale,busyExecutors,totalExecutors,queueLength);
}
}
......@@ -691,6 +691,13 @@ public class Queue extends ResourceController implements Saveable {
return r;
}
/**
* Counts all the {@link BuildableItem}s currently in the queue.
*/
public synchronized int countBuildableItems() {
return buildables.size()+pendings.size();
}
/**
* Gets the information about the queue item for the given project.
*
......@@ -1260,6 +1267,14 @@ public class Queue extends ResourceController implements Saveable {
*/
public Future<Executable> getFuture() { return future; }
/**
* If this task needs to be run on a node with a particular label,
* return that {@link Label}. Otherwise null, indicating
* it can run on anywhere.
*
* <p>
* This code takes {@link LabelAssignmentAction} into account, then fall back to {@link SubTask#getAssignedLabel()}
*/
public Label getAssignedLabel() {
for (LabelAssignmentAction laa : getActions(LabelAssignmentAction.class)) {
Label l = laa.getAssignedLabel(task);
......
......@@ -305,7 +305,7 @@ public class NodeProvisioner {
@Override
protected void doRun() {
Jenkins h = Jenkins.getInstance();
h.overallNodeProvisioner.update();
h.unlabeledNodeProvisioner.update();
for( Label l : h.getLabels() )
l.nodeProvisioner.update();
}
......
......@@ -30,6 +30,7 @@ import com.google.common.collect.Lists;
import com.google.inject.Injector;
import hudson.ExtensionComponent;
import hudson.ExtensionFinder;
import hudson.model.LoadStatistics;
import hudson.model.Messages;
import hudson.model.Node;
import hudson.model.AbstractCIBase;
......@@ -569,14 +570,38 @@ public class Jenkins extends AbstractCIBase implements ModifiableItemGroup<TopLe
/**
* Load statistics of the entire system.
*
* This includes every executor and every job in the system.
*/
@Exported
public transient final OverallLoadStatistics overallLoad = new OverallLoadStatistics();
/**
* {@link NodeProvisioner} that reacts to {@link OverallLoadStatistics}.
* Load statistics of the free roaming jobs and slaves.
*
* This includes all executors on {@link Mode#NORMAL} nodes and jobs that do not have any assigned nodes.
*
* @since 1.467
*/
@Exported
public transient final LoadStatistics unlabeledLoad = new UnlabeldLoadStatistics();
/**
* {@link NodeProvisioner} that reacts to {@link #unlabeledLoad}.
* @since 1.467
*/
public transient final NodeProvisioner unlabeledNodeProvisioner = new NodeProvisioner(null,unlabeledLoad);
/**
* @deprecated as of 1.467
* Use {@link #unlabeledNodeProvisioner}.
* This was broken because it was tracking all the executors in the system, but it was only tracking
* free-roaming jobs in the queue. So {@link Cloud} fails to launch nodes when you have some exclusive
* slaves and free-roaming jobs in the queue.
*/
public transient final NodeProvisioner overallNodeProvisioner = new NodeProvisioner(null,overallLoad);
@Restricted(NoExternalUse.class)
public transient final NodeProvisioner overallNodeProvisioner = unlabeledNodeProvisioner;
public transient final ServletContext servletContext;
......
/*
* The MIT License
*
* Copyright (c) 2012, CloudBees, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package jenkins.model;
import hudson.model.Computer;
import hudson.model.LoadStatistics;
import hudson.model.Node;
import hudson.model.Node.Mode;
import hudson.model.OverallLoadStatistics;
import hudson.model.Queue.Task;
/**
* {@link LoadStatistics} that track the "free roam" jobs (whose {@link Task#getAssignedLabel()} is null)
* and the # of executors that can execute them ({@link Node} whose mode is {@link Mode#EXCLUSIVE})
*
* @see Mode#EXCLUSIVE
* @see Jenkins#unlabeledLoad
* @see OverallLoadStatistics
* @author Kohsuke Kawaguchi
*/
public class UnlabeldLoadStatistics extends LoadStatistics {
UnlabeldLoadStatistics() {
super(0, 0);
}
@Override
public int computeIdleExecutors() {
int r=0;
for (Computer c : Jenkins.getInstance().getComputers())
if(c.getNode().getMode()== Mode.NORMAL && (c.isOnline() || c.isConnecting()))
r += c.countIdle();
return r;
}
@Override
public int computeTotalExecutors() {
int r=0;
for (Computer c : Jenkins.getInstance().getComputers()) {
if(c.getNode().getMode()==Mode.NORMAL && c.isOnline())
r += c.countExecutors();
}
return r;
}
@Override
public int computeQueueLength() {
return Jenkins.getInstance().getQueue().countBuildableItemsFor(null);
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册