提交 f07b3b7e 编写于 作者: S Stephen Connolly 提交者: Oliver Gondža

Merge pull request #1815 from varmenise/OSS-192

[FIXED JENKINS-30084] FlyWeightTasks tied to a label will not cause node provisioning and will be blocked forever.

When a flyweighttask is limited to run on a specific label (e.g. matrix project set restrict where this project can run) if there are no nodes with that label available when it enters the queue then it will immediately move to blocked.
As it is blocked the Node provisioner will not attempt to create any slaves, so the project will sit in the queue forever (or until some other project allocates a slave with the correct label).
(cherry picked from commit 5a5f9c5d)
上级 e021165d
......@@ -876,6 +876,13 @@ public class Queue extends ResourceController implements Saveable {
return new ArrayList<BuildableItem>(snapshot.pendings);
}
/**
* Gets the snapshot of all {@link BlockedItem}s.
*/
protected List<BlockedItem> getBlockedItems() {
return new ArrayList<BlockedItem>(snapshot.blockedProjects);
}
/**
* Returns the snapshot of all {@link LeftItem}s.
*
......@@ -1471,43 +1478,54 @@ public class Queue extends ResourceController implements Saveable {
continue;
}
List<JobOffer> candidates = new ArrayList<JobOffer>(parked.size());
for (JobOffer j : parked.values())
if (j.canTake(p))
candidates.add(j);
MappingWorksheet ws = new MappingWorksheet(p, candidates);
Mapping m = loadBalancer.map(p.task, ws);
if (m == null) {
// if we couldn't find the executor that fits,
// just leave it in the buildables list and
// check if we can execute other projects
LOGGER.log(Level.FINER, "Failed to map {0} to executors. candidates={1} parked={2}",
new Object[]{p, candidates, parked.values()});
continue;
}
if (p.task instanceof FlyweightTask) {
Runnable r = makeFlyWeightTaskBuildable(new BuildableItem(p));
if (r != null) {
p.leave(this);
r.run();
updateSnapshot();
}
} else {
List<JobOffer> candidates = new ArrayList<JobOffer>(parked.size());
for (JobOffer j : parked.values())
if (j.canTake(p))
candidates.add(j);
MappingWorksheet ws = new MappingWorksheet(p, candidates);
Mapping m = loadBalancer.map(p.task, ws);
if (m == null) {
// if we couldn't find the executor that fits,
// just leave it in the buildables list and
// check if we can execute other projects
LOGGER.log(Level.FINER, "Failed to map {0} to executors. candidates={1} parked={2}",
new Object[]{p, candidates, parked.values()});
continue;
}
// found a matching executor. use it.
WorkUnitContext wuc = new WorkUnitContext(p);
m.execute(wuc);
// found a matching executor. use it.
WorkUnitContext wuc = new WorkUnitContext(p);
m.execute(wuc);
p.leave(this);
if (!wuc.getWorkUnits().isEmpty())
makePending(p);
else
LOGGER.log(Level.FINE, "BuildableItem {0} with empty work units!?", p);
// Ensure that identification of blocked tasks is using the live state: JENKINS-27708 & JENKINS-27871
// The creation of a snapshot itself should be relatively cheap given the expected rate of
// job execution. You probably would need 100's of jobs starting execution every iteration
// of maintain() before this could even start to become an issue and likely the calculation
// of isBuildBlocked(p) will become a bottleneck before updateSnapshot() will. Additionally
// since the snapshot itself only ever has at most one reference originating outside of the stack
// it should remain in the eden space and thus be cheap to GC.
// See https://issues.jenkins-ci.org/browse/JENKINS-27708?focusedCommentId=225819&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-225819
// or https://issues.jenkins-ci.org/browse/JENKINS-27708?focusedCommentId=225906&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-225906
// for alternative fixes of this issue.
updateSnapshot();
p.leave(this);
if (!wuc.getWorkUnits().isEmpty()) {
makePending(p);
}
else
LOGGER.log(Level.FINE, "BuildableItem {0} with empty work units!?", p);
// Ensure that identification of blocked tasks is using the live state: JENKINS-27708 & JENKINS-27871
// The creation of a snapshot itself should be relatively cheap given the expected rate of
// job execution. You probably would need 100's of jobs starting execution every iteration
// of maintain() before this could even start to become an issue and likely the calculation
// of isBuildBlocked(p) will become a bottleneck before updateSnapshot() will. Additionally
// since the snapshot itself only ever has at most one reference originating outside of the stack
// it should remain in the eden space and thus be cheap to GC.
// See https://issues.jenkins-ci.org/browse/JENKINS-27708?focusedCommentId=225819&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-225819
// or https://issues.jenkins-ci.org/browse/JENKINS-27708?focusedCommentId=225906&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-225906
// for alternative fixes of this issue.
updateSnapshot();
}
}
} finally { updateSnapshot(); } } finally {
lock.unlock();
......@@ -1522,8 +1540,38 @@ public class Queue extends ResourceController implements Saveable {
private @CheckForNull Runnable makeBuildable(final BuildableItem p) {
if (p.task instanceof FlyweightTask) {
if (!isBlockedByShutdown(p.task)) {
Runnable runnable = makeFlyWeightTaskBuildable(p);
if(runnable != null){
return runnable;
}
//this is to solve JENKINS-30084: the task has to be buildable to force the provisioning of nodes.
//if the execution gets here, it means the task could not be scheduled since the node
//the task is supposed to run on is offline or not available.
//Thus, the flyweighttask enters the buildables queue and will ask Jenkins to provision a node
return new BuildableRunnable(p);
}
// if the execution gets here, it means the task is blocked by shutdown and null is returned.
return null;
} else {
// regular heavyweight task
return new BuildableRunnable(p);
}
}
/**
* This method checks if the flyweight task can be run on any of the available executors
* @param p - the flyweight task to be scheduled
* @return a Runnable if there is an executor that can take the task, null otherwise
*/
@CheckForNull
private Runnable makeFlyWeightTaskBuildable(final BuildableItem p){
//we double check if this is a flyweight task
if (p.task instanceof FlyweightTask) {
Jenkins h = Jenkins.getInstance();
Map<Node,Integer> hashSource = new HashMap<Node, Integer>(h.getNodes().size());
Map<Node, Integer> hashSource = new HashMap<Node, Integer>(h.getNodes().size());
// Even if master is configured with zero executors, we may need to run a flyweight task like MatrixProject on it.
hashSource.put(h, Math.max(h.getNumExecutors() * 100, 1));
......@@ -1538,9 +1586,15 @@ public class Queue extends ResourceController implements Saveable {
Label lbl = p.getAssignedLabel();
for (Node n : hash.list(p.task.getFullDisplayName())) {
final Computer c = n.toComputer();
if (c==null || c.isOffline()) continue;
if (lbl!=null && !lbl.contains(n)) continue;
if (n.canTake(p) != null) continue;
if (c == null || c.isOffline()) {
continue;
}
if (lbl!=null && !lbl.contains(n)) {
continue;
}
if (n.canTake(p) != null) {
continue;
}
return new Runnable() {
@Override public void run() {
c.startFlyWeightTask(new WorkUnitContext(p).createWorkUnit(p.task));
......@@ -1548,19 +1602,10 @@ public class Queue extends ResourceController implements Saveable {
}
};
}
}
// if the execution get here, it means we couldn't schedule it anywhere.
return null;
} else { // regular heavyweight task
return new Runnable() {
@Override public void run() {
p.enter(Queue.this);
}
};
}
return null;
}
private static Hash<Node> NODE_HASH = new Hash<Node>() {
public String hash(Node node) {
return node.getNodeName();
......@@ -2672,6 +2717,20 @@ public class Queue extends ResourceController implements Saveable {
}
}
private class BuildableRunnable implements Runnable {
private final BuildableItem buildableItem;
private BuildableRunnable(BuildableItem p) {
this.buildableItem = p;
}
@Override
public void run() {
//the flyweighttask enters the buildables queue and will ask Jenkins to provision a node
buildableItem.enter(Queue.this);
}
}
private static class LockedJUCCallable<V> implements java.util.concurrent.Callable<V> {
private final java.util.concurrent.Callable<V> delegate;
......
......@@ -30,6 +30,7 @@ import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.xml.XmlPage;
import hudson.Launcher;
import hudson.XmlFile;
import hudson.matrix.Axis;
import hudson.matrix.AxisList;
import hudson.matrix.LabelAxis;
import hudson.matrix.MatrixBuild;
......@@ -41,7 +42,9 @@ import hudson.model.Cause.UserIdCause;
import hudson.model.Queue.BlockedItem;
import hudson.model.Queue.Executable;
import hudson.model.Queue.WaitingItem;
import hudson.model.labels.LabelExpression;
import hudson.model.queue.AbstractQueueTask;
import hudson.model.queue.CauseOfBlockage;
import hudson.model.queue.QueueTaskFuture;
import hudson.model.queue.ScheduleResult;
import hudson.model.queue.SubTask;
......@@ -50,6 +53,8 @@ import hudson.security.GlobalMatrixAuthorizationStrategy;
import hudson.security.SparseACL;
import hudson.slaves.DumbSlave;
import hudson.slaves.DummyCloudImpl;
import hudson.slaves.NodeProperty;
import hudson.slaves.NodePropertyDescriptor;
import hudson.slaves.NodeProvisionerRule;
import hudson.tasks.BuildTrigger;
import hudson.tasks.Shell;
......@@ -99,6 +104,7 @@ import org.jvnet.hudson.test.MockQueueItemAuthenticator;
import org.jvnet.hudson.test.SequenceLock;
import org.jvnet.hudson.test.SleepBuilder;
import org.jvnet.hudson.test.TestBuilder;
import org.jvnet.hudson.test.TestExtension;
import org.jvnet.hudson.test.recipes.LocalData;
import org.mortbay.jetty.Server;
import org.mortbay.jetty.bio.SocketConnector;
......@@ -780,4 +786,119 @@ public class QueueTest {
"B finished at " + buildBEndTime + ", C started at " + buildC.getStartTimeInMillis(),
buildC.getStartTimeInMillis() >= buildBEndTime);
}
@Issue("JENKINS-30084")
@Test
/*
* When a flyweight task is restricted to run on a specific node, the node will be provisioned
* and the flyweight task will be executed.
*/
public void shouldRunFlyweightTaskOnProvisionedNodeWhenNodeRestricted() throws Exception {
MatrixProject matrixProject = r.createMatrixProject();
matrixProject.setAxes(new AxisList(
new Axis("axis", "a", "b")
));
Label label = LabelExpression.get("aws-linux-dummy");
DummyCloudImpl dummyCloud = new DummyCloudImpl(r, 0);
dummyCloud.label = label;
r.jenkins.clouds.add(dummyCloud);
matrixProject.setAssignedLabel(label);
r.assertBuildStatusSuccess(matrixProject.scheduleBuild2(0));
assertEquals("aws-linux-dummy", matrixProject.getBuilds().getLastBuild().getBuiltOn().getLabelString());
}
@Test
public void shouldBeAbleToBlockFlyweightTaskAtTheLastMinute() throws Exception {
MatrixProject matrixProject = r.createMatrixProject("downstream");
matrixProject.setDisplayName("downstream");
matrixProject.setAxes(new AxisList(
new Axis("axis", "a", "b")
));
Label label = LabelExpression.get("aws-linux-dummy");
DummyCloudImpl dummyCloud = new DummyCloudImpl(r, 0);
dummyCloud.label = label;
BlockDownstreamProjectExecution property = new BlockDownstreamProjectExecution();
dummyCloud.getNodeProperties().add(property);
r.jenkins.clouds.add(dummyCloud);
matrixProject.setAssignedLabel(label);
FreeStyleProject upstreamProject = r.createFreeStyleProject("upstream");
upstreamProject.getBuildersList().add(new SleepBuilder(10000));
upstreamProject.setDisplayName("upstream");
//let's assume the flyweighttask has an upstream project and that must be blocked
// when the upstream project is running
matrixProject.addTrigger(new ReverseBuildTrigger("upstream", Result.SUCCESS));
matrixProject.setBlockBuildWhenUpstreamBuilding(true);
//we schedule the project but we pretend no executors are available thus
//the flyweight task is in the buildable queue without being executed
QueueTaskFuture downstream = matrixProject.scheduleBuild2(0);
if (downstream == null) {
throw new Exception("the flyweight task could not be scheduled, thus the test will be interrupted");
}
//let s wait for the Queue instance to be updated
while (Queue.getInstance().getBuildableItems().size() != 1) {
Thread.sleep(10);
}
//in this state the build is not blocked, it's just waiting for an available executor
assertFalse(Queue.getInstance().getItems()[0].isBlocked());
//we start the upstream project that should block the downstream one
QueueTaskFuture upstream = upstreamProject.scheduleBuild2(0);
if (upstream == null) {
throw new Exception("the upstream task could not be scheduled, thus the test will be interrupted");
}
//let s wait for the Upstream to enter the buildable Queue
boolean enteredTheQueue = false;
while (!enteredTheQueue) {
for (Queue.BuildableItem item : Queue.getInstance().getBuildableItems()) {
if (item.task.getDisplayName() != null && item.task.getDisplayName().equals(upstreamProject.getDisplayName())) {
enteredTheQueue = true;
}
}
}
//let's wait for the upstream project to actually start so that we're sure the Queue has been updated
//when the upstream starts the downstream has already left the buildable queue and the queue is empty
while (!Queue.getInstance().getBuildableItems().isEmpty()) {
Thread.sleep(10);
}
assertTrue(Queue.getInstance().getItems()[0].isBlocked());
assertTrue(Queue.getInstance().getBlockedItems().get(0).task.getDisplayName().equals(matrixProject.displayName));
//once the upstream is completed, the downstream can join the buildable queue again.
r.assertBuildStatusSuccess(upstream);
while (Queue.getInstance().getBuildableItems().isEmpty()) {
Thread.sleep(10);
}
assertFalse(Queue.getInstance().getItems()[0].isBlocked());
assertTrue(Queue.getInstance().getBlockedItems().isEmpty());
assertTrue(Queue.getInstance().getBuildableItems().get(0).task.getDisplayName().equals(matrixProject.displayName));
}
//let's make sure that the downstram project is not started before the upstream --> we want to simulate
// the case: buildable-->blocked-->buildable
public static class BlockDownstreamProjectExecution extends NodeProperty<Slave> {
@Override
public CauseOfBlockage canTake(Queue.BuildableItem item) {
if (item.task.getName().equals("downstream")) {
return new CauseOfBlockage() {
@Override
public String getShortDescription() {
return "slave not provisioned";
}
};
}
return null;
}
@TestExtension("shouldBeAbleToBlockFlyWeightTaskOnLastMinute")
public static class DescriptorImpl extends NodePropertyDescriptor {
@Override
public String getDisplayName() {
return "Some Property";
}
}
}
}
......@@ -34,8 +34,13 @@ import java.util.Collection;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.Future;
import hudson.util.DescribableList;
import jenkins.model.Jenkins;
import org.jvnet.hudson.test.JenkinsRule;
import javax.xml.ws.Provider;
/**
* {@link Cloud} implementation useful for testing.
*
......@@ -64,12 +69,26 @@ public class DummyCloudImpl extends Cloud {
*/
public Label label;
public List<NodeProperty<?>> getNodeProperties() {
return this.nodeProperties;
}
List<NodeProperty<?>> nodeProperties =
new ArrayList<NodeProperty<?>>();
public DummyCloudImpl(JenkinsRule rule, int delay) {
super("test");
this.rule = rule;
this.delay = delay;
}
public DummyCloudImpl(JenkinsRule rule, int delay, List<NodeProperty<?>> nodeProperties) {
super("test");
this.rule = rule;
this.delay = delay;
this.nodeProperties = nodeProperties;
}
public Collection<PlannedNode> provision(Label label, int excessWorkload) {
List<PlannedNode> r = new ArrayList<PlannedNode>();
if(label!=this.label) return r; // provisioning impossible
......@@ -105,7 +124,17 @@ public class DummyCloudImpl extends Cloud {
Thread.sleep(time);
System.out.println("launching slave");
DumbSlave slave = rule.createSlave(label);
final DumbSlave slave = rule.createSlave(label);
Provider<NodeProperty> nodePropertyProvider = new Provider<NodeProperty>() {
@Override
public NodeProperty invoke(NodeProperty request) {
request.setNode(slave);
return request;
}
};
for (NodeProperty nodeProperty : nodeProperties) {
slave.getNodeProperties().add(nodePropertyProvider.invoke(nodeProperty));
}
computer = slave.toComputer();
computer.connect(false).get();
synchronized (DummyCloudImpl.this) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册