提交 52a1a103 编写于 作者: S Stephen Connolly 提交者: Oleg Nenashev

[FIXED JENKINS-35160] - Job deletion: Wait up to 15 seconds for interrupted...

[FIXED JENKINS-35160] - Job deletion: Wait up to 15 seconds for interrupted builds to complete (#2789)

* [FIXED JENKINS-35160] Wait up to 15 seconds for interrupted builds to complete

- Also now aware of concurrent builds

* [JENKINS-35160] Tests are good, they catch bugs

* [JENKINS-35160] We should do the interrupt for any Item not just Jobs

* [JENKINS-35160] s/DeleteBlocker/ItemDeletion/g

Left over references before I settled on a better name

* [JENKINS-35160] Switch to Failure for better HTML rendering

* [JENKINS-35160] Align the i18n key with owning class
上级 d48e1d6d
......@@ -33,6 +33,8 @@ import hudson.BulkChange;
import hudson.cli.declarative.CLIResolver;
import hudson.model.listeners.ItemListener;
import hudson.model.listeners.SaveableListener;
import hudson.model.queue.Tasks;
import hudson.model.queue.WorkUnit;
import hudson.security.AccessControlled;
import hudson.security.Permission;
import hudson.security.ACL;
......@@ -41,8 +43,13 @@ import hudson.util.AlternativeUiTextProvider.Message;
import hudson.util.AtomicFileWriter;
import hudson.util.IOUtils;
import hudson.util.Secret;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import jenkins.model.DirectlyModifiableTopLevelItemGroup;
import jenkins.model.Jenkins;
import jenkins.model.queue.ItemDeletion;
import jenkins.security.NotReallyRoleSensitiveCallable;
import jenkins.util.xml.XMLUtils;
......@@ -79,6 +86,7 @@ import javax.xml.transform.TransformerException;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import static hudson.model.queue.Executables.getParentOf;
import static javax.servlet.http.HttpServletResponse.SC_BAD_REQUEST;
import org.apache.commons.io.FileUtils;
import org.kohsuke.accmod.Restricted;
......@@ -577,9 +585,102 @@ public abstract class AbstractItem extends Actionable implements Item, HttpDelet
*/
public void delete() throws IOException, InterruptedException {
checkPermission(DELETE);
synchronized (this) { // could just make performDelete synchronized but overriders might not honor that
performDelete();
} // JENKINS-19446: leave synch block, but JENKINS-22001: still notify synchronously
boolean responsibleForAbortingBuilds = !ItemDeletion.contains(this);
boolean ownsRegistration = ItemDeletion.register(this);
if (!ownsRegistration && ItemDeletion.isRegistered(this)) {
// we are not the owning thread and somebody else is concurrently deleting this exact item
throw new Failure(Messages.AbstractItem_BeingDeleted(getPronoun()));
}
try {
// if a build is in progress. Cancel it.
if (responsibleForAbortingBuilds || ownsRegistration) {
Queue queue = Queue.getInstance();
if (this instanceof Queue.Task) {
// clear any items in the queue so they do not get picked up
queue.cancel((Queue.Task) this);
}
// now cancel any child items - this happens after ItemDeletion registration, so we can use a snapshot
for (Queue.Item i : queue.getItems()) {
Item item = Tasks.getItemOf(i.task);
while (item != null) {
if (item == this) {
queue.cancel(i);
break;
}
if (item.getParent() instanceof Item) {
item = (Item) item.getParent();
} else {
break;
}
}
}
// interrupt any builds in progress (and this should be a recursive test so that folders do not pay
// the 15 second delay for every child item). This happens after queue cancellation, so will be
// a complete set of builds in flight
Map<Executor, Queue.Executable> buildsInProgress = new LinkedHashMap<>();
for (Computer c : Jenkins.getInstance().getComputers()) {
for (Executor e : c.getAllExecutors()) {
WorkUnit workUnit = e.getCurrentWorkUnit();
if (workUnit != null) {
Item item = Tasks.getItemOf(getParentOf(workUnit.getExecutable()));
if (item != null) {
while (item != null) {
if (item == this) {
buildsInProgress.put(e, e.getCurrentExecutable());
e.interrupt(Result.ABORTED);
break;
}
if (item.getParent() instanceof Item) {
item = (Item) item.getParent();
} else {
break;
}
}
}
}
}
}
if (!buildsInProgress.isEmpty()) {
// give them 15 seconds or so to respond to the interrupt
long expiration = System.nanoTime() + TimeUnit.SECONDS.toNanos(15);
// comparison with executor.getCurrentExecutable() == computation currently should always be true
// as we no longer recycle Executors, but safer to future-proof in case we ever revisit recycling
while (!buildsInProgress.isEmpty() && expiration - System.nanoTime() > 0L) {
// we know that ItemDeletion will prevent any new builds in the queue
// ItemDeletion happens-before Queue.cancel so we know that the Queue will stay clear
// Queue.cancel happens-before collecting the buildsInProgress list
// thus buildsInProgress contains the complete set we need to interrupt and wait for
for (Iterator<Map.Entry<Executor, Queue.Executable>> iterator =
buildsInProgress.entrySet().iterator();
iterator.hasNext(); ) {
Map.Entry<Executor, Queue.Executable> entry = iterator.next();
// comparison with executor.getCurrentExecutable() == executable currently should always be
// true as we no longer recycle Executors, but safer to future-proof in case we ever
// revisit recycling.
if (!entry.getKey().isAlive()
|| entry.getValue() != entry.getKey().getCurrentExecutable()) {
iterator.remove();
}
// I don't know why, but we have to keep interrupting
entry.getKey().interrupt(Result.ABORTED);
}
Thread.sleep(50L);
}
if (!buildsInProgress.isEmpty()) {
throw new Failure(Messages.AbstractItem_FailureToStopBuilds(
buildsInProgress.size(), getFullDisplayName()
));
}
}
}
synchronized (this) { // could just make performDelete synchronized but overriders might not honor that
performDelete();
} // JENKINS-19446: leave synch block, but JENKINS-22001: still notify synchronously
} finally {
if (ownsRegistration) {
ItemDeletion.deregister(this);
}
}
getParent().onDeleted(AbstractItem.this);
Jenkins.getInstance().rebuildDependencyGraphAsync();
}
......
......@@ -971,6 +971,19 @@ public /*transient*/ abstract class Computer extends Actionable implements Acces
return new ArrayList<OneOffExecutor>(oneOffExecutors);
}
/**
* Gets the read-only snapshot view of all {@link Executor} instances including {@linkplain OneOffExecutor}s.
*
* @return the read-only snapshot view of all {@link Executor} instances including {@linkplain OneOffExecutor}s.
* @since TODO
*/
public List<Executor> getAllExecutors() {
List<Executor> result = new ArrayList<>(executors.size() + oneOffExecutors.size());
result.addAll(executors);
result.addAll(oneOffExecutors);
return result;
}
/**
* Used to render the list of executors.
* @return a snapshot of the executor display information
......
......@@ -941,12 +941,7 @@ public class Executor extends Thread implements ModelObject {
return null;
}
for (Computer computer : jenkins.getComputers()) {
for (Executor executor : computer.getExecutors()) {
if (executor.getCurrentExecutable() == executable) {
return executor;
}
}
for (Executor executor : computer.getOneOffExecutors()) {
for (Executor executor : computer.getAllExecutors()) {
if (executor.getCurrentExecutable() == executable) {
return executor;
}
......
......@@ -262,20 +262,6 @@ public abstract class Job<JobT extends Job<JobT, RunT>, RunT extends Run<JobT, R
}
}
@Override
protected void performDelete() throws IOException, InterruptedException {
// if a build is in progress. Cancel it.
RunT lb = getLastBuild();
if (lb != null) {
Executor e = lb.getExecutor();
if (e != null) {
e.interrupt();
// should we block until the build is cancelled?
}
}
super.performDelete();
}
/*package*/ TextFile getNextBuildNumberFile() {
return new TextFile(new File(this.getRootDir(), "nextBuildNumber"));
}
......
......@@ -55,12 +55,7 @@ public abstract class RestartListener implements ExtensionPoint {
public boolean isReadyToRestart() throws IOException, InterruptedException {
for (Computer c : Jenkins.getInstance().getComputers()) {
if (c.isOnline()) {
for (Executor e : c.getExecutors()) {
if (blocksRestart(e)) {
return false;
}
}
for (Executor e : c.getOneOffExecutors()) {
for (Executor e : c.getAllExecutors()) {
if (blocksRestart(e)) {
return false;
}
......
......@@ -23,9 +23,11 @@
*/
package hudson.model.queue;
import hudson.model.Queue;
import hudson.model.Queue.Item;
import hudson.model.Queue.Task;
import hudson.security.ACL;
import javax.annotation.CheckForNull;
import org.acegisecurity.Authentication;
import java.util.Collection;
......@@ -34,6 +36,8 @@ import javax.annotation.Nonnull;
import jenkins.security.QueueItemAuthenticator;
import jenkins.security.QueueItemAuthenticatorConfiguration;
import static hudson.model.queue.Executables.getParentOf;
/**
* Convenience methods around {@link Task} and {@link SubTask}.
*
......@@ -66,6 +70,27 @@ public class Tasks {
}
}
/**
* Gets the {@link hudson.model.Item} most closely associated with the supplied {@link SubTask}.
* @param t the {@link SubTask}.
* @return the {@link hudson.model.Item} associated with the {@link SubTask} or {@code null} if this
* {@link SubTask} is not associated with an {@link hudson.model.Item}
* @since TODO
*/
@CheckForNull
public static hudson.model.Item getItemOf(@Nonnull SubTask t) {
// TODO move to default method on SubTask once code level is Java 8
Queue.Task p = getOwnerTaskOf(t);
while (!(p instanceof hudson.model.Item)) {
Queue.Task o = getOwnerTaskOf(p);
if (o == p) {
break;
}
p = o;
}
return p instanceof hudson.model.Item ? (hudson.model.Item)p : null;
}
/**
* Helper method to safely invoke {@link Task#getDefaultAuthentication()} on classes that may come
* from plugins compiled against an earlier version of Jenkins.
......
/*
* The MIT License
*
* Copyright (c) 2017 CloudBees, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package jenkins.model.queue;
import hudson.Extension;
import hudson.ExtensionList;
import hudson.model.Action;
import hudson.model.Item;
import hudson.model.Queue;
import hudson.model.queue.Tasks;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import javax.annotation.CheckForNull;
import javax.annotation.Nonnull;
import javax.annotation.concurrent.GuardedBy;
/**
* A {@link Queue.QueueDecisionHandler} that blocks items being deleted from entering the queue.
*
* @since TODO
*/
@Extension
public class ItemDeletion extends Queue.QueueDecisionHandler {
/**
* Lock to guard the {@link #registrations} set.
*/
private final ReadWriteLock lock = new ReentrantReadWriteLock();
/**
* The explicit deletions in progress.
*/
@GuardedBy("lock")
private final Set<Item> registrations = new HashSet<>();
@GuardedBy("lock")
private boolean _contains(@Nonnull Item item) {
if (registrations.isEmpty()) {
// no point walking everything if there is nothing in-flight
return false;
}
while (item != null) {
if (registrations.contains(item)) {
return true;
}
if (item.getParent() instanceof Item) {
item = (Item) item.getParent();
} else {
break;
}
}
return false;
}
/**
* Checks if the supplied {@link Item} or any of its {@link Item#getParent()} are being deleted.
*
* @param item the item.
* @return {@code true} if the {@link Item} or any of its {@link Item#getParent()} are being deleted.
*/
public static boolean contains(@Nonnull Item item) {
ItemDeletion instance = instance();
if (instance == null) {
return false;
}
instance.lock.readLock().lock();
try {
return instance._contains(item);
} finally {
instance.lock.readLock().unlock();
}
}
/**
* Checks if the supplied {@link Item} is explicitly registered for deletion.
*
* @param item the item.
* @return {@code true} if and only if the supplied {@link Item} has been {@linkplain #register(Item)}ed for
* deletion.
*/
public static boolean isRegistered(@Nonnull Item item) {
ItemDeletion instance = instance();
if (instance == null) {
return false;
}
instance.lock.readLock().lock();
try {
return instance.registrations.contains(item);
} finally {
instance.lock.readLock().unlock();
}
}
/**
* Register the supplied {@link Item} for deletion.
*
* @param item the {@link Item} that is to be deleted.
* @return {@code true} if and only if the {@link Item} was registered and the caller is now responsible to call
* {@link #deregister(Item)}.
*/
public static boolean register(@Nonnull Item item) {
ItemDeletion instance = instance();
if (instance == null) {
return false;
}
instance.lock.writeLock().lock();
try {
return instance.registrations.add(item);
} finally {
instance.lock.writeLock().unlock();
}
}
/**
* Deregister the supplied {@link Item} for deletion.
*
* @param item the {@link Item} that was to be deleted and is now either deleted or the delete was aborted.
*/
public static void deregister(@Nonnull Item item) {
ItemDeletion instance = instance();
if (instance != null) {
instance.lock.writeLock().lock();
try {
instance.registrations.remove(item);
} finally {
instance.lock.writeLock().unlock();
}
}
}
/**
* Gets the singleton instance.
*
* @return the {@link ItemDeletion} singleton.
*/
@CheckForNull
private static ItemDeletion instance() {
return ExtensionList.lookup(Queue.QueueDecisionHandler.class).get(ItemDeletion.class);
}
/**
* {@inheritDoc}
*/
@Override
public boolean shouldSchedule(Queue.Task p, List<Action> actions) {
Item item = Tasks.getItemOf(p);
if (item != null) {
lock.readLock().lock();
try {
return !_contains(item);
} finally {
lock.readLock().unlock();
}
}
return true;
}
}
......@@ -32,6 +32,9 @@ AbstractItem.NoSuchJobExists=No such job \u2018{0}\u2019 exists. Perhaps you mea
AbstractItem.NoSuchJobExistsWithoutSuggestion=No such job \u2018{0}\u2019 exists.
AbstractItem.Pronoun=Item
AbstractItem.TaskNoun=Build
AbstractItem.BeingDeleted={0} is currently being deleted
AbstractItem.FailureToStopBuilds=Failed to interrupt and stop {0,choice,1#{0,number,integer} build|1<{0,number,integer} \
builds} of {1}
AbstractProject.AssignedLabelString_NoMatch_DidYouMean=There\u2019s no agent/cloud that matches this assignment. Did you mean \u2018{1}\u2019 instead of \u2018{0}\u2019?
AbstractProject.NewBuildForWorkspace=Scheduling a new build to get a workspace.
AbstractProject.AwaitingBuildForWorkspace=Awaiting build to get a workspace.
......
......@@ -32,15 +32,19 @@ import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.TextPage;
import hudson.Functions;
import hudson.model.queue.QueueTaskFuture;
import hudson.util.TextFile;
import hudson.util.TimeUnit2;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.text.MessageFormat;
import java.util.concurrent.Callable;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import jenkins.model.ProjectNamingStrategy;
import org.hamcrest.Matchers;
import org.junit.Rule;
import org.junit.Test;
import org.jvnet.hudson.test.Issue;
......@@ -48,6 +52,7 @@ import org.jvnet.hudson.test.FailureBuilder;
import org.jvnet.hudson.test.JenkinsRule;
import org.jvnet.hudson.test.JenkinsRule.WebClient;
import org.jvnet.hudson.test.RunLoadCounter;
import org.jvnet.hudson.test.SleepBuilder;
import org.jvnet.hudson.test.recipes.LocalData;
import static org.hamcrest.Matchers.endsWith;
......@@ -351,6 +356,28 @@ public class JobTest {
tryRename("myJob8 ", "myJob8 ", null, true);
}
@Issue("JENKINS-35160")
@Test
public void interruptOnDelete() throws Exception {
j.jenkins.setNumExecutors(2);
Queue.getInstance().maintain();
final FreeStyleProject p = j.createFreeStyleProject();
p.addProperty(new ParametersDefinitionProperty(
new StringParameterDefinition("dummy", "0")));
p.setConcurrentBuild(true);
p.getBuildersList().add(new SleepBuilder(30000)); // we want the uninterrupted job to run for long time
FreeStyleBuild build1 = p.scheduleBuild2(0).getStartCondition().get();
FreeStyleBuild build2 = p.scheduleBuild2(0).getStartCondition().get();
QueueTaskFuture<FreeStyleBuild> build3 = p.scheduleBuild2(0);
long start = System.nanoTime();
p.delete();
long end = System.nanoTime();
assertThat(end - start, Matchers.lessThan(TimeUnit.SECONDS.toNanos(1)));
assertThat(build1.getResult(), Matchers.is(Result.ABORTED));
assertThat(build2.getResult(), Matchers.is(Result.ABORTED));
assertThat(build3.isCancelled(), Matchers.is(true));
}
private void tryRename(String initialName, String submittedName,
String correctResult, boolean shouldSkipConfirm) throws Exception {
j.jenkins.setCrumbIssuer(null);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册