SlaveComputer.java 39.1 KB
Newer Older
K
kohsuke 已提交
1 2
/*
 * The MIT License
3
 *
K
kohsuke 已提交
4
 * Copyright (c) 2004-2009, Sun Microsystems, Inc., Kohsuke Kawaguchi, Stephen Connolly
5
 *
K
kohsuke 已提交
6 7 8 9 10 11
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
12
 *
K
kohsuke 已提交
13 14
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
15
 *
K
kohsuke 已提交
16 17 18 19 20 21 22 23
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
24 25
package hudson.slaves;

K
Kohsuke Kawaguchi 已提交
26
import hudson.AbortException;
27
import hudson.FilePath;
28
import hudson.Functions;
29
import hudson.Main;
30
import hudson.RestrictedSince;
K
kohsuke 已提交
31
import hudson.Util;
32
import hudson.console.ConsoleLogFilter;
K
Kohsuke Kawaguchi 已提交
33 34 35 36 37 38 39 40 41
import hudson.model.Computer;
import hudson.model.Executor;
import hudson.model.ExecutorListener;
import hudson.model.Node;
import hudson.model.Queue;
import hudson.model.Slave;
import hudson.model.TaskListener;
import hudson.model.User;
import hudson.remoting.Channel;
42
import hudson.remoting.ChannelBuilder;
43
import hudson.remoting.ChannelClosedException;
44
import hudson.remoting.CommandTransport;
45
import hudson.remoting.Launcher;
K
Kohsuke Kawaguchi 已提交
46
import hudson.remoting.VirtualChannel;
47
import hudson.security.ACL;
48
import hudson.security.ACLContext;
49
import hudson.slaves.OfflineCause.ChannelTermination;
K
Kohsuke Kawaguchi 已提交
50 51 52 53
import hudson.util.Futures;
import hudson.util.NullStream;
import hudson.util.RingBufferLogHandler;
import hudson.util.StreamTaskListener;
54
import hudson.util.VersionNumber;
55 56
import hudson.util.io.RewindableFileOutputStream;
import hudson.util.io.RewindableRotatingFileOutputStream;
57 58 59
import jenkins.model.Jenkins;
import jenkins.security.ChannelConfigurator;
import jenkins.security.MasterToSlaveCallable;
60
import jenkins.slaves.EncryptedSlaveAgentJnlpFile;
61
import jenkins.slaves.JnlpAgentReceiver;
62
import jenkins.slaves.RemotingVersionInfo;
63
import jenkins.slaves.systemInfo.SlaveSystemInfo;
64
import jenkins.util.SystemProperties;
65
import org.kohsuke.accmod.Restricted;
66
import org.kohsuke.accmod.restrictions.Beta;
67
import org.kohsuke.accmod.restrictions.DoNotUse;
68 69 70 71 72
import org.kohsuke.stapler.HttpRedirect;
import org.kohsuke.stapler.HttpResponse;
import org.kohsuke.stapler.QueryParameter;
import org.kohsuke.stapler.StaplerRequest;
import org.kohsuke.stapler.StaplerResponse;
K
Kohsuke Kawaguchi 已提交
73
import org.kohsuke.stapler.WebMethod;
74
import org.kohsuke.stapler.export.Exported;
K
Kohsuke Kawaguchi 已提交
75
import org.kohsuke.stapler.interceptor.RequirePOST;
76

77
import javax.annotation.CheckForNull;
78
import javax.annotation.Nonnull;
79
import javax.annotation.OverridingMethodsMustInvokeSuper;
K
Kohsuke Kawaguchi 已提交
80 81 82 83 84 85 86 87 88 89
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.nio.charset.Charset;
import java.security.Security;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
90 91
import java.util.Map;
import java.util.TreeMap;
K
Kohsuke Kawaguchi 已提交
92 93 94 95 96 97
import java.util.concurrent.Future;
import java.util.logging.Handler;
import java.util.logging.Level;
import java.util.logging.LogRecord;
import java.util.logging.Logger;

98
import static hudson.slaves.SlaveComputer.LogHolder.SLAVE_LOG_HANDLER;
99
import org.jenkinsci.remoting.util.LoggingChannelListener;
K
Kohsuke Kawaguchi 已提交
100

101

102 103 104 105 106
/**
 * {@link Computer} for {@link Slave}s.
 *
 * @author Kohsuke Kawaguchi
 */
K
kohsuke 已提交
107
public class SlaveComputer extends Computer {
108 109 110 111
    private volatile Channel channel;
    private volatile transient boolean acceptingTasks = true;
    private Charset defaultCharset;
    private Boolean isUnix;
K
kohsuke 已提交
112 113
    /**
     * Effective {@link ComputerLauncher} that hides the details of
114
     * how we launch a agent agent on this computer.
K
kohsuke 已提交
115 116 117
     *
     * <p>
     * This is normally the same as {@link Slave#getLauncher()} but
118
     * can be different. See {@link #grabLauncher(Node)}.
K
kohsuke 已提交
119
     */
120 121
    private ComputerLauncher launcher;

122 123 124
    /**
     * Perpetually writable log file.
     */
125
    private final RewindableFileOutputStream log;
126 127 128 129 130 131 132

    /**
     * {@link StreamTaskListener} that wraps {@link #log}, hence perpetually writable.
     */
    private final TaskListener taskListener;


133 134 135 136 137 138 139
    /**
     * Number of failed attempts to reconnect to this node
     * (so that if we keep failing to reconnect, we can stop
     * trying.)
     */
    private transient int numRetryAttempt;

K
kohsuke 已提交
140 141 142 143 144 145 146 147
    /**
     * Tracks the status of the last launch operation, which is always asynchronous.
     * This can be used to wait for the completion, or cancel the launch activity.
     */
    private volatile Future<?> lastConnectActivity = null;

    private Object constructed = new Object();

148 149
    private transient volatile String absoluteRemoteFs;

K
kohsuke 已提交
150 151
    public SlaveComputer(Slave slave) {
        super(slave);
152
        this.log = new RewindableRotatingFileOutputStream(getLogFile(), 10);
153
        this.taskListener = new StreamTaskListener(decorate(this.log));
154
        assert slave.getNumExecutors()!=0 : "Computer created with 0 executors";
K
kohsuke 已提交
155
    }
156

157 158 159 160 161 162 163 164 165 166 167 168 169 170
    /**
     * Uses {@link ConsoleLogFilter} to decorate logger.
     */
    private OutputStream decorate(OutputStream os) {
        for (ConsoleLogFilter f : ConsoleLogFilter.all()) {
            try {
                os = f.decorateLogger(this,os);
            } catch (IOException|InterruptedException e) {
                LOGGER.log(Level.WARNING, "Failed to filter log with "+f, e);
            }
        }
        return os;
    }

171
    @Override
172
    @OverridingMethodsMustInvokeSuper
173
    public boolean isAcceptingTasks() {
174
        // our boolean flag is an override on any additional programmatic reasons why this agent might not be
175 176
        // accepting tasks.
        return acceptingTasks && super.isAcceptingTasks();
177 178
    }

K
Kohsuke Kawaguchi 已提交
179 180 181
    /**
     * @since 1.498
     */
182
    public String getJnlpMac() {
183
        return JnlpAgentReceiver.SLAVE_SECRET.mac(getName());
184 185
    }

186
    /**
187
     * Allows suspension of tasks being accepted by the agent computer. While this could be called by a
188 189 190 191
     * {@linkplain hudson.slaves.ComputerLauncher} or a {@linkplain hudson.slaves.RetentionStrategy}, such usage
     * can result in fights between multiple actors calling setting differential values. A better approach
     * is to override {@link hudson.slaves.RetentionStrategy#isAcceptingTasks(hudson.model.Computer)} if the
     * {@link hudson.slaves.RetentionStrategy} needs to control availability.
192
     *
193
     * @param acceptingTasks {@code true} if the agent can accept tasks.
194 195 196 197 198
     */
    public void setAcceptingTasks(boolean acceptingTasks) {
        this.acceptingTasks = acceptingTasks;
    }

199
    @Override
200 201 202 203
    public Boolean isUnix() {
        return isUnix;
    }

204
    @CheckForNull
205
    @Override
206
    public Slave getNode() {
J
Jesse Glick 已提交
207 208 209 210 211 212 213
        Node node = super.getNode();
        if (node == null || node instanceof Slave) {
            return (Slave)node;
        } else {
            logger.log(Level.WARNING, "found an unexpected kind of node {0} from {1} with nodeName={2}", new Object[] {node, this, nodeName});
            return null;
        }
214 215
    }

216
    /**
B
Basil Crow 已提交
217
     * Return the {@link TaskListener} for this SlaveComputer. Never null
218 219
     * @since 2.9
     */
N
Nicolas De Loof 已提交
220 221 222 223
    public TaskListener getListener() {
        return taskListener;
    }

K
kohsuke 已提交
224 225 226 227 228 229 230 231
    @Override
    public String getIcon() {
        Future<?> l = lastConnectActivity;
        if(l!=null && !l.isDone())
            return "computer-flash.gif";
        return super.getIcon();
    }

M
mindless 已提交
232 233 234 235
    /**
     * @deprecated since 2008-05-20.
     */
    @Deprecated @Override
236 237 238 239 240 241 242 243 244
    public boolean isJnlpAgent() {
        return launcher instanceof JNLPLauncher;
    }

    @Override
    public boolean isLaunchSupported() {
        return launcher.isLaunchSupported();
    }

245
    /**
B
Basil Crow 已提交
246
     * Return the {@link ComputerLauncher} for this SlaveComputer.
247 248
     * @since 1.312
     */
249 250 251 252
    public ComputerLauncher getLauncher() {
        return launcher;
    }

253
    /**
B
Basil Crow 已提交
254 255
     * Return the {@link ComputerLauncher} for this SlaveComputer, strips off
     * any {@link DelegatingComputerLauncher}s or {@link ComputerLauncherFilter}s.
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
     * @since 2.83
     */
    public ComputerLauncher getDelegatedLauncher() {
        ComputerLauncher l = launcher;
        while (true) {
            if (l instanceof DelegatingComputerLauncher) {
                l = ((DelegatingComputerLauncher) l).getLauncher();
            } else if (l instanceof ComputerLauncherFilter) {
                l = ((ComputerLauncherFilter) l).getCore();
            } else {
                break;
            }
        }
        return l;
    }

272
    protected Future<?> _connect(boolean forceReconnect) {
K
kohsuke 已提交
273
        if(channel!=null)   return Futures.precomputed(null);
274
        if(!forceReconnect && isConnecting())
K
kohsuke 已提交
275
            return lastConnectActivity;
276
        if(forceReconnect && isConnecting())
K
kohsuke 已提交
277
            logger.fine("Forcing a reconnect on "+getName());
278 279

        closeChannel();
280 281 282
        return lastConnectActivity = Computer.threadPoolForRemoting.submit(() -> {
            // do this on another thread so that the lengthy launch operation
            // (which is typical) won't block UI thread.
K
Kohsuke Kawaguchi 已提交
283

284 285
            try (ACLContext ctx = ACL.as(ACL.SYSTEM)) {// background activity should run like a super user
                log.rewind();
K
kohsuke 已提交
286
                try {
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
                    for (ComputerListener cl : ComputerListener.all())
                        cl.preLaunch(SlaveComputer.this, taskListener);
                    offlineCause = null;
                    launcher.launch(SlaveComputer.this, taskListener);
                } catch (AbortException e) {
                    taskListener.error(e.getMessage());
                    throw e;
                } catch (IOException e) {
                    Util.displayIOException(e,taskListener);
                    Functions.printStackTrace(e, taskListener.error(Messages.ComputerLauncher_unexpectedError()));
                    throw e;
                } catch (InterruptedException e) {
                    Functions.printStackTrace(e, taskListener.error(Messages.ComputerLauncher_abortedLaunch()));
                    throw e;
                } catch (Exception e) {
                    Functions.printStackTrace(e, taskListener.error(Messages.ComputerLauncher_unexpectedError()));
                    throw e;
                }
            } finally {
                if (channel==null && offlineCause == null) {
                    offlineCause = new OfflineCause.LaunchFailed();
                    for (ComputerListener cl : ComputerListener.all())
                        cl.onLaunchFailure(SlaveComputer.this, taskListener);
K
kohsuke 已提交
310
                }
311
            }
312 313 314 315

            if (channel==null)
                throw new IOException("Agent failed to connect, even though the launcher didn't report it. See the log output for details.");
            return null;
316 317 318 319 320 321 322 323 324
        });
    }

    @Override
    public void taskAccepted(Executor executor, Queue.Task task) {
        super.taskAccepted(executor, task);
        if (launcher instanceof ExecutorListener) {
            ((ExecutorListener)launcher).taskAccepted(executor, task);
        }
325

326 327 328 329
        //getNode() can return null at indeterminate times when nodes go offline
        Slave node = getNode();
        if (node != null && node.getRetentionStrategy() instanceof ExecutorListener) {
            ((ExecutorListener)node.getRetentionStrategy()).taskAccepted(executor, task);
330 331 332 333 334 335 336 337 338
        }
    }

    @Override
    public void taskCompleted(Executor executor, Queue.Task task, long durationMS) {
        super.taskCompleted(executor, task, durationMS);
        if (launcher instanceof ExecutorListener) {
            ((ExecutorListener)launcher).taskCompleted(executor, task, durationMS);
        }
339
        RetentionStrategy r = getRetentionStrategy();
K
kohsuke 已提交
340 341
        if (r instanceof ExecutorListener) {
            ((ExecutorListener) r).taskCompleted(executor, task, durationMS);
342 343 344 345 346 347 348 349 350
        }
    }

    @Override
    public void taskCompletedWithProblems(Executor executor, Queue.Task task, long durationMS, Throwable problems) {
        super.taskCompletedWithProblems(executor, task, durationMS, problems);
        if (launcher instanceof ExecutorListener) {
            ((ExecutorListener)launcher).taskCompletedWithProblems(executor, task, durationMS, problems);
        }
351 352 353
        RetentionStrategy r = getRetentionStrategy();
        if (r instanceof ExecutorListener) {
            ((ExecutorListener) r).taskCompletedWithProblems(executor, task, durationMS, problems);
354 355 356
        }
    }

K
kohsuke 已提交
357 358 359 360 361 362
    @Override
    public boolean isConnecting() {
        Future<?> l = lastConnectActivity;
        return isOffline() && l!=null && !l.isDone();
    }

363 364
    public OutputStream openLogFile() {
        try {
365 366 367
            log.rewind();
            return log;
        } catch (IOException e) {
368
            logger.log(Level.SEVERE, "Failed to create log file "+getLogFile(),e);
369
            return new NullStream();
370 371 372 373 374
        }
    }

    private final Object channelLock = new Object();

375 376 377
    /**
     * Creates a {@link Channel} from the given stream and sets that to this agent.
     *
O
Oleg Nenashev 已提交
378 379
     * Same as {@link #setChannel(InputStream, OutputStream, OutputStream, Channel.Listener)}, but for
     * {@link TaskListener}.
380 381 382 383
     */
    public void setChannel(@Nonnull InputStream in, @Nonnull OutputStream out,
                           @Nonnull TaskListener taskListener,
                           @CheckForNull Channel.Listener listener) throws IOException, InterruptedException {
K
kohsuke 已提交
384 385 386
        setChannel(in,out,taskListener.getLogger(),listener);
    }

387
    /**
388
     * Creates a {@link Channel} from the given stream and sets that to this agent.
K
kohsuke 已提交
389 390
     *
     * @param in
N
Nicolas De Loof 已提交
391
     *      Stream connected to the remote agent. It's the caller's responsibility to do
K
kohsuke 已提交
392 393 394 395 396
     *      buffering on this stream, if that's necessary.
     * @param out
     *      Stream connected to the remote peer. It's the caller's responsibility to do
     *      buffering on this stream, if that's necessary.
     * @param launchLog
397
     *      If non-null, receive the portion of data in {@code is} before
K
kohsuke 已提交
398 399 400 401
     *      the data goes into the "binary mode". This is useful
     *      when the established communication channel might include some data that might
     *      be useful for debugging/trouble-shooting.
     * @param listener
402
     *      Gets a notification when the channel closes, to perform clean up. Can be null.
403 404
     *      By the time this method is called, the cause of the termination is reported to the user,
     *      so the implementation of the listener doesn't need to do that again.
405
     */
406 407 408
    public void setChannel(@Nonnull InputStream in, @Nonnull OutputStream out,
                           @CheckForNull OutputStream launchLog,
                           @CheckForNull Channel.Listener listener) throws IOException, InterruptedException {
K
Kohsuke Kawaguchi 已提交
409 410 411 412
        ChannelBuilder cb = new ChannelBuilder(nodeName,threadPoolForRemoting)
            .withMode(Channel.Mode.NEGOTIATE)
            .withHeaderStream(launchLog);

413 414
        for (ChannelConfigurator cc : ChannelConfigurator.all()) {
            cc.onChannelBuilding(cb,this);
K
Kohsuke Kawaguchi 已提交
415 416 417
        }

        Channel channel = cb.build(in,out);
418 419 420
        setChannel(channel,launchLog,listener);
    }

421 422 423 424 425 426 427 428 429 430 431 432 433 434
    /**
     * Creates a {@link Channel} from the given Channel Builder and Command Transport.
     * This method can be used to allow {@link ComputerLauncher}s to create channels not based on I/O streams.
     *
     * @param cb
     *      Channel Builder.
     *      To print launch logs this channel builder should have a Header Stream defined
     *      (see {@link ChannelBuilder#getHeaderStream()}) in this argument or by one of {@link ChannelConfigurator}s.
     * @param commandTransport
     *      Command Transport
     * @param listener
     *      Gets a notification when the channel closes, to perform clean up. Can be {@code null}.
     *      By the time this method is called, the cause of the termination is reported to the user,
     *      so the implementation of the listener doesn't need to do that again.
435
     * @since 2.127
436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453
     */
    @Restricted(Beta.class)
    public void setChannel(@Nonnull ChannelBuilder cb,
                           @Nonnull CommandTransport commandTransport,
                           @CheckForNull Channel.Listener listener) throws IOException, InterruptedException {
        for (ChannelConfigurator cc : ChannelConfigurator.all()) {
            cc.onChannelBuilding(cb,this);
        }

        OutputStream headerStream = cb.getHeaderStream();
        if (headerStream == null) {
            LOGGER.log(Level.WARNING, "No header stream defined when setting channel for computer {0}. " +
                    "Launch log won't be printed", this);
        }
        Channel channel = cb.build(commandTransport);
        setChannel(channel, headerStream, listener);
    }

J
Jesse Glick 已提交
454 455 456 457 458 459 460 461
    /**
     * Shows {@link Channel#classLoadingCount}.
     * @since 1.495
     */
    public int getClassLoadingCount() throws IOException, InterruptedException {
        return channel.call(new LoadingCount(false));
    }

462 463 464
    /**
     * Shows {@link Channel#classLoadingPrefetchCacheCount}.
     * @return -1 in case that capability is not supported
J
Jesse Glick 已提交
465
     * @since 1.519
466 467 468 469 470 471 472 473
     */
    public int getClassLoadingPrefetchCacheCount() throws IOException, InterruptedException {
        if (!channel.remoteCapability.supportsPrefetch()) {
            return -1;
        }
        return channel.call(new LoadingPrefetchCacheCount());
    }

J
Jesse Glick 已提交
474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497
    /**
     * Shows {@link Channel#resourceLoadingCount}.
     * @since 1.495
     */
    public int getResourceLoadingCount() throws IOException, InterruptedException {
        return channel.call(new LoadingCount(true));
    }

    /**
     * Shows {@link Channel#classLoadingTime}.
     * @since 1.495
     */
    public long getClassLoadingTime() throws IOException, InterruptedException {
        return channel.call(new LoadingTime(false));
    }

    /**
     * Shows {@link Channel#resourceLoadingTime}.
     * @since 1.495
     */
    public long getResourceLoadingTime() throws IOException, InterruptedException {
        return channel.call(new LoadingTime(true));
    }

498
    /**
499
     * Returns the remote FS root absolute path or {@code null} if the agent is off-line. The absolute path may change
500 501 502
     * between connections if the connection method does not provide a consistent working directory and the node's
     * remote FS is specified as a relative path.
     *
503
     * @return the remote FS root absolute path or {@code null} if the agent is off-line.
504
     * @since 1.606
505
     */
506 507 508 509 510
    @CheckForNull
    public String getAbsoluteRemoteFs() {
        return channel == null ? null : absoluteRemoteFs;
    }

511 512 513 514 515 516 517
    /**
     * Just for restFul api.
     * Returns the remote FS root absolute path or {@code null} if the agent is off-line. The absolute path may change
     * between connections if the connection method does not provide a consistent working directory and the node's
     * remote FS is specified as a relative path.
     * @see #getAbsoluteRemoteFs()
     * @return the remote FS root absolute path or {@code null} if the agent is off-line or don't have connect permission.
D
Daniel Beck 已提交
518
     * @since 2.125
519 520 521 522 523 524 525 526 527 528 529 530
     */
    @Exported
    @Restricted(DoNotUse.class)
    @CheckForNull
    public String getAbsoluteRemotePath() {
        if(hasPermission(CONNECT)) {
            return getAbsoluteRemoteFs();
        } else {
            return null;
        }
    }

531
    static class LoadingCount extends MasterToSlaveCallable<Integer,RuntimeException> {
J
Jesse Glick 已提交
532 533 534 535 536 537
        private final boolean resource;
        LoadingCount(boolean resource) {
            this.resource = resource;
        }
        @Override public Integer call() {
            Channel c = Channel.current();
O
Oleg Nenashev 已提交
538 539 540
            if (c == null) {
                return -1;
            }
J
Jesse Glick 已提交
541 542 543 544
            return resource ? c.resourceLoadingCount.get() : c.classLoadingCount.get();
        }
    }

545
    static class LoadingPrefetchCacheCount extends MasterToSlaveCallable<Integer,RuntimeException> {
546
        @Override public Integer call() {
547 548 549 550 551
            Channel c = Channel.current();
            if (c == null) {
                return -1;
            }
            return c.classLoadingPrefetchCacheCount.get();
552 553 554
        }
    }

555
    static class LoadingTime extends MasterToSlaveCallable<Long,RuntimeException> {
J
Jesse Glick 已提交
556 557 558 559 560 561
        private final boolean resource;
        LoadingTime(boolean resource) {
            this.resource = resource;
        }
        @Override public Long call() {
            Channel c = Channel.current();
O
Oleg Nenashev 已提交
562
            if (c == null) {
563
                return -1L;
O
Oleg Nenashev 已提交
564
            }
J
Jesse Glick 已提交
565 566 567 568
            return resource ? c.resourceLoadingTime.get() : c.classLoadingTime.get();
        }
    }

569
    /**
570
     * Sets up the connection through an existing channel.
571 572 573
     * @param channel the channel to use; <strong>warning:</strong> callers are expected to have called {@link ChannelConfigurator} already.
     * @param launchLog Launch log. If not {@code null}, will receive launch log messages
     * @param listener Channel event listener to be attached (if not {@code null})
574 575
     * @since 1.444
     */
576 577 578
    public void setChannel(@Nonnull Channel channel,
                           @CheckForNull OutputStream launchLog,
                           @CheckForNull Channel.Listener listener) throws IOException, InterruptedException {
579 580 581
        if(this.channel!=null)
            throw new IllegalStateException("Already connected");

582
        final TaskListener taskListener = launchLog != null ? new StreamTaskListener(launchLog) : TaskListener.NULL;
583
        PrintStream log = taskListener.getLogger();
584

585 586
        channel.setProperty(SlaveComputer.class, this);

587
        channel.addListener(new LoggingChannelListener(logger, Level.FINEST) {
588
            @Override
589 590
            public void onClosed(Channel c, IOException cause) {
                // Orderly shutdown will have null exception
591 592
                if (cause!=null) {
                    offlineCause = new ChannelTermination(cause);
593
                    Functions.printStackTrace(cause, taskListener.error("Connection terminated"));
594 595 596
                } else {
                    taskListener.getLogger().println("Connection terminated");
                }
597
                closeChannel();
598 599 600 601 602 603
                try {
                    launcher.afterDisconnect(SlaveComputer.this, taskListener);
                } catch (Throwable t) {
                    LogRecord lr = new LogRecord(Level.SEVERE,
                            "Launcher {0}'s afterDisconnect method propagated an exception when {1}'s connection was closed: {2}");
                    lr.setThrown(t);
604
                    lr.setParameters(new Object[]{launcher, SlaveComputer.this.getName(), t.getMessage()});
605 606
                    logger.log(lr);
                }
607 608
            }
        });
609 610
        if(listener!=null)
            channel.addListener(listener);
611

612
        String slaveVersion = channel.call(new SlaveVersion());
N
Nicolas De Loof 已提交
613
        log.println("Remoting version: " + slaveVersion);
614 615 616 617 618 619
        VersionNumber agentVersion = new VersionNumber(slaveVersion);
        if (agentVersion.isOlderThan(RemotingVersionInfo.getMinimumSupportedVersion())) {
            log.println(String.format("WARNING: Remoting version is older than a minimum required one (%s). " +
                    "Connection will not be rejected, but the compatibility is NOT guaranteed",
                    RemotingVersionInfo.getMinimumSupportedVersion()));
        }
620

621 622 623 624 625
        boolean _isUnix = channel.call(new DetectOS());
        log.println(_isUnix? hudson.model.Messages.Slave_UnixSlave():hudson.model.Messages.Slave_WindowsSlave());

        String defaultCharsetName = channel.call(new DetectDefaultCharset());

626 627 628 629
        Slave node = getNode();
        if (node == null) { // Node has been disabled/removed during the connection
            throw new IOException("Node "+nodeName+" has been deleted during the channel setup");
        }
630

631 632 633 634
        String remoteFS = node.getRemoteFS();
        if (Util.isRelativePath(remoteFS)) {
            remoteFS = channel.call(new AbsolutePath(remoteFS));
            log.println("NOTE: Relative remote path resolved to: "+remoteFS);
635
        }
636 637 638 639
        if(_isUnix && !remoteFS.contains("/") && remoteFS.contains("\\"))
            log.println("WARNING: "+remoteFS
                    +" looks suspiciously like Windows path. Maybe you meant "+remoteFS.replace('\\','/')+"?");
        FilePath root = new FilePath(channel,remoteFS);
640

K
Kohsuke Kawaguchi 已提交
641 642 643 644 645
        // reference counting problem is known to happen, such as JENKINS-9017, and so as a preventive measure
        // we pin the base classloader so that it'll never get GCed. When this classloader gets released,
        // it'll have a catastrophic impact on the communication.
        channel.pinClassLoader(getClass().getClassLoader());

646
        channel.call(new SlaveInitializer(DEFAULT_RING_BUFFER_SIZE));
647
        try (ACLContext ctx = ACL.as(ACL.SYSTEM)) {
648 649 650 651
            for (ComputerListener cl : ComputerListener.all()) {
                cl.preOnline(this,channel,root,taskListener);
            }
        }
652

653 654
        offlineCause = null;

655 656 657
        // update the data structure atomically to prevent others from seeing a channel that's not properly initialized yet
        synchronized(channelLock) {
            if(this.channel!=null) {
658
                // check again. we used to have this entire method in a big synchronization block,
659 660 661 662 663 664 665 666 667 668 669
                // but Channel constructor blocks for an external process to do the connection
                // if CommandLauncher is used, and that cannot be interrupted because it blocks at InputStream.
                // so if the process hangs, it hangs the thread in a lock, and since Hudson will try to relaunch,
                // we'll end up queuing the lot of threads in a pseudo deadlock.
                // This implementation prevents that by avoiding a lock. HUDSON-1705 is likely a manifestation of this.
                channel.close();
                throw new IllegalStateException("Already connected");
            }
            isUnix = _isUnix;
            numRetryAttempt = 0;
            this.channel = channel;
670
            this.absoluteRemoteFs = remoteFS;
671
            defaultCharset = Charset.forName(defaultCharsetName);
672 673 674 675

            synchronized (statusChangeLock) {
                statusChangeLock.notifyAll();
            }
676
        }
677
        try (ACLContext ctx = ACL.as(ACL.SYSTEM)) {
678
            for (ComputerListener cl : ComputerListener.all()) {
679 680 681 682 683 684 685 686 687 688 689 690 691
                try {
                    cl.onOnline(this,taskListener);
                } catch (Exception e) {
                    // Per Javadoc log exceptions but still go online.
                    // NOTE: this does not include Errors, which indicate a fatal problem
                    taskListener.getLogger().format(
                        "onOnline: %s reported an exception: %s%n",
                        cl.getClass(),
                        e.toString());
                } catch (Throwable e) {
                    closeChannel();
                    throw e;
                }
692 693
            }
        }
694
        log.println("Agent successfully connected and online");
695
        Jenkins.get().getQueue().scheduleMaintenance();
696 697 698
    }

    @Override
699
    public Channel getChannel() {
700 701 702 703 704 705 706 707 708 709 710
        return channel;
    }

    public Charset getDefaultCharset() {
        return defaultCharset;
    }

    public List<LogRecord> getLogRecords() throws IOException, InterruptedException {
        if(channel==null)
            return Collections.emptyList();
        else
711
            return channel.call(new SlaveLogFetcher());
712 713
    }

714
    @RequirePOST
715
    public HttpResponse doDoDisconnect(@QueryParameter String offlineMessage) {
716 717
        if (channel!=null) {
            //does nothing in case computer is already disconnected
718
            checkPermission(DISCONNECT);
719
            offlineMessage = Util.fixEmptyAndTrim(offlineMessage);
720
            disconnect(new OfflineCause.UserCause(User.current(), offlineMessage));
721 722
        }
        return new HttpRedirect(".");
723 724 725
    }

    @Override
726 727
    public Future<?> disconnect(OfflineCause cause) {
        super.disconnect(cause);
K
kohsuke 已提交
728
        return Computer.threadPoolForRemoting.submit(new Runnable() {
729 730 731
            public void run() {
                // do this on another thread so that any lengthy disconnect operation
                // (which could be typical) won't block UI thread.
732 733 734
                launcher.beforeDisconnect(SlaveComputer.this, taskListener);
                closeChannel();
                launcher.afterDisconnect(SlaveComputer.this, taskListener);
735 736 737 738
            }
        });
    }

739
    @RequirePOST
740 741
    @Override
    public void doLaunchSlaveAgent(StaplerRequest req, StaplerResponse rsp) throws IOException {
W
Wadeck Follonier 已提交
742 743
        checkPermission(CONNECT);
            
744
        if(channel!=null) {
745 746 747 748 749 750 751
            try {
                req.getView(this, "already-launched.jelly").forward(req, rsp);
            } catch (IOException x) {
                throw x;
            } catch (/*Servlet*/Exception x) {
                throw new IOException(x);
            }
752 753 754
            return;
        }

K
kohsuke 已提交
755
        connect(true);
756 757 758 759 760 761 762 763 764 765 766

        // TODO: would be nice to redirect the user to "launching..." wait page,
        // then spend a few seconds there and poll for the completion periodically.
        rsp.sendRedirect("log");
    }

    public void tryReconnect() {
        numRetryAttempt++;
        if(numRetryAttempt<6 || (numRetryAttempt%12)==0) {
            // initially retry several times quickly, and after that, do it infrequently.
            logger.info("Attempting to reconnect "+nodeName);
K
kohsuke 已提交
767
            connect(true);
768 769 770 771
        }
    }

    /**
772
     * Serves jar files for inbound agents.
773
     *
M
mindless 已提交
774
     * @deprecated since 2008-08-18.
775
     *      This URL binding is no longer used and moved up directly under to {@link jenkins.model.Jenkins},
776
     *      but it's left here for now just in case some old inbound agents request it.
777
     */
778
    @Deprecated
779 780 781 782
    public Slave.JnlpJar getJnlpJars(String fileName) {
        return new Slave.JnlpJar(fileName);
    }

K
Kohsuke Kawaguchi 已提交
783
    @WebMethod(name="slave-agent.jnlp")
784
    public HttpResponse doSlaveAgentJnlp(StaplerRequest req, StaplerResponse res) {
785
        return new EncryptedSlaveAgentJnlpFile(this, "slave-agent.jnlp.jelly", getName(), CONNECT);
K
Kohsuke Kawaguchi 已提交
786 787
    }

788 789 790 791
    @Override
    protected void kill() {
        super.kill();
        closeChannel();
792 793 794 795 796
        try {
            log.close();
        } catch (IOException x) {
            LOGGER.log(Level.WARNING, "Failed to close agent log", x);
        }
797 798

        try {
799
            Util.deleteRecursive(getLogDir());
800
        } catch (IOException ex) {
801
            logger.log(Level.WARNING, "Unable to delete agent logs", ex);
802
        }
803 804 805
    }

    public RetentionStrategy getRetentionStrategy() {
806
        Slave n = getNode();
807
        return n==null ? RetentionStrategy.NOOP : n.getRetentionStrategy();
808 809 810 811 812 813 814
    }

    /**
     * If still connected, disconnect.
     */
    private void closeChannel() {
        // TODO: race condition between this and the setChannel method.
815 816 817 818 819 820 821
        Channel c;
        synchronized (channelLock) {
            c = channel;
            channel = null;
            absoluteRemoteFs = null;
            isUnix = null;
        }
822 823 824 825 826 827
        if (c != null) {
            try {
                c.close();
            } catch (IOException e) {
                logger.log(Level.SEVERE, "Failed to terminate channel to " + getDisplayName(), e);
            }
828
            for (ComputerListener cl : ComputerListener.all())
829
                cl.onOffline(this, offlineCause);
830 831 832 833
        }
    }

    @Override
834
    protected void setNode(final Node node) {
835
        super.setNode(node);
K
kohsuke 已提交
836
        launcher = grabLauncher(node);
837

838
        // maybe the configuration was changed to relaunch the agent, so try to re-launch now.
K
kohsuke 已提交
839 840
        // "constructed==null" test is an ugly hack to avoid launching before the object is fully
        // constructed.
841
        if(constructed!=null) {
842 843 844 845 846 847 848 849
            if (node instanceof Slave) {
                Queue.withLock(new Runnable() {
                    @Override
                    public void run() {
                        ((Slave)node).getRetentionStrategy().check(SlaveComputer.this);
                    }
                });
            } else {
850
                connect(false);
851
            }
852
        }
K
kohsuke 已提交
853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868
    }

    /**
     * Grabs a {@link ComputerLauncher} out of {@link Node} to keep it in this {@link Computer}.
     * The returned launcher will be set to {@link #launcher} and used to carry out the actual launch operation.
     *
     * <p>
     * Subtypes that needs to decorate {@link ComputerLauncher} can do so by overriding this method.
     * This is useful for {@link SlaveComputer}s for clouds for example, where one normally needs
     * additional pre-launch step (such as waiting for the provisioned node to become available)
     * before the user specified launch step (like SSH connection) kicks in.
     *
     * @see ComputerLauncherFilter
     */
    protected ComputerLauncher grabLauncher(Node node) {
        return ((Slave)node).getLauncher();
869 870
    }

871
    /**
872
     * Get the agent version
873 874 875 876 877 878 879 880 881 882 883 884
     */
    public String getSlaveVersion() throws IOException, InterruptedException {
        return channel.call(new SlaveVersion());
    }

    /**
     * Get the OS description.
     */
    public String getOSDescription() throws IOException, InterruptedException {
        return channel.call(new DetectOS()) ? "Unix" : "Windows";
    }

885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910
    /**
     * Expose real full env vars map from agent for UI presentation
     */
    public Map<String,String> getEnvVarsFull() throws IOException, InterruptedException {
        if(getChannel() == null) {
            Map<String, String> env = new TreeMap<> ();
            env.put("N/A","N/A");
            return env;
        } else {
            return getChannel().call(new ListFullEnvironment());
        }
    }

    private static class ListFullEnvironment extends MasterToSlaveCallable<Map<String,String>,IOException> {
        public Map<String,String> call() throws IOException {
            Map<String, String> env = new TreeMap<>(System.getenv());
            if(Main.isUnitTest || Main.isDevelopmentMode) {
                // if unit test is launched with maven debug switch,
                // we need to prevent forked Maven processes from seeing it, or else
                // they'll hang
                env.remove("MAVEN_OPTS");
            }
            return env;
        }
    }

911 912
    private static final Logger logger = Logger.getLogger(SlaveComputer.class.getName());

913
    private static final class SlaveVersion extends MasterToSlaveCallable<String,IOException> {
914 915 916 917 918
        public String call() throws IOException {
            try { return Launcher.VERSION; }
            catch (Throwable ex) { return "< 1.335"; } // Older slave.jar won't have VERSION
        }
    }
919
    private static final class DetectOS extends MasterToSlaveCallable<Boolean,IOException> {
920 921 922 923 924
        public Boolean call() throws IOException {
            return File.pathSeparatorChar==':';
        }
    }

925
    private static final class AbsolutePath extends MasterToSlaveCallable<String,IOException> {
926 927 928

        private static final long serialVersionUID = 1L;

929 930 931 932 933 934 935 936 937 938 939
        private final String relativePath;

        private AbsolutePath(String relativePath) {
            this.relativePath = relativePath;
        }

        public String call() throws IOException {
            return new File(relativePath).getAbsolutePath();
        }
    }

940
    private static final class DetectDefaultCharset extends MasterToSlaveCallable<String,IOException> {
941 942 943 944 945 946
        public String call() throws IOException {
            return Charset.defaultCharset().name();
        }
    }

    /**
947 948
     * Puts the {@link #SLAVE_LOG_HANDLER} into a separate class so that loading this class
     * in JVM doesn't end up loading tons of additional classes.
949
     */
950 951
    static final class LogHolder {
        /**
952
         * This field is used on each agent to record logs on the agent.
953
         */
954
        static RingBufferLogHandler SLAVE_LOG_HANDLER;
955
    }
956

957
    private static class SlaveInitializer extends MasterToSlaveCallable<Void,RuntimeException> {
958 959 960 961 962 963
        final int ringBufferSize;

        public SlaveInitializer(int ringBufferSize) {
            this.ringBufferSize = ringBufferSize;
        }

964
        public Void call() {
965 966
            SLAVE_LOG_HANDLER = new RingBufferLogHandler(ringBufferSize);

967
            // avoid double installation of the handler. Inbound agents can reconnect to the master multiple times
968 969
            // and each connection gets a different RemoteClassLoader, so we need to evict them by class name,
            // not by their identity.
K
Kohsuke Kawaguchi 已提交
970
            for (Handler h : LOGGER.getHandlers()) {
971
                if (h.getClass().getName().equals(SLAVE_LOG_HANDLER.getClass().getName()))
K
Kohsuke Kawaguchi 已提交
972
                    LOGGER.removeHandler(h);
973
            }
K
Kohsuke Kawaguchi 已提交
974
            LOGGER.addHandler(SLAVE_LOG_HANDLER);
975

K
Kohsuke Kawaguchi 已提交
976
            // remove Sun PKCS11 provider if present. See http://wiki.jenkins-ci.org/display/JENKINS/Solaris+Issue+6276483
977 978 979 980 981 982
            try {
                Security.removeProvider("SunPKCS11-Solaris");
            } catch (SecurityException e) {
                // ignore this error.
            }

983
            try {
984
                getChannelOrFail().setProperty("slave",Boolean.TRUE); // indicate that this side of the channel is the agent side.
985 986 987
            } catch (ChannelClosedException e) {
                throw new IllegalStateException(e);
            }
988

989 990 991
            return null;
        }
        private static final long serialVersionUID = 1L;
992
        private static final Logger LOGGER = Logger.getLogger("");
993
    }
994 995 996

    /**
     * Obtains a {@link VirtualChannel} that allows some computation to be performed on the master.
997 998 999
     * This method can be called from any thread on the master, or from agent (more precisely,
     * it only works from the remoting request-handling thread in agents, which means if you've started
     * separate thread on agents, that'll fail.)
1000 1001 1002 1003 1004
     *
     * @return null if the calling thread doesn't have any trace of where its master is.
     * @since 1.362
     */
    public static VirtualChannel getChannelToMaster() {
1005
        if (Jenkins.getInstanceOrNull()!=null) // check if calling thread is on master or on slave
K
Kohsuke Kawaguchi 已提交
1006
            return FilePath.localChannel;
1007

1008
        // if this method is called from within the agent computation thread, this should work
1009
        Channel c = Channel.current();
1010
        if (c!=null && Boolean.TRUE.equals(c.getProperty("slave")))
1011 1012 1013 1014
            return c;

        return null;
    }
1015

1016 1017 1018
    /**
     * Helper method for Jelly.
     */
1019 1020
    @Restricted(DoNotUse.class)
    @RestrictedSince("TODO")
1021 1022 1023 1024
    public static List<SlaveSystemInfo> getSystemInfoExtensions() {
        return SlaveSystemInfo.all();
    }

1025
    private static class SlaveLogFetcher extends MasterToSlaveCallable<List<LogRecord>,RuntimeException> {
1026
        public List<LogRecord> call() {
1027
            return new ArrayList<>(SLAVE_LOG_HANDLER.getView());
1028 1029
        }
    }
1030

1031 1032 1033
    // use RingBufferLogHandler class name to configure for backward compatibility
    private static final int DEFAULT_RING_BUFFER_SIZE = SystemProperties.getInteger(RingBufferLogHandler.class.getName() + ".defaultSize", 256);

1034
    private static final Logger LOGGER = Logger.getLogger(SlaveComputer.class.getName());
1035
}