tcp_timer.c 14.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Implementation of the Transmission Control Protocol(TCP).
 *
 * Version:	$Id: tcp_timer.c,v 1.88 2002/02/01 22:01:04 davem Exp $
 *
10
 * Authors:	Ross Biro
L
Linus Torvalds 已提交
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *		Mark Evans, <evansmp@uhura.aston.ac.uk>
 *		Corey Minyard <wf-rch!minyard@relay.EU.net>
 *		Florian La Roche, <flla@stud.uni-sb.de>
 *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
 *		Linus Torvalds, <torvalds@cs.helsinki.fi>
 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
 *		Matthew Dillon, <dillon@apollo.west.oic.com>
 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
 *		Jorge Cwik, <jorge@laser.satlink.net>
 */

#include <linux/module.h>
#include <net/tcp.h>

26 27 28 29 30 31 32 33
int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES;
int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES;
int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME;
int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES;
int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
int sysctl_tcp_orphan_retries __read_mostly;
L
Linus Torvalds 已提交
34 35 36 37 38

static void tcp_write_timer(unsigned long);
static void tcp_delack_timer(unsigned long);
static void tcp_keepalive_timer (unsigned long data);

39 40 41 42 43 44
void tcp_init_xmit_timers(struct sock *sk)
{
	inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
				  &tcp_keepalive_timer);
}

45 46
EXPORT_SYMBOL(tcp_init_xmit_timers);

L
Linus Torvalds 已提交
47 48 49 50 51 52 53 54 55 56 57 58 59 60
static void tcp_write_err(struct sock *sk)
{
	sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
	sk->sk_error_report(sk);

	tcp_done(sk);
	NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
}

/* Do not allow orphaned sockets to eat all our resources.
 * This is direct violation of TCP specs, but it is required
 * to prevent DoS attacks. It is called when a retransmission timeout
 * or zero probe timeout occurs on orphaned socket.
 *
S
Stephen Hemminger 已提交
61
 * Criteria is still not confirmed experimentally and may change.
L
Linus Torvalds 已提交
62 63 64 65 66 67 68 69 70 71
 * We kill the socket, if:
 * 1. If number of orphaned sockets exceeds an administratively configured
 *    limit.
 * 2. If we have strong memory pressure.
 */
static int tcp_out_of_resources(struct sock *sk, int do_reset)
{
	struct tcp_sock *tp = tcp_sk(sk);
	int orphans = atomic_read(&tcp_orphan_count);

72
	/* If peer does not open window for long time, or did not transmit
L
Linus Torvalds 已提交
73 74 75 76 77 78 79 80
	 * anything for long time, penalize it. */
	if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
		orphans <<= 1;

	/* If some dubious ICMP arrived, penalize even more. */
	if (sk->sk_err_soft)
		orphans <<= 1;

81
	if (tcp_too_many_orphans(sk, orphans)) {
L
Linus Torvalds 已提交
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
		if (net_ratelimit())
			printk(KERN_INFO "Out of socket memory\n");

		/* Catch exceptional cases, when connection requires reset.
		 *      1. Last segment was sent recently. */
		if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
		    /*  2. Window is closed. */
		    (!tp->snd_wnd && !tp->packets_out))
			do_reset = 1;
		if (do_reset)
			tcp_send_active_reset(sk, GFP_ATOMIC);
		tcp_done(sk);
		NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
		return 1;
	}
	return 0;
}

/* Calculate maximal number or retries on an orphaned socket. */
static int tcp_orphan_retries(struct sock *sk, int alive)
{
	int retries = sysctl_tcp_orphan_retries; /* May be zero. */

	/* We know from an ICMP that something is wrong. */
	if (sk->sk_err_soft && !alive)
		retries = 0;

	/* However, if socket sent something recently, select some safe
	 * number of retries. 8 corresponds to >100 seconds with minimal
	 * RTO of 200msec. */
	if (retries == 0 && alive)
		retries = 8;
	return retries;
}

117 118 119 120 121 122 123 124 125
static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
{
	/* Black hole detection */
	if (sysctl_tcp_mtu_probing) {
		if (!icsk->icsk_mtup.enabled) {
			icsk->icsk_mtup.enabled = 1;
			tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
		} else {
			struct tcp_sock *tp = tcp_sk(sk);
126 127
			int mss;

128
			mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
129 130 131 132 133 134 135 136
			mss = min(sysctl_tcp_base_mss, mss);
			mss = max(mss, 68 - tp->tcp_header_len);
			icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
			tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
		}
	}
}

L
Linus Torvalds 已提交
137 138 139
/* A write timeout has occurred. Process the after effects. */
static int tcp_write_timeout(struct sock *sk)
{
J
John Heffner 已提交
140
	struct inet_connection_sock *icsk = inet_csk(sk);
L
Linus Torvalds 已提交
141 142 143
	int retry_until;

	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
144
		if (icsk->icsk_retransmits)
L
Linus Torvalds 已提交
145
			dst_negative_advice(&sk->sk_dst_cache);
146
		retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
L
Linus Torvalds 已提交
147
	} else {
148
		if (icsk->icsk_retransmits >= sysctl_tcp_retries1) {
J
John Heffner 已提交
149
			/* Black hole detection */
150
			tcp_mtu_probing(icsk, sk);
L
Linus Torvalds 已提交
151 152 153 154 155 156

			dst_negative_advice(&sk->sk_dst_cache);
		}

		retry_until = sysctl_tcp_retries2;
		if (sock_flag(sk, SOCK_DEAD)) {
157
			const int alive = (icsk->icsk_rto < TCP_RTO_MAX);
158

L
Linus Torvalds 已提交
159 160
			retry_until = tcp_orphan_retries(sk, alive);

161
			if (tcp_out_of_resources(sk, alive || icsk->icsk_retransmits < retry_until))
L
Linus Torvalds 已提交
162 163 164 165
				return 1;
		}
	}

166
	if (icsk->icsk_retransmits >= retry_until) {
L
Linus Torvalds 已提交
167 168 169 170 171 172 173 174 175 176 177
		/* Has it gone just too far? */
		tcp_write_err(sk);
		return 1;
	}
	return 0;
}

static void tcp_delack_timer(unsigned long data)
{
	struct sock *sk = (struct sock*)data;
	struct tcp_sock *tp = tcp_sk(sk);
178
	struct inet_connection_sock *icsk = inet_csk(sk);
L
Linus Torvalds 已提交
179 180 181 182

	bh_lock_sock(sk);
	if (sock_owned_by_user(sk)) {
		/* Try again later. */
183
		icsk->icsk_ack.blocked = 1;
L
Linus Torvalds 已提交
184
		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
185
		sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN);
L
Linus Torvalds 已提交
186 187 188
		goto out_unlock;
	}

189
	sk_mem_reclaim_partial(sk);
L
Linus Torvalds 已提交
190

191
	if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
L
Linus Torvalds 已提交
192 193
		goto out;

194 195
	if (time_after(icsk->icsk_ack.timeout, jiffies)) {
		sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
L
Linus Torvalds 已提交
196 197
		goto out;
	}
198
	icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
L
Linus Torvalds 已提交
199

200
	if (!skb_queue_empty(&tp->ucopy.prequeue)) {
L
Linus Torvalds 已提交
201 202
		struct sk_buff *skb;

203
		NET_INC_STATS_BH(LINUX_MIB_TCPSCHEDULERFAILED);
L
Linus Torvalds 已提交
204 205 206 207 208 209 210

		while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
			sk->sk_backlog_rcv(sk, skb);

		tp->ucopy.memory = 0;
	}

211 212
	if (inet_csk_ack_scheduled(sk)) {
		if (!icsk->icsk_ack.pingpong) {
L
Linus Torvalds 已提交
213
			/* Delayed ACK missed: inflate ATO. */
214
			icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
L
Linus Torvalds 已提交
215 216 217 218
		} else {
			/* Delayed ACK missed: leave pingpong mode and
			 * deflate ATO.
			 */
219 220
			icsk->icsk_ack.pingpong = 0;
			icsk->icsk_ack.ato      = TCP_ATO_MIN;
L
Linus Torvalds 已提交
221 222 223 224 225 226 227 228
		}
		tcp_send_ack(sk);
		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
	}
	TCP_CHECK_TIMER(sk);

out:
	if (tcp_memory_pressure)
229
		sk_mem_reclaim(sk);
L
Linus Torvalds 已提交
230 231 232 233 234 235 236
out_unlock:
	bh_unlock_sock(sk);
	sock_put(sk);
}

static void tcp_probe_timer(struct sock *sk)
{
237
	struct inet_connection_sock *icsk = inet_csk(sk);
L
Linus Torvalds 已提交
238 239 240
	struct tcp_sock *tp = tcp_sk(sk);
	int max_probes;

241
	if (tp->packets_out || !tcp_send_head(sk)) {
242
		icsk->icsk_probes_out = 0;
L
Linus Torvalds 已提交
243 244 245 246 247 248 249 250 251 252
		return;
	}

	/* *WARNING* RFC 1122 forbids this
	 *
	 * It doesn't AFAIK, because we kill the retransmit timer -AK
	 *
	 * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
	 * this behaviour in Solaris down as a bug fix. [AC]
	 *
253
	 * Let me to explain. icsk_probes_out is zeroed by incoming ACKs
L
Linus Torvalds 已提交
254 255 256 257 258 259 260 261 262 263
	 * even if they advertise zero window. Hence, connection is killed only
	 * if we received no ACKs for normal connection timeout. It is not killed
	 * only because window stays zero for some time, window may be zero
	 * until armageddon and even later. We are in full accordance
	 * with RFCs, only probe timer combines both retransmission timeout
	 * and probe timeout in one bottle.				--ANK
	 */
	max_probes = sysctl_tcp_retries2;

	if (sock_flag(sk, SOCK_DEAD)) {
264
		const int alive = ((icsk->icsk_rto << icsk->icsk_backoff) < TCP_RTO_MAX);
265

L
Linus Torvalds 已提交
266 267
		max_probes = tcp_orphan_retries(sk, alive);

268
		if (tcp_out_of_resources(sk, alive || icsk->icsk_probes_out <= max_probes))
L
Linus Torvalds 已提交
269 270 271
			return;
	}

272
	if (icsk->icsk_probes_out > max_probes) {
L
Linus Torvalds 已提交
273 274 275 276 277 278 279 280 281 282 283 284 285 286
		tcp_write_err(sk);
	} else {
		/* Only send another probe if we didn't close things up. */
		tcp_send_probe0(sk);
	}
}

/*
 *	The TCP retransmit timer.
 */

static void tcp_retransmit_timer(struct sock *sk)
{
	struct tcp_sock *tp = tcp_sk(sk);
287
	struct inet_connection_sock *icsk = inet_csk(sk);
L
Linus Torvalds 已提交
288 289 290 291

	if (!tp->packets_out)
		goto out;

292
	BUG_TRAP(!tcp_write_queue_empty(sk));
L
Linus Torvalds 已提交
293 294 295 296 297 298 299 300 301

	if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
	    !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
		/* Receiver dastardly shrinks window. Our retransmits
		 * become zero probes, but we should not timeout this
		 * connection. If the socket is an orphan, time it out,
		 * we cannot allow such beasts to hang infinitely.
		 */
#ifdef TCP_DEBUG
302 303
		struct inet_sock *inet = inet_sk(sk);
		if (sk->sk_family == AF_INET) {
304
			LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer " NIPQUAD_FMT ":%u/%u shrinks window %u:%u. Repaired.\n",
305
			       NIPQUAD(inet->daddr), ntohs(inet->dport),
L
Linus Torvalds 已提交
306 307
			       inet->num, tp->snd_una, tp->snd_nxt);
		}
308 309 310 311 312 313 314 315
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
		else if (sk->sk_family == AF_INET6) {
			struct ipv6_pinfo *np = inet6_sk(sk);
			LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer " NIP6_FMT ":%u/%u shrinks window %u:%u. Repaired.\n",
			       NIP6(np->daddr), ntohs(inet->dport),
			       inet->num, tp->snd_una, tp->snd_nxt);
		}
#endif
L
Linus Torvalds 已提交
316 317 318 319 320 321
#endif
		if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
			tcp_write_err(sk);
			goto out;
		}
		tcp_enter_loss(sk, 0);
322
		tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
L
Linus Torvalds 已提交
323 324 325 326 327 328 329
		__sk_dst_reset(sk);
		goto out_reset_timer;
	}

	if (tcp_write_timeout(sk))
		goto out;

330
	if (icsk->icsk_retransmits == 0) {
331 332
		if (icsk->icsk_ca_state == TCP_CA_Disorder ||
		    icsk->icsk_ca_state == TCP_CA_Recovery) {
333
			if (tcp_is_sack(tp)) {
334
				if (icsk->icsk_ca_state == TCP_CA_Recovery)
L
Linus Torvalds 已提交
335 336 337 338
					NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
				else
					NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES);
			} else {
339
				if (icsk->icsk_ca_state == TCP_CA_Recovery)
L
Linus Torvalds 已提交
340 341 342 343
					NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL);
				else
					NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES);
			}
344
		} else if (icsk->icsk_ca_state == TCP_CA_Loss) {
L
Linus Torvalds 已提交
345 346 347 348 349 350 351 352 353 354 355 356
			NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES);
		} else {
			NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS);
		}
	}

	if (tcp_use_frto(sk)) {
		tcp_enter_frto(sk);
	} else {
		tcp_enter_loss(sk, 0);
	}

357
	if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {
L
Linus Torvalds 已提交
358 359 360
		/* Retransmission failed because of local congestion,
		 * do not backoff.
		 */
361 362 363
		if (!icsk->icsk_retransmits)
			icsk->icsk_retransmits = 1;
		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
364 365
					  min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL),
					  TCP_RTO_MAX);
L
Linus Torvalds 已提交
366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
		goto out;
	}

	/* Increase the timeout each time we retransmit.  Note that
	 * we do not increase the rtt estimate.  rto is initialized
	 * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
	 * that doubling rto each time is the least we can get away with.
	 * In KA9Q, Karn uses this for the first few times, and then
	 * goes to quadratic.  netBSD doubles, but only goes up to *64,
	 * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
	 * defined in the protocol as the maximum possible RTT.  I guess
	 * we'll have to use something other than TCP to talk to the
	 * University of Mars.
	 *
	 * PAWS allows us longer timeouts and large windows, so once
	 * implemented ftp to mars will work nicely. We will have to fix
	 * the 120 second clamps though!
	 */
384 385
	icsk->icsk_backoff++;
	icsk->icsk_retransmits++;
L
Linus Torvalds 已提交
386 387

out_reset_timer:
388
	icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
389
	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
390
	if (icsk->icsk_retransmits > sysctl_tcp_retries1)
L
Linus Torvalds 已提交
391 392 393 394 395 396 397 398
		__sk_dst_reset(sk);

out:;
}

static void tcp_write_timer(unsigned long data)
{
	struct sock *sk = (struct sock*)data;
399
	struct inet_connection_sock *icsk = inet_csk(sk);
L
Linus Torvalds 已提交
400 401 402 403 404
	int event;

	bh_lock_sock(sk);
	if (sock_owned_by_user(sk)) {
		/* Try again later */
405
		sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20));
L
Linus Torvalds 已提交
406 407 408
		goto out_unlock;
	}

409
	if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending)
L
Linus Torvalds 已提交
410 411
		goto out;

412 413
	if (time_after(icsk->icsk_timeout, jiffies)) {
		sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
L
Linus Torvalds 已提交
414 415 416
		goto out;
	}

417 418
	event = icsk->icsk_pending;
	icsk->icsk_pending = 0;
L
Linus Torvalds 已提交
419 420

	switch (event) {
421
	case ICSK_TIME_RETRANS:
L
Linus Torvalds 已提交
422 423
		tcp_retransmit_timer(sk);
		break;
424
	case ICSK_TIME_PROBE0:
L
Linus Torvalds 已提交
425 426 427 428 429 430
		tcp_probe_timer(sk);
		break;
	}
	TCP_CHECK_TIMER(sk);

out:
431
	sk_mem_reclaim(sk);
L
Linus Torvalds 已提交
432 433 434 435 436
out_unlock:
	bh_unlock_sock(sk);
	sock_put(sk);
}

437 438 439 440 441 442
/*
 *	Timer for listening sockets
 */

static void tcp_synack_timer(struct sock *sk)
{
443 444
	inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL,
				   TCP_TIMEOUT_INIT, TCP_RTO_MAX);
L
Linus Torvalds 已提交
445 446 447 448 449 450 451 452
}

void tcp_set_keepalive(struct sock *sk, int val)
{
	if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
		return;

	if (val && !sock_flag(sk, SOCK_KEEPOPEN))
453
		inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
L
Linus Torvalds 已提交
454
	else if (!val)
455
		inet_csk_delete_keepalive_timer(sk);
L
Linus Torvalds 已提交
456 457 458 459 460 461
}


static void tcp_keepalive_timer (unsigned long data)
{
	struct sock *sk = (struct sock *) data;
462
	struct inet_connection_sock *icsk = inet_csk(sk);
L
Linus Torvalds 已提交
463 464 465 466 467 468
	struct tcp_sock *tp = tcp_sk(sk);
	__u32 elapsed;

	/* Only process if socket is not in use. */
	bh_lock_sock(sk);
	if (sock_owned_by_user(sk)) {
469
		/* Try again later. */
470
		inet_csk_reset_keepalive_timer (sk, HZ/20);
L
Linus Torvalds 已提交
471 472 473 474 475 476 477 478 479 480
		goto out;
	}

	if (sk->sk_state == TCP_LISTEN) {
		tcp_synack_timer(sk);
		goto out;
	}

	if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
		if (tp->linger2 >= 0) {
481
			const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
L
Linus Torvalds 已提交
482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497

			if (tmo > 0) {
				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
				goto out;
			}
		}
		tcp_send_active_reset(sk, GFP_ATOMIC);
		goto death;
	}

	if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
		goto out;

	elapsed = keepalive_time_when(tp);

	/* It is alive without keepalive 8) */
498
	if (tp->packets_out || tcp_send_head(sk))
L
Linus Torvalds 已提交
499 500 501 502 503
		goto resched;

	elapsed = tcp_time_stamp - tp->rcv_tstamp;

	if (elapsed >= keepalive_time_when(tp)) {
504 505
		if ((!tp->keepalive_probes && icsk->icsk_probes_out >= sysctl_tcp_keepalive_probes) ||
		     (tp->keepalive_probes && icsk->icsk_probes_out >= tp->keepalive_probes)) {
L
Linus Torvalds 已提交
506 507 508 509 510
			tcp_send_active_reset(sk, GFP_ATOMIC);
			tcp_write_err(sk);
			goto out;
		}
		if (tcp_write_wakeup(sk) <= 0) {
511
			icsk->icsk_probes_out++;
L
Linus Torvalds 已提交
512 513 514 515 516 517 518 519 520 521 522 523 524
			elapsed = keepalive_intvl_when(tp);
		} else {
			/* If keepalive was lost due to local congestion,
			 * try harder.
			 */
			elapsed = TCP_RESOURCE_PROBE_INTERVAL;
		}
	} else {
		/* It is tp->rcv_tstamp + keepalive_time_when(tp) */
		elapsed = keepalive_time_when(tp) - elapsed;
	}

	TCP_CHECK_TIMER(sk);
525
	sk_mem_reclaim(sk);
L
Linus Torvalds 已提交
526 527

resched:
528
	inet_csk_reset_keepalive_timer (sk, elapsed);
L
Linus Torvalds 已提交
529 530
	goto out;

531
death:
L
Linus Torvalds 已提交
532 533 534 535 536 537
	tcp_done(sk);

out:
	bh_unlock_sock(sk);
	sock_put(sk);
}