output.c 22.7 KB
Newer Older
1
/* SCTP kernel implementation
L
Linus Torvalds 已提交
2 3 4 5
 * (C) Copyright IBM Corp. 2001, 2004
 * Copyright (c) 1999-2000 Cisco, Inc.
 * Copyright (c) 1999-2001 Motorola, Inc.
 *
6
 * This file is part of the SCTP kernel implementation
L
Linus Torvalds 已提交
7 8 9
 *
 * These functions handle output processing.
 *
10
 * This SCTP implementation is free software;
L
Linus Torvalds 已提交
11 12 13 14 15
 * you can redistribute it and/or modify it under the terms of
 * the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
16
 * This SCTP implementation is distributed in the hope that it
L
Linus Torvalds 已提交
17 18 19 20 21 22
 * will be useful, but WITHOUT ANY WARRANTY; without even the implied
 *                 ************************
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
23 24
 * along with GNU CC; see the file COPYING.  If not, see
 * <http://www.gnu.org/licenses/>.
L
Linus Torvalds 已提交
25 26 27
 *
 * Please send any bug reports or fixes you make to the
 * email address(es):
28
 *    lksctp developers <linux-sctp@vger.kernel.org>
L
Linus Torvalds 已提交
29 30 31 32 33 34 35 36
 *
 * Written or modified by:
 *    La Monte H.P. Yarroll <piggy@acm.org>
 *    Karl Knutson          <karl@athena.chicago.il.us>
 *    Jon Grimm             <jgrimm@austin.ibm.com>
 *    Sridhar Samudrala     <sri@us.ibm.com>
 */

37 38
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

L
Linus Torvalds 已提交
39 40 41 42 43 44 45
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/wait.h>
#include <linux/time.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/init.h>
46
#include <linux/slab.h>
L
Linus Torvalds 已提交
47
#include <net/inet_ecn.h>
V
Vlad Yasevich 已提交
48
#include <net/ip.h>
L
Linus Torvalds 已提交
49
#include <net/icmp.h>
50
#include <net/net_namespace.h>
L
Linus Torvalds 已提交
51 52 53 54 55 56

#include <linux/socket.h> /* for sa_family_t */
#include <net/sock.h>

#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
57
#include <net/sctp/checksum.h>
L
Linus Torvalds 已提交
58 59

/* Forward declarations for private helpers. */
60 61
static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
					      struct sctp_chunk *chunk);
62
static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
L
Linus Torvalds 已提交
63
					   struct sctp_chunk *chunk);
64 65 66 67 68
static void sctp_packet_append_data(struct sctp_packet *packet,
					   struct sctp_chunk *chunk);
static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
					struct sctp_chunk *chunk,
					u16 chunk_len);
L
Linus Torvalds 已提交
69

70 71 72 73 74 75 76 77 78 79 80
static void sctp_packet_reset(struct sctp_packet *packet)
{
	packet->size = packet->overhead;
	packet->has_cookie_echo = 0;
	packet->has_sack = 0;
	packet->has_data = 0;
	packet->has_auth = 0;
	packet->ipfragok = 0;
	packet->auth = NULL;
}

L
Linus Torvalds 已提交
81 82 83
/* Config a packet.
 * This appears to be a followup set of initializations.
 */
84 85
void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
			int ecn_capable)
L
Linus Torvalds 已提交
86
{
M
Marcelo Ricardo Leitner 已提交
87 88
	struct sctp_transport *tp = packet->transport;
	struct sctp_association *asoc = tp->asoc;
L
Linus Torvalds 已提交
89

90
	pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag);
L
Linus Torvalds 已提交
91 92 93

	packet->vtag = vtag;

M
Marcelo Ricardo Leitner 已提交
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
	if (asoc && tp->dst) {
		struct sock *sk = asoc->base.sk;

		rcu_read_lock();
		if (__sk_dst_get(sk) != tp->dst) {
			dst_hold(tp->dst);
			sk_setup_caps(sk, tp->dst);
		}

		if (sk_can_gso(sk)) {
			struct net_device *dev = tp->dst->dev;

			packet->max_size = dev->gso_max_size;
		} else {
			packet->max_size = asoc->pathmtu;
		}
		rcu_read_unlock();

	} else {
		packet->max_size = tp->pathmtu;
	}

L
Linus Torvalds 已提交
116
	if (ecn_capable && sctp_packet_empty(packet)) {
M
Marcelo Ricardo Leitner 已提交
117
		struct sctp_chunk *chunk;
L
Linus Torvalds 已提交
118 119

		/* If there a is a prepend chunk stick it on the list before
120 121
		 * any other chunks get appended.
		 */
M
Marcelo Ricardo Leitner 已提交
122
		chunk = sctp_get_ecne_prepend(asoc);
L
Linus Torvalds 已提交
123 124 125 126 127 128
		if (chunk)
			sctp_packet_append_chunk(packet, chunk);
	}
}

/* Initialize the packet structure. */
129 130 131
void sctp_packet_init(struct sctp_packet *packet,
		      struct sctp_transport *transport,
		      __u16 sport, __u16 dport)
L
Linus Torvalds 已提交
132 133 134 135
{
	struct sctp_association *asoc = transport->asoc;
	size_t overhead;

136
	pr_debug("%s: packet:%p transport:%p\n", __func__, packet, transport);
L
Linus Torvalds 已提交
137 138 139 140

	packet->transport = transport;
	packet->source_port = sport;
	packet->destination_port = dport;
141
	INIT_LIST_HEAD(&packet->chunk_list);
L
Linus Torvalds 已提交
142
	if (asoc) {
143 144
		struct sctp_sock *sp = sctp_sk(asoc->base.sk);
		overhead = sp->pf->af->net_header_len;
L
Linus Torvalds 已提交
145 146 147 148 149
	} else {
		overhead = sizeof(struct ipv6hdr);
	}
	overhead += sizeof(struct sctphdr);
	packet->overhead = overhead;
150
	sctp_packet_reset(packet);
L
Linus Torvalds 已提交
151 152 153 154 155 156
	packet->vtag = 0;
}

/* Free a packet.  */
void sctp_packet_free(struct sctp_packet *packet)
{
157
	struct sctp_chunk *chunk, *tmp;
L
Linus Torvalds 已提交
158

159
	pr_debug("%s: packet:%p\n", __func__, packet);
L
Linus Torvalds 已提交
160

161 162
	list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
		list_del_init(&chunk->list);
L
Linus Torvalds 已提交
163
		sctp_chunk_free(chunk);
164
	}
L
Linus Torvalds 已提交
165 166 167 168 169 170 171 172 173 174
}

/* This routine tries to append the chunk to the offered packet. If adding
 * the chunk causes the packet to exceed the path MTU and COOKIE_ECHO chunk
 * is not present in the packet, it transmits the input packet.
 * Data can be bundled with a packet containing a COOKIE_ECHO chunk as long
 * as it can fit in the packet, but any more data that does not fit in this
 * packet can be sent only after receiving the COOKIE_ACK.
 */
sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
175
				       struct sctp_chunk *chunk,
176
				       int one_packet, gfp_t gfp)
L
Linus Torvalds 已提交
177 178 179
{
	sctp_xmit_t retval;

180
	pr_debug("%s: packet:%p size:%Zu chunk:%p size:%d\n", __func__,
181
		 packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1);
L
Linus Torvalds 已提交
182 183 184 185

	switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) {
	case SCTP_XMIT_PMTU_FULL:
		if (!packet->has_cookie_echo) {
186 187
			int error = 0;

188
			error = sctp_packet_transmit(packet, gfp);
L
Linus Torvalds 已提交
189 190 191 192 193 194
			if (error < 0)
				chunk->skb->sk->sk_err = -error;

			/* If we have an empty packet, then we can NOT ever
			 * return PMTU_FULL.
			 */
195 196 197
			if (!one_packet)
				retval = sctp_packet_append_chunk(packet,
								  chunk);
L
Linus Torvalds 已提交
198 199 200 201 202
		}
		break;

	case SCTP_XMIT_RWND_FULL:
	case SCTP_XMIT_OK:
203
	case SCTP_XMIT_DELAY:
L
Linus Torvalds 已提交
204
		break;
205
	}
L
Linus Torvalds 已提交
206 207 208 209

	return retval;
}

210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
/* Try to bundle an auth chunk into the packet. */
static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt,
					   struct sctp_chunk *chunk)
{
	struct sctp_association *asoc = pkt->transport->asoc;
	struct sctp_chunk *auth;
	sctp_xmit_t retval = SCTP_XMIT_OK;

	/* if we don't have an association, we can't do authentication */
	if (!asoc)
		return retval;

	/* See if this is an auth chunk we are bundling or if
	 * auth is already bundled.
	 */
225
	if (chunk->chunk_hdr->type == SCTP_CID_AUTH || pkt->has_auth)
226 227 228 229 230 231 232 233 234 235 236 237
		return retval;

	/* if the peer did not request this chunk to be authenticated,
	 * don't do it
	 */
	if (!chunk->auth)
		return retval;

	auth = sctp_make_auth(asoc);
	if (!auth)
		return retval;

238 239 240 241
	retval = __sctp_packet_append_chunk(pkt, auth);

	if (retval != SCTP_XMIT_OK)
		sctp_chunk_free(auth);
242 243 244 245

	return retval;
}

L
Linus Torvalds 已提交
246 247 248 249 250 251 252 253 254 255 256 257
/* Try to bundle a SACK with the packet. */
static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt,
					   struct sctp_chunk *chunk)
{
	sctp_xmit_t retval = SCTP_XMIT_OK;

	/* If sending DATA and haven't aleady bundled a SACK, try to
	 * bundle one in to the packet.
	 */
	if (sctp_chunk_is_data(chunk) && !pkt->has_sack &&
	    !pkt->has_cookie_echo) {
		struct sctp_association *asoc;
D
Doug Graham 已提交
258
		struct timer_list *timer;
L
Linus Torvalds 已提交
259
		asoc = pkt->transport->asoc;
D
Doug Graham 已提交
260
		timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK];
L
Linus Torvalds 已提交
261

D
Doug Graham 已提交
262 263
		/* If the SACK timer is running, we have a pending SACK */
		if (timer_pending(timer)) {
L
Linus Torvalds 已提交
264
			struct sctp_chunk *sack;
265 266 267 268 269

			if (pkt->transport->sack_generation !=
			    pkt->transport->asoc->peer.sack_generation)
				return retval;

L
Linus Torvalds 已提交
270 271 272
			asoc->a_rwnd = asoc->rwnd;
			sack = sctp_make_sack(asoc);
			if (sack) {
273 274 275 276 277
				retval = __sctp_packet_append_chunk(pkt, sack);
				if (retval != SCTP_XMIT_OK) {
					sctp_chunk_free(sack);
					goto out;
				}
L
Linus Torvalds 已提交
278
				asoc->peer.sack_needed = 0;
D
Doug Graham 已提交
279
				if (del_timer(timer))
L
Linus Torvalds 已提交
280 281 282 283
					sctp_association_put(asoc);
			}
		}
	}
284
out:
L
Linus Torvalds 已提交
285 286 287
	return retval;
}

288

L
Linus Torvalds 已提交
289 290 291
/* Append a chunk to the offered packet reporting back any inability to do
 * so.
 */
292 293
static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
					      struct sctp_chunk *chunk)
L
Linus Torvalds 已提交
294 295
{
	sctp_xmit_t retval = SCTP_XMIT_OK;
296
	__u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length));
L
Linus Torvalds 已提交
297

298 299 300 301
	/* Check to see if this chunk will fit into the packet */
	retval = sctp_packet_will_fit(packet, chunk, chunk_len);
	if (retval != SCTP_XMIT_OK)
		goto finish;
L
Linus Torvalds 已提交
302

303
	/* We believe that this chunk is OK to add to the packet */
304
	switch (chunk->chunk_hdr->type) {
305
	case SCTP_CID_DATA:
306 307
		/* Account for the data being in the packet */
		sctp_packet_append_data(packet, chunk);
L
Linus Torvalds 已提交
308 309
		/* Disallow SACK bundling after DATA. */
		packet->has_sack = 1;
310 311 312 313
		/* Disallow AUTH bundling after DATA */
		packet->has_auth = 1;
		/* Let it be knows that packet has DATA in it */
		packet->has_data = 1;
314 315
		/* timestamp the chunk for rtx purposes */
		chunk->sent_at = jiffies;
X
Xin Long 已提交
316 317
		/* Mainly used for prsctp RTX policy */
		chunk->sent_count++;
318
		break;
319
	case SCTP_CID_COOKIE_ECHO:
L
Linus Torvalds 已提交
320
		packet->has_cookie_echo = 1;
321 322
		break;

323
	case SCTP_CID_SACK:
L
Linus Torvalds 已提交
324
		packet->has_sack = 1;
325 326
		if (chunk->asoc)
			chunk->asoc->stats.osacks++;
327 328
		break;

329
	case SCTP_CID_AUTH:
330 331 332 333
		packet->has_auth = 1;
		packet->auth = chunk;
		break;
	}
L
Linus Torvalds 已提交
334 335

	/* It is OK to send this chunk.  */
336
	list_add_tail(&chunk->list, &packet->chunk_list);
L
Linus Torvalds 已提交
337 338 339 340 341 342
	packet->size += chunk_len;
	chunk->transport = packet->transport;
finish:
	return retval;
}

343 344 345 346 347 348 349 350
/* Append a chunk to the offered packet reporting back any inability to do
 * so.
 */
sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet,
				     struct sctp_chunk *chunk)
{
	sctp_xmit_t retval = SCTP_XMIT_OK;

351
	pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk);
352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378

	/* Data chunks are special.  Before seeing what else we can
	 * bundle into this packet, check to see if we are allowed to
	 * send this DATA.
	 */
	if (sctp_chunk_is_data(chunk)) {
		retval = sctp_packet_can_append_data(packet, chunk);
		if (retval != SCTP_XMIT_OK)
			goto finish;
	}

	/* Try to bundle AUTH chunk */
	retval = sctp_packet_bundle_auth(packet, chunk);
	if (retval != SCTP_XMIT_OK)
		goto finish;

	/* Try to bundle SACK chunk */
	retval = sctp_packet_bundle_sack(packet, chunk);
	if (retval != SCTP_XMIT_OK)
		goto finish;

	retval = __sctp_packet_append_chunk(packet, chunk);

finish:
	return retval;
}

379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
static void sctp_packet_release_owner(struct sk_buff *skb)
{
	sk_free(skb->sk);
}

static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
{
	skb_orphan(skb);
	skb->sk = sk;
	skb->destructor = sctp_packet_release_owner;

	/*
	 * The data chunks have already been accounted for in sctp_sendmsg(),
	 * therefore only reserve a single byte to keep socket around until
	 * the packet has been transmitted.
	 */
	atomic_inc(&sk->sk_wmem_alloc);
}

X
Xin Long 已提交
398 399
static int sctp_packet_pack(struct sctp_packet *packet,
			    struct sk_buff *head, int gso, gfp_t gfp)
L
Linus Torvalds 已提交
400 401
{
	struct sctp_transport *tp = packet->transport;
X
Xin Long 已提交
402
	struct sctp_auth_chunk *auth = NULL;
403
	struct sctp_chunk *chunk, *tmp;
X
Xin Long 已提交
404 405 406
	int pkt_count = 0, pkt_size;
	struct sock *sk = head->sk;
	struct sk_buff *nskb;
407
	int auth_len = 0;
L
Linus Torvalds 已提交
408

M
Marcelo Ricardo Leitner 已提交
409 410
	if (gso) {
		skb_shinfo(head)->gso_type = sk->sk_gso_type;
X
Xin Long 已提交
411 412 413 414 415
		NAPI_GRO_CB(head)->last = head;
	} else {
		nskb = head;
		pkt_size = packet->size;
		goto merge;
M
Marcelo Ricardo Leitner 已提交
416
	}
L
Linus Torvalds 已提交
417

M
Marcelo Ricardo Leitner 已提交
418
	do {
X
Xin Long 已提交
419 420 421 422 423
		/* calculate the pkt_size and alloc nskb */
		pkt_size = packet->overhead;
		list_for_each_entry_safe(chunk, tmp, &packet->chunk_list,
					 list) {
			int padded = SCTP_PAD4(chunk->skb->len);
424

X
Xin Long 已提交
425 426 427 428 429 430 431 432
			if (chunk == packet->auth)
				auth_len = padded;
			else if (auth_len + padded + packet->overhead >
				 tp->pathmtu)
				return 0;
			else if (pkt_size + padded > tp->pathmtu)
				break;
			pkt_size += padded;
M
Marcelo Ricardo Leitner 已提交
433
		}
X
Xin Long 已提交
434 435 436 437
		nskb = alloc_skb(pkt_size + MAX_HEADER, gfp);
		if (!nskb)
			return 0;
		skb_reserve(nskb, packet->overhead + MAX_HEADER);
L
Linus Torvalds 已提交
438

X
Xin Long 已提交
439 440
merge:
		/* merge chunks into nskb and append nskb into head list */
M
Marcelo Ricardo Leitner 已提交
441 442
		pkt_size -= packet->overhead;
		list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
X
Xin Long 已提交
443 444
			int padding;

M
Marcelo Ricardo Leitner 已提交
445 446
			list_del_init(&chunk->list);
			if (sctp_chunk_is_data(chunk)) {
447 448
				if (!sctp_chunk_retransmitted(chunk) &&
				    !tp->rto_pending) {
M
Marcelo Ricardo Leitner 已提交
449 450 451 452 453
					chunk->rtt_in_progress = 1;
					tp->rto_pending = 1;
				}
			}

454
			padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len;
M
Marcelo Ricardo Leitner 已提交
455 456 457 458
			if (padding)
				memset(skb_put(chunk->skb, padding), 0, padding);

			if (chunk == packet->auth)
X
Xin Long 已提交
459 460
				auth = (struct sctp_auth_chunk *)
							skb_tail_pointer(nskb);
M
Marcelo Ricardo Leitner 已提交
461

X
Xin Long 已提交
462 463
			memcpy(skb_put(nskb, chunk->skb->len), chunk->skb->data,
			       chunk->skb->len);
L
Linus Torvalds 已提交
464

M
Marcelo Ricardo Leitner 已提交
465 466 467 468 469 470 471 472
			pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n",
				 chunk,
				 sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)),
				 chunk->has_tsn ? "TSN" : "No TSN",
				 chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0,
				 ntohs(chunk->chunk_hdr->length), chunk->skb->len,
				 chunk->rtt_in_progress);

473
			pkt_size -= SCTP_PAD4(chunk->skb->len);
L
Linus Torvalds 已提交
474

475
			if (!sctp_chunk_is_data(chunk) && chunk != packet->auth)
M
Marcelo Ricardo Leitner 已提交
476 477 478 479 480 481
				sctp_chunk_free(chunk);

			if (!pkt_size)
				break;
		}

X
Xin Long 已提交
482 483 484 485 486 487
		if (auth) {
			sctp_auth_calculate_hmac(tp->asoc, nskb, auth, gfp);
			/* free auth if no more chunks, or add it back */
			if (list_empty(&packet->chunk_list))
				sctp_chunk_free(packet->auth);
			else
488 489
				list_add(&packet->auth->list,
					 &packet->chunk_list);
490 491
		}

X
Xin Long 已提交
492 493 494 495 496 497 498 499
		if (gso) {
			if (skb_gro_receive(&head, nskb)) {
				kfree_skb(nskb);
				return 0;
			}
			if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >=
					 sk->sk_gso_max_segs))
				return 0;
500
		}
X
Xin Long 已提交
501 502

		pkt_count++;
M
Marcelo Ricardo Leitner 已提交
503
	} while (!list_empty(&packet->chunk_list));
504

X
Xin Long 已提交
505 506 507 508 509 510 511 512 513
	if (gso) {
		memset(head->cb, 0, max(sizeof(struct inet_skb_parm),
					sizeof(struct inet6_skb_parm)));
		skb_shinfo(head)->gso_segs = pkt_count;
		skb_shinfo(head)->gso_size = GSO_BY_FRAGS;
		rcu_read_lock();
		if (skb_dst(head) != tp->dst) {
			dst_hold(tp->dst);
			sk_setup_caps(sk, tp->dst);
514
		}
X
Xin Long 已提交
515 516
		rcu_read_unlock();
		goto chksum;
517
	}
L
Linus Torvalds 已提交
518

X
Xin Long 已提交
519 520
	if (sctp_checksum_disable)
		return 1;
L
Linus Torvalds 已提交
521

X
Xin Long 已提交
522 523 524 525
	if (!(skb_dst(head)->dev->features & NETIF_F_SCTP_CRC) ||
	    dst_xfrm(skb_dst(head)) || packet->ipfragok) {
		struct sctphdr *sh =
			(struct sctphdr *)skb_transport_header(head);
L
Linus Torvalds 已提交
526

X
Xin Long 已提交
527 528 529 530 531 532
		sh->checksum = sctp_compute_cksum(head, 0);
	} else {
chksum:
		head->ip_summed = CHECKSUM_PARTIAL;
		head->csum_start = skb_transport_header(head) - head->head;
		head->csum_offset = offsetof(struct sctphdr, checksum);
L
Linus Torvalds 已提交
533 534
	}

X
Xin Long 已提交
535 536 537 538 539 540 541 542 543 544 545 546 547 548
	return pkt_count;
}

/* All packets are sent to the network through this function from
 * sctp_outq_tail().
 *
 * The return value is always 0 for now.
 */
int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
{
	struct sctp_transport *tp = packet->transport;
	struct sctp_association *asoc = tp->asoc;
	struct sctp_chunk *chunk, *tmp;
	int pkt_count, gso = 0;
549
	int confirm;
X
Xin Long 已提交
550 551 552 553
	struct dst_entry *dst;
	struct sk_buff *head;
	struct sctphdr *sh;
	struct sock *sk;
L
Linus Torvalds 已提交
554

X
Xin Long 已提交
555 556 557 558 559
	pr_debug("%s: packet:%p\n", __func__, packet);
	if (list_empty(&packet->chunk_list))
		return 0;
	chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
	sk = chunk->skb->sk;
L
Linus Torvalds 已提交
560

X
Xin Long 已提交
561 562 563 564 565
	/* check gso */
	if (packet->size > tp->pathmtu && !packet->ipfragok) {
		if (!sk_can_gso(sk)) {
			pr_err_once("Trying to GSO but underlying device doesn't support it.");
			goto out;
L
Linus Torvalds 已提交
566
		}
X
Xin Long 已提交
567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590
		gso = 1;
	}

	/* alloc head skb */
	head = alloc_skb((gso ? packet->overhead : packet->size) +
			 MAX_HEADER, gfp);
	if (!head)
		goto out;
	skb_reserve(head, packet->overhead + MAX_HEADER);
	sctp_packet_set_owner_w(head, sk);

	/* set sctp header */
	sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr));
	skb_reset_transport_header(head);
	sh->source = htons(packet->source_port);
	sh->dest = htons(packet->destination_port);
	sh->vtag = htonl(packet->vtag);
	sh->checksum = 0;

	/* update dst if in need */
	if (!sctp_transport_dst_check(tp)) {
		sctp_transport_route(tp, NULL, sctp_sk(sk));
		if (asoc && asoc->param_flags & SPP_PMTUD_ENABLE)
			sctp_assoc_sync_pmtu(sk, asoc);
L
Linus Torvalds 已提交
591
	}
X
Xin Long 已提交
592 593 594 595 596 597 598
	dst = dst_clone(tp->dst);
	if (!dst) {
		IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
		kfree_skb(head);
		goto out;
	}
	skb_dst_set(head, dst);
L
Linus Torvalds 已提交
599

X
Xin Long 已提交
600 601 602 603 604 605
	/* pack up chunks */
	pkt_count = sctp_packet_pack(packet, head, gso, gfp);
	if (!pkt_count) {
		kfree_skb(head);
		goto out;
	}
M
Marcelo Ricardo Leitner 已提交
606 607
	pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len);

X
Xin Long 已提交
608 609 610 611 612 613 614
	/* start autoclose timer */
	if (packet->has_data && sctp_state(asoc, ESTABLISHED) &&
	    asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) {
		struct timer_list *timer =
			&asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE];
		unsigned long timeout =
			asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE];
L
Linus Torvalds 已提交
615

X
Xin Long 已提交
616 617 618
		if (!mod_timer(timer, jiffies + timeout))
			sctp_association_hold(asoc);
	}
M
Marcelo Ricardo Leitner 已提交
619

X
Xin Long 已提交
620 621 622 623 624 625
	/* sctp xmit */
	tp->af_specific->ecn_capable(sk);
	if (asoc) {
		asoc->stats.opackets += pkt_count;
		if (asoc->peer.last_sent_to != tp)
			asoc->peer.last_sent_to = tp;
M
Marcelo Ricardo Leitner 已提交
626 627
	}
	head->ignore_df = packet->ipfragok;
628 629 630 631 632 633 634 635
	confirm = tp->dst_pending_confirm;
	if (confirm)
		skb_set_dst_pending_confirm(head, 1);
	/* neighbour should be confirmed on successful transmission or
	 * positive error
	 */
	if (tp->af_specific->sctp_xmit(head, tp) >= 0 && confirm)
		tp->dst_pending_confirm = 0;
L
Linus Torvalds 已提交
636

X
Xin Long 已提交
637
out:
638 639
	list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
		list_del_init(&chunk->list);
L
Linus Torvalds 已提交
640
		if (!sctp_chunk_is_data(chunk))
641
			sctp_chunk_free(chunk);
L
Linus Torvalds 已提交
642
	}
643
	sctp_packet_reset(packet);
X
Xin Long 已提交
644
	return 0;
L
Linus Torvalds 已提交
645 646 647 648 649 650
}

/********************************************************************
 * 2nd Level Abstractions
 ********************************************************************/

651 652
/* This private function check to see if a chunk can be added */
static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
L
Linus Torvalds 已提交
653 654
					   struct sctp_chunk *chunk)
{
655
	size_t datasize, rwnd, inflight, flight_size;
L
Linus Torvalds 已提交
656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673
	struct sctp_transport *transport = packet->transport;
	struct sctp_association *asoc = transport->asoc;
	struct sctp_outq *q = &asoc->outqueue;

	/* RFC 2960 6.1  Transmission of DATA Chunks
	 *
	 * A) At any given time, the data sender MUST NOT transmit new data to
	 * any destination transport address if its peer's rwnd indicates
	 * that the peer has no buffer space (i.e. rwnd is 0, see Section
	 * 6.2.1).  However, regardless of the value of rwnd (including if it
	 * is 0), the data sender can always have one DATA chunk in flight to
	 * the receiver if allowed by cwnd (see rule B below).  This rule
	 * allows the sender to probe for a change in rwnd that the sender
	 * missed due to the SACK having been lost in transit from the data
	 * receiver to the data sender.
	 */

	rwnd = asoc->peer.rwnd;
674 675
	inflight = q->outstanding_bytes;
	flight_size = transport->flight_size;
L
Linus Torvalds 已提交
676 677 678

	datasize = sctp_data_size(chunk);

679 680 681 682 683
	if (datasize > rwnd && inflight > 0)
		/* We have (at least) one data chunk in flight,
		 * so we can't fall back to rule 6.1 B).
		 */
		return SCTP_XMIT_RWND_FULL;
L
Linus Torvalds 已提交
684 685 686 687 688 689 690 691 692 693 694 695 696

	/* RFC 2960 6.1  Transmission of DATA Chunks
	 *
	 * B) At any given time, the sender MUST NOT transmit new data
	 * to a given transport address if it has cwnd or more bytes
	 * of data outstanding to that transport address.
	 */
	/* RFC 7.2.4 & the Implementers Guide 2.8.
	 *
	 * 3) ...
	 *    When a Fast Retransmit is being performed the sender SHOULD
	 *    ignore the value of cwnd and SHOULD NOT delay retransmission.
	 */
697 698 699
	if (chunk->fast_retransmit != SCTP_NEED_FRTX &&
	    flight_size >= transport->cwnd)
		return SCTP_XMIT_RWND_FULL;
L
Linus Torvalds 已提交
700 701 702 703 704 705 706

	/* Nagle's algorithm to solve small-packet problem:
	 * Inhibit the sending of new chunks when new outgoing data arrives
	 * if any previously transmitted data on the connection remains
	 * unacknowledged.
	 */

X
Xin Long 已提交
707 708 709
	if ((sctp_sk(asoc->base.sk)->nodelay || inflight == 0) &&
	    !chunk->msg->force_delay)
		/* Nothing unacked */
710 711 712 713 714 715 716 717 718 719 720 721
		return SCTP_XMIT_OK;

	if (!sctp_packet_empty(packet))
		/* Append to packet */
		return SCTP_XMIT_OK;

	if (!sctp_state(asoc, ESTABLISHED))
		return SCTP_XMIT_OK;

	/* Check whether this chunk and all the rest of pending data will fit
	 * or delay in hopes of bundling a full sized packet.
	 */
722 723
	if (chunk->skb->len + q->out_qlen >
		transport->pathmtu - packet->overhead - sizeof(sctp_data_chunk_t) - 4)
724 725 726 727 728 729 730 731
		/* Enough data queued to fill a packet */
		return SCTP_XMIT_OK;

	/* Don't delay large message writes that may have been fragmented */
	if (!chunk->msg->can_delay)
		return SCTP_XMIT_OK;

	/* Defer until all data acked or packet full */
732
	return SCTP_XMIT_DELAY;
733 734 735 736 737 738 739 740 741 742 743
}

/* This private function does management things when adding DATA chunk */
static void sctp_packet_append_data(struct sctp_packet *packet,
				struct sctp_chunk *chunk)
{
	struct sctp_transport *transport = packet->transport;
	size_t datasize = sctp_data_size(chunk);
	struct sctp_association *asoc = transport->asoc;
	u32 rwnd = asoc->peer.rwnd;

L
Linus Torvalds 已提交
744 745 746 747 748 749
	/* Keep track of how many bytes are in flight over this transport. */
	transport->flight_size += datasize;

	/* Keep track of how many bytes are in flight to the receiver. */
	asoc->outqueue.outstanding_bytes += datasize;

750
	/* Update our view of the receiver's rwnd. */
L
Linus Torvalds 已提交
751 752 753 754 755 756
	if (datasize < rwnd)
		rwnd -= datasize;
	else
		rwnd = 0;

	asoc->peer.rwnd = rwnd;
757 758
	sctp_chunk_assign_tsn(chunk);
	sctp_chunk_assign_ssn(chunk);
759 760 761 762 763 764
}

static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
					struct sctp_chunk *chunk,
					u16 chunk_len)
{
765
	size_t psize, pmtu, maxsize;
766 767 768
	sctp_xmit_t retval = SCTP_XMIT_OK;

	psize = packet->size;
M
Marcelo Ricardo Leitner 已提交
769 770 771 772
	if (packet->transport->asoc)
		pmtu = packet->transport->asoc->pathmtu;
	else
		pmtu = packet->transport->pathmtu;
773 774

	/* Decide if we need to fragment or resubmit later. */
M
Marcelo Ricardo Leitner 已提交
775 776
	if (psize + chunk_len > pmtu) {
		/* It's OK to fragment at IP level if any one of the following
777
		 * is true:
M
Marcelo Ricardo Leitner 已提交
778 779 780 781
		 *	1. The packet is empty (meaning this chunk is greater
		 *	   the MTU)
		 *	2. The packet doesn't have any data in it yet and data
		 *	   requires authentication.
782
		 */
M
Marcelo Ricardo Leitner 已提交
783
		if (sctp_packet_empty(packet) ||
784 785 786 787 788 789
		    (!packet->has_data && chunk->auth)) {
			/* We no longer do re-fragmentation.
			 * Just fragment at the IP layer, if we
			 * actually hit this condition
			 */
			packet->ipfragok = 1;
M
Marcelo Ricardo Leitner 已提交
790
			goto out;
791
		}
M
Marcelo Ricardo Leitner 已提交
792

793 794 795 796 797 798 799
		/* Similarly, if this chunk was built before a PMTU
		 * reduction, we have to fragment it at IP level now. So
		 * if the packet already contains something, we need to
		 * flush.
		 */
		maxsize = pmtu - packet->overhead;
		if (packet->auth)
800
			maxsize -= SCTP_PAD4(packet->auth->skb->len);
801 802 803
		if (chunk_len > maxsize)
			retval = SCTP_XMIT_PMTU_FULL;

M
Marcelo Ricardo Leitner 已提交
804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830
		/* It is also okay to fragment if the chunk we are
		 * adding is a control chunk, but only if current packet
		 * is not a GSO one otherwise it causes fragmentation of
		 * a large frame. So in this case we allow the
		 * fragmentation by forcing it to be in a new packet.
		 */
		if (!sctp_chunk_is_data(chunk) && packet->has_data)
			retval = SCTP_XMIT_PMTU_FULL;

		if (psize + chunk_len > packet->max_size)
			/* Hit GSO/PMTU limit, gotta flush */
			retval = SCTP_XMIT_PMTU_FULL;

		if (!packet->transport->burst_limited &&
		    psize + chunk_len > (packet->transport->cwnd >> 1))
			/* Do not allow a single GSO packet to use more
			 * than half of cwnd.
			 */
			retval = SCTP_XMIT_PMTU_FULL;

		if (packet->transport->burst_limited &&
		    psize + chunk_len > (packet->transport->burst_limited >> 1))
			/* Do not allow a single GSO packet to use more
			 * than half of original cwnd.
			 */
			retval = SCTP_XMIT_PMTU_FULL;
		/* Otherwise it will fit in the GSO packet */
831
	}
L
Linus Torvalds 已提交
832

M
Marcelo Ricardo Leitner 已提交
833
out:
L
Linus Torvalds 已提交
834 835
	return retval;
}