fragmentation.c 15.0 KB
Newer Older
1
/* Copyright (C) 2013-2016  B.A.T.M.A.N. contributors:
2 3 4 5 6 7 8 9 10 11 12 13 14
 *
 * Martin Hundebøll <martin@hundeboll.net>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
15
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 17 18
 */

#include "fragmentation.h"
19 20 21 22 23 24 25 26 27
#include "main.h"

#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
#include <linux/etherdevice.h>
#include <linux/fs.h>
#include <linux/if_ether.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
28
#include <linux/lockdep.h>
29 30 31 32 33 34 35 36
#include <linux/netdevice.h>
#include <linux/pkt_sched.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/string.h>

#include "hard-interface.h"
37
#include "originator.h"
38
#include "packet.h"
39
#include "routing.h"
40
#include "send.h"
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
#include "soft-interface.h"

/**
 * batadv_frag_clear_chain - delete entries in the fragment buffer chain
 * @head: head of chain with entries.
 *
 * Free fragments in the passed hlist. Should be called with appropriate lock.
 */
static void batadv_frag_clear_chain(struct hlist_head *head)
{
	struct batadv_frag_list_entry *entry;
	struct hlist_node *node;

	hlist_for_each_entry_safe(entry, node, head, list) {
		hlist_del(&entry->list);
		kfree_skb(entry->skb);
		kfree(entry);
	}
}

/**
 * batadv_frag_purge_orig - free fragments associated to an orig
 * @orig_node: originator to free fragments from
 * @check_cb: optional function to tell if an entry should be purged
 */
void batadv_frag_purge_orig(struct batadv_orig_node *orig_node,
			    bool (*check_cb)(struct batadv_frag_table_entry *))
{
	struct batadv_frag_table_entry *chain;
70
	u8 i;
71 72 73

	for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) {
		chain = &orig_node->fragments[i];
74
		spin_lock_bh(&chain->lock);
75 76

		if (!check_cb || check_cb(chain)) {
77 78
			batadv_frag_clear_chain(&chain->head);
			chain->size = 0;
79 80
		}

81
		spin_unlock_bh(&chain->lock);
82 83 84 85 86 87
	}
}

/**
 * batadv_frag_size_limit - maximum possible size of packet to be fragmented
 *
88
 * Return: the maximum size of payload that can be fragmented.
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
 */
static int batadv_frag_size_limit(void)
{
	int limit = BATADV_FRAG_MAX_FRAG_SIZE;

	limit -= sizeof(struct batadv_frag_packet);
	limit *= BATADV_FRAG_MAX_FRAGMENTS;

	return limit;
}

/**
 * batadv_frag_init_chain - check and prepare fragment chain for new fragment
 * @chain: chain in fragments table to init
 * @seqno: sequence number of the received fragment
 *
 * Make chain ready for a fragment with sequence number "seqno". Delete existing
 * entries if they have an "old" sequence number.
 *
 * Caller must hold chain->lock.
 *
110
 * Return: true if chain is empty and caller can just insert the new fragment
111 112 113
 * without searching for the right position.
 */
static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain,
114
				   u16 seqno)
115
{
116 117
	lockdep_assert_held(&chain->lock);

118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
	if (chain->seqno == seqno)
		return false;

	if (!hlist_empty(&chain->head))
		batadv_frag_clear_chain(&chain->head);

	chain->size = 0;
	chain->seqno = seqno;

	return true;
}

/**
 * batadv_frag_insert_packet - insert a fragment into a fragment chain
 * @orig_node: originator that the fragment was received from
 * @skb: skb to insert
 * @chain_out: list head to attach complete chains of fragments to
 *
 * Insert a new fragment into the reverse ordered chain in the right table
 * entry. The hash table entry is cleared if "old" fragments exist in it.
 *
139
 * Return: true if skb is buffered, false on error. If the chain has all the
140 141 142 143 144 145 146 147 148
 * fragments needed to merge the packet, the chain is moved to the passed head
 * to avoid locking the chain in the table.
 */
static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
				      struct sk_buff *skb,
				      struct hlist_head *chain_out)
{
	struct batadv_frag_table_entry *chain;
	struct batadv_frag_list_entry *frag_entry_new = NULL, *frag_entry_curr;
149
	struct batadv_frag_list_entry *frag_entry_last = NULL;
150
	struct batadv_frag_packet *frag_packet;
151 152
	u8 bucket;
	u16 seqno, hdr_size = sizeof(struct batadv_frag_packet);
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
	bool ret = false;

	/* Linearize packet to avoid linearizing 16 packets in a row when doing
	 * the later merge. Non-linear merge should be added to remove this
	 * linearization.
	 */
	if (skb_linearize(skb) < 0)
		goto err;

	frag_packet = (struct batadv_frag_packet *)skb->data;
	seqno = ntohs(frag_packet->seqno);
	bucket = seqno % BATADV_FRAG_BUFFER_COUNT;

	frag_entry_new = kmalloc(sizeof(*frag_entry_new), GFP_ATOMIC);
	if (!frag_entry_new)
		goto err;

	frag_entry_new->skb = skb;
	frag_entry_new->no = frag_packet->no;

	/* Select entry in the "chain table" and delete any prior fragments
	 * with another sequence number. batadv_frag_init_chain() returns true,
	 * if the list is empty at return.
	 */
	chain = &orig_node->fragments[bucket];
	spin_lock_bh(&chain->lock);
	if (batadv_frag_init_chain(chain, seqno)) {
		hlist_add_head(&frag_entry_new->list, &chain->head);
		chain->size = skb->len - hdr_size;
		chain->timestamp = jiffies;
183
		chain->total_size = ntohs(frag_packet->total_size);
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
		ret = true;
		goto out;
	}

	/* Find the position for the new fragment. */
	hlist_for_each_entry(frag_entry_curr, &chain->head, list) {
		/* Drop packet if fragment already exists. */
		if (frag_entry_curr->no == frag_entry_new->no)
			goto err_unlock;

		/* Order fragments from highest to lowest. */
		if (frag_entry_curr->no < frag_entry_new->no) {
			hlist_add_before(&frag_entry_new->list,
					 &frag_entry_curr->list);
			chain->size += skb->len - hdr_size;
			chain->timestamp = jiffies;
			ret = true;
			goto out;
		}
203 204 205

		/* store current entry because it could be the last in list */
		frag_entry_last = frag_entry_curr;
206 207
	}

208 209
	/* Reached the end of the list, so insert after 'frag_entry_last'. */
	if (likely(frag_entry_last)) {
210
		hlist_add_behind(&frag_entry_new->list, &frag_entry_last->list);
211 212 213 214 215 216 217
		chain->size += skb->len - hdr_size;
		chain->timestamp = jiffies;
		ret = true;
	}

out:
	if (chain->size > batadv_frag_size_limit() ||
218 219
	    chain->total_size != ntohs(frag_packet->total_size) ||
	    chain->total_size > batadv_frag_size_limit()) {
220
		/* Clear chain if total size of either the list or the packet
221 222
		 * exceeds the maximum size of one merged packet. Don't allow
		 * packets to have different total_size.
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
		 */
		batadv_frag_clear_chain(&chain->head);
		chain->size = 0;
	} else if (ntohs(frag_packet->total_size) == chain->size) {
		/* All fragments received. Hand over chain to caller. */
		hlist_move_list(&chain->head, chain_out);
		chain->size = 0;
	}

err_unlock:
	spin_unlock_bh(&chain->lock);

err:
	if (!ret)
		kfree(frag_entry_new);

	return ret;
}

/**
 * batadv_frag_merge_packets - merge a chain of fragments
 * @chain: head of chain with fragments
 *
 * Expand the first skb in the chain and copy the content of the remaining
 * skb's into the expanded one. After doing so, clear the chain.
 *
249
 * Return: the merged skb or NULL on error.
250 251
 */
static struct sk_buff *
252
batadv_frag_merge_packets(struct hlist_head *chain)
253 254 255 256 257 258 259 260 261 262 263 264 265 266
{
	struct batadv_frag_packet *packet;
	struct batadv_frag_list_entry *entry;
	struct sk_buff *skb_out = NULL;
	int size, hdr_size = sizeof(struct batadv_frag_packet);

	/* Remove first entry, as this is the destination for the rest of the
	 * fragments.
	 */
	entry = hlist_entry(chain->first, struct batadv_frag_list_entry, list);
	hlist_del(&entry->list);
	skb_out = entry->skb;
	kfree(entry);

267 268 269
	packet = (struct batadv_frag_packet *)skb_out->data;
	size = ntohs(packet->total_size);

270
	/* Make room for the rest of the fragments. */
271
	if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) {
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
		kfree_skb(skb_out);
		skb_out = NULL;
		goto free;
	}

	/* Move the existing MAC header to just before the payload. (Override
	 * the fragment header.)
	 */
	skb_pull_rcsum(skb_out, hdr_size);
	memmove(skb_out->data - ETH_HLEN, skb_mac_header(skb_out), ETH_HLEN);
	skb_set_mac_header(skb_out, -ETH_HLEN);
	skb_reset_network_header(skb_out);
	skb_reset_transport_header(skb_out);

	/* Copy the payload of the each fragment into the last skb */
	hlist_for_each_entry(entry, chain, list) {
		size = entry->skb->len - hdr_size;
		memcpy(skb_put(skb_out, size), entry->skb->data + hdr_size,
		       size);
	}

free:
	/* Locking is not needed, because 'chain' is not part of any orig. */
	batadv_frag_clear_chain(chain);
	return skb_out;
}

/**
 * batadv_frag_skb_buffer - buffer fragment for later merge
 * @skb: skb to buffer
 * @orig_node_src: originator that the skb is received from
 *
 * Add fragment to buffer and merge fragments if possible.
 *
 * There are three possible outcomes: 1) Packet is merged: Return true and
 * set *skb to merged packet; 2) Packet is buffered: Return true and set *skb
 * to NULL; 3) Error: Return false and leave skb as is.
309 310 311
 *
 * Return: true when packet is merged or buffered, false when skb is not not
 * used.
312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
 */
bool batadv_frag_skb_buffer(struct sk_buff **skb,
			    struct batadv_orig_node *orig_node_src)
{
	struct sk_buff *skb_out = NULL;
	struct hlist_head head = HLIST_HEAD_INIT;
	bool ret = false;

	/* Add packet to buffer and table entry if merge is possible. */
	if (!batadv_frag_insert_packet(orig_node_src, *skb, &head))
		goto out_err;

	/* Leave if more fragments are needed to merge. */
	if (hlist_empty(&head))
		goto out;

328
	skb_out = batadv_frag_merge_packets(&head);
329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
	if (!skb_out)
		goto out_err;

out:
	*skb = skb_out;
	ret = true;
out_err:
	return ret;
}

/**
 * batadv_frag_skb_fwd - forward fragments that would exceed MTU when merged
 * @skb: skb to forward
 * @recv_if: interface that the skb is received on
 * @orig_node_src: originator that the skb is received from
 *
 * Look up the next-hop of the fragments payload and check if the merged packet
 * will exceed the MTU towards the next-hop. If so, the fragment is forwarded
 * without merging it.
 *
349
 * Return: true if the fragment is consumed/forwarded, false otherwise.
350 351 352 353 354 355 356 357 358
 */
bool batadv_frag_skb_fwd(struct sk_buff *skb,
			 struct batadv_hard_iface *recv_if,
			 struct batadv_orig_node *orig_node_src)
{
	struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
	struct batadv_orig_node *orig_node_dst = NULL;
	struct batadv_neigh_node *neigh_node = NULL;
	struct batadv_frag_packet *packet;
359
	u16 total_size;
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
	bool ret = false;

	packet = (struct batadv_frag_packet *)skb->data;
	orig_node_dst = batadv_orig_hash_find(bat_priv, packet->dest);
	if (!orig_node_dst)
		goto out;

	neigh_node = batadv_find_router(bat_priv, orig_node_dst, recv_if);
	if (!neigh_node)
		goto out;

	/* Forward the fragment, if the merged packet would be too big to
	 * be assembled.
	 */
	total_size = ntohs(packet->total_size);
	if (total_size > neigh_node->if_incoming->net_dev->mtu) {
		batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_FWD);
		batadv_add_counter(bat_priv, BATADV_CNT_FRAG_FWD_BYTES,
				   skb->len + ETH_HLEN);

380
		packet->ttl--;
381
		batadv_send_unicast_skb(skb, neigh_node);
382 383 384 385 386
		ret = true;
	}

out:
	if (orig_node_dst)
387
		batadv_orig_node_put(orig_node_dst);
388
	if (neigh_node)
389
		batadv_neigh_node_put(neigh_node);
390 391
	return ret;
}
392 393 394 395 396 397 398 399 400 401 402

/**
 * batadv_frag_create - create a fragment from skb
 * @skb: skb to create fragment from
 * @frag_head: header to use in new fragment
 * @mtu: size of new fragment
 *
 * Split the passed skb into two fragments: A new one with size matching the
 * passed mtu and the old one with the rest. The new skb contains data from the
 * tail of the old skb.
 *
403
 * Return: the new fragment, NULL on error.
404 405 406 407 408 409
 */
static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
					  struct batadv_frag_packet *frag_head,
					  unsigned int mtu)
{
	struct sk_buff *skb_fragment;
410 411
	unsigned int header_size = sizeof(*frag_head);
	unsigned int fragment_size = mtu - header_size;
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436

	skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN);
	if (!skb_fragment)
		goto err;

	skb->priority = TC_PRIO_CONTROL;

	/* Eat the last mtu-bytes of the skb */
	skb_reserve(skb_fragment, header_size + ETH_HLEN);
	skb_split(skb, skb_fragment, skb->len - fragment_size);

	/* Add the header */
	skb_push(skb_fragment, header_size);
	memcpy(skb_fragment->data, frag_head, header_size);

err:
	return skb_fragment;
}

/**
 * batadv_frag_send_packet - create up to 16 fragments from the passed skb
 * @skb: skb to create fragments from
 * @orig_node: final destination of the created fragments
 * @neigh_node: next-hop of the created fragments
 *
437
 * Return: true on success, false otherwise.
438 439 440 441 442 443
 */
bool batadv_frag_send_packet(struct sk_buff *skb,
			     struct batadv_orig_node *orig_node,
			     struct batadv_neigh_node *neigh_node)
{
	struct batadv_priv *bat_priv;
444
	struct batadv_hard_iface *primary_if = NULL;
445 446
	struct batadv_frag_packet frag_header;
	struct sk_buff *skb_fragment;
447 448 449
	unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
	unsigned int header_size = sizeof(frag_header);
	unsigned int max_fragment_size, max_packet_size;
450
	bool ret = false;
451 452 453 454

	/* To avoid merge and refragmentation at next-hops we never send
	 * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
	 */
455
	mtu = min_t(unsigned int, mtu, BATADV_FRAG_MAX_FRAG_SIZE);
456
	max_fragment_size = mtu - header_size;
457 458 459 460 461 462 463 464 465 466 467 468
	max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;

	/* Don't even try to fragment, if we need more than 16 fragments */
	if (skb->len > max_packet_size)
		goto out_err;

	bat_priv = orig_node->bat_priv;
	primary_if = batadv_primary_if_get_selected(bat_priv);
	if (!primary_if)
		goto out_err;

	/* Create one header to be copied to all fragments */
469 470 471
	frag_header.packet_type = BATADV_UNICAST_FRAG;
	frag_header.version = BATADV_COMPAT_VERSION;
	frag_header.ttl = BATADV_TTL;
472 473 474 475
	frag_header.seqno = htons(atomic_inc_return(&bat_priv->frag_seqno));
	frag_header.reserved = 0;
	frag_header.no = 0;
	frag_header.total_size = htons(skb->len);
476 477
	ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr);
	ether_addr_copy(frag_header.dest, orig_node->orig);
478 479 480 481 482 483 484 485 486 487

	/* Eat and send fragments from the tail of skb */
	while (skb->len > max_fragment_size) {
		skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
		if (!skb_fragment)
			goto out_err;

		batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
		batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
				   skb_fragment->len + ETH_HLEN);
488
		batadv_send_unicast_skb(skb_fragment, neigh_node);
489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506
		frag_header.no++;

		/* The initial check in this function should cover this case */
		if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1)
			goto out_err;
	}

	/* Make room for the fragment header. */
	if (batadv_skb_head_push(skb, header_size) < 0 ||
	    pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0)
		goto out_err;

	memcpy(skb->data, &frag_header, header_size);

	/* Send the last fragment */
	batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
	batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
			   skb->len + ETH_HLEN);
507
	batadv_send_unicast_skb(skb, neigh_node);
508

509 510
	ret = true;

511
out_err:
512
	if (primary_if)
513
		batadv_hardif_put(primary_if);
514 515

	return ret;
516
}