cls_u32.c 17.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
/*
 * net/sched/cls_u32.c	Ugly (or Universal) 32bit key Packet Classifier.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 *	The filters are packed to hash tables of key nodes
 *	with a set of 32bit key/mask pairs at every node.
 *	Nodes reference next level hash tables etc.
 *
 *	This scheme is the best universal classifier I managed to
 *	invent; it is not super-fast, but it is not slow (provided you
 *	program it correctly), and general enough.  And its relative
 *	speed grows as the number of rules becomes larger.
 *
 *	It seems that it represents the best middle point between
 *	speed and manageability both by human and by machine.
 *
 *	It is especially useful for link sharing combined with QoS;
 *	pure RSVP doesn't need such a general approach and can use
 *	much simpler (and faster) schemes, sort of cls_rsvp.c.
 *
 *	JHS: We should remove the CONFIG_NET_CLS_IND from here
 *	eventually when the meta match extension is made available
 *
 *	nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
 */

#include <linux/module.h>
34
#include <linux/slab.h>
L
Linus Torvalds 已提交
35 36 37 38 39 40
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
41
#include <linux/bitmap.h>
42
#include <net/netlink.h>
L
Linus Torvalds 已提交
43 44 45
#include <net/act_api.h>
#include <net/pkt_cls.h>

E
Eric Dumazet 已提交
46
struct tc_u_knode {
L
Linus Torvalds 已提交
47 48 49 50 51
	struct tc_u_knode	*next;
	u32			handle;
	struct tc_u_hnode	*ht_up;
	struct tcf_exts		exts;
#ifdef CONFIG_NET_CLS_IND
52
	int			ifindex;
L
Linus Torvalds 已提交
53 54 55 56 57 58 59 60 61 62 63 64 65
#endif
	u8			fshift;
	struct tcf_result	res;
	struct tc_u_hnode	*ht_down;
#ifdef CONFIG_CLS_U32_PERF
	struct tc_u32_pcnt	*pf;
#endif
#ifdef CONFIG_CLS_U32_MARK
	struct tc_u32_mark	mark;
#endif
	struct tc_u32_sel	sel;
};

E
Eric Dumazet 已提交
66
struct tc_u_hnode {
L
Linus Torvalds 已提交
67 68 69 70 71
	struct tc_u_hnode	*next;
	u32			handle;
	u32			prio;
	struct tc_u_common	*tp_c;
	int			refcnt;
E
Eric Dumazet 已提交
72
	unsigned int		divisor;
L
Linus Torvalds 已提交
73 74 75
	struct tc_u_knode	*ht[1];
};

E
Eric Dumazet 已提交
76
struct tc_u_common {
L
Linus Torvalds 已提交
77 78 79 80 81 82
	struct tc_u_hnode	*hlist;
	struct Qdisc		*q;
	int			refcnt;
	u32			hgenerator;
};

E
Eric Dumazet 已提交
83 84 85
static inline unsigned int u32_hash_fold(__be32 key,
					 const struct tc_u32_sel *sel,
					 u8 fshift)
L
Linus Torvalds 已提交
86
{
E
Eric Dumazet 已提交
87
	unsigned int h = ntohl(key & sel->hmask) >> fshift;
L
Linus Torvalds 已提交
88 89 90 91

	return h;
}

92
static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res)
L
Linus Torvalds 已提交
93 94 95
{
	struct {
		struct tc_u_knode *knode;
96
		unsigned int	  off;
L
Linus Torvalds 已提交
97 98
	} stack[TC_U32_MAXDEPTH];

99
	struct tc_u_hnode *ht = tp->root;
100
	unsigned int off = skb_network_offset(skb);
L
Linus Torvalds 已提交
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
	struct tc_u_knode *n;
	int sdepth = 0;
	int off2 = 0;
	int sel = 0;
#ifdef CONFIG_CLS_U32_PERF
	int j;
#endif
	int i, r;

next_ht:
	n = ht->ht[sel];

next_knode:
	if (n) {
		struct tc_u32_key *key = n->sel.keys;

#ifdef CONFIG_CLS_U32_PERF
E
Eric Dumazet 已提交
118
		n->pf->rcnt += 1;
L
Linus Torvalds 已提交
119 120 121 122
		j = 0;
#endif

#ifdef CONFIG_CLS_U32_MARK
T
Thomas Graf 已提交
123
		if ((skb->mark & n->mark.mask) != n->mark.val) {
L
Linus Torvalds 已提交
124 125 126 127 128 129 130
			n = n->next;
			goto next_knode;
		} else {
			n->mark.success++;
		}
#endif

E
Eric Dumazet 已提交
131
		for (i = n->sel.nkeys; i > 0; i--, key++) {
S
stephen hemminger 已提交
132
			int toff = off + key->off + (off2 & key->offmask);
S
stephen hemminger 已提交
133
			__be32 *data, hdata;
134

D
Dan Carpenter 已提交
135
			if (skb_headroom(skb) + toff > INT_MAX)
S
stephen hemminger 已提交
136 137
				goto out;

S
stephen hemminger 已提交
138
			data = skb_header_pointer(skb, toff, 4, &hdata);
139 140 141
			if (!data)
				goto out;
			if ((*data ^ key->val) & key->mask) {
L
Linus Torvalds 已提交
142 143 144 145
				n = n->next;
				goto next_knode;
			}
#ifdef CONFIG_CLS_U32_PERF
E
Eric Dumazet 已提交
146
			n->pf->kcnts[j] += 1;
L
Linus Torvalds 已提交
147 148 149 150 151
			j++;
#endif
		}
		if (n->ht_down == NULL) {
check_terminal:
E
Eric Dumazet 已提交
152
			if (n->sel.flags & TC_U32_TERMINAL) {
L
Linus Torvalds 已提交
153 154 155

				*res = n->res;
#ifdef CONFIG_NET_CLS_IND
156
				if (!tcf_match_indev(skb, n->ifindex)) {
L
Linus Torvalds 已提交
157 158 159 160 161
					n = n->next;
					goto next_knode;
				}
#endif
#ifdef CONFIG_CLS_U32_PERF
E
Eric Dumazet 已提交
162
				n->pf->rhit += 1;
L
Linus Torvalds 已提交
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
#endif
				r = tcf_exts_exec(skb, &n->exts, res);
				if (r < 0) {
					n = n->next;
					goto next_knode;
				}

				return r;
			}
			n = n->next;
			goto next_knode;
		}

		/* PUSH */
		if (sdepth >= TC_U32_MAXDEPTH)
			goto deadloop;
		stack[sdepth].knode = n;
180
		stack[sdepth].off = off;
L
Linus Torvalds 已提交
181 182 183 184
		sdepth++;

		ht = n->ht_down;
		sel = 0;
185
		if (ht->divisor) {
S
stephen hemminger 已提交
186
			__be32 *data, hdata;
187 188

			data = skb_header_pointer(skb, off + n->sel.hoff, 4,
S
stephen hemminger 已提交
189
						  &hdata);
190 191 192 193 194
			if (!data)
				goto out;
			sel = ht->divisor & u32_hash_fold(*data, &n->sel,
							  n->fshift);
		}
E
Eric Dumazet 已提交
195
		if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
L
Linus Torvalds 已提交
196 197
			goto next_ht;

E
Eric Dumazet 已提交
198
		if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
L
Linus Torvalds 已提交
199
			off2 = n->sel.off + 3;
200
			if (n->sel.flags & TC_U32_VAROFFSET) {
S
stephen hemminger 已提交
201
				__be16 *data, hdata;
202 203 204

				data = skb_header_pointer(skb,
							  off + n->sel.offoff,
S
stephen hemminger 已提交
205
							  2, &hdata);
206 207 208 209 210
				if (!data)
					goto out;
				off2 += ntohs(n->sel.offmask & *data) >>
					n->sel.offshift;
			}
L
Linus Torvalds 已提交
211 212
			off2 &= ~3;
		}
E
Eric Dumazet 已提交
213
		if (n->sel.flags & TC_U32_EAT) {
214
			off += off2;
L
Linus Torvalds 已提交
215 216 217
			off2 = 0;
		}

218
		if (off < skb->len)
L
Linus Torvalds 已提交
219 220 221 222 223 224 225
			goto next_ht;
	}

	/* POP */
	if (sdepth--) {
		n = stack[sdepth].knode;
		ht = n->ht_up;
226
		off = stack[sdepth].off;
L
Linus Torvalds 已提交
227 228
		goto check_terminal;
	}
229
out:
L
Linus Torvalds 已提交
230 231 232
	return -1;

deadloop:
233
	net_warn_ratelimited("cls_u32: dead loop\n");
L
Linus Torvalds 已提交
234 235 236
	return -1;
}

E
Eric Dumazet 已提交
237
static struct tc_u_hnode *
L
Linus Torvalds 已提交
238 239 240 241 242 243 244 245 246 247 248
u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
{
	struct tc_u_hnode *ht;

	for (ht = tp_c->hlist; ht; ht = ht->next)
		if (ht->handle == handle)
			break;

	return ht;
}

E
Eric Dumazet 已提交
249
static struct tc_u_knode *
L
Linus Torvalds 已提交
250 251
u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
{
E
Eric Dumazet 已提交
252
	unsigned int sel;
L
Linus Torvalds 已提交
253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
	struct tc_u_knode *n = NULL;

	sel = TC_U32_HASH(handle);
	if (sel > ht->divisor)
		goto out;

	for (n = ht->ht[sel]; n; n = n->next)
		if (n->handle == handle)
			break;
out:
	return n;
}


static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
{
	struct tc_u_hnode *ht;
	struct tc_u_common *tp_c = tp->data;

	if (TC_U32_HTID(handle) == TC_U32_ROOT)
		ht = tp->root;
	else
		ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));

	if (!ht)
		return 0;

	if (TC_U32_KEY(handle) == 0)
		return (unsigned long)ht;

	return (unsigned long)u32_lookup_key(ht, handle);
}

static void u32_put(struct tcf_proto *tp, unsigned long f)
{
}

static u32 gen_new_htid(struct tc_u_common *tp_c)
{
	int i = 0x800;

	do {
		if (++tp_c->hgenerator == 0x7FF)
			tp_c->hgenerator = 1;
E
Eric Dumazet 已提交
297
	} while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
L
Linus Torvalds 已提交
298 299 300 301 302 303 304 305 306

	return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
}

static int u32_init(struct tcf_proto *tp)
{
	struct tc_u_hnode *root_ht;
	struct tc_u_common *tp_c;

307
	tp_c = tp->q->u32_node;
L
Linus Torvalds 已提交
308

309
	root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
L
Linus Torvalds 已提交
310 311 312 313 314 315 316 317 318
	if (root_ht == NULL)
		return -ENOBUFS;

	root_ht->divisor = 0;
	root_ht->refcnt++;
	root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
	root_ht->prio = tp->prio;

	if (tp_c == NULL) {
319
		tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
L
Linus Torvalds 已提交
320 321 322 323 324
		if (tp_c == NULL) {
			kfree(root_ht);
			return -ENOBUFS;
		}
		tp_c->q = tp->q;
325
		tp->q->u32_node = tp_c;
L
Linus Torvalds 已提交
326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
	}

	tp_c->refcnt++;
	root_ht->next = tp_c->hlist;
	tp_c->hlist = root_ht;
	root_ht->tp_c = tp_c;

	tp->root = root_ht;
	tp->data = tp_c;
	return 0;
}

static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
{
	tcf_unbind_filter(tp, &n->res);
	tcf_exts_destroy(tp, &n->exts);
	if (n->ht_down)
		n->ht_down->refcnt--;
#ifdef CONFIG_CLS_U32_PERF
345
	kfree(n->pf);
L
Linus Torvalds 已提交
346 347 348 349 350
#endif
	kfree(n);
	return 0;
}

351
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
L
Linus Torvalds 已提交
352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
{
	struct tc_u_knode **kp;
	struct tc_u_hnode *ht = key->ht_up;

	if (ht) {
		for (kp = &ht->ht[TC_U32_HASH(key->handle)]; *kp; kp = &(*kp)->next) {
			if (*kp == key) {
				tcf_tree_lock(tp);
				*kp = key->next;
				tcf_tree_unlock(tp);

				u32_destroy_key(tp, key);
				return 0;
			}
		}
	}
368
	WARN_ON(1);
L
Linus Torvalds 已提交
369 370 371 372 373 374
	return 0;
}

static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
{
	struct tc_u_knode *n;
E
Eric Dumazet 已提交
375
	unsigned int h;
L
Linus Torvalds 已提交
376

E
Eric Dumazet 已提交
377
	for (h = 0; h <= ht->divisor; h++) {
L
Linus Torvalds 已提交
378 379 380 381 382 383 384 385 386 387 388 389 390
		while ((n = ht->ht[h]) != NULL) {
			ht->ht[h] = n->next;

			u32_destroy_key(tp, n);
		}
	}
}

static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode **hn;

391
	WARN_ON(ht->refcnt);
L
Linus Torvalds 已提交
392 393 394 395 396 397 398 399 400 401 402

	u32_clear_hnode(tp, ht);

	for (hn = &tp_c->hlist; *hn; hn = &(*hn)->next) {
		if (*hn == ht) {
			*hn = ht->next;
			kfree(ht);
			return 0;
		}
	}

403
	WARN_ON(1);
L
Linus Torvalds 已提交
404 405 406 407 408 409
	return -ENOENT;
}

static void u32_destroy(struct tcf_proto *tp)
{
	struct tc_u_common *tp_c = tp->data;
410
	struct tc_u_hnode *root_ht = tp->root;
L
Linus Torvalds 已提交
411

412
	WARN_ON(root_ht == NULL);
L
Linus Torvalds 已提交
413 414 415 416 417 418 419

	if (root_ht && --root_ht->refcnt == 0)
		u32_destroy_hnode(tp, root_ht);

	if (--tp_c->refcnt == 0) {
		struct tc_u_hnode *ht;

420
		tp->q->u32_node = NULL;
L
Linus Torvalds 已提交
421

422 423
		for (ht = tp_c->hlist; ht; ht = ht->next) {
			ht->refcnt--;
L
Linus Torvalds 已提交
424
			u32_clear_hnode(tp, ht);
425
		}
L
Linus Torvalds 已提交
426 427 428 429

		while ((ht = tp_c->hlist) != NULL) {
			tp_c->hlist = ht->next;

430
			WARN_ON(ht->refcnt != 0);
L
Linus Torvalds 已提交
431 432

			kfree(ht);
433
		}
L
Linus Torvalds 已提交
434 435 436 437 438 439 440 441 442

		kfree(tp_c);
	}

	tp->data = NULL;
}

static int u32_delete(struct tcf_proto *tp, unsigned long arg)
{
E
Eric Dumazet 已提交
443
	struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
L
Linus Torvalds 已提交
444 445 446 447 448

	if (ht == NULL)
		return 0;

	if (TC_U32_KEY(ht->handle))
E
Eric Dumazet 已提交
449
		return u32_delete_key(tp, (struct tc_u_knode *)ht);
L
Linus Torvalds 已提交
450 451 452 453

	if (tp->root == ht)
		return -EINVAL;

454 455
	if (ht->refcnt == 1) {
		ht->refcnt--;
L
Linus Torvalds 已提交
456
		u32_destroy_hnode(tp, ht);
457 458 459
	} else {
		return -EBUSY;
	}
L
Linus Torvalds 已提交
460 461 462 463

	return 0;
}

464
#define NR_U32_NODE (1<<12)
L
Linus Torvalds 已提交
465 466 467
static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
{
	struct tc_u_knode *n;
468 469 470 471 472
	unsigned long i;
	unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long),
					GFP_KERNEL);
	if (!bitmap)
		return handle | 0xFFF;
L
Linus Torvalds 已提交
473

E
Eric Dumazet 已提交
474
	for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
475
		set_bit(TC_U32_NODE(n->handle), bitmap);
L
Linus Torvalds 已提交
476

477 478 479 480 481 482
	i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
	if (i >= NR_U32_NODE)
		i = find_next_zero_bit(bitmap, NR_U32_NODE, 1);

	kfree(bitmap);
	return handle | (i >= NR_U32_NODE ? 0xFFF : i);
L
Linus Torvalds 已提交
483 484
}

485 486 487 488 489 490 491 492 493 494
static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
	[TCA_U32_CLASSID]	= { .type = NLA_U32 },
	[TCA_U32_HASH]		= { .type = NLA_U32 },
	[TCA_U32_LINK]		= { .type = NLA_U32 },
	[TCA_U32_DIVISOR]	= { .type = NLA_U32 },
	[TCA_U32_SEL]		= { .len = sizeof(struct tc_u32_sel) },
	[TCA_U32_INDEV]		= { .type = NLA_STRING, .len = IFNAMSIZ },
	[TCA_U32_MARK]		= { .len = sizeof(struct tc_u32_mark) },
};

495 496
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
			 unsigned long base, struct tc_u_hnode *ht,
497
			 struct tc_u_knode *n, struct nlattr **tb,
498
			 struct nlattr *est, bool ovr)
L
Linus Torvalds 已提交
499 500 501 502
{
	int err;
	struct tcf_exts e;

503
	tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
504
	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
L
Linus Torvalds 已提交
505 506 507 508
	if (err < 0)
		return err;

	err = -EINVAL;
509
	if (tb[TCA_U32_LINK]) {
510
		u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
511
		struct tc_u_hnode *ht_down = NULL, *ht_old;
L
Linus Torvalds 已提交
512 513 514 515 516 517 518 519 520 521 522 523 524

		if (TC_U32_KEY(handle))
			goto errout;

		if (handle) {
			ht_down = u32_lookup_ht(ht->tp_c, handle);

			if (ht_down == NULL)
				goto errout;
			ht_down->refcnt++;
		}

		tcf_tree_lock(tp);
525 526
		ht_old = n->ht_down;
		n->ht_down = ht_down;
L
Linus Torvalds 已提交
527 528
		tcf_tree_unlock(tp);

529 530
		if (ht_old)
			ht_old->refcnt--;
L
Linus Torvalds 已提交
531
	}
532
	if (tb[TCA_U32_CLASSID]) {
533
		n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
L
Linus Torvalds 已提交
534 535 536 537
		tcf_bind_filter(tp, &n->res, base);
	}

#ifdef CONFIG_NET_CLS_IND
538
	if (tb[TCA_U32_INDEV]) {
539 540 541
		int ret;
		ret = tcf_change_indev(net, tb[TCA_U32_INDEV]);
		if (ret < 0)
L
Linus Torvalds 已提交
542
			goto errout;
543
		n->ifindex = ret;
L
Linus Torvalds 已提交
544 545 546 547 548 549 550 551 552 553
	}
#endif
	tcf_exts_change(tp, &n->exts, &e);

	return 0;
errout:
	tcf_exts_destroy(tp, &e);
	return err;
}

554
static int u32_change(struct net *net, struct sk_buff *in_skb,
555
		      struct tcf_proto *tp, unsigned long base, u32 handle,
556
		      struct nlattr **tca,
557
		      unsigned long *arg, bool ovr)
L
Linus Torvalds 已提交
558 559 560 561 562
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode *ht;
	struct tc_u_knode *n;
	struct tc_u32_sel *s;
563 564
	struct nlattr *opt = tca[TCA_OPTIONS];
	struct nlattr *tb[TCA_U32_MAX + 1];
L
Linus Torvalds 已提交
565 566 567 568 569 570
	u32 htid;
	int err;

	if (opt == NULL)
		return handle ? -EINVAL : 0;

571
	err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy);
572 573
	if (err < 0)
		return err;
L
Linus Torvalds 已提交
574

E
Eric Dumazet 已提交
575 576
	n = (struct tc_u_knode *)*arg;
	if (n) {
L
Linus Torvalds 已提交
577 578 579
		if (TC_U32_KEY(n->handle) == 0)
			return -EINVAL;

580
		return u32_set_parms(net, tp, base, n->ht_up, n, tb,
581
				     tca[TCA_RATE], ovr);
L
Linus Torvalds 已提交
582 583
	}

584
	if (tb[TCA_U32_DIVISOR]) {
E
Eric Dumazet 已提交
585
		unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
L
Linus Torvalds 已提交
586 587 588 589 590 591 592 593 594 595

		if (--divisor > 0x100)
			return -EINVAL;
		if (TC_U32_KEY(handle))
			return -EINVAL;
		if (handle == 0) {
			handle = gen_new_htid(tp->data);
			if (handle == 0)
				return -ENOMEM;
		}
E
Eric Dumazet 已提交
596
		ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
L
Linus Torvalds 已提交
597 598 599
		if (ht == NULL)
			return -ENOBUFS;
		ht->tp_c = tp_c;
600
		ht->refcnt = 1;
L
Linus Torvalds 已提交
601 602 603 604 605 606 607 608 609
		ht->divisor = divisor;
		ht->handle = handle;
		ht->prio = tp->prio;
		ht->next = tp_c->hlist;
		tp_c->hlist = ht;
		*arg = (unsigned long)ht;
		return 0;
	}

610
	if (tb[TCA_U32_HASH]) {
611
		htid = nla_get_u32(tb[TCA_U32_HASH]);
L
Linus Torvalds 已提交
612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634
		if (TC_U32_HTID(htid) == TC_U32_ROOT) {
			ht = tp->root;
			htid = ht->handle;
		} else {
			ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
			if (ht == NULL)
				return -EINVAL;
		}
	} else {
		ht = tp->root;
		htid = ht->handle;
	}

	if (ht->divisor < TC_U32_HASH(htid))
		return -EINVAL;

	if (handle) {
		if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
			return -EINVAL;
		handle = htid | TC_U32_NODE(handle);
	} else
		handle = gen_new_kid(ht, htid);

635
	if (tb[TCA_U32_SEL] == NULL)
L
Linus Torvalds 已提交
636 637
		return -EINVAL;

638
	s = nla_data(tb[TCA_U32_SEL]);
L
Linus Torvalds 已提交
639

640
	n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
L
Linus Torvalds 已提交
641 642 643 644
	if (n == NULL)
		return -ENOBUFS;

#ifdef CONFIG_CLS_U32_PERF
645
	n->pf = kzalloc(sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64), GFP_KERNEL);
L
Linus Torvalds 已提交
646 647 648 649 650 651 652 653 654
	if (n->pf == NULL) {
		kfree(n);
		return -ENOBUFS;
	}
#endif

	memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
	n->ht_up = ht;
	n->handle = handle;
655
	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
656
	tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
L
Linus Torvalds 已提交
657 658

#ifdef CONFIG_CLS_U32_MARK
659
	if (tb[TCA_U32_MARK]) {
L
Linus Torvalds 已提交
660 661
		struct tc_u32_mark *mark;

662
		mark = nla_data(tb[TCA_U32_MARK]);
L
Linus Torvalds 已提交
663 664 665 666 667
		memcpy(&n->mark, mark, sizeof(struct tc_u32_mark));
		n->mark.success = 0;
	}
#endif

668
	err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
L
Linus Torvalds 已提交
669 670 671 672 673 674 675
	if (err == 0) {
		struct tc_u_knode **ins;
		for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
			if (TC_U32_NODE(handle) < TC_U32_NODE((*ins)->handle))
				break;

		n->next = *ins;
676
		tcf_tree_lock(tp);
L
Linus Torvalds 已提交
677
		*ins = n;
678
		tcf_tree_unlock(tp);
L
Linus Torvalds 已提交
679 680 681 682 683

		*arg = (unsigned long)n;
		return 0;
	}
#ifdef CONFIG_CLS_U32_PERF
684
	kfree(n->pf);
L
Linus Torvalds 已提交
685 686 687 688 689 690 691 692 693 694
#endif
	kfree(n);
	return err;
}

static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode *ht;
	struct tc_u_knode *n;
E
Eric Dumazet 已提交
695
	unsigned int h;
L
Linus Torvalds 已提交
696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725

	if (arg->stop)
		return;

	for (ht = tp_c->hlist; ht; ht = ht->next) {
		if (ht->prio != tp->prio)
			continue;
		if (arg->count >= arg->skip) {
			if (arg->fn(tp, (unsigned long)ht, arg) < 0) {
				arg->stop = 1;
				return;
			}
		}
		arg->count++;
		for (h = 0; h <= ht->divisor; h++) {
			for (n = ht->ht[h]; n; n = n->next) {
				if (arg->count < arg->skip) {
					arg->count++;
					continue;
				}
				if (arg->fn(tp, (unsigned long)n, arg) < 0) {
					arg->stop = 1;
					return;
				}
				arg->count++;
			}
		}
	}
}

726
static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
L
Linus Torvalds 已提交
727 728
		     struct sk_buff *skb, struct tcmsg *t)
{
E
Eric Dumazet 已提交
729
	struct tc_u_knode *n = (struct tc_u_knode *)fh;
730
	struct nlattr *nest;
L
Linus Torvalds 已提交
731 732 733 734 735 736

	if (n == NULL)
		return skb->len;

	t->tcm_handle = n->handle;

737 738 739
	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;
L
Linus Torvalds 已提交
740 741

	if (TC_U32_KEY(n->handle) == 0) {
E
Eric Dumazet 已提交
742 743 744
		struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
		u32 divisor = ht->divisor + 1;

745 746
		if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
			goto nla_put_failure;
L
Linus Torvalds 已提交
747
	} else {
748 749 750 751
		if (nla_put(skb, TCA_U32_SEL,
			    sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
			    &n->sel))
			goto nla_put_failure;
L
Linus Torvalds 已提交
752 753
		if (n->ht_up) {
			u32 htid = n->handle & 0xFFFFF000;
754 755
			if (nla_put_u32(skb, TCA_U32_HASH, htid))
				goto nla_put_failure;
L
Linus Torvalds 已提交
756
		}
757 758 759 760 761 762
		if (n->res.classid &&
		    nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
			goto nla_put_failure;
		if (n->ht_down &&
		    nla_put_u32(skb, TCA_U32_LINK, n->ht_down->handle))
			goto nla_put_failure;
L
Linus Torvalds 已提交
763 764

#ifdef CONFIG_CLS_U32_MARK
765 766 767
		if ((n->mark.val || n->mark.mask) &&
		    nla_put(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark))
			goto nla_put_failure;
L
Linus Torvalds 已提交
768 769
#endif

770
		if (tcf_exts_dump(skb, &n->exts) < 0)
771
			goto nla_put_failure;
L
Linus Torvalds 已提交
772 773

#ifdef CONFIG_NET_CLS_IND
774 775 776 777 778 779
		if (n->ifindex) {
			struct net_device *dev;
			dev = __dev_get_by_index(net, n->ifindex);
			if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name))
				goto nla_put_failure;
		}
L
Linus Torvalds 已提交
780 781
#endif
#ifdef CONFIG_CLS_U32_PERF
782 783 784 785
		if (nla_put(skb, TCA_U32_PCNT,
			    sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64),
			    n->pf))
			goto nla_put_failure;
L
Linus Torvalds 已提交
786 787 788
#endif
	}

789 790
	nla_nest_end(skb, nest);

L
Linus Torvalds 已提交
791
	if (TC_U32_KEY(n->handle))
792
		if (tcf_exts_dump_stats(skb, &n->exts) < 0)
793
			goto nla_put_failure;
L
Linus Torvalds 已提交
794 795
	return skb->len;

796
nla_put_failure:
797
	nla_nest_cancel(skb, nest);
L
Linus Torvalds 已提交
798 799 800
	return -1;
}

801
static struct tcf_proto_ops cls_u32_ops __read_mostly = {
L
Linus Torvalds 已提交
802 803 804 805 806 807 808 809 810 811 812 813 814 815 816
	.kind		=	"u32",
	.classify	=	u32_classify,
	.init		=	u32_init,
	.destroy	=	u32_destroy,
	.get		=	u32_get,
	.put		=	u32_put,
	.change		=	u32_change,
	.delete		=	u32_delete,
	.walk		=	u32_walk,
	.dump		=	u32_dump,
	.owner		=	THIS_MODULE,
};

static int __init init_u32(void)
{
817
	pr_info("u32 classifier\n");
L
Linus Torvalds 已提交
818
#ifdef CONFIG_CLS_U32_PERF
819
	pr_info("    Performance counters on\n");
L
Linus Torvalds 已提交
820 821
#endif
#ifdef CONFIG_NET_CLS_IND
822
	pr_info("    input device check on\n");
L
Linus Torvalds 已提交
823 824
#endif
#ifdef CONFIG_NET_CLS_ACT
825
	pr_info("    Actions configured\n");
L
Linus Torvalds 已提交
826 827 828 829
#endif
	return register_tcf_proto_ops(&cls_u32_ops);
}

830
static void __exit exit_u32(void)
L
Linus Torvalds 已提交
831 832 833 834 835 836 837
{
	unregister_tcf_proto_ops(&cls_u32_ops);
}

module_init(init_u32)
module_exit(exit_u32)
MODULE_LICENSE("GPL");