cls_u32.c 29.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
/*
 * net/sched/cls_u32.c	Ugly (or Universal) 32bit key Packet Classifier.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 *	The filters are packed to hash tables of key nodes
 *	with a set of 32bit key/mask pairs at every node.
 *	Nodes reference next level hash tables etc.
 *
 *	This scheme is the best universal classifier I managed to
 *	invent; it is not super-fast, but it is not slow (provided you
 *	program it correctly), and general enough.  And its relative
 *	speed grows as the number of rules becomes larger.
 *
 *	It seems that it represents the best middle point between
 *	speed and manageability both by human and by machine.
 *
 *	It is especially useful for link sharing combined with QoS;
 *	pure RSVP doesn't need such a general approach and can use
 *	much simpler (and faster) schemes, sort of cls_rsvp.c.
 *
 *	JHS: We should remove the CONFIG_NET_CLS_IND from here
 *	eventually when the meta match extension is made available
 *
 *	nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
 */

#include <linux/module.h>
34
#include <linux/slab.h>
L
Linus Torvalds 已提交
35 36 37 38
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
39
#include <linux/percpu.h>
L
Linus Torvalds 已提交
40 41
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
42
#include <linux/bitmap.h>
43 44
#include <linux/netdevice.h>
#include <linux/hash.h>
45
#include <net/netlink.h>
L
Linus Torvalds 已提交
46 47
#include <net/act_api.h>
#include <net/pkt_cls.h>
48
#include <linux/netdevice.h>
49
#include <linux/idr.h>
L
Linus Torvalds 已提交
50

E
Eric Dumazet 已提交
51
struct tc_u_knode {
52
	struct tc_u_knode __rcu	*next;
L
Linus Torvalds 已提交
53
	u32			handle;
54
	struct tc_u_hnode __rcu	*ht_up;
L
Linus Torvalds 已提交
55 56
	struct tcf_exts		exts;
#ifdef CONFIG_NET_CLS_IND
57
	int			ifindex;
L
Linus Torvalds 已提交
58 59 60
#endif
	u8			fshift;
	struct tcf_result	res;
61
	struct tc_u_hnode __rcu	*ht_down;
L
Linus Torvalds 已提交
62
#ifdef CONFIG_CLS_U32_PERF
63
	struct tc_u32_pcnt __percpu *pf;
L
Linus Torvalds 已提交
64
#endif
65
	u32			flags;
L
Linus Torvalds 已提交
66
#ifdef CONFIG_CLS_U32_MARK
67 68 69
	u32			val;
	u32			mask;
	u32 __percpu		*pcpu_success;
L
Linus Torvalds 已提交
70
#endif
71 72
	struct tcf_proto	*tp;
	struct rcu_head		rcu;
73 74 75 76
	/* The 'sel' field MUST be the last field in structure to allow for
	 * tc_u32_keys allocated at end of structure.
	 */
	struct tc_u32_sel	sel;
L
Linus Torvalds 已提交
77 78
};

E
Eric Dumazet 已提交
79
struct tc_u_hnode {
80
	struct tc_u_hnode __rcu	*next;
L
Linus Torvalds 已提交
81 82 83 84
	u32			handle;
	u32			prio;
	struct tc_u_common	*tp_c;
	int			refcnt;
E
Eric Dumazet 已提交
85
	unsigned int		divisor;
86
	struct idr		handle_idr;
87
	struct rcu_head		rcu;
88 89 90 91
	/* The 'ht' field MUST be the last field in structure to allow for
	 * more entries allocated at end of structure.
	 */
	struct tc_u_knode __rcu	*ht[1];
L
Linus Torvalds 已提交
92 93
};

E
Eric Dumazet 已提交
94
struct tc_u_common {
95
	struct tc_u_hnode __rcu	*hlist;
96
	struct tcf_block	*block;
L
Linus Torvalds 已提交
97
	int			refcnt;
98
	struct idr		handle_idr;
99
	struct hlist_node	hnode;
100
	struct rcu_head		rcu;
L
Linus Torvalds 已提交
101 102
};

E
Eric Dumazet 已提交
103 104 105
static inline unsigned int u32_hash_fold(__be32 key,
					 const struct tc_u32_sel *sel,
					 u8 fshift)
L
Linus Torvalds 已提交
106
{
E
Eric Dumazet 已提交
107
	unsigned int h = ntohl(key & sel->hmask) >> fshift;
L
Linus Torvalds 已提交
108 109 110 111

	return h;
}

J
Jamal Hadi Salim 已提交
112 113
static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp,
			struct tcf_result *res)
L
Linus Torvalds 已提交
114 115 116
{
	struct {
		struct tc_u_knode *knode;
117
		unsigned int	  off;
L
Linus Torvalds 已提交
118 119
	} stack[TC_U32_MAXDEPTH];

120
	struct tc_u_hnode *ht = rcu_dereference_bh(tp->root);
121
	unsigned int off = skb_network_offset(skb);
L
Linus Torvalds 已提交
122 123 124 125 126 127 128 129 130 131
	struct tc_u_knode *n;
	int sdepth = 0;
	int off2 = 0;
	int sel = 0;
#ifdef CONFIG_CLS_U32_PERF
	int j;
#endif
	int i, r;

next_ht:
132
	n = rcu_dereference_bh(ht->ht[sel]);
L
Linus Torvalds 已提交
133 134 135 136 137 138

next_knode:
	if (n) {
		struct tc_u32_key *key = n->sel.keys;

#ifdef CONFIG_CLS_U32_PERF
139
		__this_cpu_inc(n->pf->rcnt);
L
Linus Torvalds 已提交
140 141 142
		j = 0;
#endif

143 144 145 146 147
		if (tc_skip_sw(n->flags)) {
			n = rcu_dereference_bh(n->next);
			goto next_knode;
		}

L
Linus Torvalds 已提交
148
#ifdef CONFIG_CLS_U32_MARK
149
		if ((skb->mark & n->mask) != n->val) {
150
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
151 152
			goto next_knode;
		} else {
153
			__this_cpu_inc(*n->pcpu_success);
L
Linus Torvalds 已提交
154 155 156
		}
#endif

E
Eric Dumazet 已提交
157
		for (i = n->sel.nkeys; i > 0; i--, key++) {
S
stephen hemminger 已提交
158
			int toff = off + key->off + (off2 & key->offmask);
S
stephen hemminger 已提交
159
			__be32 *data, hdata;
160

D
Dan Carpenter 已提交
161
			if (skb_headroom(skb) + toff > INT_MAX)
S
stephen hemminger 已提交
162 163
				goto out;

S
stephen hemminger 已提交
164
			data = skb_header_pointer(skb, toff, 4, &hdata);
165 166 167
			if (!data)
				goto out;
			if ((*data ^ key->val) & key->mask) {
168
				n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
169 170 171
				goto next_knode;
			}
#ifdef CONFIG_CLS_U32_PERF
172
			__this_cpu_inc(n->pf->kcnts[j]);
L
Linus Torvalds 已提交
173 174 175
			j++;
#endif
		}
176 177 178

		ht = rcu_dereference_bh(n->ht_down);
		if (!ht) {
L
Linus Torvalds 已提交
179
check_terminal:
E
Eric Dumazet 已提交
180
			if (n->sel.flags & TC_U32_TERMINAL) {
L
Linus Torvalds 已提交
181 182 183

				*res = n->res;
#ifdef CONFIG_NET_CLS_IND
184
				if (!tcf_match_indev(skb, n->ifindex)) {
185
					n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
186 187 188 189
					goto next_knode;
				}
#endif
#ifdef CONFIG_CLS_U32_PERF
190
				__this_cpu_inc(n->pf->rhit);
L
Linus Torvalds 已提交
191 192 193
#endif
				r = tcf_exts_exec(skb, &n->exts, res);
				if (r < 0) {
194
					n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
195 196 197 198 199
					goto next_knode;
				}

				return r;
			}
200
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
201 202 203 204 205 206 207
			goto next_knode;
		}

		/* PUSH */
		if (sdepth >= TC_U32_MAXDEPTH)
			goto deadloop;
		stack[sdepth].knode = n;
208
		stack[sdepth].off = off;
L
Linus Torvalds 已提交
209 210
		sdepth++;

211
		ht = rcu_dereference_bh(n->ht_down);
L
Linus Torvalds 已提交
212
		sel = 0;
213
		if (ht->divisor) {
S
stephen hemminger 已提交
214
			__be32 *data, hdata;
215 216

			data = skb_header_pointer(skb, off + n->sel.hoff, 4,
S
stephen hemminger 已提交
217
						  &hdata);
218 219 220 221 222
			if (!data)
				goto out;
			sel = ht->divisor & u32_hash_fold(*data, &n->sel,
							  n->fshift);
		}
E
Eric Dumazet 已提交
223
		if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
L
Linus Torvalds 已提交
224 225
			goto next_ht;

E
Eric Dumazet 已提交
226
		if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
L
Linus Torvalds 已提交
227
			off2 = n->sel.off + 3;
228
			if (n->sel.flags & TC_U32_VAROFFSET) {
S
stephen hemminger 已提交
229
				__be16 *data, hdata;
230 231 232

				data = skb_header_pointer(skb,
							  off + n->sel.offoff,
S
stephen hemminger 已提交
233
							  2, &hdata);
234 235 236 237 238
				if (!data)
					goto out;
				off2 += ntohs(n->sel.offmask & *data) >>
					n->sel.offshift;
			}
L
Linus Torvalds 已提交
239 240
			off2 &= ~3;
		}
E
Eric Dumazet 已提交
241
		if (n->sel.flags & TC_U32_EAT) {
242
			off += off2;
L
Linus Torvalds 已提交
243 244 245
			off2 = 0;
		}

246
		if (off < skb->len)
L
Linus Torvalds 已提交
247 248 249 250 251 252
			goto next_ht;
	}

	/* POP */
	if (sdepth--) {
		n = stack[sdepth].knode;
253
		ht = rcu_dereference_bh(n->ht_up);
254
		off = stack[sdepth].off;
L
Linus Torvalds 已提交
255 256
		goto check_terminal;
	}
257
out:
L
Linus Torvalds 已提交
258 259 260
	return -1;

deadloop:
261
	net_warn_ratelimited("cls_u32: dead loop\n");
L
Linus Torvalds 已提交
262 263 264
	return -1;
}

J
Jamal Hadi Salim 已提交
265
static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
L
Linus Torvalds 已提交
266 267 268
{
	struct tc_u_hnode *ht;

269 270 271
	for (ht = rtnl_dereference(tp_c->hlist);
	     ht;
	     ht = rtnl_dereference(ht->next))
L
Linus Torvalds 已提交
272 273 274 275 276 277
		if (ht->handle == handle)
			break;

	return ht;
}

J
Jamal Hadi Salim 已提交
278
static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
L
Linus Torvalds 已提交
279
{
E
Eric Dumazet 已提交
280
	unsigned int sel;
L
Linus Torvalds 已提交
281 282 283 284 285 286
	struct tc_u_knode *n = NULL;

	sel = TC_U32_HASH(handle);
	if (sel > ht->divisor)
		goto out;

287 288 289
	for (n = rtnl_dereference(ht->ht[sel]);
	     n;
	     n = rtnl_dereference(n->next))
L
Linus Torvalds 已提交
290 291 292 293 294 295 296
		if (n->handle == handle)
			break;
out:
	return n;
}


297
static void *u32_get(struct tcf_proto *tp, u32 handle)
L
Linus Torvalds 已提交
298 299 300 301 302
{
	struct tc_u_hnode *ht;
	struct tc_u_common *tp_c = tp->data;

	if (TC_U32_HTID(handle) == TC_U32_ROOT)
303
		ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
304 305 306 307
	else
		ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));

	if (!ht)
308
		return NULL;
L
Linus Torvalds 已提交
309 310

	if (TC_U32_KEY(handle) == 0)
311
		return ht;
L
Linus Torvalds 已提交
312

313
	return u32_lookup_key(ht, handle);
L
Linus Torvalds 已提交
314 315
}

316
static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
L
Linus Torvalds 已提交
317
{
318 319
	unsigned long idr_index;
	int err;
L
Linus Torvalds 已提交
320

321
	/* This is only used inside rtnl lock it is safe to increment
322 323
	 * without read _copy_ update semantics
	 */
324 325 326 327 328
	err = idr_alloc_ext(&tp_c->handle_idr, ptr, &idr_index,
			    1, 0x7FF, GFP_KERNEL);
	if (err)
		return 0;
	return (u32)(idr_index | 0x800) << 20;
L
Linus Torvalds 已提交
329 330
}

331 332 333 334 335 336 337
static struct hlist_head *tc_u_common_hash;

#define U32_HASH_SHIFT 10
#define U32_HASH_SIZE (1 << U32_HASH_SHIFT)

static unsigned int tc_u_hash(const struct tcf_proto *tp)
{
338
	return hash_ptr(tp->chain->block, U32_HASH_SHIFT);
339 340 341 342 343 344 345 346 347
}

static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
{
	struct tc_u_common *tc;
	unsigned int h;

	h = tc_u_hash(tp);
	hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
348
		if (tc->block == tp->chain->block)
349 350 351 352 353
			return tc;
	}
	return NULL;
}

L
Linus Torvalds 已提交
354 355 356 357
static int u32_init(struct tcf_proto *tp)
{
	struct tc_u_hnode *root_ht;
	struct tc_u_common *tp_c;
358
	unsigned int h;
L
Linus Torvalds 已提交
359

360
	tp_c = tc_u_common_find(tp);
L
Linus Torvalds 已提交
361

362
	root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
L
Linus Torvalds 已提交
363 364 365 366
	if (root_ht == NULL)
		return -ENOBUFS;

	root_ht->refcnt++;
367
	root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
L
Linus Torvalds 已提交
368
	root_ht->prio = tp->prio;
369
	idr_init(&root_ht->handle_idr);
L
Linus Torvalds 已提交
370 371

	if (tp_c == NULL) {
372
		tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
L
Linus Torvalds 已提交
373 374 375 376
		if (tp_c == NULL) {
			kfree(root_ht);
			return -ENOBUFS;
		}
377
		tp_c->block = tp->chain->block;
378
		INIT_HLIST_NODE(&tp_c->hnode);
379
		idr_init(&tp_c->handle_idr);
380 381 382

		h = tc_u_hash(tp);
		hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]);
L
Linus Torvalds 已提交
383 384 385
	}

	tp_c->refcnt++;
386 387
	RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
	rcu_assign_pointer(tp_c->hlist, root_ht);
L
Linus Torvalds 已提交
388 389
	root_ht->tp_c = tp_c;

390
	rcu_assign_pointer(tp->root, root_ht);
L
Linus Torvalds 已提交
391 392 393 394
	tp->data = tp_c;
	return 0;
}

J
Jamal Hadi Salim 已提交
395
static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
396
			   bool free_pf)
L
Linus Torvalds 已提交
397
{
398
	tcf_exts_destroy(&n->exts);
L
Linus Torvalds 已提交
399 400 401
	if (n->ht_down)
		n->ht_down->refcnt--;
#ifdef CONFIG_CLS_U32_PERF
402 403
	if (free_pf)
		free_percpu(n->pf);
404 405
#endif
#ifdef CONFIG_CLS_U32_MARK
406 407
	if (free_pf)
		free_percpu(n->pcpu_success);
L
Linus Torvalds 已提交
408 409 410 411 412
#endif
	kfree(n);
	return 0;
}

413 414 415 416 417 418 419 420
/* u32_delete_key_rcu should be called when free'ing a copied
 * version of a tc_u_knode obtained from u32_init_knode(). When
 * copies are obtained from u32_init_knode() the statistics are
 * shared between the old and new copies to allow readers to
 * continue to update the statistics during the copy. To support
 * this the u32_delete_key_rcu variant does not free the percpu
 * statistics.
 */
421 422 423 424
static void u32_delete_key_rcu(struct rcu_head *rcu)
{
	struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);

425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
	u32_destroy_key(key->tp, key, false);
}

/* u32_delete_key_freepf_rcu is the rcu callback variant
 * that free's the entire structure including the statistics
 * percpu variables. Only use this if the key is not a copy
 * returned by u32_init_knode(). See u32_delete_key_rcu()
 * for the variant that should be used with keys return from
 * u32_init_knode()
 */
static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
{
	struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);

	u32_destroy_key(key->tp, key, true);
440 441
}

442
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
L
Linus Torvalds 已提交
443
{
444 445
	struct tc_u_knode __rcu **kp;
	struct tc_u_knode *pkp;
446
	struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
L
Linus Torvalds 已提交
447 448

	if (ht) {
449 450 451 452 453
		kp = &ht->ht[TC_U32_HASH(key->handle)];
		for (pkp = rtnl_dereference(*kp); pkp;
		     kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
			if (pkp == key) {
				RCU_INIT_POINTER(*kp, key->next);
L
Linus Torvalds 已提交
454

455
				tcf_unbind_filter(tp, &key->res);
456
				call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
L
Linus Torvalds 已提交
457 458 459 460
				return 0;
			}
		}
	}
461
	WARN_ON(1);
L
Linus Torvalds 已提交
462 463 464
	return 0;
}

465
static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
466
{
467
	struct tcf_block *block = tp->chain->block;
468
	struct tc_cls_u32_offload cls_u32 = {};
469

470
	tc_cls_common_offload_init(&cls_u32.common, tp);
471 472 473 474
	cls_u32.command = TC_CLSU32_DELETE_HNODE;
	cls_u32.hnode.divisor = h->divisor;
	cls_u32.hnode.handle = h->handle;
	cls_u32.hnode.prio = h->prio;
475

476
	tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
477 478
}

J
Jamal Hadi Salim 已提交
479 480
static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
				u32 flags)
481
{
482
	struct tcf_block *block = tp->chain->block;
483
	struct tc_cls_u32_offload cls_u32 = {};
484 485
	bool skip_sw = tc_skip_sw(flags);
	bool offloaded = false;
486
	int err;
487

488 489 490 491 492
	tc_cls_common_offload_init(&cls_u32.common, tp);
	cls_u32.command = TC_CLSU32_NEW_HNODE;
	cls_u32.hnode.divisor = h->divisor;
	cls_u32.hnode.handle = h->handle;
	cls_u32.hnode.prio = h->prio;
493

494 495 496
	err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
	if (err < 0) {
		u32_clear_hw_hnode(tp, h);
497
		return err;
498 499 500 501 502 503
	} else if (err > 0) {
		offloaded = true;
	}

	if (skip_sw && !offloaded)
		return -EINVAL;
504 505

	return 0;
506 507
}

508
static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
509
{
510
	struct tcf_block *block = tp->chain->block;
511
	struct tc_cls_u32_offload cls_u32 = {};
512

513
	tc_cls_common_offload_init(&cls_u32.common, tp);
514 515
	cls_u32.command = TC_CLSU32_DELETE_KNODE;
	cls_u32.knode.handle = handle;
516

517
	tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
518 519
}

J
Jamal Hadi Salim 已提交
520 521
static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
				u32 flags)
522
{
523
	struct tcf_block *block = tp->chain->block;
524
	struct tc_cls_u32_offload cls_u32 = {};
525
	bool skip_sw = tc_skip_sw(flags);
526
	int err;
527

528 529 530 531
	tc_cls_common_offload_init(&cls_u32.common, tp);
	cls_u32.command = TC_CLSU32_REPLACE_KNODE;
	cls_u32.knode.handle = n->handle;
	cls_u32.knode.fshift = n->fshift;
532
#ifdef CONFIG_CLS_U32_MARK
533 534
	cls_u32.knode.val = n->val;
	cls_u32.knode.mask = n->mask;
535
#else
536 537
	cls_u32.knode.val = 0;
	cls_u32.knode.mask = 0;
538
#endif
539 540
	cls_u32.knode.sel = &n->sel;
	cls_u32.knode.exts = &n->exts;
541
	if (n->ht_down)
542
		cls_u32.knode.link_handle = n->ht_down->handle;
543

544 545 546
	err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
	if (err < 0) {
		u32_remove_hw_knode(tp, n->handle);
547
		return err;
548 549 550 551 552 553
	} else if (err > 0) {
		n->flags |= TCA_CLS_FLAGS_IN_HW;
	}

	if (skip_sw && !(n->flags && TCA_CLS_FLAGS_IN_HW))
		return -EINVAL;
554 555

	return 0;
556 557
}

558
static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
L
Linus Torvalds 已提交
559 560
{
	struct tc_u_knode *n;
E
Eric Dumazet 已提交
561
	unsigned int h;
L
Linus Torvalds 已提交
562

E
Eric Dumazet 已提交
563
	for (h = 0; h <= ht->divisor; h++) {
564 565 566
		while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
			RCU_INIT_POINTER(ht->ht[h],
					 rtnl_dereference(n->next));
567
			tcf_unbind_filter(tp, &n->res);
568
			u32_remove_hw_knode(tp, n->handle);
569
			idr_remove_ext(&ht->handle_idr, n->handle);
570
			call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
L
Linus Torvalds 已提交
571 572 573 574 575 576 577
		}
	}
}

static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
{
	struct tc_u_common *tp_c = tp->data;
578 579
	struct tc_u_hnode __rcu **hn;
	struct tc_u_hnode *phn;
L
Linus Torvalds 已提交
580

581
	WARN_ON(ht->refcnt);
L
Linus Torvalds 已提交
582

583
	u32_clear_hnode(tp, ht);
L
Linus Torvalds 已提交
584

585 586 587 588 589
	hn = &tp_c->hlist;
	for (phn = rtnl_dereference(*hn);
	     phn;
	     hn = &phn->next, phn = rtnl_dereference(*hn)) {
		if (phn == ht) {
590
			u32_clear_hw_hnode(tp, ht);
591 592
			idr_destroy(&ht->handle_idr);
			idr_remove_ext(&tp_c->handle_idr, ht->handle);
593 594
			RCU_INIT_POINTER(*hn, ht->next);
			kfree_rcu(ht, rcu);
L
Linus Torvalds 已提交
595 596 597 598 599 600 601
			return 0;
		}
	}

	return -ENOENT;
}

602 603 604 605 606 607 608 609 610 611 612
static bool ht_empty(struct tc_u_hnode *ht)
{
	unsigned int h;

	for (h = 0; h <= ht->divisor; h++)
		if (rcu_access_pointer(ht->ht[h]))
			return false;

	return true;
}

613
static void u32_destroy(struct tcf_proto *tp)
L
Linus Torvalds 已提交
614 615
{
	struct tc_u_common *tp_c = tp->data;
616
	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
617

618
	WARN_ON(root_ht == NULL);
L
Linus Torvalds 已提交
619 620 621 622 623 624 625

	if (root_ht && --root_ht->refcnt == 0)
		u32_destroy_hnode(tp, root_ht);

	if (--tp_c->refcnt == 0) {
		struct tc_u_hnode *ht;

626
		hlist_del(&tp_c->hnode);
L
Linus Torvalds 已提交
627

628 629 630
		for (ht = rtnl_dereference(tp_c->hlist);
		     ht;
		     ht = rtnl_dereference(ht->next)) {
631
			ht->refcnt--;
632
			u32_clear_hnode(tp, ht);
633
		}
L
Linus Torvalds 已提交
634

635 636 637
		while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
			RCU_INIT_POINTER(tp_c->hlist, ht->next);
			kfree_rcu(ht, rcu);
638
		}
L
Linus Torvalds 已提交
639

640
		idr_destroy(&tp_c->handle_idr);
L
Linus Torvalds 已提交
641 642 643 644 645 646
		kfree(tp_c);
	}

	tp->data = NULL;
}

647
static int u32_delete(struct tcf_proto *tp, void *arg, bool *last)
L
Linus Torvalds 已提交
648
{
649
	struct tc_u_hnode *ht = arg;
650
	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
651 652
	struct tc_u_common *tp_c = tp->data;
	int ret = 0;
L
Linus Torvalds 已提交
653 654

	if (ht == NULL)
655
		goto out;
L
Linus Torvalds 已提交
656

657 658
	if (TC_U32_KEY(ht->handle)) {
		u32_remove_hw_knode(tp, ht->handle);
659 660
		ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
		goto out;
661
	}
L
Linus Torvalds 已提交
662

663
	if (root_ht == ht)
L
Linus Torvalds 已提交
664 665
		return -EINVAL;

666 667
	if (ht->refcnt == 1) {
		ht->refcnt--;
L
Linus Torvalds 已提交
668
		u32_destroy_hnode(tp, ht);
669 670 671
	} else {
		return -EBUSY;
	}
L
Linus Torvalds 已提交
672

673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706
out:
	*last = true;
	if (root_ht) {
		if (root_ht->refcnt > 1) {
			*last = false;
			goto ret;
		}
		if (root_ht->refcnt == 1) {
			if (!ht_empty(root_ht)) {
				*last = false;
				goto ret;
			}
		}
	}

	if (tp_c->refcnt > 1) {
		*last = false;
		goto ret;
	}

	if (tp_c->refcnt == 1) {
		struct tc_u_hnode *ht;

		for (ht = rtnl_dereference(tp_c->hlist);
		     ht;
		     ht = rtnl_dereference(ht->next))
			if (!ht_empty(ht)) {
				*last = false;
				break;
			}
	}

ret:
	return ret;
L
Linus Torvalds 已提交
707 708
}

709
static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid)
L
Linus Torvalds 已提交
710
{
711 712 713 714 715 716 717 718 719 720 721
	unsigned long idr_index;
	u32 start = htid | 0x800;
	u32 max = htid | 0xFFF;
	u32 min = htid;

	if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
			  start, max + 1, GFP_KERNEL)) {
		if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
				  min + 1, max + 1, GFP_KERNEL))
			return max;
	}
722

723
	return (u32)idr_index;
L
Linus Torvalds 已提交
724 725
}

726 727 728 729 730 731 732 733
static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
	[TCA_U32_CLASSID]	= { .type = NLA_U32 },
	[TCA_U32_HASH]		= { .type = NLA_U32 },
	[TCA_U32_LINK]		= { .type = NLA_U32 },
	[TCA_U32_DIVISOR]	= { .type = NLA_U32 },
	[TCA_U32_SEL]		= { .len = sizeof(struct tc_u32_sel) },
	[TCA_U32_INDEV]		= { .type = NLA_STRING, .len = IFNAMSIZ },
	[TCA_U32_MARK]		= { .len = sizeof(struct tc_u32_mark) },
734
	[TCA_U32_FLAGS]		= { .type = NLA_U32 },
735 736
};

737 738
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
			 unsigned long base, struct tc_u_hnode *ht,
739
			 struct tc_u_knode *n, struct nlattr **tb,
740
			 struct nlattr *est, bool ovr)
L
Linus Torvalds 已提交
741
{
742
	int err;
L
Linus Torvalds 已提交
743

744
	err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr);
L
Linus Torvalds 已提交
745 746 747
	if (err < 0)
		return err;

748
	if (tb[TCA_U32_LINK]) {
749
		u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
750
		struct tc_u_hnode *ht_down = NULL, *ht_old;
L
Linus Torvalds 已提交
751 752

		if (TC_U32_KEY(handle))
753
			return -EINVAL;
L
Linus Torvalds 已提交
754 755 756 757 758

		if (handle) {
			ht_down = u32_lookup_ht(ht->tp_c, handle);

			if (ht_down == NULL)
759
				return -EINVAL;
L
Linus Torvalds 已提交
760 761 762
			ht_down->refcnt++;
		}

763 764
		ht_old = rtnl_dereference(n->ht_down);
		rcu_assign_pointer(n->ht_down, ht_down);
L
Linus Torvalds 已提交
765

766 767
		if (ht_old)
			ht_old->refcnt--;
L
Linus Torvalds 已提交
768
	}
769
	if (tb[TCA_U32_CLASSID]) {
770
		n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
L
Linus Torvalds 已提交
771 772 773 774
		tcf_bind_filter(tp, &n->res, base);
	}

#ifdef CONFIG_NET_CLS_IND
775
	if (tb[TCA_U32_INDEV]) {
776 777 778
		int ret;
		ret = tcf_change_indev(net, tb[TCA_U32_INDEV]);
		if (ret < 0)
779
			return -EINVAL;
780
		n->ifindex = ret;
L
Linus Torvalds 已提交
781 782 783 784 785
	}
#endif
	return 0;
}

J
Jamal Hadi Salim 已提交
786
static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807
			      struct tc_u_knode *n)
{
	struct tc_u_knode __rcu **ins;
	struct tc_u_knode *pins;
	struct tc_u_hnode *ht;

	if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
		ht = rtnl_dereference(tp->root);
	else
		ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));

	ins = &ht->ht[TC_U32_HASH(n->handle)];

	/* The node must always exist for it to be replaced if this is not the
	 * case then something went very wrong elsewhere.
	 */
	for (pins = rtnl_dereference(*ins); ;
	     ins = &pins->next, pins = rtnl_dereference(*ins))
		if (pins->handle == n->handle)
			break;

808
	idr_replace_ext(&ht->handle_idr, n, n->handle);
809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833
	RCU_INIT_POINTER(n->next, pins->next);
	rcu_assign_pointer(*ins, n);
}

static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
					 struct tc_u_knode *n)
{
	struct tc_u_knode *new;
	struct tc_u32_sel *s = &n->sel;

	new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
		      GFP_KERNEL);

	if (!new)
		return NULL;

	RCU_INIT_POINTER(new->next, n->next);
	new->handle = n->handle;
	RCU_INIT_POINTER(new->ht_up, n->ht_up);

#ifdef CONFIG_NET_CLS_IND
	new->ifindex = n->ifindex;
#endif
	new->fshift = n->fshift;
	new->res = n->res;
834
	new->flags = n->flags;
835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857
	RCU_INIT_POINTER(new->ht_down, n->ht_down);

	/* bump reference count as long as we hold pointer to structure */
	if (new->ht_down)
		new->ht_down->refcnt++;

#ifdef CONFIG_CLS_U32_PERF
	/* Statistics may be incremented by readers during update
	 * so we must keep them in tact. When the node is later destroyed
	 * a special destroy call must be made to not free the pf memory.
	 */
	new->pf = n->pf;
#endif

#ifdef CONFIG_CLS_U32_MARK
	new->val = n->val;
	new->mask = n->mask;
	/* Similarly success statistics must be moved as pointers */
	new->pcpu_success = n->pcpu_success;
#endif
	new->tp = tp;
	memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));

858 859 860 861
	if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
		kfree(new);
		return NULL;
	}
862 863 864 865

	return new;
}

866
static int u32_change(struct net *net, struct sk_buff *in_skb,
867
		      struct tcf_proto *tp, unsigned long base, u32 handle,
868
		      struct nlattr **tca, void **arg, bool ovr)
L
Linus Torvalds 已提交
869 870 871 872 873
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode *ht;
	struct tc_u_knode *n;
	struct tc_u32_sel *s;
874 875
	struct nlattr *opt = tca[TCA_OPTIONS];
	struct nlattr *tb[TCA_U32_MAX + 1];
876
	u32 htid, flags = 0;
L
Linus Torvalds 已提交
877
	int err;
878 879 880
#ifdef CONFIG_CLS_U32_PERF
	size_t size;
#endif
L
Linus Torvalds 已提交
881 882 883 884

	if (opt == NULL)
		return handle ? -EINVAL : 0;

885
	err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, NULL);
886 887
	if (err < 0)
		return err;
L
Linus Torvalds 已提交
888

889
	if (tb[TCA_U32_FLAGS]) {
890
		flags = nla_get_u32(tb[TCA_U32_FLAGS]);
891
		if (!tc_flags_valid(flags))
892
			return -EINVAL;
893
	}
894

895
	n = *arg;
E
Eric Dumazet 已提交
896
	if (n) {
897 898
		struct tc_u_knode *new;

L
Linus Torvalds 已提交
899 900 901
		if (TC_U32_KEY(n->handle) == 0)
			return -EINVAL;

902 903 904
		if (n->flags != flags)
			return -EINVAL;

905 906 907 908 909 910 911 912 913 914 915 916 917
		new = u32_init_knode(tp, n);
		if (!new)
			return -ENOMEM;

		err = u32_set_parms(net, tp, base,
				    rtnl_dereference(n->ht_up), new, tb,
				    tca[TCA_RATE], ovr);

		if (err) {
			u32_destroy_key(tp, new, false);
			return err;
		}

918 919 920 921 922 923
		err = u32_replace_hw_knode(tp, new, flags);
		if (err) {
			u32_destroy_key(tp, new, false);
			return err;
		}

924 925 926
		if (!tc_in_hw(new->flags))
			new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;

927
		u32_replace_knode(tp, tp_c, new);
928
		tcf_unbind_filter(tp, &n->res);
929 930
		call_rcu(&n->rcu, u32_delete_key_rcu);
		return 0;
L
Linus Torvalds 已提交
931 932
	}

933
	if (tb[TCA_U32_DIVISOR]) {
E
Eric Dumazet 已提交
934
		unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
L
Linus Torvalds 已提交
935 936 937 938 939

		if (--divisor > 0x100)
			return -EINVAL;
		if (TC_U32_KEY(handle))
			return -EINVAL;
E
Eric Dumazet 已提交
940
		ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
L
Linus Torvalds 已提交
941 942
		if (ht == NULL)
			return -ENOBUFS;
943 944 945 946 947 948 949 950 951 952 953 954 955 956
		if (handle == 0) {
			handle = gen_new_htid(tp->data, ht);
			if (handle == 0) {
				kfree(ht);
				return -ENOMEM;
			}
		} else {
			err = idr_alloc_ext(&tp_c->handle_idr, ht, NULL,
					    handle, handle + 1, GFP_KERNEL);
			if (err) {
				kfree(ht);
				return err;
			}
		}
L
Linus Torvalds 已提交
957
		ht->tp_c = tp_c;
958
		ht->refcnt = 1;
L
Linus Torvalds 已提交
959 960 961
		ht->divisor = divisor;
		ht->handle = handle;
		ht->prio = tp->prio;
962
		idr_init(&ht->handle_idr);
963 964 965

		err = u32_replace_hw_hnode(tp, ht, flags);
		if (err) {
966
			idr_remove_ext(&tp_c->handle_idr, handle);
967 968 969 970
			kfree(ht);
			return err;
		}

971 972
		RCU_INIT_POINTER(ht->next, tp_c->hlist);
		rcu_assign_pointer(tp_c->hlist, ht);
973
		*arg = ht;
974

L
Linus Torvalds 已提交
975 976 977
		return 0;
	}

978
	if (tb[TCA_U32_HASH]) {
979
		htid = nla_get_u32(tb[TCA_U32_HASH]);
L
Linus Torvalds 已提交
980
		if (TC_U32_HTID(htid) == TC_U32_ROOT) {
981
			ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
982 983 984 985 986 987 988
			htid = ht->handle;
		} else {
			ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
			if (ht == NULL)
				return -EINVAL;
		}
	} else {
989
		ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
990 991 992 993 994 995 996 997 998 999
		htid = ht->handle;
	}

	if (ht->divisor < TC_U32_HASH(htid))
		return -EINVAL;

	if (handle) {
		if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
			return -EINVAL;
		handle = htid | TC_U32_NODE(handle);
1000 1001 1002 1003 1004
		err = idr_alloc_ext(&ht->handle_idr, NULL, NULL,
				    handle, handle + 1,
				    GFP_KERNEL);
		if (err)
			return err;
L
Linus Torvalds 已提交
1005 1006 1007
	} else
		handle = gen_new_kid(ht, htid);

1008 1009 1010 1011
	if (tb[TCA_U32_SEL] == NULL) {
		err = -EINVAL;
		goto erridr;
	}
L
Linus Torvalds 已提交
1012

1013
	s = nla_data(tb[TCA_U32_SEL]);
L
Linus Torvalds 已提交
1014

1015
	n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
1016 1017 1018 1019
	if (n == NULL) {
		err = -ENOBUFS;
		goto erridr;
	}
L
Linus Torvalds 已提交
1020 1021

#ifdef CONFIG_CLS_U32_PERF
1022 1023 1024
	size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
	n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
	if (!n->pf) {
1025 1026
		err = -ENOBUFS;
		goto errfree;
L
Linus Torvalds 已提交
1027 1028 1029 1030
	}
#endif

	memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
1031
	RCU_INIT_POINTER(n->ht_up, ht);
L
Linus Torvalds 已提交
1032
	n->handle = handle;
1033
	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
1034
	n->flags = flags;
1035
	n->tp = tp;
L
Linus Torvalds 已提交
1036

1037 1038 1039 1040
	err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
	if (err < 0)
		goto errout;

L
Linus Torvalds 已提交
1041
#ifdef CONFIG_CLS_U32_MARK
1042
	n->pcpu_success = alloc_percpu(u32);
1043 1044 1045 1046
	if (!n->pcpu_success) {
		err = -ENOMEM;
		goto errout;
	}
1047

1048
	if (tb[TCA_U32_MARK]) {
L
Linus Torvalds 已提交
1049 1050
		struct tc_u32_mark *mark;

1051
		mark = nla_data(tb[TCA_U32_MARK]);
1052 1053
		n->val = mark->val;
		n->mask = mark->mask;
L
Linus Torvalds 已提交
1054 1055 1056
	}
#endif

1057
	err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
L
Linus Torvalds 已提交
1058
	if (err == 0) {
1059 1060 1061
		struct tc_u_knode __rcu **ins;
		struct tc_u_knode *pins;

1062 1063 1064 1065
		err = u32_replace_hw_knode(tp, n, flags);
		if (err)
			goto errhw;

1066 1067 1068
		if (!tc_in_hw(n->flags))
			n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;

1069 1070 1071 1072
		ins = &ht->ht[TC_U32_HASH(handle)];
		for (pins = rtnl_dereference(*ins); pins;
		     ins = &pins->next, pins = rtnl_dereference(*ins))
			if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle))
L
Linus Torvalds 已提交
1073 1074
				break;

1075 1076
		RCU_INIT_POINTER(n->next, pins);
		rcu_assign_pointer(*ins, n);
1077
		*arg = n;
L
Linus Torvalds 已提交
1078 1079
		return 0;
	}
1080

1081
errhw:
1082 1083 1084 1085
#ifdef CONFIG_CLS_U32_MARK
	free_percpu(n->pcpu_success);
#endif

1086 1087
errout:
	tcf_exts_destroy(&n->exts);
L
Linus Torvalds 已提交
1088
#ifdef CONFIG_CLS_U32_PERF
1089
errfree:
1090
	free_percpu(n->pf);
L
Linus Torvalds 已提交
1091 1092
#endif
	kfree(n);
1093 1094
erridr:
	idr_remove_ext(&ht->handle_idr, handle);
L
Linus Torvalds 已提交
1095 1096 1097 1098 1099 1100 1101 1102
	return err;
}

static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode *ht;
	struct tc_u_knode *n;
E
Eric Dumazet 已提交
1103
	unsigned int h;
L
Linus Torvalds 已提交
1104 1105 1106 1107

	if (arg->stop)
		return;

1108 1109 1110
	for (ht = rtnl_dereference(tp_c->hlist);
	     ht;
	     ht = rtnl_dereference(ht->next)) {
L
Linus Torvalds 已提交
1111 1112 1113
		if (ht->prio != tp->prio)
			continue;
		if (arg->count >= arg->skip) {
1114
			if (arg->fn(tp, ht, arg) < 0) {
L
Linus Torvalds 已提交
1115 1116 1117 1118 1119 1120
				arg->stop = 1;
				return;
			}
		}
		arg->count++;
		for (h = 0; h <= ht->divisor; h++) {
1121 1122 1123
			for (n = rtnl_dereference(ht->ht[h]);
			     n;
			     n = rtnl_dereference(n->next)) {
L
Linus Torvalds 已提交
1124 1125 1126 1127
				if (arg->count < arg->skip) {
					arg->count++;
					continue;
				}
1128
				if (arg->fn(tp, n, arg) < 0) {
L
Linus Torvalds 已提交
1129 1130 1131 1132 1133 1134 1135 1136 1137
					arg->stop = 1;
					return;
				}
				arg->count++;
			}
		}
	}
}

1138 1139 1140 1141 1142 1143 1144 1145
static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
{
	struct tc_u_knode *n = fh;

	if (n && n->res.classid == classid)
		n->res.class = cl;
}

1146
static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
J
Jamal Hadi Salim 已提交
1147
		    struct sk_buff *skb, struct tcmsg *t)
L
Linus Torvalds 已提交
1148
{
1149
	struct tc_u_knode *n = fh;
1150
	struct tc_u_hnode *ht_up, *ht_down;
1151
	struct nlattr *nest;
L
Linus Torvalds 已提交
1152 1153 1154 1155 1156 1157

	if (n == NULL)
		return skb->len;

	t->tcm_handle = n->handle;

1158 1159 1160
	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;
L
Linus Torvalds 已提交
1161 1162

	if (TC_U32_KEY(n->handle) == 0) {
1163
		struct tc_u_hnode *ht = fh;
E
Eric Dumazet 已提交
1164 1165
		u32 divisor = ht->divisor + 1;

1166 1167
		if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
			goto nla_put_failure;
L
Linus Torvalds 已提交
1168
	} else {
1169 1170 1171
#ifdef CONFIG_CLS_U32_PERF
		struct tc_u32_pcnt *gpf;
		int cpu;
1172
#endif
1173

1174 1175 1176 1177
		if (nla_put(skb, TCA_U32_SEL,
			    sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
			    &n->sel))
			goto nla_put_failure;
1178 1179 1180

		ht_up = rtnl_dereference(n->ht_up);
		if (ht_up) {
L
Linus Torvalds 已提交
1181
			u32 htid = n->handle & 0xFFFFF000;
1182 1183
			if (nla_put_u32(skb, TCA_U32_HASH, htid))
				goto nla_put_failure;
L
Linus Torvalds 已提交
1184
		}
1185 1186 1187
		if (n->res.classid &&
		    nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
			goto nla_put_failure;
1188 1189 1190 1191

		ht_down = rtnl_dereference(n->ht_down);
		if (ht_down &&
		    nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
1192
			goto nla_put_failure;
L
Linus Torvalds 已提交
1193

1194 1195 1196
		if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags))
			goto nla_put_failure;

L
Linus Torvalds 已提交
1197
#ifdef CONFIG_CLS_U32_MARK
1198 1199 1200 1201
		if ((n->val || n->mask)) {
			struct tc_u32_mark mark = {.val = n->val,
						   .mask = n->mask,
						   .success = 0};
1202
			int cpum;
1203

1204 1205
			for_each_possible_cpu(cpum) {
				__u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum);
1206 1207 1208 1209 1210 1211 1212

				mark.success += cnt;
			}

			if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark))
				goto nla_put_failure;
		}
L
Linus Torvalds 已提交
1213 1214
#endif

1215
		if (tcf_exts_dump(skb, &n->exts) < 0)
1216
			goto nla_put_failure;
L
Linus Torvalds 已提交
1217 1218

#ifdef CONFIG_NET_CLS_IND
1219 1220 1221 1222 1223 1224
		if (n->ifindex) {
			struct net_device *dev;
			dev = __dev_get_by_index(net, n->ifindex);
			if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name))
				goto nla_put_failure;
		}
L
Linus Torvalds 已提交
1225 1226
#endif
#ifdef CONFIG_CLS_U32_PERF
1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242
		gpf = kzalloc(sizeof(struct tc_u32_pcnt) +
			      n->sel.nkeys * sizeof(u64),
			      GFP_KERNEL);
		if (!gpf)
			goto nla_put_failure;

		for_each_possible_cpu(cpu) {
			int i;
			struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);

			gpf->rcnt += pf->rcnt;
			gpf->rhit += pf->rhit;
			for (i = 0; i < n->sel.nkeys; i++)
				gpf->kcnts[i] += pf->kcnts[i];
		}

1243 1244 1245 1246
		if (nla_put_64bit(skb, TCA_U32_PCNT,
				  sizeof(struct tc_u32_pcnt) +
				  n->sel.nkeys * sizeof(u64),
				  gpf, TCA_U32_PAD)) {
1247
			kfree(gpf);
1248
			goto nla_put_failure;
1249 1250
		}
		kfree(gpf);
L
Linus Torvalds 已提交
1251 1252 1253
#endif
	}

1254 1255
	nla_nest_end(skb, nest);

L
Linus Torvalds 已提交
1256
	if (TC_U32_KEY(n->handle))
1257
		if (tcf_exts_dump_stats(skb, &n->exts) < 0)
1258
			goto nla_put_failure;
L
Linus Torvalds 已提交
1259 1260
	return skb->len;

1261
nla_put_failure:
1262
	nla_nest_cancel(skb, nest);
L
Linus Torvalds 已提交
1263 1264 1265
	return -1;
}

1266
static struct tcf_proto_ops cls_u32_ops __read_mostly = {
L
Linus Torvalds 已提交
1267 1268 1269 1270 1271 1272 1273 1274 1275
	.kind		=	"u32",
	.classify	=	u32_classify,
	.init		=	u32_init,
	.destroy	=	u32_destroy,
	.get		=	u32_get,
	.change		=	u32_change,
	.delete		=	u32_delete,
	.walk		=	u32_walk,
	.dump		=	u32_dump,
1276
	.bind_class	=	u32_bind_class,
L
Linus Torvalds 已提交
1277 1278 1279 1280 1281
	.owner		=	THIS_MODULE,
};

static int __init init_u32(void)
{
1282 1283
	int i, ret;

1284
	pr_info("u32 classifier\n");
L
Linus Torvalds 已提交
1285
#ifdef CONFIG_CLS_U32_PERF
1286
	pr_info("    Performance counters on\n");
L
Linus Torvalds 已提交
1287 1288
#endif
#ifdef CONFIG_NET_CLS_IND
1289
	pr_info("    input device check on\n");
L
Linus Torvalds 已提交
1290 1291
#endif
#ifdef CONFIG_NET_CLS_ACT
1292
	pr_info("    Actions configured\n");
L
Linus Torvalds 已提交
1293
#endif
1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306
	tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE,
					  sizeof(struct hlist_head),
					  GFP_KERNEL);
	if (!tc_u_common_hash)
		return -ENOMEM;

	for (i = 0; i < U32_HASH_SIZE; i++)
		INIT_HLIST_HEAD(&tc_u_common_hash[i]);

	ret = register_tcf_proto_ops(&cls_u32_ops);
	if (ret)
		kvfree(tc_u_common_hash);
	return ret;
L
Linus Torvalds 已提交
1307 1308
}

1309
static void __exit exit_u32(void)
L
Linus Torvalds 已提交
1310 1311
{
	unregister_tcf_proto_ops(&cls_u32_ops);
1312
	kvfree(tc_u_common_hash);
L
Linus Torvalds 已提交
1313 1314 1315 1316 1317
}

module_init(init_u32)
module_exit(exit_u32)
MODULE_LICENSE("GPL");