cls_u32.c 29.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
/*
 * net/sched/cls_u32.c	Ugly (or Universal) 32bit key Packet Classifier.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 *	The filters are packed to hash tables of key nodes
 *	with a set of 32bit key/mask pairs at every node.
 *	Nodes reference next level hash tables etc.
 *
 *	This scheme is the best universal classifier I managed to
 *	invent; it is not super-fast, but it is not slow (provided you
 *	program it correctly), and general enough.  And its relative
 *	speed grows as the number of rules becomes larger.
 *
 *	It seems that it represents the best middle point between
 *	speed and manageability both by human and by machine.
 *
 *	It is especially useful for link sharing combined with QoS;
 *	pure RSVP doesn't need such a general approach and can use
 *	much simpler (and faster) schemes, sort of cls_rsvp.c.
 *
 *	JHS: We should remove the CONFIG_NET_CLS_IND from here
 *	eventually when the meta match extension is made available
 *
 *	nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
 */

#include <linux/module.h>
34
#include <linux/slab.h>
L
Linus Torvalds 已提交
35 36 37 38
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
39
#include <linux/percpu.h>
L
Linus Torvalds 已提交
40 41
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
42
#include <linux/bitmap.h>
43 44
#include <linux/netdevice.h>
#include <linux/hash.h>
45
#include <net/netlink.h>
L
Linus Torvalds 已提交
46 47
#include <net/act_api.h>
#include <net/pkt_cls.h>
48
#include <linux/netdevice.h>
49
#include <linux/idr.h>
L
Linus Torvalds 已提交
50

E
Eric Dumazet 已提交
51
struct tc_u_knode {
52
	struct tc_u_knode __rcu	*next;
L
Linus Torvalds 已提交
53
	u32			handle;
54
	struct tc_u_hnode __rcu	*ht_up;
L
Linus Torvalds 已提交
55 56
	struct tcf_exts		exts;
#ifdef CONFIG_NET_CLS_IND
57
	int			ifindex;
L
Linus Torvalds 已提交
58 59 60
#endif
	u8			fshift;
	struct tcf_result	res;
61
	struct tc_u_hnode __rcu	*ht_down;
L
Linus Torvalds 已提交
62
#ifdef CONFIG_CLS_U32_PERF
63
	struct tc_u32_pcnt __percpu *pf;
L
Linus Torvalds 已提交
64
#endif
65
	u32			flags;
L
Linus Torvalds 已提交
66
#ifdef CONFIG_CLS_U32_MARK
67 68 69
	u32			val;
	u32			mask;
	u32 __percpu		*pcpu_success;
L
Linus Torvalds 已提交
70
#endif
71 72
	struct tcf_proto	*tp;
	struct rcu_head		rcu;
73 74 75 76
	/* The 'sel' field MUST be the last field in structure to allow for
	 * tc_u32_keys allocated at end of structure.
	 */
	struct tc_u32_sel	sel;
L
Linus Torvalds 已提交
77 78
};

E
Eric Dumazet 已提交
79
struct tc_u_hnode {
80
	struct tc_u_hnode __rcu	*next;
L
Linus Torvalds 已提交
81 82 83 84
	u32			handle;
	u32			prio;
	struct tc_u_common	*tp_c;
	int			refcnt;
E
Eric Dumazet 已提交
85
	unsigned int		divisor;
86
	struct idr		handle_idr;
87
	struct rcu_head		rcu;
88 89 90 91
	/* The 'ht' field MUST be the last field in structure to allow for
	 * more entries allocated at end of structure.
	 */
	struct tc_u_knode __rcu	*ht[1];
L
Linus Torvalds 已提交
92 93
};

E
Eric Dumazet 已提交
94
struct tc_u_common {
95
	struct tc_u_hnode __rcu	*hlist;
L
Linus Torvalds 已提交
96 97
	struct Qdisc		*q;
	int			refcnt;
98
	struct idr		handle_idr;
99
	struct hlist_node	hnode;
100
	struct rcu_head		rcu;
L
Linus Torvalds 已提交
101 102
};

E
Eric Dumazet 已提交
103 104 105
static inline unsigned int u32_hash_fold(__be32 key,
					 const struct tc_u32_sel *sel,
					 u8 fshift)
L
Linus Torvalds 已提交
106
{
E
Eric Dumazet 已提交
107
	unsigned int h = ntohl(key & sel->hmask) >> fshift;
L
Linus Torvalds 已提交
108 109 110 111

	return h;
}

J
Jamal Hadi Salim 已提交
112 113
static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp,
			struct tcf_result *res)
L
Linus Torvalds 已提交
114 115 116
{
	struct {
		struct tc_u_knode *knode;
117
		unsigned int	  off;
L
Linus Torvalds 已提交
118 119
	} stack[TC_U32_MAXDEPTH];

120
	struct tc_u_hnode *ht = rcu_dereference_bh(tp->root);
121
	unsigned int off = skb_network_offset(skb);
L
Linus Torvalds 已提交
122 123 124 125 126 127 128 129 130 131
	struct tc_u_knode *n;
	int sdepth = 0;
	int off2 = 0;
	int sel = 0;
#ifdef CONFIG_CLS_U32_PERF
	int j;
#endif
	int i, r;

next_ht:
132
	n = rcu_dereference_bh(ht->ht[sel]);
L
Linus Torvalds 已提交
133 134 135 136 137 138

next_knode:
	if (n) {
		struct tc_u32_key *key = n->sel.keys;

#ifdef CONFIG_CLS_U32_PERF
139
		__this_cpu_inc(n->pf->rcnt);
L
Linus Torvalds 已提交
140 141 142
		j = 0;
#endif

143 144 145 146 147
		if (tc_skip_sw(n->flags)) {
			n = rcu_dereference_bh(n->next);
			goto next_knode;
		}

L
Linus Torvalds 已提交
148
#ifdef CONFIG_CLS_U32_MARK
149
		if ((skb->mark & n->mask) != n->val) {
150
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
151 152
			goto next_knode;
		} else {
153
			__this_cpu_inc(*n->pcpu_success);
L
Linus Torvalds 已提交
154 155 156
		}
#endif

E
Eric Dumazet 已提交
157
		for (i = n->sel.nkeys; i > 0; i--, key++) {
S
stephen hemminger 已提交
158
			int toff = off + key->off + (off2 & key->offmask);
S
stephen hemminger 已提交
159
			__be32 *data, hdata;
160

D
Dan Carpenter 已提交
161
			if (skb_headroom(skb) + toff > INT_MAX)
S
stephen hemminger 已提交
162 163
				goto out;

S
stephen hemminger 已提交
164
			data = skb_header_pointer(skb, toff, 4, &hdata);
165 166 167
			if (!data)
				goto out;
			if ((*data ^ key->val) & key->mask) {
168
				n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
169 170 171
				goto next_knode;
			}
#ifdef CONFIG_CLS_U32_PERF
172
			__this_cpu_inc(n->pf->kcnts[j]);
L
Linus Torvalds 已提交
173 174 175
			j++;
#endif
		}
176 177 178

		ht = rcu_dereference_bh(n->ht_down);
		if (!ht) {
L
Linus Torvalds 已提交
179
check_terminal:
E
Eric Dumazet 已提交
180
			if (n->sel.flags & TC_U32_TERMINAL) {
L
Linus Torvalds 已提交
181 182 183

				*res = n->res;
#ifdef CONFIG_NET_CLS_IND
184
				if (!tcf_match_indev(skb, n->ifindex)) {
185
					n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
186 187 188 189
					goto next_knode;
				}
#endif
#ifdef CONFIG_CLS_U32_PERF
190
				__this_cpu_inc(n->pf->rhit);
L
Linus Torvalds 已提交
191 192 193
#endif
				r = tcf_exts_exec(skb, &n->exts, res);
				if (r < 0) {
194
					n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
195 196 197 198 199
					goto next_knode;
				}

				return r;
			}
200
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
201 202 203 204 205 206 207
			goto next_knode;
		}

		/* PUSH */
		if (sdepth >= TC_U32_MAXDEPTH)
			goto deadloop;
		stack[sdepth].knode = n;
208
		stack[sdepth].off = off;
L
Linus Torvalds 已提交
209 210
		sdepth++;

211
		ht = rcu_dereference_bh(n->ht_down);
L
Linus Torvalds 已提交
212
		sel = 0;
213
		if (ht->divisor) {
S
stephen hemminger 已提交
214
			__be32 *data, hdata;
215 216

			data = skb_header_pointer(skb, off + n->sel.hoff, 4,
S
stephen hemminger 已提交
217
						  &hdata);
218 219 220 221 222
			if (!data)
				goto out;
			sel = ht->divisor & u32_hash_fold(*data, &n->sel,
							  n->fshift);
		}
E
Eric Dumazet 已提交
223
		if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
L
Linus Torvalds 已提交
224 225
			goto next_ht;

E
Eric Dumazet 已提交
226
		if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
L
Linus Torvalds 已提交
227
			off2 = n->sel.off + 3;
228
			if (n->sel.flags & TC_U32_VAROFFSET) {
S
stephen hemminger 已提交
229
				__be16 *data, hdata;
230 231 232

				data = skb_header_pointer(skb,
							  off + n->sel.offoff,
S
stephen hemminger 已提交
233
							  2, &hdata);
234 235 236 237 238
				if (!data)
					goto out;
				off2 += ntohs(n->sel.offmask & *data) >>
					n->sel.offshift;
			}
L
Linus Torvalds 已提交
239 240
			off2 &= ~3;
		}
E
Eric Dumazet 已提交
241
		if (n->sel.flags & TC_U32_EAT) {
242
			off += off2;
L
Linus Torvalds 已提交
243 244 245
			off2 = 0;
		}

246
		if (off < skb->len)
L
Linus Torvalds 已提交
247 248 249 250 251 252
			goto next_ht;
	}

	/* POP */
	if (sdepth--) {
		n = stack[sdepth].knode;
253
		ht = rcu_dereference_bh(n->ht_up);
254
		off = stack[sdepth].off;
L
Linus Torvalds 已提交
255 256
		goto check_terminal;
	}
257
out:
L
Linus Torvalds 已提交
258 259 260
	return -1;

deadloop:
261
	net_warn_ratelimited("cls_u32: dead loop\n");
L
Linus Torvalds 已提交
262 263 264
	return -1;
}

J
Jamal Hadi Salim 已提交
265
static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
L
Linus Torvalds 已提交
266 267 268
{
	struct tc_u_hnode *ht;

269 270 271
	for (ht = rtnl_dereference(tp_c->hlist);
	     ht;
	     ht = rtnl_dereference(ht->next))
L
Linus Torvalds 已提交
272 273 274 275 276 277
		if (ht->handle == handle)
			break;

	return ht;
}

J
Jamal Hadi Salim 已提交
278
static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
L
Linus Torvalds 已提交
279
{
E
Eric Dumazet 已提交
280
	unsigned int sel;
L
Linus Torvalds 已提交
281 282 283 284 285 286
	struct tc_u_knode *n = NULL;

	sel = TC_U32_HASH(handle);
	if (sel > ht->divisor)
		goto out;

287 288 289
	for (n = rtnl_dereference(ht->ht[sel]);
	     n;
	     n = rtnl_dereference(n->next))
L
Linus Torvalds 已提交
290 291 292 293 294 295 296
		if (n->handle == handle)
			break;
out:
	return n;
}


297
static void *u32_get(struct tcf_proto *tp, u32 handle)
L
Linus Torvalds 已提交
298 299 300 301 302
{
	struct tc_u_hnode *ht;
	struct tc_u_common *tp_c = tp->data;

	if (TC_U32_HTID(handle) == TC_U32_ROOT)
303
		ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
304 305 306 307
	else
		ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));

	if (!ht)
308
		return NULL;
L
Linus Torvalds 已提交
309 310

	if (TC_U32_KEY(handle) == 0)
311
		return ht;
L
Linus Torvalds 已提交
312

313
	return u32_lookup_key(ht, handle);
L
Linus Torvalds 已提交
314 315
}

316
static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
L
Linus Torvalds 已提交
317
{
318 319
	unsigned long idr_index;
	int err;
L
Linus Torvalds 已提交
320

321
	/* This is only used inside rtnl lock it is safe to increment
322 323
	 * without read _copy_ update semantics
	 */
324 325 326 327 328
	err = idr_alloc_ext(&tp_c->handle_idr, ptr, &idr_index,
			    1, 0x7FF, GFP_KERNEL);
	if (err)
		return 0;
	return (u32)(idr_index | 0x800) << 20;
L
Linus Torvalds 已提交
329 330
}

331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
static struct hlist_head *tc_u_common_hash;

#define U32_HASH_SHIFT 10
#define U32_HASH_SIZE (1 << U32_HASH_SHIFT)

static unsigned int tc_u_hash(const struct tcf_proto *tp)
{
	struct net_device *dev = tp->q->dev_queue->dev;
	u32 qhandle = tp->q->handle;
	int ifindex = dev->ifindex;

	return hash_64((u64)ifindex << 32 | qhandle, U32_HASH_SHIFT);
}

static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
{
	struct tc_u_common *tc;
	unsigned int h;

	h = tc_u_hash(tp);
	hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
		if (tc->q == tp->q)
			return tc;
	}
	return NULL;
}

L
Linus Torvalds 已提交
358 359 360 361
static int u32_init(struct tcf_proto *tp)
{
	struct tc_u_hnode *root_ht;
	struct tc_u_common *tp_c;
362
	unsigned int h;
L
Linus Torvalds 已提交
363

364
	tp_c = tc_u_common_find(tp);
L
Linus Torvalds 已提交
365

366
	root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
L
Linus Torvalds 已提交
367 368 369 370
	if (root_ht == NULL)
		return -ENOBUFS;

	root_ht->refcnt++;
371
	root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
L
Linus Torvalds 已提交
372
	root_ht->prio = tp->prio;
373
	idr_init(&root_ht->handle_idr);
L
Linus Torvalds 已提交
374 375

	if (tp_c == NULL) {
376
		tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
L
Linus Torvalds 已提交
377 378 379 380 381
		if (tp_c == NULL) {
			kfree(root_ht);
			return -ENOBUFS;
		}
		tp_c->q = tp->q;
382
		INIT_HLIST_NODE(&tp_c->hnode);
383
		idr_init(&tp_c->handle_idr);
384 385 386

		h = tc_u_hash(tp);
		hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]);
L
Linus Torvalds 已提交
387 388 389
	}

	tp_c->refcnt++;
390 391
	RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
	rcu_assign_pointer(tp_c->hlist, root_ht);
L
Linus Torvalds 已提交
392 393
	root_ht->tp_c = tp_c;

394
	rcu_assign_pointer(tp->root, root_ht);
L
Linus Torvalds 已提交
395 396 397 398
	tp->data = tp_c;
	return 0;
}

J
Jamal Hadi Salim 已提交
399
static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
400
			   bool free_pf)
L
Linus Torvalds 已提交
401
{
402
	tcf_exts_destroy(&n->exts);
L
Linus Torvalds 已提交
403 404 405
	if (n->ht_down)
		n->ht_down->refcnt--;
#ifdef CONFIG_CLS_U32_PERF
406 407
	if (free_pf)
		free_percpu(n->pf);
408 409
#endif
#ifdef CONFIG_CLS_U32_MARK
410 411
	if (free_pf)
		free_percpu(n->pcpu_success);
L
Linus Torvalds 已提交
412 413 414 415 416
#endif
	kfree(n);
	return 0;
}

417 418 419 420 421 422 423 424
/* u32_delete_key_rcu should be called when free'ing a copied
 * version of a tc_u_knode obtained from u32_init_knode(). When
 * copies are obtained from u32_init_knode() the statistics are
 * shared between the old and new copies to allow readers to
 * continue to update the statistics during the copy. To support
 * this the u32_delete_key_rcu variant does not free the percpu
 * statistics.
 */
425 426 427 428
static void u32_delete_key_rcu(struct rcu_head *rcu)
{
	struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);

429 430 431 432 433 434 435 436 437 438 439 440 441 442 443
	u32_destroy_key(key->tp, key, false);
}

/* u32_delete_key_freepf_rcu is the rcu callback variant
 * that free's the entire structure including the statistics
 * percpu variables. Only use this if the key is not a copy
 * returned by u32_init_knode(). See u32_delete_key_rcu()
 * for the variant that should be used with keys return from
 * u32_init_knode()
 */
static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
{
	struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);

	u32_destroy_key(key->tp, key, true);
444 445
}

446
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
L
Linus Torvalds 已提交
447
{
448 449
	struct tc_u_knode __rcu **kp;
	struct tc_u_knode *pkp;
450
	struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
L
Linus Torvalds 已提交
451 452

	if (ht) {
453 454 455 456 457
		kp = &ht->ht[TC_U32_HASH(key->handle)];
		for (pkp = rtnl_dereference(*kp); pkp;
		     kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
			if (pkp == key) {
				RCU_INIT_POINTER(*kp, key->next);
L
Linus Torvalds 已提交
458

459
				tcf_unbind_filter(tp, &key->res);
460
				call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
L
Linus Torvalds 已提交
461 462 463 464
				return 0;
			}
		}
	}
465
	WARN_ON(1);
L
Linus Torvalds 已提交
466 467 468
	return 0;
}

469 470 471
static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
{
	struct net_device *dev = tp->q->dev_queue->dev;
472
	struct tc_cls_u32_offload cls_u32 = {};
473

474
	if (!tc_should_offload(dev, 0))
475
		return;
476

477 478 479 480 481
	tc_cls_common_offload_init(&cls_u32.common, tp);
	cls_u32.command = TC_CLSU32_DELETE_KNODE;
	cls_u32.knode.handle = handle;

	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
482 483
}

J
Jamal Hadi Salim 已提交
484 485
static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
				u32 flags)
486 487
{
	struct net_device *dev = tp->q->dev_queue->dev;
488
	struct tc_cls_u32_offload cls_u32 = {};
489
	int err;
490

491
	if (!tc_should_offload(dev, flags))
492 493
		return tc_skip_sw(flags) ? -EINVAL : 0;

494 495 496 497 498
	tc_cls_common_offload_init(&cls_u32.common, tp);
	cls_u32.command = TC_CLSU32_NEW_HNODE;
	cls_u32.hnode.divisor = h->divisor;
	cls_u32.hnode.handle = h->handle;
	cls_u32.hnode.prio = h->prio;
499

500
	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
501 502
	if (tc_skip_sw(flags))
		return err;
503 504

	return 0;
505 506 507 508 509
}

static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
{
	struct net_device *dev = tp->q->dev_queue->dev;
510
	struct tc_cls_u32_offload cls_u32 = {};
511

512
	if (!tc_should_offload(dev, 0))
513
		return;
514

515 516 517 518 519
	tc_cls_common_offload_init(&cls_u32.common, tp);
	cls_u32.command = TC_CLSU32_DELETE_HNODE;
	cls_u32.hnode.divisor = h->divisor;
	cls_u32.hnode.handle = h->handle;
	cls_u32.hnode.prio = h->prio;
520

521
	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
522 523
}

J
Jamal Hadi Salim 已提交
524 525
static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
				u32 flags)
526 527
{
	struct net_device *dev = tp->q->dev_queue->dev;
528
	struct tc_cls_u32_offload cls_u32 = {};
529
	int err;
530

531
	if (!tc_should_offload(dev, flags))
532 533
		return tc_skip_sw(flags) ? -EINVAL : 0;

534 535 536 537
	tc_cls_common_offload_init(&cls_u32.common, tp);
	cls_u32.command = TC_CLSU32_REPLACE_KNODE;
	cls_u32.knode.handle = n->handle;
	cls_u32.knode.fshift = n->fshift;
538
#ifdef CONFIG_CLS_U32_MARK
539 540
	cls_u32.knode.val = n->val;
	cls_u32.knode.mask = n->mask;
541
#else
542 543
	cls_u32.knode.val = 0;
	cls_u32.knode.mask = 0;
544
#endif
545 546
	cls_u32.knode.sel = &n->sel;
	cls_u32.knode.exts = &n->exts;
547
	if (n->ht_down)
548
		cls_u32.knode.link_handle = n->ht_down->handle;
549

550
	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
551 552 553 554

	if (!err)
		n->flags |= TCA_CLS_FLAGS_IN_HW;

555 556
	if (tc_skip_sw(flags))
		return err;
557 558

	return 0;
559 560
}

561
static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
L
Linus Torvalds 已提交
562 563
{
	struct tc_u_knode *n;
E
Eric Dumazet 已提交
564
	unsigned int h;
L
Linus Torvalds 已提交
565

E
Eric Dumazet 已提交
566
	for (h = 0; h <= ht->divisor; h++) {
567 568 569
		while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
			RCU_INIT_POINTER(ht->ht[h],
					 rtnl_dereference(n->next));
570
			tcf_unbind_filter(tp, &n->res);
571
			u32_remove_hw_knode(tp, n->handle);
572
			idr_remove_ext(&ht->handle_idr, n->handle);
573
			call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
L
Linus Torvalds 已提交
574 575 576 577 578 579 580
		}
	}
}

static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
{
	struct tc_u_common *tp_c = tp->data;
581 582
	struct tc_u_hnode __rcu **hn;
	struct tc_u_hnode *phn;
L
Linus Torvalds 已提交
583

584
	WARN_ON(ht->refcnt);
L
Linus Torvalds 已提交
585

586
	u32_clear_hnode(tp, ht);
L
Linus Torvalds 已提交
587

588 589 590 591 592
	hn = &tp_c->hlist;
	for (phn = rtnl_dereference(*hn);
	     phn;
	     hn = &phn->next, phn = rtnl_dereference(*hn)) {
		if (phn == ht) {
593
			u32_clear_hw_hnode(tp, ht);
594 595
			idr_destroy(&ht->handle_idr);
			idr_remove_ext(&tp_c->handle_idr, ht->handle);
596 597
			RCU_INIT_POINTER(*hn, ht->next);
			kfree_rcu(ht, rcu);
L
Linus Torvalds 已提交
598 599 600 601 602 603 604
			return 0;
		}
	}

	return -ENOENT;
}

605 606 607 608 609 610 611 612 613 614 615
static bool ht_empty(struct tc_u_hnode *ht)
{
	unsigned int h;

	for (h = 0; h <= ht->divisor; h++)
		if (rcu_access_pointer(ht->ht[h]))
			return false;

	return true;
}

616
static void u32_destroy(struct tcf_proto *tp)
L
Linus Torvalds 已提交
617 618
{
	struct tc_u_common *tp_c = tp->data;
619
	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
620

621
	WARN_ON(root_ht == NULL);
L
Linus Torvalds 已提交
622 623 624 625 626 627 628

	if (root_ht && --root_ht->refcnt == 0)
		u32_destroy_hnode(tp, root_ht);

	if (--tp_c->refcnt == 0) {
		struct tc_u_hnode *ht;

629
		hlist_del(&tp_c->hnode);
L
Linus Torvalds 已提交
630

631 632 633
		for (ht = rtnl_dereference(tp_c->hlist);
		     ht;
		     ht = rtnl_dereference(ht->next)) {
634
			ht->refcnt--;
635
			u32_clear_hnode(tp, ht);
636
		}
L
Linus Torvalds 已提交
637

638 639 640
		while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
			RCU_INIT_POINTER(tp_c->hlist, ht->next);
			kfree_rcu(ht, rcu);
641
		}
L
Linus Torvalds 已提交
642

643
		idr_destroy(&tp_c->handle_idr);
L
Linus Torvalds 已提交
644 645 646 647 648 649
		kfree(tp_c);
	}

	tp->data = NULL;
}

650
static int u32_delete(struct tcf_proto *tp, void *arg, bool *last)
L
Linus Torvalds 已提交
651
{
652
	struct tc_u_hnode *ht = arg;
653
	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
654 655
	struct tc_u_common *tp_c = tp->data;
	int ret = 0;
L
Linus Torvalds 已提交
656 657

	if (ht == NULL)
658
		goto out;
L
Linus Torvalds 已提交
659

660 661
	if (TC_U32_KEY(ht->handle)) {
		u32_remove_hw_knode(tp, ht->handle);
662 663
		ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
		goto out;
664
	}
L
Linus Torvalds 已提交
665

666
	if (root_ht == ht)
L
Linus Torvalds 已提交
667 668
		return -EINVAL;

669 670
	if (ht->refcnt == 1) {
		ht->refcnt--;
L
Linus Torvalds 已提交
671
		u32_destroy_hnode(tp, ht);
672 673 674
	} else {
		return -EBUSY;
	}
L
Linus Torvalds 已提交
675

676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709
out:
	*last = true;
	if (root_ht) {
		if (root_ht->refcnt > 1) {
			*last = false;
			goto ret;
		}
		if (root_ht->refcnt == 1) {
			if (!ht_empty(root_ht)) {
				*last = false;
				goto ret;
			}
		}
	}

	if (tp_c->refcnt > 1) {
		*last = false;
		goto ret;
	}

	if (tp_c->refcnt == 1) {
		struct tc_u_hnode *ht;

		for (ht = rtnl_dereference(tp_c->hlist);
		     ht;
		     ht = rtnl_dereference(ht->next))
			if (!ht_empty(ht)) {
				*last = false;
				break;
			}
	}

ret:
	return ret;
L
Linus Torvalds 已提交
710 711
}

712
static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid)
L
Linus Torvalds 已提交
713
{
714 715 716 717 718 719 720 721 722 723 724
	unsigned long idr_index;
	u32 start = htid | 0x800;
	u32 max = htid | 0xFFF;
	u32 min = htid;

	if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
			  start, max + 1, GFP_KERNEL)) {
		if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
				  min + 1, max + 1, GFP_KERNEL))
			return max;
	}
725

726
	return (u32)idr_index;
L
Linus Torvalds 已提交
727 728
}

729 730 731 732 733 734 735 736
static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
	[TCA_U32_CLASSID]	= { .type = NLA_U32 },
	[TCA_U32_HASH]		= { .type = NLA_U32 },
	[TCA_U32_LINK]		= { .type = NLA_U32 },
	[TCA_U32_DIVISOR]	= { .type = NLA_U32 },
	[TCA_U32_SEL]		= { .len = sizeof(struct tc_u32_sel) },
	[TCA_U32_INDEV]		= { .type = NLA_STRING, .len = IFNAMSIZ },
	[TCA_U32_MARK]		= { .len = sizeof(struct tc_u32_mark) },
737
	[TCA_U32_FLAGS]		= { .type = NLA_U32 },
738 739
};

740 741
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
			 unsigned long base, struct tc_u_hnode *ht,
742
			 struct tc_u_knode *n, struct nlattr **tb,
743
			 struct nlattr *est, bool ovr)
L
Linus Torvalds 已提交
744
{
745
	int err;
L
Linus Torvalds 已提交
746

747
	err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr);
L
Linus Torvalds 已提交
748 749 750
	if (err < 0)
		return err;

751
	if (tb[TCA_U32_LINK]) {
752
		u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
753
		struct tc_u_hnode *ht_down = NULL, *ht_old;
L
Linus Torvalds 已提交
754 755

		if (TC_U32_KEY(handle))
756
			return -EINVAL;
L
Linus Torvalds 已提交
757 758 759 760 761

		if (handle) {
			ht_down = u32_lookup_ht(ht->tp_c, handle);

			if (ht_down == NULL)
762
				return -EINVAL;
L
Linus Torvalds 已提交
763 764 765
			ht_down->refcnt++;
		}

766 767
		ht_old = rtnl_dereference(n->ht_down);
		rcu_assign_pointer(n->ht_down, ht_down);
L
Linus Torvalds 已提交
768

769 770
		if (ht_old)
			ht_old->refcnt--;
L
Linus Torvalds 已提交
771
	}
772
	if (tb[TCA_U32_CLASSID]) {
773
		n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
L
Linus Torvalds 已提交
774 775 776 777
		tcf_bind_filter(tp, &n->res, base);
	}

#ifdef CONFIG_NET_CLS_IND
778
	if (tb[TCA_U32_INDEV]) {
779 780 781
		int ret;
		ret = tcf_change_indev(net, tb[TCA_U32_INDEV]);
		if (ret < 0)
782
			return -EINVAL;
783
		n->ifindex = ret;
L
Linus Torvalds 已提交
784 785 786 787 788
	}
#endif
	return 0;
}

J
Jamal Hadi Salim 已提交
789
static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810
			      struct tc_u_knode *n)
{
	struct tc_u_knode __rcu **ins;
	struct tc_u_knode *pins;
	struct tc_u_hnode *ht;

	if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
		ht = rtnl_dereference(tp->root);
	else
		ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));

	ins = &ht->ht[TC_U32_HASH(n->handle)];

	/* The node must always exist for it to be replaced if this is not the
	 * case then something went very wrong elsewhere.
	 */
	for (pins = rtnl_dereference(*ins); ;
	     ins = &pins->next, pins = rtnl_dereference(*ins))
		if (pins->handle == n->handle)
			break;

811
	idr_replace_ext(&ht->handle_idr, n, n->handle);
812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836
	RCU_INIT_POINTER(n->next, pins->next);
	rcu_assign_pointer(*ins, n);
}

static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
					 struct tc_u_knode *n)
{
	struct tc_u_knode *new;
	struct tc_u32_sel *s = &n->sel;

	new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
		      GFP_KERNEL);

	if (!new)
		return NULL;

	RCU_INIT_POINTER(new->next, n->next);
	new->handle = n->handle;
	RCU_INIT_POINTER(new->ht_up, n->ht_up);

#ifdef CONFIG_NET_CLS_IND
	new->ifindex = n->ifindex;
#endif
	new->fshift = n->fshift;
	new->res = n->res;
837
	new->flags = n->flags;
838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860
	RCU_INIT_POINTER(new->ht_down, n->ht_down);

	/* bump reference count as long as we hold pointer to structure */
	if (new->ht_down)
		new->ht_down->refcnt++;

#ifdef CONFIG_CLS_U32_PERF
	/* Statistics may be incremented by readers during update
	 * so we must keep them in tact. When the node is later destroyed
	 * a special destroy call must be made to not free the pf memory.
	 */
	new->pf = n->pf;
#endif

#ifdef CONFIG_CLS_U32_MARK
	new->val = n->val;
	new->mask = n->mask;
	/* Similarly success statistics must be moved as pointers */
	new->pcpu_success = n->pcpu_success;
#endif
	new->tp = tp;
	memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));

861 862 863 864
	if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
		kfree(new);
		return NULL;
	}
865 866 867 868

	return new;
}

869
static int u32_change(struct net *net, struct sk_buff *in_skb,
870
		      struct tcf_proto *tp, unsigned long base, u32 handle,
871
		      struct nlattr **tca, void **arg, bool ovr)
L
Linus Torvalds 已提交
872 873 874 875 876
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode *ht;
	struct tc_u_knode *n;
	struct tc_u32_sel *s;
877 878
	struct nlattr *opt = tca[TCA_OPTIONS];
	struct nlattr *tb[TCA_U32_MAX + 1];
879
	u32 htid, flags = 0;
L
Linus Torvalds 已提交
880
	int err;
881 882 883
#ifdef CONFIG_CLS_U32_PERF
	size_t size;
#endif
L
Linus Torvalds 已提交
884 885 886 887

	if (opt == NULL)
		return handle ? -EINVAL : 0;

888
	err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, NULL);
889 890
	if (err < 0)
		return err;
L
Linus Torvalds 已提交
891

892
	if (tb[TCA_U32_FLAGS]) {
893
		flags = nla_get_u32(tb[TCA_U32_FLAGS]);
894
		if (!tc_flags_valid(flags))
895
			return -EINVAL;
896
	}
897

898
	n = *arg;
E
Eric Dumazet 已提交
899
	if (n) {
900 901
		struct tc_u_knode *new;

L
Linus Torvalds 已提交
902 903 904
		if (TC_U32_KEY(n->handle) == 0)
			return -EINVAL;

905 906 907
		if (n->flags != flags)
			return -EINVAL;

908 909 910 911 912 913 914 915 916 917 918 919 920
		new = u32_init_knode(tp, n);
		if (!new)
			return -ENOMEM;

		err = u32_set_parms(net, tp, base,
				    rtnl_dereference(n->ht_up), new, tb,
				    tca[TCA_RATE], ovr);

		if (err) {
			u32_destroy_key(tp, new, false);
			return err;
		}

921 922 923 924 925 926
		err = u32_replace_hw_knode(tp, new, flags);
		if (err) {
			u32_destroy_key(tp, new, false);
			return err;
		}

927 928 929
		if (!tc_in_hw(new->flags))
			new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;

930
		u32_replace_knode(tp, tp_c, new);
931
		tcf_unbind_filter(tp, &n->res);
932 933
		call_rcu(&n->rcu, u32_delete_key_rcu);
		return 0;
L
Linus Torvalds 已提交
934 935
	}

936
	if (tb[TCA_U32_DIVISOR]) {
E
Eric Dumazet 已提交
937
		unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
L
Linus Torvalds 已提交
938 939 940 941 942

		if (--divisor > 0x100)
			return -EINVAL;
		if (TC_U32_KEY(handle))
			return -EINVAL;
E
Eric Dumazet 已提交
943
		ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
L
Linus Torvalds 已提交
944 945
		if (ht == NULL)
			return -ENOBUFS;
946 947 948 949 950 951 952 953 954 955 956 957 958 959
		if (handle == 0) {
			handle = gen_new_htid(tp->data, ht);
			if (handle == 0) {
				kfree(ht);
				return -ENOMEM;
			}
		} else {
			err = idr_alloc_ext(&tp_c->handle_idr, ht, NULL,
					    handle, handle + 1, GFP_KERNEL);
			if (err) {
				kfree(ht);
				return err;
			}
		}
L
Linus Torvalds 已提交
960
		ht->tp_c = tp_c;
961
		ht->refcnt = 1;
L
Linus Torvalds 已提交
962 963 964
		ht->divisor = divisor;
		ht->handle = handle;
		ht->prio = tp->prio;
965
		idr_init(&ht->handle_idr);
966 967 968

		err = u32_replace_hw_hnode(tp, ht, flags);
		if (err) {
969
			idr_remove_ext(&tp_c->handle_idr, handle);
970 971 972 973
			kfree(ht);
			return err;
		}

974 975
		RCU_INIT_POINTER(ht->next, tp_c->hlist);
		rcu_assign_pointer(tp_c->hlist, ht);
976
		*arg = ht;
977

L
Linus Torvalds 已提交
978 979 980
		return 0;
	}

981
	if (tb[TCA_U32_HASH]) {
982
		htid = nla_get_u32(tb[TCA_U32_HASH]);
L
Linus Torvalds 已提交
983
		if (TC_U32_HTID(htid) == TC_U32_ROOT) {
984
			ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
985 986 987 988 989 990 991
			htid = ht->handle;
		} else {
			ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
			if (ht == NULL)
				return -EINVAL;
		}
	} else {
992
		ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
993 994 995 996 997 998 999 1000 1001 1002
		htid = ht->handle;
	}

	if (ht->divisor < TC_U32_HASH(htid))
		return -EINVAL;

	if (handle) {
		if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
			return -EINVAL;
		handle = htid | TC_U32_NODE(handle);
1003 1004 1005 1006 1007
		err = idr_alloc_ext(&ht->handle_idr, NULL, NULL,
				    handle, handle + 1,
				    GFP_KERNEL);
		if (err)
			return err;
L
Linus Torvalds 已提交
1008 1009 1010
	} else
		handle = gen_new_kid(ht, htid);

1011 1012 1013 1014
	if (tb[TCA_U32_SEL] == NULL) {
		err = -EINVAL;
		goto erridr;
	}
L
Linus Torvalds 已提交
1015

1016
	s = nla_data(tb[TCA_U32_SEL]);
L
Linus Torvalds 已提交
1017

1018
	n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
1019 1020 1021 1022
	if (n == NULL) {
		err = -ENOBUFS;
		goto erridr;
	}
L
Linus Torvalds 已提交
1023 1024

#ifdef CONFIG_CLS_U32_PERF
1025 1026 1027
	size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
	n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
	if (!n->pf) {
1028 1029
		err = -ENOBUFS;
		goto errfree;
L
Linus Torvalds 已提交
1030 1031 1032 1033
	}
#endif

	memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
1034
	RCU_INIT_POINTER(n->ht_up, ht);
L
Linus Torvalds 已提交
1035
	n->handle = handle;
1036
	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
1037
	n->flags = flags;
1038
	n->tp = tp;
L
Linus Torvalds 已提交
1039

1040 1041 1042 1043
	err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
	if (err < 0)
		goto errout;

L
Linus Torvalds 已提交
1044
#ifdef CONFIG_CLS_U32_MARK
1045
	n->pcpu_success = alloc_percpu(u32);
1046 1047 1048 1049
	if (!n->pcpu_success) {
		err = -ENOMEM;
		goto errout;
	}
1050

1051
	if (tb[TCA_U32_MARK]) {
L
Linus Torvalds 已提交
1052 1053
		struct tc_u32_mark *mark;

1054
		mark = nla_data(tb[TCA_U32_MARK]);
1055 1056
		n->val = mark->val;
		n->mask = mark->mask;
L
Linus Torvalds 已提交
1057 1058 1059
	}
#endif

1060
	err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
L
Linus Torvalds 已提交
1061
	if (err == 0) {
1062 1063 1064
		struct tc_u_knode __rcu **ins;
		struct tc_u_knode *pins;

1065 1066 1067 1068
		err = u32_replace_hw_knode(tp, n, flags);
		if (err)
			goto errhw;

1069 1070 1071
		if (!tc_in_hw(n->flags))
			n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;

1072 1073 1074 1075
		ins = &ht->ht[TC_U32_HASH(handle)];
		for (pins = rtnl_dereference(*ins); pins;
		     ins = &pins->next, pins = rtnl_dereference(*ins))
			if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle))
L
Linus Torvalds 已提交
1076 1077
				break;

1078 1079
		RCU_INIT_POINTER(n->next, pins);
		rcu_assign_pointer(*ins, n);
1080
		*arg = n;
L
Linus Torvalds 已提交
1081 1082
		return 0;
	}
1083

1084
errhw:
1085 1086 1087 1088
#ifdef CONFIG_CLS_U32_MARK
	free_percpu(n->pcpu_success);
#endif

1089 1090
errout:
	tcf_exts_destroy(&n->exts);
L
Linus Torvalds 已提交
1091
#ifdef CONFIG_CLS_U32_PERF
1092
errfree:
1093
	free_percpu(n->pf);
L
Linus Torvalds 已提交
1094 1095
#endif
	kfree(n);
1096 1097
erridr:
	idr_remove_ext(&ht->handle_idr, handle);
L
Linus Torvalds 已提交
1098 1099 1100 1101 1102 1103 1104 1105
	return err;
}

static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode *ht;
	struct tc_u_knode *n;
E
Eric Dumazet 已提交
1106
	unsigned int h;
L
Linus Torvalds 已提交
1107 1108 1109 1110

	if (arg->stop)
		return;

1111 1112 1113
	for (ht = rtnl_dereference(tp_c->hlist);
	     ht;
	     ht = rtnl_dereference(ht->next)) {
L
Linus Torvalds 已提交
1114 1115 1116
		if (ht->prio != tp->prio)
			continue;
		if (arg->count >= arg->skip) {
1117
			if (arg->fn(tp, ht, arg) < 0) {
L
Linus Torvalds 已提交
1118 1119 1120 1121 1122 1123
				arg->stop = 1;
				return;
			}
		}
		arg->count++;
		for (h = 0; h <= ht->divisor; h++) {
1124 1125 1126
			for (n = rtnl_dereference(ht->ht[h]);
			     n;
			     n = rtnl_dereference(n->next)) {
L
Linus Torvalds 已提交
1127 1128 1129 1130
				if (arg->count < arg->skip) {
					arg->count++;
					continue;
				}
1131
				if (arg->fn(tp, n, arg) < 0) {
L
Linus Torvalds 已提交
1132 1133 1134 1135 1136 1137 1138 1139 1140
					arg->stop = 1;
					return;
				}
				arg->count++;
			}
		}
	}
}

1141 1142 1143 1144 1145 1146 1147 1148
static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
{
	struct tc_u_knode *n = fh;

	if (n && n->res.classid == classid)
		n->res.class = cl;
}

1149
static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
J
Jamal Hadi Salim 已提交
1150
		    struct sk_buff *skb, struct tcmsg *t)
L
Linus Torvalds 已提交
1151
{
1152
	struct tc_u_knode *n = fh;
1153
	struct tc_u_hnode *ht_up, *ht_down;
1154
	struct nlattr *nest;
L
Linus Torvalds 已提交
1155 1156 1157 1158 1159 1160

	if (n == NULL)
		return skb->len;

	t->tcm_handle = n->handle;

1161 1162 1163
	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;
L
Linus Torvalds 已提交
1164 1165

	if (TC_U32_KEY(n->handle) == 0) {
1166
		struct tc_u_hnode *ht = fh;
E
Eric Dumazet 已提交
1167 1168
		u32 divisor = ht->divisor + 1;

1169 1170
		if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
			goto nla_put_failure;
L
Linus Torvalds 已提交
1171
	} else {
1172 1173 1174
#ifdef CONFIG_CLS_U32_PERF
		struct tc_u32_pcnt *gpf;
		int cpu;
1175
#endif
1176

1177 1178 1179 1180
		if (nla_put(skb, TCA_U32_SEL,
			    sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
			    &n->sel))
			goto nla_put_failure;
1181 1182 1183

		ht_up = rtnl_dereference(n->ht_up);
		if (ht_up) {
L
Linus Torvalds 已提交
1184
			u32 htid = n->handle & 0xFFFFF000;
1185 1186
			if (nla_put_u32(skb, TCA_U32_HASH, htid))
				goto nla_put_failure;
L
Linus Torvalds 已提交
1187
		}
1188 1189 1190
		if (n->res.classid &&
		    nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
			goto nla_put_failure;
1191 1192 1193 1194

		ht_down = rtnl_dereference(n->ht_down);
		if (ht_down &&
		    nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
1195
			goto nla_put_failure;
L
Linus Torvalds 已提交
1196

1197 1198 1199
		if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags))
			goto nla_put_failure;

L
Linus Torvalds 已提交
1200
#ifdef CONFIG_CLS_U32_MARK
1201 1202 1203 1204
		if ((n->val || n->mask)) {
			struct tc_u32_mark mark = {.val = n->val,
						   .mask = n->mask,
						   .success = 0};
1205
			int cpum;
1206

1207 1208
			for_each_possible_cpu(cpum) {
				__u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum);
1209 1210 1211 1212 1213 1214 1215

				mark.success += cnt;
			}

			if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark))
				goto nla_put_failure;
		}
L
Linus Torvalds 已提交
1216 1217
#endif

1218
		if (tcf_exts_dump(skb, &n->exts) < 0)
1219
			goto nla_put_failure;
L
Linus Torvalds 已提交
1220 1221

#ifdef CONFIG_NET_CLS_IND
1222 1223 1224 1225 1226 1227
		if (n->ifindex) {
			struct net_device *dev;
			dev = __dev_get_by_index(net, n->ifindex);
			if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name))
				goto nla_put_failure;
		}
L
Linus Torvalds 已提交
1228 1229
#endif
#ifdef CONFIG_CLS_U32_PERF
1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245
		gpf = kzalloc(sizeof(struct tc_u32_pcnt) +
			      n->sel.nkeys * sizeof(u64),
			      GFP_KERNEL);
		if (!gpf)
			goto nla_put_failure;

		for_each_possible_cpu(cpu) {
			int i;
			struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);

			gpf->rcnt += pf->rcnt;
			gpf->rhit += pf->rhit;
			for (i = 0; i < n->sel.nkeys; i++)
				gpf->kcnts[i] += pf->kcnts[i];
		}

1246 1247 1248 1249
		if (nla_put_64bit(skb, TCA_U32_PCNT,
				  sizeof(struct tc_u32_pcnt) +
				  n->sel.nkeys * sizeof(u64),
				  gpf, TCA_U32_PAD)) {
1250
			kfree(gpf);
1251
			goto nla_put_failure;
1252 1253
		}
		kfree(gpf);
L
Linus Torvalds 已提交
1254 1255 1256
#endif
	}

1257 1258
	nla_nest_end(skb, nest);

L
Linus Torvalds 已提交
1259
	if (TC_U32_KEY(n->handle))
1260
		if (tcf_exts_dump_stats(skb, &n->exts) < 0)
1261
			goto nla_put_failure;
L
Linus Torvalds 已提交
1262 1263
	return skb->len;

1264
nla_put_failure:
1265
	nla_nest_cancel(skb, nest);
L
Linus Torvalds 已提交
1266 1267 1268
	return -1;
}

1269
static struct tcf_proto_ops cls_u32_ops __read_mostly = {
L
Linus Torvalds 已提交
1270 1271 1272 1273 1274 1275 1276 1277 1278
	.kind		=	"u32",
	.classify	=	u32_classify,
	.init		=	u32_init,
	.destroy	=	u32_destroy,
	.get		=	u32_get,
	.change		=	u32_change,
	.delete		=	u32_delete,
	.walk		=	u32_walk,
	.dump		=	u32_dump,
1279
	.bind_class	=	u32_bind_class,
L
Linus Torvalds 已提交
1280 1281 1282 1283 1284
	.owner		=	THIS_MODULE,
};

static int __init init_u32(void)
{
1285 1286
	int i, ret;

1287
	pr_info("u32 classifier\n");
L
Linus Torvalds 已提交
1288
#ifdef CONFIG_CLS_U32_PERF
1289
	pr_info("    Performance counters on\n");
L
Linus Torvalds 已提交
1290 1291
#endif
#ifdef CONFIG_NET_CLS_IND
1292
	pr_info("    input device check on\n");
L
Linus Torvalds 已提交
1293 1294
#endif
#ifdef CONFIG_NET_CLS_ACT
1295
	pr_info("    Actions configured\n");
L
Linus Torvalds 已提交
1296
#endif
1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309
	tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE,
					  sizeof(struct hlist_head),
					  GFP_KERNEL);
	if (!tc_u_common_hash)
		return -ENOMEM;

	for (i = 0; i < U32_HASH_SIZE; i++)
		INIT_HLIST_HEAD(&tc_u_common_hash[i]);

	ret = register_tcf_proto_ops(&cls_u32_ops);
	if (ret)
		kvfree(tc_u_common_hash);
	return ret;
L
Linus Torvalds 已提交
1310 1311
}

1312
static void __exit exit_u32(void)
L
Linus Torvalds 已提交
1313 1314
{
	unregister_tcf_proto_ops(&cls_u32_ops);
1315
	kvfree(tc_u_common_hash);
L
Linus Torvalds 已提交
1316 1317 1318 1319 1320
}

module_init(init_u32)
module_exit(exit_u32)
MODULE_LICENSE("GPL");