cls_u32.c 27.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
/*
 * net/sched/cls_u32.c	Ugly (or Universal) 32bit key Packet Classifier.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 *	The filters are packed to hash tables of key nodes
 *	with a set of 32bit key/mask pairs at every node.
 *	Nodes reference next level hash tables etc.
 *
 *	This scheme is the best universal classifier I managed to
 *	invent; it is not super-fast, but it is not slow (provided you
 *	program it correctly), and general enough.  And its relative
 *	speed grows as the number of rules becomes larger.
 *
 *	It seems that it represents the best middle point between
 *	speed and manageability both by human and by machine.
 *
 *	It is especially useful for link sharing combined with QoS;
 *	pure RSVP doesn't need such a general approach and can use
 *	much simpler (and faster) schemes, sort of cls_rsvp.c.
 *
 *	JHS: We should remove the CONFIG_NET_CLS_IND from here
 *	eventually when the meta match extension is made available
 *
 *	nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
 */

#include <linux/module.h>
34
#include <linux/slab.h>
L
Linus Torvalds 已提交
35 36 37 38
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
39
#include <linux/percpu.h>
L
Linus Torvalds 已提交
40 41
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
42
#include <linux/bitmap.h>
43
#include <net/netlink.h>
L
Linus Torvalds 已提交
44 45
#include <net/act_api.h>
#include <net/pkt_cls.h>
46
#include <linux/netdevice.h>
L
Linus Torvalds 已提交
47

E
Eric Dumazet 已提交
48
struct tc_u_knode {
49
	struct tc_u_knode __rcu	*next;
L
Linus Torvalds 已提交
50
	u32			handle;
51
	struct tc_u_hnode __rcu	*ht_up;
L
Linus Torvalds 已提交
52 53
	struct tcf_exts		exts;
#ifdef CONFIG_NET_CLS_IND
54
	int			ifindex;
L
Linus Torvalds 已提交
55 56 57
#endif
	u8			fshift;
	struct tcf_result	res;
58
	struct tc_u_hnode __rcu	*ht_down;
L
Linus Torvalds 已提交
59
#ifdef CONFIG_CLS_U32_PERF
60
	struct tc_u32_pcnt __percpu *pf;
L
Linus Torvalds 已提交
61
#endif
62
	u32			flags;
L
Linus Torvalds 已提交
63
#ifdef CONFIG_CLS_U32_MARK
64 65 66
	u32			val;
	u32			mask;
	u32 __percpu		*pcpu_success;
L
Linus Torvalds 已提交
67
#endif
68 69
	struct tcf_proto	*tp;
	struct rcu_head		rcu;
70 71 72 73
	/* The 'sel' field MUST be the last field in structure to allow for
	 * tc_u32_keys allocated at end of structure.
	 */
	struct tc_u32_sel	sel;
L
Linus Torvalds 已提交
74 75
};

E
Eric Dumazet 已提交
76
struct tc_u_hnode {
77
	struct tc_u_hnode __rcu	*next;
L
Linus Torvalds 已提交
78 79 80 81
	u32			handle;
	u32			prio;
	struct tc_u_common	*tp_c;
	int			refcnt;
E
Eric Dumazet 已提交
82
	unsigned int		divisor;
83
	struct rcu_head		rcu;
84 85 86 87
	/* The 'ht' field MUST be the last field in structure to allow for
	 * more entries allocated at end of structure.
	 */
	struct tc_u_knode __rcu	*ht[1];
L
Linus Torvalds 已提交
88 89
};

E
Eric Dumazet 已提交
90
struct tc_u_common {
91
	struct tc_u_hnode __rcu	*hlist;
L
Linus Torvalds 已提交
92 93 94
	struct Qdisc		*q;
	int			refcnt;
	u32			hgenerator;
95
	struct rcu_head		rcu;
L
Linus Torvalds 已提交
96 97
};

E
Eric Dumazet 已提交
98 99 100
static inline unsigned int u32_hash_fold(__be32 key,
					 const struct tc_u32_sel *sel,
					 u8 fshift)
L
Linus Torvalds 已提交
101
{
E
Eric Dumazet 已提交
102
	unsigned int h = ntohl(key & sel->hmask) >> fshift;
L
Linus Torvalds 已提交
103 104 105 106

	return h;
}

J
Jamal Hadi Salim 已提交
107 108
static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp,
			struct tcf_result *res)
L
Linus Torvalds 已提交
109 110 111
{
	struct {
		struct tc_u_knode *knode;
112
		unsigned int	  off;
L
Linus Torvalds 已提交
113 114
	} stack[TC_U32_MAXDEPTH];

115
	struct tc_u_hnode *ht = rcu_dereference_bh(tp->root);
116
	unsigned int off = skb_network_offset(skb);
L
Linus Torvalds 已提交
117 118 119 120 121 122 123 124 125 126
	struct tc_u_knode *n;
	int sdepth = 0;
	int off2 = 0;
	int sel = 0;
#ifdef CONFIG_CLS_U32_PERF
	int j;
#endif
	int i, r;

next_ht:
127
	n = rcu_dereference_bh(ht->ht[sel]);
L
Linus Torvalds 已提交
128 129 130 131 132 133

next_knode:
	if (n) {
		struct tc_u32_key *key = n->sel.keys;

#ifdef CONFIG_CLS_U32_PERF
134
		__this_cpu_inc(n->pf->rcnt);
L
Linus Torvalds 已提交
135 136 137
		j = 0;
#endif

138 139 140 141 142
		if (tc_skip_sw(n->flags)) {
			n = rcu_dereference_bh(n->next);
			goto next_knode;
		}

L
Linus Torvalds 已提交
143
#ifdef CONFIG_CLS_U32_MARK
144
		if ((skb->mark & n->mask) != n->val) {
145
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
146 147
			goto next_knode;
		} else {
148
			__this_cpu_inc(*n->pcpu_success);
L
Linus Torvalds 已提交
149 150 151
		}
#endif

E
Eric Dumazet 已提交
152
		for (i = n->sel.nkeys; i > 0; i--, key++) {
S
stephen hemminger 已提交
153
			int toff = off + key->off + (off2 & key->offmask);
S
stephen hemminger 已提交
154
			__be32 *data, hdata;
155

D
Dan Carpenter 已提交
156
			if (skb_headroom(skb) + toff > INT_MAX)
S
stephen hemminger 已提交
157 158
				goto out;

S
stephen hemminger 已提交
159
			data = skb_header_pointer(skb, toff, 4, &hdata);
160 161 162
			if (!data)
				goto out;
			if ((*data ^ key->val) & key->mask) {
163
				n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
164 165 166
				goto next_knode;
			}
#ifdef CONFIG_CLS_U32_PERF
167
			__this_cpu_inc(n->pf->kcnts[j]);
L
Linus Torvalds 已提交
168 169 170
			j++;
#endif
		}
171 172 173

		ht = rcu_dereference_bh(n->ht_down);
		if (!ht) {
L
Linus Torvalds 已提交
174
check_terminal:
E
Eric Dumazet 已提交
175
			if (n->sel.flags & TC_U32_TERMINAL) {
L
Linus Torvalds 已提交
176 177 178

				*res = n->res;
#ifdef CONFIG_NET_CLS_IND
179
				if (!tcf_match_indev(skb, n->ifindex)) {
180
					n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
181 182 183 184
					goto next_knode;
				}
#endif
#ifdef CONFIG_CLS_U32_PERF
185
				__this_cpu_inc(n->pf->rhit);
L
Linus Torvalds 已提交
186 187 188
#endif
				r = tcf_exts_exec(skb, &n->exts, res);
				if (r < 0) {
189
					n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
190 191 192 193 194
					goto next_knode;
				}

				return r;
			}
195
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
196 197 198 199 200 201 202
			goto next_knode;
		}

		/* PUSH */
		if (sdepth >= TC_U32_MAXDEPTH)
			goto deadloop;
		stack[sdepth].knode = n;
203
		stack[sdepth].off = off;
L
Linus Torvalds 已提交
204 205
		sdepth++;

206
		ht = rcu_dereference_bh(n->ht_down);
L
Linus Torvalds 已提交
207
		sel = 0;
208
		if (ht->divisor) {
S
stephen hemminger 已提交
209
			__be32 *data, hdata;
210 211

			data = skb_header_pointer(skb, off + n->sel.hoff, 4,
S
stephen hemminger 已提交
212
						  &hdata);
213 214 215 216 217
			if (!data)
				goto out;
			sel = ht->divisor & u32_hash_fold(*data, &n->sel,
							  n->fshift);
		}
E
Eric Dumazet 已提交
218
		if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
L
Linus Torvalds 已提交
219 220
			goto next_ht;

E
Eric Dumazet 已提交
221
		if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
L
Linus Torvalds 已提交
222
			off2 = n->sel.off + 3;
223
			if (n->sel.flags & TC_U32_VAROFFSET) {
S
stephen hemminger 已提交
224
				__be16 *data, hdata;
225 226 227

				data = skb_header_pointer(skb,
							  off + n->sel.offoff,
S
stephen hemminger 已提交
228
							  2, &hdata);
229 230 231 232 233
				if (!data)
					goto out;
				off2 += ntohs(n->sel.offmask & *data) >>
					n->sel.offshift;
			}
L
Linus Torvalds 已提交
234 235
			off2 &= ~3;
		}
E
Eric Dumazet 已提交
236
		if (n->sel.flags & TC_U32_EAT) {
237
			off += off2;
L
Linus Torvalds 已提交
238 239 240
			off2 = 0;
		}

241
		if (off < skb->len)
L
Linus Torvalds 已提交
242 243 244 245 246 247
			goto next_ht;
	}

	/* POP */
	if (sdepth--) {
		n = stack[sdepth].knode;
248
		ht = rcu_dereference_bh(n->ht_up);
249
		off = stack[sdepth].off;
L
Linus Torvalds 已提交
250 251
		goto check_terminal;
	}
252
out:
L
Linus Torvalds 已提交
253 254 255
	return -1;

deadloop:
256
	net_warn_ratelimited("cls_u32: dead loop\n");
L
Linus Torvalds 已提交
257 258 259
	return -1;
}

J
Jamal Hadi Salim 已提交
260
static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
L
Linus Torvalds 已提交
261 262 263
{
	struct tc_u_hnode *ht;

264 265 266
	for (ht = rtnl_dereference(tp_c->hlist);
	     ht;
	     ht = rtnl_dereference(ht->next))
L
Linus Torvalds 已提交
267 268 269 270 271 272
		if (ht->handle == handle)
			break;

	return ht;
}

J
Jamal Hadi Salim 已提交
273
static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
L
Linus Torvalds 已提交
274
{
E
Eric Dumazet 已提交
275
	unsigned int sel;
L
Linus Torvalds 已提交
276 277 278 279 280 281
	struct tc_u_knode *n = NULL;

	sel = TC_U32_HASH(handle);
	if (sel > ht->divisor)
		goto out;

282 283 284
	for (n = rtnl_dereference(ht->ht[sel]);
	     n;
	     n = rtnl_dereference(n->next))
L
Linus Torvalds 已提交
285 286 287 288 289 290 291 292 293 294 295 296 297
		if (n->handle == handle)
			break;
out:
	return n;
}


static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
{
	struct tc_u_hnode *ht;
	struct tc_u_common *tp_c = tp->data;

	if (TC_U32_HTID(handle) == TC_U32_ROOT)
298
		ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
	else
		ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));

	if (!ht)
		return 0;

	if (TC_U32_KEY(handle) == 0)
		return (unsigned long)ht;

	return (unsigned long)u32_lookup_key(ht, handle);
}

static u32 gen_new_htid(struct tc_u_common *tp_c)
{
	int i = 0x800;

315 316 317
	/* hgenerator only used inside rtnl lock it is safe to increment
	 * without read _copy_ update semantics
	 */
L
Linus Torvalds 已提交
318 319 320
	do {
		if (++tp_c->hgenerator == 0x7FF)
			tp_c->hgenerator = 1;
E
Eric Dumazet 已提交
321
	} while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
L
Linus Torvalds 已提交
322 323 324 325 326 327 328 329 330

	return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
}

static int u32_init(struct tcf_proto *tp)
{
	struct tc_u_hnode *root_ht;
	struct tc_u_common *tp_c;

331
	tp_c = tp->q->u32_node;
L
Linus Torvalds 已提交
332

333
	root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
L
Linus Torvalds 已提交
334 335 336 337 338 339 340 341
	if (root_ht == NULL)
		return -ENOBUFS;

	root_ht->refcnt++;
	root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
	root_ht->prio = tp->prio;

	if (tp_c == NULL) {
342
		tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
L
Linus Torvalds 已提交
343 344 345 346 347
		if (tp_c == NULL) {
			kfree(root_ht);
			return -ENOBUFS;
		}
		tp_c->q = tp->q;
348
		tp->q->u32_node = tp_c;
L
Linus Torvalds 已提交
349 350 351
	}

	tp_c->refcnt++;
352 353
	RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
	rcu_assign_pointer(tp_c->hlist, root_ht);
L
Linus Torvalds 已提交
354 355
	root_ht->tp_c = tp_c;

356
	rcu_assign_pointer(tp->root, root_ht);
L
Linus Torvalds 已提交
357 358 359 360
	tp->data = tp_c;
	return 0;
}

J
Jamal Hadi Salim 已提交
361
static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
362
			   bool free_pf)
L
Linus Torvalds 已提交
363
{
364
	tcf_exts_destroy(&n->exts);
L
Linus Torvalds 已提交
365 366 367
	if (n->ht_down)
		n->ht_down->refcnt--;
#ifdef CONFIG_CLS_U32_PERF
368 369
	if (free_pf)
		free_percpu(n->pf);
370 371
#endif
#ifdef CONFIG_CLS_U32_MARK
372 373
	if (free_pf)
		free_percpu(n->pcpu_success);
L
Linus Torvalds 已提交
374 375 376 377 378
#endif
	kfree(n);
	return 0;
}

379 380 381 382 383 384 385 386
/* u32_delete_key_rcu should be called when free'ing a copied
 * version of a tc_u_knode obtained from u32_init_knode(). When
 * copies are obtained from u32_init_knode() the statistics are
 * shared between the old and new copies to allow readers to
 * continue to update the statistics during the copy. To support
 * this the u32_delete_key_rcu variant does not free the percpu
 * statistics.
 */
387 388 389 390
static void u32_delete_key_rcu(struct rcu_head *rcu)
{
	struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);

391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
	u32_destroy_key(key->tp, key, false);
}

/* u32_delete_key_freepf_rcu is the rcu callback variant
 * that free's the entire structure including the statistics
 * percpu variables. Only use this if the key is not a copy
 * returned by u32_init_knode(). See u32_delete_key_rcu()
 * for the variant that should be used with keys return from
 * u32_init_knode()
 */
static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
{
	struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);

	u32_destroy_key(key->tp, key, true);
406 407
}

408
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
L
Linus Torvalds 已提交
409
{
410 411
	struct tc_u_knode __rcu **kp;
	struct tc_u_knode *pkp;
412
	struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
L
Linus Torvalds 已提交
413 414

	if (ht) {
415 416 417 418 419
		kp = &ht->ht[TC_U32_HASH(key->handle)];
		for (pkp = rtnl_dereference(*kp); pkp;
		     kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
			if (pkp == key) {
				RCU_INIT_POINTER(*kp, key->next);
L
Linus Torvalds 已提交
420

421
				tcf_unbind_filter(tp, &key->res);
422
				call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
L
Linus Torvalds 已提交
423 424 425 426
				return 0;
			}
		}
	}
427
	WARN_ON(1);
L
Linus Torvalds 已提交
428 429 430
	return 0;
}

431 432 433 434 435 436 437 438 439
static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
{
	struct net_device *dev = tp->q->dev_queue->dev;
	struct tc_cls_u32_offload u32_offload = {0};
	struct tc_to_netdev offload;

	offload.type = TC_SETUP_CLSU32;
	offload.cls_u32 = &u32_offload;

440
	if (tc_should_offload(dev, tp, 0)) {
441 442 443 444 445 446 447
		offload.cls_u32->command = TC_CLSU32_DELETE_KNODE;
		offload.cls_u32->knode.handle = handle;
		dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
					      tp->protocol, &offload);
	}
}

J
Jamal Hadi Salim 已提交
448 449
static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
				u32 flags)
450 451 452 453
{
	struct net_device *dev = tp->q->dev_queue->dev;
	struct tc_cls_u32_offload u32_offload = {0};
	struct tc_to_netdev offload;
454
	int err;
455

456
	if (!tc_should_offload(dev, tp, flags))
457 458
		return tc_skip_sw(flags) ? -EINVAL : 0;

459 460 461
	offload.type = TC_SETUP_CLSU32;
	offload.cls_u32 = &u32_offload;

462 463 464 465
	offload.cls_u32->command = TC_CLSU32_NEW_HNODE;
	offload.cls_u32->hnode.divisor = h->divisor;
	offload.cls_u32->hnode.handle = h->handle;
	offload.cls_u32->hnode.prio = h->prio;
466

467 468 469 470
	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
					    tp->protocol, &offload);
	if (tc_skip_sw(flags))
		return err;
471 472

	return 0;
473 474 475 476 477 478 479 480 481 482 483
}

static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
{
	struct net_device *dev = tp->q->dev_queue->dev;
	struct tc_cls_u32_offload u32_offload = {0};
	struct tc_to_netdev offload;

	offload.type = TC_SETUP_CLSU32;
	offload.cls_u32 = &u32_offload;

484
	if (tc_should_offload(dev, tp, 0)) {
485 486 487 488 489 490 491 492 493 494
		offload.cls_u32->command = TC_CLSU32_DELETE_HNODE;
		offload.cls_u32->hnode.divisor = h->divisor;
		offload.cls_u32->hnode.handle = h->handle;
		offload.cls_u32->hnode.prio = h->prio;

		dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
					      tp->protocol, &offload);
	}
}

J
Jamal Hadi Salim 已提交
495 496
static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
				u32 flags)
497 498 499 500
{
	struct net_device *dev = tp->q->dev_queue->dev;
	struct tc_cls_u32_offload u32_offload = {0};
	struct tc_to_netdev offload;
501
	int err;
502 503 504 505

	offload.type = TC_SETUP_CLSU32;
	offload.cls_u32 = &u32_offload;

506 507 508 509 510 511
	if (!tc_should_offload(dev, tp, flags))
		return tc_skip_sw(flags) ? -EINVAL : 0;

	offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE;
	offload.cls_u32->knode.handle = n->handle;
	offload.cls_u32->knode.fshift = n->fshift;
512
#ifdef CONFIG_CLS_U32_MARK
513 514
	offload.cls_u32->knode.val = n->val;
	offload.cls_u32->knode.mask = n->mask;
515
#else
516 517
	offload.cls_u32->knode.val = 0;
	offload.cls_u32->knode.mask = 0;
518
#endif
519 520 521 522 523 524 525
	offload.cls_u32->knode.sel = &n->sel;
	offload.cls_u32->knode.exts = &n->exts;
	if (n->ht_down)
		offload.cls_u32->knode.link_handle = n->ht_down->handle;

	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
					    tp->protocol, &offload);
526 527 528 529

	if (!err)
		n->flags |= TCA_CLS_FLAGS_IN_HW;

530 531
	if (tc_skip_sw(flags))
		return err;
532 533

	return 0;
534 535
}

536
static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
L
Linus Torvalds 已提交
537 538
{
	struct tc_u_knode *n;
E
Eric Dumazet 已提交
539
	unsigned int h;
L
Linus Torvalds 已提交
540

E
Eric Dumazet 已提交
541
	for (h = 0; h <= ht->divisor; h++) {
542 543 544
		while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
			RCU_INIT_POINTER(ht->ht[h],
					 rtnl_dereference(n->next));
545
			tcf_unbind_filter(tp, &n->res);
546
			u32_remove_hw_knode(tp, n->handle);
547
			call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
L
Linus Torvalds 已提交
548 549 550 551 552 553 554
		}
	}
}

static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
{
	struct tc_u_common *tp_c = tp->data;
555 556
	struct tc_u_hnode __rcu **hn;
	struct tc_u_hnode *phn;
L
Linus Torvalds 已提交
557

558
	WARN_ON(ht->refcnt);
L
Linus Torvalds 已提交
559

560
	u32_clear_hnode(tp, ht);
L
Linus Torvalds 已提交
561

562 563 564 565 566
	hn = &tp_c->hlist;
	for (phn = rtnl_dereference(*hn);
	     phn;
	     hn = &phn->next, phn = rtnl_dereference(*hn)) {
		if (phn == ht) {
567
			u32_clear_hw_hnode(tp, ht);
568 569
			RCU_INIT_POINTER(*hn, ht->next);
			kfree_rcu(ht, rcu);
L
Linus Torvalds 已提交
570 571 572 573 574 575 576
			return 0;
		}
	}

	return -ENOENT;
}

577 578 579 580 581 582 583 584 585 586 587 588
static bool ht_empty(struct tc_u_hnode *ht)
{
	unsigned int h;

	for (h = 0; h <= ht->divisor; h++)
		if (rcu_access_pointer(ht->ht[h]))
			return false;

	return true;
}

static bool u32_destroy(struct tcf_proto *tp, bool force)
L
Linus Torvalds 已提交
589 590
{
	struct tc_u_common *tp_c = tp->data;
591
	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
592

593
	WARN_ON(root_ht == NULL);
L
Linus Torvalds 已提交
594

595 596 597 598 599 600 601 602 603
	if (!force) {
		if (root_ht) {
			if (root_ht->refcnt > 1)
				return false;
			if (root_ht->refcnt == 1) {
				if (!ht_empty(root_ht))
					return false;
			}
		}
604 605 606 607 608 609 610 611 612 613 614 615 616

		if (tp_c->refcnt > 1)
			return false;

		if (tp_c->refcnt == 1) {
			struct tc_u_hnode *ht;

			for (ht = rtnl_dereference(tp_c->hlist);
			     ht;
			     ht = rtnl_dereference(ht->next))
				if (!ht_empty(ht))
					return false;
		}
617 618
	}

L
Linus Torvalds 已提交
619 620 621 622 623 624
	if (root_ht && --root_ht->refcnt == 0)
		u32_destroy_hnode(tp, root_ht);

	if (--tp_c->refcnt == 0) {
		struct tc_u_hnode *ht;

625
		tp->q->u32_node = NULL;
L
Linus Torvalds 已提交
626

627 628 629
		for (ht = rtnl_dereference(tp_c->hlist);
		     ht;
		     ht = rtnl_dereference(ht->next)) {
630
			ht->refcnt--;
631
			u32_clear_hnode(tp, ht);
632
		}
L
Linus Torvalds 已提交
633

634 635 636
		while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
			RCU_INIT_POINTER(tp_c->hlist, ht->next);
			kfree_rcu(ht, rcu);
637
		}
L
Linus Torvalds 已提交
638 639 640 641 642

		kfree(tp_c);
	}

	tp->data = NULL;
643
	return true;
L
Linus Torvalds 已提交
644 645 646 647
}

static int u32_delete(struct tcf_proto *tp, unsigned long arg)
{
E
Eric Dumazet 已提交
648
	struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
649
	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
650 651 652 653

	if (ht == NULL)
		return 0;

654 655
	if (TC_U32_KEY(ht->handle)) {
		u32_remove_hw_knode(tp, ht->handle);
E
Eric Dumazet 已提交
656
		return u32_delete_key(tp, (struct tc_u_knode *)ht);
657
	}
L
Linus Torvalds 已提交
658

659
	if (root_ht == ht)
L
Linus Torvalds 已提交
660 661
		return -EINVAL;

662 663
	if (ht->refcnt == 1) {
		ht->refcnt--;
L
Linus Torvalds 已提交
664
		u32_destroy_hnode(tp, ht);
665 666 667
	} else {
		return -EBUSY;
	}
L
Linus Torvalds 已提交
668 669 670 671

	return 0;
}

672
#define NR_U32_NODE (1<<12)
L
Linus Torvalds 已提交
673 674 675
static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
{
	struct tc_u_knode *n;
676 677 678 679 680
	unsigned long i;
	unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long),
					GFP_KERNEL);
	if (!bitmap)
		return handle | 0xFFF;
L
Linus Torvalds 已提交
681

682 683 684
	for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]);
	     n;
	     n = rtnl_dereference(n->next))
685
		set_bit(TC_U32_NODE(n->handle), bitmap);
L
Linus Torvalds 已提交
686

687 688 689 690 691 692
	i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
	if (i >= NR_U32_NODE)
		i = find_next_zero_bit(bitmap, NR_U32_NODE, 1);

	kfree(bitmap);
	return handle | (i >= NR_U32_NODE ? 0xFFF : i);
L
Linus Torvalds 已提交
693 694
}

695 696 697 698 699 700 701 702
static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
	[TCA_U32_CLASSID]	= { .type = NLA_U32 },
	[TCA_U32_HASH]		= { .type = NLA_U32 },
	[TCA_U32_LINK]		= { .type = NLA_U32 },
	[TCA_U32_DIVISOR]	= { .type = NLA_U32 },
	[TCA_U32_SEL]		= { .len = sizeof(struct tc_u32_sel) },
	[TCA_U32_INDEV]		= { .type = NLA_STRING, .len = IFNAMSIZ },
	[TCA_U32_MARK]		= { .len = sizeof(struct tc_u32_mark) },
703
	[TCA_U32_FLAGS]		= { .type = NLA_U32 },
704 705
};

706 707
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
			 unsigned long base, struct tc_u_hnode *ht,
708
			 struct tc_u_knode *n, struct nlattr **tb,
709
			 struct nlattr *est, bool ovr)
L
Linus Torvalds 已提交
710 711
{
	struct tcf_exts e;
712
	int err;
L
Linus Torvalds 已提交
713

714
	err = tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
L
Linus Torvalds 已提交
715 716
	if (err < 0)
		return err;
717 718 719
	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
	if (err < 0)
		goto errout;
L
Linus Torvalds 已提交
720 721

	err = -EINVAL;
722
	if (tb[TCA_U32_LINK]) {
723
		u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
724
		struct tc_u_hnode *ht_down = NULL, *ht_old;
L
Linus Torvalds 已提交
725 726 727 728 729 730 731 732 733 734 735 736

		if (TC_U32_KEY(handle))
			goto errout;

		if (handle) {
			ht_down = u32_lookup_ht(ht->tp_c, handle);

			if (ht_down == NULL)
				goto errout;
			ht_down->refcnt++;
		}

737 738
		ht_old = rtnl_dereference(n->ht_down);
		rcu_assign_pointer(n->ht_down, ht_down);
L
Linus Torvalds 已提交
739

740 741
		if (ht_old)
			ht_old->refcnt--;
L
Linus Torvalds 已提交
742
	}
743
	if (tb[TCA_U32_CLASSID]) {
744
		n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
L
Linus Torvalds 已提交
745 746 747 748
		tcf_bind_filter(tp, &n->res, base);
	}

#ifdef CONFIG_NET_CLS_IND
749
	if (tb[TCA_U32_INDEV]) {
750 751 752
		int ret;
		ret = tcf_change_indev(net, tb[TCA_U32_INDEV]);
		if (ret < 0)
L
Linus Torvalds 已提交
753
			goto errout;
754
		n->ifindex = ret;
L
Linus Torvalds 已提交
755 756 757 758 759 760
	}
#endif
	tcf_exts_change(tp, &n->exts, &e);

	return 0;
errout:
761
	tcf_exts_destroy(&e);
L
Linus Torvalds 已提交
762 763 764
	return err;
}

J
Jamal Hadi Salim 已提交
765
static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811
			      struct tc_u_knode *n)
{
	struct tc_u_knode __rcu **ins;
	struct tc_u_knode *pins;
	struct tc_u_hnode *ht;

	if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
		ht = rtnl_dereference(tp->root);
	else
		ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));

	ins = &ht->ht[TC_U32_HASH(n->handle)];

	/* The node must always exist for it to be replaced if this is not the
	 * case then something went very wrong elsewhere.
	 */
	for (pins = rtnl_dereference(*ins); ;
	     ins = &pins->next, pins = rtnl_dereference(*ins))
		if (pins->handle == n->handle)
			break;

	RCU_INIT_POINTER(n->next, pins->next);
	rcu_assign_pointer(*ins, n);
}

static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
					 struct tc_u_knode *n)
{
	struct tc_u_knode *new;
	struct tc_u32_sel *s = &n->sel;

	new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
		      GFP_KERNEL);

	if (!new)
		return NULL;

	RCU_INIT_POINTER(new->next, n->next);
	new->handle = n->handle;
	RCU_INIT_POINTER(new->ht_up, n->ht_up);

#ifdef CONFIG_NET_CLS_IND
	new->ifindex = n->ifindex;
#endif
	new->fshift = n->fshift;
	new->res = n->res;
812
	new->flags = n->flags;
813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835
	RCU_INIT_POINTER(new->ht_down, n->ht_down);

	/* bump reference count as long as we hold pointer to structure */
	if (new->ht_down)
		new->ht_down->refcnt++;

#ifdef CONFIG_CLS_U32_PERF
	/* Statistics may be incremented by readers during update
	 * so we must keep them in tact. When the node is later destroyed
	 * a special destroy call must be made to not free the pf memory.
	 */
	new->pf = n->pf;
#endif

#ifdef CONFIG_CLS_U32_MARK
	new->val = n->val;
	new->mask = n->mask;
	/* Similarly success statistics must be moved as pointers */
	new->pcpu_success = n->pcpu_success;
#endif
	new->tp = tp;
	memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));

836 837 838 839
	if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
		kfree(new);
		return NULL;
	}
840 841 842 843

	return new;
}

844
static int u32_change(struct net *net, struct sk_buff *in_skb,
845
		      struct tcf_proto *tp, unsigned long base, u32 handle,
J
Jamal Hadi Salim 已提交
846
		      struct nlattr **tca, unsigned long *arg, bool ovr)
L
Linus Torvalds 已提交
847 848 849 850 851
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode *ht;
	struct tc_u_knode *n;
	struct tc_u32_sel *s;
852 853
	struct nlattr *opt = tca[TCA_OPTIONS];
	struct nlattr *tb[TCA_U32_MAX + 1];
854
	u32 htid, flags = 0;
L
Linus Torvalds 已提交
855
	int err;
856 857 858
#ifdef CONFIG_CLS_U32_PERF
	size_t size;
#endif
L
Linus Torvalds 已提交
859 860 861 862

	if (opt == NULL)
		return handle ? -EINVAL : 0;

863
	err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, NULL);
864 865
	if (err < 0)
		return err;
L
Linus Torvalds 已提交
866

867
	if (tb[TCA_U32_FLAGS]) {
868
		flags = nla_get_u32(tb[TCA_U32_FLAGS]);
869
		if (!tc_flags_valid(flags))
870
			return -EINVAL;
871
	}
872

E
Eric Dumazet 已提交
873 874
	n = (struct tc_u_knode *)*arg;
	if (n) {
875 876
		struct tc_u_knode *new;

L
Linus Torvalds 已提交
877 878 879
		if (TC_U32_KEY(n->handle) == 0)
			return -EINVAL;

880 881 882
		if (n->flags != flags)
			return -EINVAL;

883 884 885 886 887 888 889 890 891 892 893 894 895
		new = u32_init_knode(tp, n);
		if (!new)
			return -ENOMEM;

		err = u32_set_parms(net, tp, base,
				    rtnl_dereference(n->ht_up), new, tb,
				    tca[TCA_RATE], ovr);

		if (err) {
			u32_destroy_key(tp, new, false);
			return err;
		}

896 897 898 899 900 901
		err = u32_replace_hw_knode(tp, new, flags);
		if (err) {
			u32_destroy_key(tp, new, false);
			return err;
		}

902 903 904
		if (!tc_in_hw(new->flags))
			new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;

905
		u32_replace_knode(tp, tp_c, new);
906
		tcf_unbind_filter(tp, &n->res);
907 908
		call_rcu(&n->rcu, u32_delete_key_rcu);
		return 0;
L
Linus Torvalds 已提交
909 910
	}

911
	if (tb[TCA_U32_DIVISOR]) {
E
Eric Dumazet 已提交
912
		unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
L
Linus Torvalds 已提交
913 914 915 916 917 918 919 920 921 922

		if (--divisor > 0x100)
			return -EINVAL;
		if (TC_U32_KEY(handle))
			return -EINVAL;
		if (handle == 0) {
			handle = gen_new_htid(tp->data);
			if (handle == 0)
				return -ENOMEM;
		}
E
Eric Dumazet 已提交
923
		ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
L
Linus Torvalds 已提交
924 925 926
		if (ht == NULL)
			return -ENOBUFS;
		ht->tp_c = tp_c;
927
		ht->refcnt = 1;
L
Linus Torvalds 已提交
928 929 930
		ht->divisor = divisor;
		ht->handle = handle;
		ht->prio = tp->prio;
931 932 933 934 935 936 937

		err = u32_replace_hw_hnode(tp, ht, flags);
		if (err) {
			kfree(ht);
			return err;
		}

938 939
		RCU_INIT_POINTER(ht->next, tp_c->hlist);
		rcu_assign_pointer(tp_c->hlist, ht);
L
Linus Torvalds 已提交
940
		*arg = (unsigned long)ht;
941

L
Linus Torvalds 已提交
942 943 944
		return 0;
	}

945
	if (tb[TCA_U32_HASH]) {
946
		htid = nla_get_u32(tb[TCA_U32_HASH]);
L
Linus Torvalds 已提交
947
		if (TC_U32_HTID(htid) == TC_U32_ROOT) {
948
			ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
949 950 951 952 953 954 955
			htid = ht->handle;
		} else {
			ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
			if (ht == NULL)
				return -EINVAL;
		}
	} else {
956
		ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
957 958 959 960 961 962 963 964 965 966 967 968 969
		htid = ht->handle;
	}

	if (ht->divisor < TC_U32_HASH(htid))
		return -EINVAL;

	if (handle) {
		if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
			return -EINVAL;
		handle = htid | TC_U32_NODE(handle);
	} else
		handle = gen_new_kid(ht, htid);

970
	if (tb[TCA_U32_SEL] == NULL)
L
Linus Torvalds 已提交
971 972
		return -EINVAL;

973
	s = nla_data(tb[TCA_U32_SEL]);
L
Linus Torvalds 已提交
974

975
	n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
L
Linus Torvalds 已提交
976 977 978 979
	if (n == NULL)
		return -ENOBUFS;

#ifdef CONFIG_CLS_U32_PERF
980 981 982
	size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
	n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
	if (!n->pf) {
L
Linus Torvalds 已提交
983 984 985 986 987 988
		kfree(n);
		return -ENOBUFS;
	}
#endif

	memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
989
	RCU_INIT_POINTER(n->ht_up, ht);
L
Linus Torvalds 已提交
990
	n->handle = handle;
991
	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
992
	n->flags = flags;
993
	n->tp = tp;
L
Linus Torvalds 已提交
994

995 996 997 998
	err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
	if (err < 0)
		goto errout;

L
Linus Torvalds 已提交
999
#ifdef CONFIG_CLS_U32_MARK
1000
	n->pcpu_success = alloc_percpu(u32);
1001 1002 1003 1004
	if (!n->pcpu_success) {
		err = -ENOMEM;
		goto errout;
	}
1005

1006
	if (tb[TCA_U32_MARK]) {
L
Linus Torvalds 已提交
1007 1008
		struct tc_u32_mark *mark;

1009
		mark = nla_data(tb[TCA_U32_MARK]);
1010 1011
		n->val = mark->val;
		n->mask = mark->mask;
L
Linus Torvalds 已提交
1012 1013 1014
	}
#endif

1015
	err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
L
Linus Torvalds 已提交
1016
	if (err == 0) {
1017 1018 1019
		struct tc_u_knode __rcu **ins;
		struct tc_u_knode *pins;

1020 1021 1022 1023
		err = u32_replace_hw_knode(tp, n, flags);
		if (err)
			goto errhw;

1024 1025 1026
		if (!tc_in_hw(n->flags))
			n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;

1027 1028 1029 1030
		ins = &ht->ht[TC_U32_HASH(handle)];
		for (pins = rtnl_dereference(*ins); pins;
		     ins = &pins->next, pins = rtnl_dereference(*ins))
			if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle))
L
Linus Torvalds 已提交
1031 1032
				break;

1033 1034
		RCU_INIT_POINTER(n->next, pins);
		rcu_assign_pointer(*ins, n);
L
Linus Torvalds 已提交
1035 1036 1037
		*arg = (unsigned long)n;
		return 0;
	}
1038

1039
errhw:
1040 1041 1042 1043
#ifdef CONFIG_CLS_U32_MARK
	free_percpu(n->pcpu_success);
#endif

1044 1045
errout:
	tcf_exts_destroy(&n->exts);
L
Linus Torvalds 已提交
1046
#ifdef CONFIG_CLS_U32_PERF
1047
	free_percpu(n->pf);
L
Linus Torvalds 已提交
1048 1049 1050 1051 1052 1053 1054 1055 1056 1057
#endif
	kfree(n);
	return err;
}

static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode *ht;
	struct tc_u_knode *n;
E
Eric Dumazet 已提交
1058
	unsigned int h;
L
Linus Torvalds 已提交
1059 1060 1061 1062

	if (arg->stop)
		return;

1063 1064 1065
	for (ht = rtnl_dereference(tp_c->hlist);
	     ht;
	     ht = rtnl_dereference(ht->next)) {
L
Linus Torvalds 已提交
1066 1067 1068 1069 1070 1071 1072 1073 1074 1075
		if (ht->prio != tp->prio)
			continue;
		if (arg->count >= arg->skip) {
			if (arg->fn(tp, (unsigned long)ht, arg) < 0) {
				arg->stop = 1;
				return;
			}
		}
		arg->count++;
		for (h = 0; h <= ht->divisor; h++) {
1076 1077 1078
			for (n = rtnl_dereference(ht->ht[h]);
			     n;
			     n = rtnl_dereference(n->next)) {
L
Linus Torvalds 已提交
1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092
				if (arg->count < arg->skip) {
					arg->count++;
					continue;
				}
				if (arg->fn(tp, (unsigned long)n, arg) < 0) {
					arg->stop = 1;
					return;
				}
				arg->count++;
			}
		}
	}
}

1093
static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
J
Jamal Hadi Salim 已提交
1094
		    struct sk_buff *skb, struct tcmsg *t)
L
Linus Torvalds 已提交
1095
{
E
Eric Dumazet 已提交
1096
	struct tc_u_knode *n = (struct tc_u_knode *)fh;
1097
	struct tc_u_hnode *ht_up, *ht_down;
1098
	struct nlattr *nest;
L
Linus Torvalds 已提交
1099 1100 1101 1102 1103 1104

	if (n == NULL)
		return skb->len;

	t->tcm_handle = n->handle;

1105 1106 1107
	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;
L
Linus Torvalds 已提交
1108 1109

	if (TC_U32_KEY(n->handle) == 0) {
E
Eric Dumazet 已提交
1110 1111 1112
		struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
		u32 divisor = ht->divisor + 1;

1113 1114
		if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
			goto nla_put_failure;
L
Linus Torvalds 已提交
1115
	} else {
1116 1117 1118
#ifdef CONFIG_CLS_U32_PERF
		struct tc_u32_pcnt *gpf;
		int cpu;
1119
#endif
1120

1121 1122 1123 1124
		if (nla_put(skb, TCA_U32_SEL,
			    sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
			    &n->sel))
			goto nla_put_failure;
1125 1126 1127

		ht_up = rtnl_dereference(n->ht_up);
		if (ht_up) {
L
Linus Torvalds 已提交
1128
			u32 htid = n->handle & 0xFFFFF000;
1129 1130
			if (nla_put_u32(skb, TCA_U32_HASH, htid))
				goto nla_put_failure;
L
Linus Torvalds 已提交
1131
		}
1132 1133 1134
		if (n->res.classid &&
		    nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
			goto nla_put_failure;
1135 1136 1137 1138

		ht_down = rtnl_dereference(n->ht_down);
		if (ht_down &&
		    nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
1139
			goto nla_put_failure;
L
Linus Torvalds 已提交
1140

1141 1142 1143
		if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags))
			goto nla_put_failure;

L
Linus Torvalds 已提交
1144
#ifdef CONFIG_CLS_U32_MARK
1145 1146 1147 1148
		if ((n->val || n->mask)) {
			struct tc_u32_mark mark = {.val = n->val,
						   .mask = n->mask,
						   .success = 0};
1149
			int cpum;
1150

1151 1152
			for_each_possible_cpu(cpum) {
				__u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum);
1153 1154 1155 1156 1157 1158 1159

				mark.success += cnt;
			}

			if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark))
				goto nla_put_failure;
		}
L
Linus Torvalds 已提交
1160 1161
#endif

1162
		if (tcf_exts_dump(skb, &n->exts) < 0)
1163
			goto nla_put_failure;
L
Linus Torvalds 已提交
1164 1165

#ifdef CONFIG_NET_CLS_IND
1166 1167 1168 1169 1170 1171
		if (n->ifindex) {
			struct net_device *dev;
			dev = __dev_get_by_index(net, n->ifindex);
			if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name))
				goto nla_put_failure;
		}
L
Linus Torvalds 已提交
1172 1173
#endif
#ifdef CONFIG_CLS_U32_PERF
1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189
		gpf = kzalloc(sizeof(struct tc_u32_pcnt) +
			      n->sel.nkeys * sizeof(u64),
			      GFP_KERNEL);
		if (!gpf)
			goto nla_put_failure;

		for_each_possible_cpu(cpu) {
			int i;
			struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);

			gpf->rcnt += pf->rcnt;
			gpf->rhit += pf->rhit;
			for (i = 0; i < n->sel.nkeys; i++)
				gpf->kcnts[i] += pf->kcnts[i];
		}

1190 1191 1192 1193
		if (nla_put_64bit(skb, TCA_U32_PCNT,
				  sizeof(struct tc_u32_pcnt) +
				  n->sel.nkeys * sizeof(u64),
				  gpf, TCA_U32_PAD)) {
1194
			kfree(gpf);
1195
			goto nla_put_failure;
1196 1197
		}
		kfree(gpf);
L
Linus Torvalds 已提交
1198 1199 1200
#endif
	}

1201 1202
	nla_nest_end(skb, nest);

L
Linus Torvalds 已提交
1203
	if (TC_U32_KEY(n->handle))
1204
		if (tcf_exts_dump_stats(skb, &n->exts) < 0)
1205
			goto nla_put_failure;
L
Linus Torvalds 已提交
1206 1207
	return skb->len;

1208
nla_put_failure:
1209
	nla_nest_cancel(skb, nest);
L
Linus Torvalds 已提交
1210 1211 1212
	return -1;
}

1213
static struct tcf_proto_ops cls_u32_ops __read_mostly = {
L
Linus Torvalds 已提交
1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227
	.kind		=	"u32",
	.classify	=	u32_classify,
	.init		=	u32_init,
	.destroy	=	u32_destroy,
	.get		=	u32_get,
	.change		=	u32_change,
	.delete		=	u32_delete,
	.walk		=	u32_walk,
	.dump		=	u32_dump,
	.owner		=	THIS_MODULE,
};

static int __init init_u32(void)
{
1228
	pr_info("u32 classifier\n");
L
Linus Torvalds 已提交
1229
#ifdef CONFIG_CLS_U32_PERF
1230
	pr_info("    Performance counters on\n");
L
Linus Torvalds 已提交
1231 1232
#endif
#ifdef CONFIG_NET_CLS_IND
1233
	pr_info("    input device check on\n");
L
Linus Torvalds 已提交
1234 1235
#endif
#ifdef CONFIG_NET_CLS_ACT
1236
	pr_info("    Actions configured\n");
L
Linus Torvalds 已提交
1237 1238 1239 1240
#endif
	return register_tcf_proto_ops(&cls_u32_ops);
}

1241
static void __exit exit_u32(void)
L
Linus Torvalds 已提交
1242 1243 1244 1245 1246 1247 1248
{
	unregister_tcf_proto_ops(&cls_u32_ops);
}

module_init(init_u32)
module_exit(exit_u32)
MODULE_LICENSE("GPL");