cls_u32.c 28.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
/*
 * net/sched/cls_u32.c	Ugly (or Universal) 32bit key Packet Classifier.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 *	The filters are packed to hash tables of key nodes
 *	with a set of 32bit key/mask pairs at every node.
 *	Nodes reference next level hash tables etc.
 *
 *	This scheme is the best universal classifier I managed to
 *	invent; it is not super-fast, but it is not slow (provided you
 *	program it correctly), and general enough.  And its relative
 *	speed grows as the number of rules becomes larger.
 *
 *	It seems that it represents the best middle point between
 *	speed and manageability both by human and by machine.
 *
 *	It is especially useful for link sharing combined with QoS;
 *	pure RSVP doesn't need such a general approach and can use
 *	much simpler (and faster) schemes, sort of cls_rsvp.c.
 *
 *	JHS: We should remove the CONFIG_NET_CLS_IND from here
 *	eventually when the meta match extension is made available
 *
 *	nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
 */

#include <linux/module.h>
34
#include <linux/slab.h>
L
Linus Torvalds 已提交
35 36 37 38
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
39
#include <linux/percpu.h>
L
Linus Torvalds 已提交
40 41
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
42
#include <linux/bitmap.h>
43 44
#include <linux/netdevice.h>
#include <linux/hash.h>
45
#include <net/netlink.h>
L
Linus Torvalds 已提交
46 47
#include <net/act_api.h>
#include <net/pkt_cls.h>
48
#include <linux/netdevice.h>
L
Linus Torvalds 已提交
49

E
Eric Dumazet 已提交
50
struct tc_u_knode {
51
	struct tc_u_knode __rcu	*next;
L
Linus Torvalds 已提交
52
	u32			handle;
53
	struct tc_u_hnode __rcu	*ht_up;
L
Linus Torvalds 已提交
54 55
	struct tcf_exts		exts;
#ifdef CONFIG_NET_CLS_IND
56
	int			ifindex;
L
Linus Torvalds 已提交
57 58 59
#endif
	u8			fshift;
	struct tcf_result	res;
60
	struct tc_u_hnode __rcu	*ht_down;
L
Linus Torvalds 已提交
61
#ifdef CONFIG_CLS_U32_PERF
62
	struct tc_u32_pcnt __percpu *pf;
L
Linus Torvalds 已提交
63
#endif
64
	u32			flags;
L
Linus Torvalds 已提交
65
#ifdef CONFIG_CLS_U32_MARK
66 67 68
	u32			val;
	u32			mask;
	u32 __percpu		*pcpu_success;
L
Linus Torvalds 已提交
69
#endif
70 71
	struct tcf_proto	*tp;
	struct rcu_head		rcu;
72 73 74 75
	/* The 'sel' field MUST be the last field in structure to allow for
	 * tc_u32_keys allocated at end of structure.
	 */
	struct tc_u32_sel	sel;
L
Linus Torvalds 已提交
76 77
};

E
Eric Dumazet 已提交
78
struct tc_u_hnode {
79
	struct tc_u_hnode __rcu	*next;
L
Linus Torvalds 已提交
80 81 82 83
	u32			handle;
	u32			prio;
	struct tc_u_common	*tp_c;
	int			refcnt;
E
Eric Dumazet 已提交
84
	unsigned int		divisor;
85
	struct rcu_head		rcu;
86 87 88 89
	/* The 'ht' field MUST be the last field in structure to allow for
	 * more entries allocated at end of structure.
	 */
	struct tc_u_knode __rcu	*ht[1];
L
Linus Torvalds 已提交
90 91
};

E
Eric Dumazet 已提交
92
struct tc_u_common {
93
	struct tc_u_hnode __rcu	*hlist;
L
Linus Torvalds 已提交
94 95 96
	struct Qdisc		*q;
	int			refcnt;
	u32			hgenerator;
97
	struct hlist_node	hnode;
98
	struct rcu_head		rcu;
L
Linus Torvalds 已提交
99 100
};

E
Eric Dumazet 已提交
101 102 103
static inline unsigned int u32_hash_fold(__be32 key,
					 const struct tc_u32_sel *sel,
					 u8 fshift)
L
Linus Torvalds 已提交
104
{
E
Eric Dumazet 已提交
105
	unsigned int h = ntohl(key & sel->hmask) >> fshift;
L
Linus Torvalds 已提交
106 107 108 109

	return h;
}

J
Jamal Hadi Salim 已提交
110 111
static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp,
			struct tcf_result *res)
L
Linus Torvalds 已提交
112 113 114
{
	struct {
		struct tc_u_knode *knode;
115
		unsigned int	  off;
L
Linus Torvalds 已提交
116 117
	} stack[TC_U32_MAXDEPTH];

118
	struct tc_u_hnode *ht = rcu_dereference_bh(tp->root);
119
	unsigned int off = skb_network_offset(skb);
L
Linus Torvalds 已提交
120 121 122 123 124 125 126 127 128 129
	struct tc_u_knode *n;
	int sdepth = 0;
	int off2 = 0;
	int sel = 0;
#ifdef CONFIG_CLS_U32_PERF
	int j;
#endif
	int i, r;

next_ht:
130
	n = rcu_dereference_bh(ht->ht[sel]);
L
Linus Torvalds 已提交
131 132 133 134 135 136

next_knode:
	if (n) {
		struct tc_u32_key *key = n->sel.keys;

#ifdef CONFIG_CLS_U32_PERF
137
		__this_cpu_inc(n->pf->rcnt);
L
Linus Torvalds 已提交
138 139 140
		j = 0;
#endif

141 142 143 144 145
		if (tc_skip_sw(n->flags)) {
			n = rcu_dereference_bh(n->next);
			goto next_knode;
		}

L
Linus Torvalds 已提交
146
#ifdef CONFIG_CLS_U32_MARK
147
		if ((skb->mark & n->mask) != n->val) {
148
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
149 150
			goto next_knode;
		} else {
151
			__this_cpu_inc(*n->pcpu_success);
L
Linus Torvalds 已提交
152 153 154
		}
#endif

E
Eric Dumazet 已提交
155
		for (i = n->sel.nkeys; i > 0; i--, key++) {
S
stephen hemminger 已提交
156
			int toff = off + key->off + (off2 & key->offmask);
S
stephen hemminger 已提交
157
			__be32 *data, hdata;
158

D
Dan Carpenter 已提交
159
			if (skb_headroom(skb) + toff > INT_MAX)
S
stephen hemminger 已提交
160 161
				goto out;

S
stephen hemminger 已提交
162
			data = skb_header_pointer(skb, toff, 4, &hdata);
163 164 165
			if (!data)
				goto out;
			if ((*data ^ key->val) & key->mask) {
166
				n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
167 168 169
				goto next_knode;
			}
#ifdef CONFIG_CLS_U32_PERF
170
			__this_cpu_inc(n->pf->kcnts[j]);
L
Linus Torvalds 已提交
171 172 173
			j++;
#endif
		}
174 175 176

		ht = rcu_dereference_bh(n->ht_down);
		if (!ht) {
L
Linus Torvalds 已提交
177
check_terminal:
E
Eric Dumazet 已提交
178
			if (n->sel.flags & TC_U32_TERMINAL) {
L
Linus Torvalds 已提交
179 180 181

				*res = n->res;
#ifdef CONFIG_NET_CLS_IND
182
				if (!tcf_match_indev(skb, n->ifindex)) {
183
					n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
184 185 186 187
					goto next_knode;
				}
#endif
#ifdef CONFIG_CLS_U32_PERF
188
				__this_cpu_inc(n->pf->rhit);
L
Linus Torvalds 已提交
189 190 191
#endif
				r = tcf_exts_exec(skb, &n->exts, res);
				if (r < 0) {
192
					n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
193 194 195 196 197
					goto next_knode;
				}

				return r;
			}
198
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
199 200 201 202 203 204 205
			goto next_knode;
		}

		/* PUSH */
		if (sdepth >= TC_U32_MAXDEPTH)
			goto deadloop;
		stack[sdepth].knode = n;
206
		stack[sdepth].off = off;
L
Linus Torvalds 已提交
207 208
		sdepth++;

209
		ht = rcu_dereference_bh(n->ht_down);
L
Linus Torvalds 已提交
210
		sel = 0;
211
		if (ht->divisor) {
S
stephen hemminger 已提交
212
			__be32 *data, hdata;
213 214

			data = skb_header_pointer(skb, off + n->sel.hoff, 4,
S
stephen hemminger 已提交
215
						  &hdata);
216 217 218 219 220
			if (!data)
				goto out;
			sel = ht->divisor & u32_hash_fold(*data, &n->sel,
							  n->fshift);
		}
E
Eric Dumazet 已提交
221
		if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
L
Linus Torvalds 已提交
222 223
			goto next_ht;

E
Eric Dumazet 已提交
224
		if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
L
Linus Torvalds 已提交
225
			off2 = n->sel.off + 3;
226
			if (n->sel.flags & TC_U32_VAROFFSET) {
S
stephen hemminger 已提交
227
				__be16 *data, hdata;
228 229 230

				data = skb_header_pointer(skb,
							  off + n->sel.offoff,
S
stephen hemminger 已提交
231
							  2, &hdata);
232 233 234 235 236
				if (!data)
					goto out;
				off2 += ntohs(n->sel.offmask & *data) >>
					n->sel.offshift;
			}
L
Linus Torvalds 已提交
237 238
			off2 &= ~3;
		}
E
Eric Dumazet 已提交
239
		if (n->sel.flags & TC_U32_EAT) {
240
			off += off2;
L
Linus Torvalds 已提交
241 242 243
			off2 = 0;
		}

244
		if (off < skb->len)
L
Linus Torvalds 已提交
245 246 247 248 249 250
			goto next_ht;
	}

	/* POP */
	if (sdepth--) {
		n = stack[sdepth].knode;
251
		ht = rcu_dereference_bh(n->ht_up);
252
		off = stack[sdepth].off;
L
Linus Torvalds 已提交
253 254
		goto check_terminal;
	}
255
out:
L
Linus Torvalds 已提交
256 257 258
	return -1;

deadloop:
259
	net_warn_ratelimited("cls_u32: dead loop\n");
L
Linus Torvalds 已提交
260 261 262
	return -1;
}

J
Jamal Hadi Salim 已提交
263
static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
L
Linus Torvalds 已提交
264 265 266
{
	struct tc_u_hnode *ht;

267 268 269
	for (ht = rtnl_dereference(tp_c->hlist);
	     ht;
	     ht = rtnl_dereference(ht->next))
L
Linus Torvalds 已提交
270 271 272 273 274 275
		if (ht->handle == handle)
			break;

	return ht;
}

J
Jamal Hadi Salim 已提交
276
static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
L
Linus Torvalds 已提交
277
{
E
Eric Dumazet 已提交
278
	unsigned int sel;
L
Linus Torvalds 已提交
279 280 281 282 283 284
	struct tc_u_knode *n = NULL;

	sel = TC_U32_HASH(handle);
	if (sel > ht->divisor)
		goto out;

285 286 287
	for (n = rtnl_dereference(ht->ht[sel]);
	     n;
	     n = rtnl_dereference(n->next))
L
Linus Torvalds 已提交
288 289 290 291 292 293 294
		if (n->handle == handle)
			break;
out:
	return n;
}


295
static void *u32_get(struct tcf_proto *tp, u32 handle)
L
Linus Torvalds 已提交
296 297 298 299 300
{
	struct tc_u_hnode *ht;
	struct tc_u_common *tp_c = tp->data;

	if (TC_U32_HTID(handle) == TC_U32_ROOT)
301
		ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
302 303 304 305
	else
		ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));

	if (!ht)
306
		return NULL;
L
Linus Torvalds 已提交
307 308

	if (TC_U32_KEY(handle) == 0)
309
		return ht;
L
Linus Torvalds 已提交
310

311
	return u32_lookup_key(ht, handle);
L
Linus Torvalds 已提交
312 313 314 315 316 317
}

static u32 gen_new_htid(struct tc_u_common *tp_c)
{
	int i = 0x800;

318 319 320
	/* hgenerator only used inside rtnl lock it is safe to increment
	 * without read _copy_ update semantics
	 */
L
Linus Torvalds 已提交
321 322 323
	do {
		if (++tp_c->hgenerator == 0x7FF)
			tp_c->hgenerator = 1;
E
Eric Dumazet 已提交
324
	} while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
L
Linus Torvalds 已提交
325 326 327 328

	return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
}

329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
static struct hlist_head *tc_u_common_hash;

#define U32_HASH_SHIFT 10
#define U32_HASH_SIZE (1 << U32_HASH_SHIFT)

static unsigned int tc_u_hash(const struct tcf_proto *tp)
{
	struct net_device *dev = tp->q->dev_queue->dev;
	u32 qhandle = tp->q->handle;
	int ifindex = dev->ifindex;

	return hash_64((u64)ifindex << 32 | qhandle, U32_HASH_SHIFT);
}

static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
{
	struct tc_u_common *tc;
	unsigned int h;

	h = tc_u_hash(tp);
	hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
		if (tc->q == tp->q)
			return tc;
	}
	return NULL;
}

L
Linus Torvalds 已提交
356 357 358 359
static int u32_init(struct tcf_proto *tp)
{
	struct tc_u_hnode *root_ht;
	struct tc_u_common *tp_c;
360
	unsigned int h;
L
Linus Torvalds 已提交
361

362
	tp_c = tc_u_common_find(tp);
L
Linus Torvalds 已提交
363

364
	root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
L
Linus Torvalds 已提交
365 366 367 368 369 370 371 372
	if (root_ht == NULL)
		return -ENOBUFS;

	root_ht->refcnt++;
	root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
	root_ht->prio = tp->prio;

	if (tp_c == NULL) {
373
		tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
L
Linus Torvalds 已提交
374 375 376 377 378
		if (tp_c == NULL) {
			kfree(root_ht);
			return -ENOBUFS;
		}
		tp_c->q = tp->q;
379 380 381 382
		INIT_HLIST_NODE(&tp_c->hnode);

		h = tc_u_hash(tp);
		hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]);
L
Linus Torvalds 已提交
383 384 385
	}

	tp_c->refcnt++;
386 387
	RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
	rcu_assign_pointer(tp_c->hlist, root_ht);
L
Linus Torvalds 已提交
388 389
	root_ht->tp_c = tp_c;

390
	rcu_assign_pointer(tp->root, root_ht);
L
Linus Torvalds 已提交
391 392 393 394
	tp->data = tp_c;
	return 0;
}

J
Jamal Hadi Salim 已提交
395
static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
396
			   bool free_pf)
L
Linus Torvalds 已提交
397
{
398
	tcf_exts_destroy(&n->exts);
L
Linus Torvalds 已提交
399 400 401
	if (n->ht_down)
		n->ht_down->refcnt--;
#ifdef CONFIG_CLS_U32_PERF
402 403
	if (free_pf)
		free_percpu(n->pf);
404 405
#endif
#ifdef CONFIG_CLS_U32_MARK
406 407
	if (free_pf)
		free_percpu(n->pcpu_success);
L
Linus Torvalds 已提交
408 409 410 411 412
#endif
	kfree(n);
	return 0;
}

413 414 415 416 417 418 419 420
/* u32_delete_key_rcu should be called when free'ing a copied
 * version of a tc_u_knode obtained from u32_init_knode(). When
 * copies are obtained from u32_init_knode() the statistics are
 * shared between the old and new copies to allow readers to
 * continue to update the statistics during the copy. To support
 * this the u32_delete_key_rcu variant does not free the percpu
 * statistics.
 */
421 422 423 424
static void u32_delete_key_rcu(struct rcu_head *rcu)
{
	struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);

425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
	u32_destroy_key(key->tp, key, false);
}

/* u32_delete_key_freepf_rcu is the rcu callback variant
 * that free's the entire structure including the statistics
 * percpu variables. Only use this if the key is not a copy
 * returned by u32_init_knode(). See u32_delete_key_rcu()
 * for the variant that should be used with keys return from
 * u32_init_knode()
 */
static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
{
	struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);

	u32_destroy_key(key->tp, key, true);
440 441
}

442
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
L
Linus Torvalds 已提交
443
{
444 445
	struct tc_u_knode __rcu **kp;
	struct tc_u_knode *pkp;
446
	struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
L
Linus Torvalds 已提交
447 448

	if (ht) {
449 450 451 452 453
		kp = &ht->ht[TC_U32_HASH(key->handle)];
		for (pkp = rtnl_dereference(*kp); pkp;
		     kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
			if (pkp == key) {
				RCU_INIT_POINTER(*kp, key->next);
L
Linus Torvalds 已提交
454

455
				tcf_unbind_filter(tp, &key->res);
456
				call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
L
Linus Torvalds 已提交
457 458 459 460
				return 0;
			}
		}
	}
461
	WARN_ON(1);
L
Linus Torvalds 已提交
462 463 464
	return 0;
}

465 466 467
static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
{
	struct net_device *dev = tp->q->dev_queue->dev;
468
	struct tc_cls_u32_offload cls_u32 = {};
469

470
	if (!tc_should_offload(dev, 0))
471
		return;
472

473 474 475 476 477
	tc_cls_common_offload_init(&cls_u32.common, tp);
	cls_u32.command = TC_CLSU32_DELETE_KNODE;
	cls_u32.knode.handle = handle;

	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
478 479
}

J
Jamal Hadi Salim 已提交
480 481
static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
				u32 flags)
482 483
{
	struct net_device *dev = tp->q->dev_queue->dev;
484
	struct tc_cls_u32_offload cls_u32 = {};
485
	int err;
486

487
	if (!tc_should_offload(dev, flags))
488 489
		return tc_skip_sw(flags) ? -EINVAL : 0;

490 491 492 493 494
	tc_cls_common_offload_init(&cls_u32.common, tp);
	cls_u32.command = TC_CLSU32_NEW_HNODE;
	cls_u32.hnode.divisor = h->divisor;
	cls_u32.hnode.handle = h->handle;
	cls_u32.hnode.prio = h->prio;
495

496
	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
497 498
	if (tc_skip_sw(flags))
		return err;
499 500

	return 0;
501 502 503 504 505
}

static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
{
	struct net_device *dev = tp->q->dev_queue->dev;
506
	struct tc_cls_u32_offload cls_u32 = {};
507

508
	if (!tc_should_offload(dev, 0))
509
		return;
510

511 512 513 514 515
	tc_cls_common_offload_init(&cls_u32.common, tp);
	cls_u32.command = TC_CLSU32_DELETE_HNODE;
	cls_u32.hnode.divisor = h->divisor;
	cls_u32.hnode.handle = h->handle;
	cls_u32.hnode.prio = h->prio;
516

517
	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
518 519
}

J
Jamal Hadi Salim 已提交
520 521
static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
				u32 flags)
522 523
{
	struct net_device *dev = tp->q->dev_queue->dev;
524
	struct tc_cls_u32_offload cls_u32 = {};
525
	int err;
526

527
	if (!tc_should_offload(dev, flags))
528 529
		return tc_skip_sw(flags) ? -EINVAL : 0;

530 531 532 533
	tc_cls_common_offload_init(&cls_u32.common, tp);
	cls_u32.command = TC_CLSU32_REPLACE_KNODE;
	cls_u32.knode.handle = n->handle;
	cls_u32.knode.fshift = n->fshift;
534
#ifdef CONFIG_CLS_U32_MARK
535 536
	cls_u32.knode.val = n->val;
	cls_u32.knode.mask = n->mask;
537
#else
538 539
	cls_u32.knode.val = 0;
	cls_u32.knode.mask = 0;
540
#endif
541 542
	cls_u32.knode.sel = &n->sel;
	cls_u32.knode.exts = &n->exts;
543
	if (n->ht_down)
544
		cls_u32.knode.link_handle = n->ht_down->handle;
545

546
	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
547 548 549 550

	if (!err)
		n->flags |= TCA_CLS_FLAGS_IN_HW;

551 552
	if (tc_skip_sw(flags))
		return err;
553 554

	return 0;
555 556
}

557
static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
L
Linus Torvalds 已提交
558 559
{
	struct tc_u_knode *n;
E
Eric Dumazet 已提交
560
	unsigned int h;
L
Linus Torvalds 已提交
561

E
Eric Dumazet 已提交
562
	for (h = 0; h <= ht->divisor; h++) {
563 564 565
		while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
			RCU_INIT_POINTER(ht->ht[h],
					 rtnl_dereference(n->next));
566
			tcf_unbind_filter(tp, &n->res);
567
			u32_remove_hw_knode(tp, n->handle);
568
			call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
L
Linus Torvalds 已提交
569 570 571 572 573 574 575
		}
	}
}

static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
{
	struct tc_u_common *tp_c = tp->data;
576 577
	struct tc_u_hnode __rcu **hn;
	struct tc_u_hnode *phn;
L
Linus Torvalds 已提交
578

579
	WARN_ON(ht->refcnt);
L
Linus Torvalds 已提交
580

581
	u32_clear_hnode(tp, ht);
L
Linus Torvalds 已提交
582

583 584 585 586 587
	hn = &tp_c->hlist;
	for (phn = rtnl_dereference(*hn);
	     phn;
	     hn = &phn->next, phn = rtnl_dereference(*hn)) {
		if (phn == ht) {
588
			u32_clear_hw_hnode(tp, ht);
589 590
			RCU_INIT_POINTER(*hn, ht->next);
			kfree_rcu(ht, rcu);
L
Linus Torvalds 已提交
591 592 593 594 595 596 597
			return 0;
		}
	}

	return -ENOENT;
}

598 599 600 601 602 603 604 605 606 607 608
static bool ht_empty(struct tc_u_hnode *ht)
{
	unsigned int h;

	for (h = 0; h <= ht->divisor; h++)
		if (rcu_access_pointer(ht->ht[h]))
			return false;

	return true;
}

609
static void u32_destroy(struct tcf_proto *tp)
L
Linus Torvalds 已提交
610 611
{
	struct tc_u_common *tp_c = tp->data;
612
	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
613

614
	WARN_ON(root_ht == NULL);
L
Linus Torvalds 已提交
615 616 617 618 619 620 621

	if (root_ht && --root_ht->refcnt == 0)
		u32_destroy_hnode(tp, root_ht);

	if (--tp_c->refcnt == 0) {
		struct tc_u_hnode *ht;

622
		hlist_del(&tp_c->hnode);
L
Linus Torvalds 已提交
623

624 625 626
		for (ht = rtnl_dereference(tp_c->hlist);
		     ht;
		     ht = rtnl_dereference(ht->next)) {
627
			ht->refcnt--;
628
			u32_clear_hnode(tp, ht);
629
		}
L
Linus Torvalds 已提交
630

631 632 633
		while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
			RCU_INIT_POINTER(tp_c->hlist, ht->next);
			kfree_rcu(ht, rcu);
634
		}
L
Linus Torvalds 已提交
635 636 637 638 639 640 641

		kfree(tp_c);
	}

	tp->data = NULL;
}

642
static int u32_delete(struct tcf_proto *tp, void *arg, bool *last)
L
Linus Torvalds 已提交
643
{
644
	struct tc_u_hnode *ht = arg;
645
	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
646 647
	struct tc_u_common *tp_c = tp->data;
	int ret = 0;
L
Linus Torvalds 已提交
648 649

	if (ht == NULL)
650
		goto out;
L
Linus Torvalds 已提交
651

652 653
	if (TC_U32_KEY(ht->handle)) {
		u32_remove_hw_knode(tp, ht->handle);
654 655
		ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
		goto out;
656
	}
L
Linus Torvalds 已提交
657

658
	if (root_ht == ht)
L
Linus Torvalds 已提交
659 660
		return -EINVAL;

661 662
	if (ht->refcnt == 1) {
		ht->refcnt--;
L
Linus Torvalds 已提交
663
		u32_destroy_hnode(tp, ht);
664 665 666
	} else {
		return -EBUSY;
	}
L
Linus Torvalds 已提交
667

668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701
out:
	*last = true;
	if (root_ht) {
		if (root_ht->refcnt > 1) {
			*last = false;
			goto ret;
		}
		if (root_ht->refcnt == 1) {
			if (!ht_empty(root_ht)) {
				*last = false;
				goto ret;
			}
		}
	}

	if (tp_c->refcnt > 1) {
		*last = false;
		goto ret;
	}

	if (tp_c->refcnt == 1) {
		struct tc_u_hnode *ht;

		for (ht = rtnl_dereference(tp_c->hlist);
		     ht;
		     ht = rtnl_dereference(ht->next))
			if (!ht_empty(ht)) {
				*last = false;
				break;
			}
	}

ret:
	return ret;
L
Linus Torvalds 已提交
702 703
}

704
#define NR_U32_NODE (1<<12)
L
Linus Torvalds 已提交
705 706 707
static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
{
	struct tc_u_knode *n;
708 709 710 711 712
	unsigned long i;
	unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long),
					GFP_KERNEL);
	if (!bitmap)
		return handle | 0xFFF;
L
Linus Torvalds 已提交
713

714 715 716
	for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]);
	     n;
	     n = rtnl_dereference(n->next))
717
		set_bit(TC_U32_NODE(n->handle), bitmap);
L
Linus Torvalds 已提交
718

719 720 721 722 723 724
	i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
	if (i >= NR_U32_NODE)
		i = find_next_zero_bit(bitmap, NR_U32_NODE, 1);

	kfree(bitmap);
	return handle | (i >= NR_U32_NODE ? 0xFFF : i);
L
Linus Torvalds 已提交
725 726
}

727 728 729 730 731 732 733 734
static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
	[TCA_U32_CLASSID]	= { .type = NLA_U32 },
	[TCA_U32_HASH]		= { .type = NLA_U32 },
	[TCA_U32_LINK]		= { .type = NLA_U32 },
	[TCA_U32_DIVISOR]	= { .type = NLA_U32 },
	[TCA_U32_SEL]		= { .len = sizeof(struct tc_u32_sel) },
	[TCA_U32_INDEV]		= { .type = NLA_STRING, .len = IFNAMSIZ },
	[TCA_U32_MARK]		= { .len = sizeof(struct tc_u32_mark) },
735
	[TCA_U32_FLAGS]		= { .type = NLA_U32 },
736 737
};

738 739
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
			 unsigned long base, struct tc_u_hnode *ht,
740
			 struct tc_u_knode *n, struct nlattr **tb,
741
			 struct nlattr *est, bool ovr)
L
Linus Torvalds 已提交
742
{
743
	int err;
L
Linus Torvalds 已提交
744

745
	err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr);
L
Linus Torvalds 已提交
746 747 748
	if (err < 0)
		return err;

749
	if (tb[TCA_U32_LINK]) {
750
		u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
751
		struct tc_u_hnode *ht_down = NULL, *ht_old;
L
Linus Torvalds 已提交
752 753

		if (TC_U32_KEY(handle))
754
			return -EINVAL;
L
Linus Torvalds 已提交
755 756 757 758 759

		if (handle) {
			ht_down = u32_lookup_ht(ht->tp_c, handle);

			if (ht_down == NULL)
760
				return -EINVAL;
L
Linus Torvalds 已提交
761 762 763
			ht_down->refcnt++;
		}

764 765
		ht_old = rtnl_dereference(n->ht_down);
		rcu_assign_pointer(n->ht_down, ht_down);
L
Linus Torvalds 已提交
766

767 768
		if (ht_old)
			ht_old->refcnt--;
L
Linus Torvalds 已提交
769
	}
770
	if (tb[TCA_U32_CLASSID]) {
771
		n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
L
Linus Torvalds 已提交
772 773 774 775
		tcf_bind_filter(tp, &n->res, base);
	}

#ifdef CONFIG_NET_CLS_IND
776
	if (tb[TCA_U32_INDEV]) {
777 778 779
		int ret;
		ret = tcf_change_indev(net, tb[TCA_U32_INDEV]);
		if (ret < 0)
780
			return -EINVAL;
781
		n->ifindex = ret;
L
Linus Torvalds 已提交
782 783 784 785 786
	}
#endif
	return 0;
}

J
Jamal Hadi Salim 已提交
787
static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833
			      struct tc_u_knode *n)
{
	struct tc_u_knode __rcu **ins;
	struct tc_u_knode *pins;
	struct tc_u_hnode *ht;

	if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
		ht = rtnl_dereference(tp->root);
	else
		ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));

	ins = &ht->ht[TC_U32_HASH(n->handle)];

	/* The node must always exist for it to be replaced if this is not the
	 * case then something went very wrong elsewhere.
	 */
	for (pins = rtnl_dereference(*ins); ;
	     ins = &pins->next, pins = rtnl_dereference(*ins))
		if (pins->handle == n->handle)
			break;

	RCU_INIT_POINTER(n->next, pins->next);
	rcu_assign_pointer(*ins, n);
}

static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
					 struct tc_u_knode *n)
{
	struct tc_u_knode *new;
	struct tc_u32_sel *s = &n->sel;

	new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
		      GFP_KERNEL);

	if (!new)
		return NULL;

	RCU_INIT_POINTER(new->next, n->next);
	new->handle = n->handle;
	RCU_INIT_POINTER(new->ht_up, n->ht_up);

#ifdef CONFIG_NET_CLS_IND
	new->ifindex = n->ifindex;
#endif
	new->fshift = n->fshift;
	new->res = n->res;
834
	new->flags = n->flags;
835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857
	RCU_INIT_POINTER(new->ht_down, n->ht_down);

	/* bump reference count as long as we hold pointer to structure */
	if (new->ht_down)
		new->ht_down->refcnt++;

#ifdef CONFIG_CLS_U32_PERF
	/* Statistics may be incremented by readers during update
	 * so we must keep them in tact. When the node is later destroyed
	 * a special destroy call must be made to not free the pf memory.
	 */
	new->pf = n->pf;
#endif

#ifdef CONFIG_CLS_U32_MARK
	new->val = n->val;
	new->mask = n->mask;
	/* Similarly success statistics must be moved as pointers */
	new->pcpu_success = n->pcpu_success;
#endif
	new->tp = tp;
	memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));

858 859 860 861
	if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
		kfree(new);
		return NULL;
	}
862 863 864 865

	return new;
}

866
static int u32_change(struct net *net, struct sk_buff *in_skb,
867
		      struct tcf_proto *tp, unsigned long base, u32 handle,
868
		      struct nlattr **tca, void **arg, bool ovr)
L
Linus Torvalds 已提交
869 870 871 872 873
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode *ht;
	struct tc_u_knode *n;
	struct tc_u32_sel *s;
874 875
	struct nlattr *opt = tca[TCA_OPTIONS];
	struct nlattr *tb[TCA_U32_MAX + 1];
876
	u32 htid, flags = 0;
L
Linus Torvalds 已提交
877
	int err;
878 879 880
#ifdef CONFIG_CLS_U32_PERF
	size_t size;
#endif
L
Linus Torvalds 已提交
881 882 883 884

	if (opt == NULL)
		return handle ? -EINVAL : 0;

885
	err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, NULL);
886 887
	if (err < 0)
		return err;
L
Linus Torvalds 已提交
888

889
	if (tb[TCA_U32_FLAGS]) {
890
		flags = nla_get_u32(tb[TCA_U32_FLAGS]);
891
		if (!tc_flags_valid(flags))
892
			return -EINVAL;
893
	}
894

895
	n = *arg;
E
Eric Dumazet 已提交
896
	if (n) {
897 898
		struct tc_u_knode *new;

L
Linus Torvalds 已提交
899 900 901
		if (TC_U32_KEY(n->handle) == 0)
			return -EINVAL;

902 903 904
		if (n->flags != flags)
			return -EINVAL;

905 906 907 908 909 910 911 912 913 914 915 916 917
		new = u32_init_knode(tp, n);
		if (!new)
			return -ENOMEM;

		err = u32_set_parms(net, tp, base,
				    rtnl_dereference(n->ht_up), new, tb,
				    tca[TCA_RATE], ovr);

		if (err) {
			u32_destroy_key(tp, new, false);
			return err;
		}

918 919 920 921 922 923
		err = u32_replace_hw_knode(tp, new, flags);
		if (err) {
			u32_destroy_key(tp, new, false);
			return err;
		}

924 925 926
		if (!tc_in_hw(new->flags))
			new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;

927
		u32_replace_knode(tp, tp_c, new);
928
		tcf_unbind_filter(tp, &n->res);
929 930
		call_rcu(&n->rcu, u32_delete_key_rcu);
		return 0;
L
Linus Torvalds 已提交
931 932
	}

933
	if (tb[TCA_U32_DIVISOR]) {
E
Eric Dumazet 已提交
934
		unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
L
Linus Torvalds 已提交
935 936 937 938 939 940 941 942 943 944

		if (--divisor > 0x100)
			return -EINVAL;
		if (TC_U32_KEY(handle))
			return -EINVAL;
		if (handle == 0) {
			handle = gen_new_htid(tp->data);
			if (handle == 0)
				return -ENOMEM;
		}
E
Eric Dumazet 已提交
945
		ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
L
Linus Torvalds 已提交
946 947 948
		if (ht == NULL)
			return -ENOBUFS;
		ht->tp_c = tp_c;
949
		ht->refcnt = 1;
L
Linus Torvalds 已提交
950 951 952
		ht->divisor = divisor;
		ht->handle = handle;
		ht->prio = tp->prio;
953 954 955 956 957 958 959

		err = u32_replace_hw_hnode(tp, ht, flags);
		if (err) {
			kfree(ht);
			return err;
		}

960 961
		RCU_INIT_POINTER(ht->next, tp_c->hlist);
		rcu_assign_pointer(tp_c->hlist, ht);
962
		*arg = ht;
963

L
Linus Torvalds 已提交
964 965 966
		return 0;
	}

967
	if (tb[TCA_U32_HASH]) {
968
		htid = nla_get_u32(tb[TCA_U32_HASH]);
L
Linus Torvalds 已提交
969
		if (TC_U32_HTID(htid) == TC_U32_ROOT) {
970
			ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
971 972 973 974 975 976 977
			htid = ht->handle;
		} else {
			ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
			if (ht == NULL)
				return -EINVAL;
		}
	} else {
978
		ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
979 980 981 982 983 984 985 986 987 988 989 990 991
		htid = ht->handle;
	}

	if (ht->divisor < TC_U32_HASH(htid))
		return -EINVAL;

	if (handle) {
		if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
			return -EINVAL;
		handle = htid | TC_U32_NODE(handle);
	} else
		handle = gen_new_kid(ht, htid);

992
	if (tb[TCA_U32_SEL] == NULL)
L
Linus Torvalds 已提交
993 994
		return -EINVAL;

995
	s = nla_data(tb[TCA_U32_SEL]);
L
Linus Torvalds 已提交
996

997
	n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
L
Linus Torvalds 已提交
998 999 1000 1001
	if (n == NULL)
		return -ENOBUFS;

#ifdef CONFIG_CLS_U32_PERF
1002 1003 1004
	size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
	n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
	if (!n->pf) {
L
Linus Torvalds 已提交
1005 1006 1007 1008 1009 1010
		kfree(n);
		return -ENOBUFS;
	}
#endif

	memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
1011
	RCU_INIT_POINTER(n->ht_up, ht);
L
Linus Torvalds 已提交
1012
	n->handle = handle;
1013
	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
1014
	n->flags = flags;
1015
	n->tp = tp;
L
Linus Torvalds 已提交
1016

1017 1018 1019 1020
	err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
	if (err < 0)
		goto errout;

L
Linus Torvalds 已提交
1021
#ifdef CONFIG_CLS_U32_MARK
1022
	n->pcpu_success = alloc_percpu(u32);
1023 1024 1025 1026
	if (!n->pcpu_success) {
		err = -ENOMEM;
		goto errout;
	}
1027

1028
	if (tb[TCA_U32_MARK]) {
L
Linus Torvalds 已提交
1029 1030
		struct tc_u32_mark *mark;

1031
		mark = nla_data(tb[TCA_U32_MARK]);
1032 1033
		n->val = mark->val;
		n->mask = mark->mask;
L
Linus Torvalds 已提交
1034 1035 1036
	}
#endif

1037
	err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
L
Linus Torvalds 已提交
1038
	if (err == 0) {
1039 1040 1041
		struct tc_u_knode __rcu **ins;
		struct tc_u_knode *pins;

1042 1043 1044 1045
		err = u32_replace_hw_knode(tp, n, flags);
		if (err)
			goto errhw;

1046 1047 1048
		if (!tc_in_hw(n->flags))
			n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;

1049 1050 1051 1052
		ins = &ht->ht[TC_U32_HASH(handle)];
		for (pins = rtnl_dereference(*ins); pins;
		     ins = &pins->next, pins = rtnl_dereference(*ins))
			if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle))
L
Linus Torvalds 已提交
1053 1054
				break;

1055 1056
		RCU_INIT_POINTER(n->next, pins);
		rcu_assign_pointer(*ins, n);
1057
		*arg = n;
L
Linus Torvalds 已提交
1058 1059
		return 0;
	}
1060

1061
errhw:
1062 1063 1064 1065
#ifdef CONFIG_CLS_U32_MARK
	free_percpu(n->pcpu_success);
#endif

1066 1067
errout:
	tcf_exts_destroy(&n->exts);
L
Linus Torvalds 已提交
1068
#ifdef CONFIG_CLS_U32_PERF
1069
	free_percpu(n->pf);
L
Linus Torvalds 已提交
1070 1071 1072 1073 1074 1075 1076 1077 1078 1079
#endif
	kfree(n);
	return err;
}

static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode *ht;
	struct tc_u_knode *n;
E
Eric Dumazet 已提交
1080
	unsigned int h;
L
Linus Torvalds 已提交
1081 1082 1083 1084

	if (arg->stop)
		return;

1085 1086 1087
	for (ht = rtnl_dereference(tp_c->hlist);
	     ht;
	     ht = rtnl_dereference(ht->next)) {
L
Linus Torvalds 已提交
1088 1089 1090
		if (ht->prio != tp->prio)
			continue;
		if (arg->count >= arg->skip) {
1091
			if (arg->fn(tp, ht, arg) < 0) {
L
Linus Torvalds 已提交
1092 1093 1094 1095 1096 1097
				arg->stop = 1;
				return;
			}
		}
		arg->count++;
		for (h = 0; h <= ht->divisor; h++) {
1098 1099 1100
			for (n = rtnl_dereference(ht->ht[h]);
			     n;
			     n = rtnl_dereference(n->next)) {
L
Linus Torvalds 已提交
1101 1102 1103 1104
				if (arg->count < arg->skip) {
					arg->count++;
					continue;
				}
1105
				if (arg->fn(tp, n, arg) < 0) {
L
Linus Torvalds 已提交
1106 1107 1108 1109 1110 1111 1112 1113 1114
					arg->stop = 1;
					return;
				}
				arg->count++;
			}
		}
	}
}

1115 1116 1117 1118 1119 1120 1121 1122
static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
{
	struct tc_u_knode *n = fh;

	if (n && n->res.classid == classid)
		n->res.class = cl;
}

1123
static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
J
Jamal Hadi Salim 已提交
1124
		    struct sk_buff *skb, struct tcmsg *t)
L
Linus Torvalds 已提交
1125
{
1126
	struct tc_u_knode *n = fh;
1127
	struct tc_u_hnode *ht_up, *ht_down;
1128
	struct nlattr *nest;
L
Linus Torvalds 已提交
1129 1130 1131 1132 1133 1134

	if (n == NULL)
		return skb->len;

	t->tcm_handle = n->handle;

1135 1136 1137
	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;
L
Linus Torvalds 已提交
1138 1139

	if (TC_U32_KEY(n->handle) == 0) {
1140
		struct tc_u_hnode *ht = fh;
E
Eric Dumazet 已提交
1141 1142
		u32 divisor = ht->divisor + 1;

1143 1144
		if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
			goto nla_put_failure;
L
Linus Torvalds 已提交
1145
	} else {
1146 1147 1148
#ifdef CONFIG_CLS_U32_PERF
		struct tc_u32_pcnt *gpf;
		int cpu;
1149
#endif
1150

1151 1152 1153 1154
		if (nla_put(skb, TCA_U32_SEL,
			    sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
			    &n->sel))
			goto nla_put_failure;
1155 1156 1157

		ht_up = rtnl_dereference(n->ht_up);
		if (ht_up) {
L
Linus Torvalds 已提交
1158
			u32 htid = n->handle & 0xFFFFF000;
1159 1160
			if (nla_put_u32(skb, TCA_U32_HASH, htid))
				goto nla_put_failure;
L
Linus Torvalds 已提交
1161
		}
1162 1163 1164
		if (n->res.classid &&
		    nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
			goto nla_put_failure;
1165 1166 1167 1168

		ht_down = rtnl_dereference(n->ht_down);
		if (ht_down &&
		    nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
1169
			goto nla_put_failure;
L
Linus Torvalds 已提交
1170

1171 1172 1173
		if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags))
			goto nla_put_failure;

L
Linus Torvalds 已提交
1174
#ifdef CONFIG_CLS_U32_MARK
1175 1176 1177 1178
		if ((n->val || n->mask)) {
			struct tc_u32_mark mark = {.val = n->val,
						   .mask = n->mask,
						   .success = 0};
1179
			int cpum;
1180

1181 1182
			for_each_possible_cpu(cpum) {
				__u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum);
1183 1184 1185 1186 1187 1188 1189

				mark.success += cnt;
			}

			if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark))
				goto nla_put_failure;
		}
L
Linus Torvalds 已提交
1190 1191
#endif

1192
		if (tcf_exts_dump(skb, &n->exts) < 0)
1193
			goto nla_put_failure;
L
Linus Torvalds 已提交
1194 1195

#ifdef CONFIG_NET_CLS_IND
1196 1197 1198 1199 1200 1201
		if (n->ifindex) {
			struct net_device *dev;
			dev = __dev_get_by_index(net, n->ifindex);
			if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name))
				goto nla_put_failure;
		}
L
Linus Torvalds 已提交
1202 1203
#endif
#ifdef CONFIG_CLS_U32_PERF
1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219
		gpf = kzalloc(sizeof(struct tc_u32_pcnt) +
			      n->sel.nkeys * sizeof(u64),
			      GFP_KERNEL);
		if (!gpf)
			goto nla_put_failure;

		for_each_possible_cpu(cpu) {
			int i;
			struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);

			gpf->rcnt += pf->rcnt;
			gpf->rhit += pf->rhit;
			for (i = 0; i < n->sel.nkeys; i++)
				gpf->kcnts[i] += pf->kcnts[i];
		}

1220 1221 1222 1223
		if (nla_put_64bit(skb, TCA_U32_PCNT,
				  sizeof(struct tc_u32_pcnt) +
				  n->sel.nkeys * sizeof(u64),
				  gpf, TCA_U32_PAD)) {
1224
			kfree(gpf);
1225
			goto nla_put_failure;
1226 1227
		}
		kfree(gpf);
L
Linus Torvalds 已提交
1228 1229 1230
#endif
	}

1231 1232
	nla_nest_end(skb, nest);

L
Linus Torvalds 已提交
1233
	if (TC_U32_KEY(n->handle))
1234
		if (tcf_exts_dump_stats(skb, &n->exts) < 0)
1235
			goto nla_put_failure;
L
Linus Torvalds 已提交
1236 1237
	return skb->len;

1238
nla_put_failure:
1239
	nla_nest_cancel(skb, nest);
L
Linus Torvalds 已提交
1240 1241 1242
	return -1;
}

1243
static struct tcf_proto_ops cls_u32_ops __read_mostly = {
L
Linus Torvalds 已提交
1244 1245 1246 1247 1248 1249 1250 1251 1252
	.kind		=	"u32",
	.classify	=	u32_classify,
	.init		=	u32_init,
	.destroy	=	u32_destroy,
	.get		=	u32_get,
	.change		=	u32_change,
	.delete		=	u32_delete,
	.walk		=	u32_walk,
	.dump		=	u32_dump,
1253
	.bind_class	=	u32_bind_class,
L
Linus Torvalds 已提交
1254 1255 1256 1257 1258
	.owner		=	THIS_MODULE,
};

static int __init init_u32(void)
{
1259 1260
	int i, ret;

1261
	pr_info("u32 classifier\n");
L
Linus Torvalds 已提交
1262
#ifdef CONFIG_CLS_U32_PERF
1263
	pr_info("    Performance counters on\n");
L
Linus Torvalds 已提交
1264 1265
#endif
#ifdef CONFIG_NET_CLS_IND
1266
	pr_info("    input device check on\n");
L
Linus Torvalds 已提交
1267 1268
#endif
#ifdef CONFIG_NET_CLS_ACT
1269
	pr_info("    Actions configured\n");
L
Linus Torvalds 已提交
1270
#endif
1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283
	tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE,
					  sizeof(struct hlist_head),
					  GFP_KERNEL);
	if (!tc_u_common_hash)
		return -ENOMEM;

	for (i = 0; i < U32_HASH_SIZE; i++)
		INIT_HLIST_HEAD(&tc_u_common_hash[i]);

	ret = register_tcf_proto_ops(&cls_u32_ops);
	if (ret)
		kvfree(tc_u_common_hash);
	return ret;
L
Linus Torvalds 已提交
1284 1285
}

1286
static void __exit exit_u32(void)
L
Linus Torvalds 已提交
1287 1288
{
	unregister_tcf_proto_ops(&cls_u32_ops);
1289
	kvfree(tc_u_common_hash);
L
Linus Torvalds 已提交
1290 1291 1292 1293 1294
}

module_init(init_u32)
module_exit(exit_u32)
MODULE_LICENSE("GPL");