cls_u32.c 31.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
/*
 * net/sched/cls_u32.c	Ugly (or Universal) 32bit key Packet Classifier.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 *	The filters are packed to hash tables of key nodes
 *	with a set of 32bit key/mask pairs at every node.
 *	Nodes reference next level hash tables etc.
 *
 *	This scheme is the best universal classifier I managed to
 *	invent; it is not super-fast, but it is not slow (provided you
 *	program it correctly), and general enough.  And its relative
 *	speed grows as the number of rules becomes larger.
 *
 *	It seems that it represents the best middle point between
 *	speed and manageability both by human and by machine.
 *
 *	It is especially useful for link sharing combined with QoS;
 *	pure RSVP doesn't need such a general approach and can use
 *	much simpler (and faster) schemes, sort of cls_rsvp.c.
 *
 *	JHS: We should remove the CONFIG_NET_CLS_IND from here
 *	eventually when the meta match extension is made available
 *
 *	nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
 */

#include <linux/module.h>
34
#include <linux/slab.h>
L
Linus Torvalds 已提交
35 36 37 38
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
39
#include <linux/percpu.h>
L
Linus Torvalds 已提交
40 41
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
42
#include <linux/bitmap.h>
43 44
#include <linux/netdevice.h>
#include <linux/hash.h>
45
#include <net/netlink.h>
L
Linus Torvalds 已提交
46 47
#include <net/act_api.h>
#include <net/pkt_cls.h>
48
#include <linux/idr.h>
L
Linus Torvalds 已提交
49

E
Eric Dumazet 已提交
50
struct tc_u_knode {
51
	struct tc_u_knode __rcu	*next;
L
Linus Torvalds 已提交
52
	u32			handle;
53
	struct tc_u_hnode __rcu	*ht_up;
L
Linus Torvalds 已提交
54 55
	struct tcf_exts		exts;
#ifdef CONFIG_NET_CLS_IND
56
	int			ifindex;
L
Linus Torvalds 已提交
57 58 59
#endif
	u8			fshift;
	struct tcf_result	res;
60
	struct tc_u_hnode __rcu	*ht_down;
L
Linus Torvalds 已提交
61
#ifdef CONFIG_CLS_U32_PERF
62
	struct tc_u32_pcnt __percpu *pf;
L
Linus Torvalds 已提交
63
#endif
64
	u32			flags;
L
Linus Torvalds 已提交
65
#ifdef CONFIG_CLS_U32_MARK
66 67 68
	u32			val;
	u32			mask;
	u32 __percpu		*pcpu_success;
L
Linus Torvalds 已提交
69
#endif
70
	struct tcf_proto	*tp;
71 72 73 74
	union {
		struct work_struct	work;
		struct rcu_head		rcu;
	};
75 76 77 78
	/* The 'sel' field MUST be the last field in structure to allow for
	 * tc_u32_keys allocated at end of structure.
	 */
	struct tc_u32_sel	sel;
L
Linus Torvalds 已提交
79 80
};

E
Eric Dumazet 已提交
81
struct tc_u_hnode {
82
	struct tc_u_hnode __rcu	*next;
L
Linus Torvalds 已提交
83 84 85 86
	u32			handle;
	u32			prio;
	struct tc_u_common	*tp_c;
	int			refcnt;
E
Eric Dumazet 已提交
87
	unsigned int		divisor;
88
	struct idr		handle_idr;
89
	struct rcu_head		rcu;
90
	u32			flags;
91 92 93 94
	/* The 'ht' field MUST be the last field in structure to allow for
	 * more entries allocated at end of structure.
	 */
	struct tc_u_knode __rcu	*ht[1];
L
Linus Torvalds 已提交
95 96
};

E
Eric Dumazet 已提交
97
struct tc_u_common {
98
	struct tc_u_hnode __rcu	*hlist;
99
	struct tcf_block	*block;
L
Linus Torvalds 已提交
100
	int			refcnt;
101
	struct idr		handle_idr;
102
	struct hlist_node	hnode;
103
	struct rcu_head		rcu;
L
Linus Torvalds 已提交
104 105
};

E
Eric Dumazet 已提交
106 107 108
static inline unsigned int u32_hash_fold(__be32 key,
					 const struct tc_u32_sel *sel,
					 u8 fshift)
L
Linus Torvalds 已提交
109
{
E
Eric Dumazet 已提交
110
	unsigned int h = ntohl(key & sel->hmask) >> fshift;
L
Linus Torvalds 已提交
111 112 113 114

	return h;
}

J
Jamal Hadi Salim 已提交
115 116
static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp,
			struct tcf_result *res)
L
Linus Torvalds 已提交
117 118 119
{
	struct {
		struct tc_u_knode *knode;
120
		unsigned int	  off;
L
Linus Torvalds 已提交
121 122
	} stack[TC_U32_MAXDEPTH];

123
	struct tc_u_hnode *ht = rcu_dereference_bh(tp->root);
124
	unsigned int off = skb_network_offset(skb);
L
Linus Torvalds 已提交
125 126 127 128 129 130 131 132 133 134
	struct tc_u_knode *n;
	int sdepth = 0;
	int off2 = 0;
	int sel = 0;
#ifdef CONFIG_CLS_U32_PERF
	int j;
#endif
	int i, r;

next_ht:
135
	n = rcu_dereference_bh(ht->ht[sel]);
L
Linus Torvalds 已提交
136 137 138 139 140 141

next_knode:
	if (n) {
		struct tc_u32_key *key = n->sel.keys;

#ifdef CONFIG_CLS_U32_PERF
142
		__this_cpu_inc(n->pf->rcnt);
L
Linus Torvalds 已提交
143 144 145
		j = 0;
#endif

146 147 148 149 150
		if (tc_skip_sw(n->flags)) {
			n = rcu_dereference_bh(n->next);
			goto next_knode;
		}

L
Linus Torvalds 已提交
151
#ifdef CONFIG_CLS_U32_MARK
152
		if ((skb->mark & n->mask) != n->val) {
153
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
154 155
			goto next_knode;
		} else {
156
			__this_cpu_inc(*n->pcpu_success);
L
Linus Torvalds 已提交
157 158 159
		}
#endif

E
Eric Dumazet 已提交
160
		for (i = n->sel.nkeys; i > 0; i--, key++) {
S
stephen hemminger 已提交
161
			int toff = off + key->off + (off2 & key->offmask);
S
stephen hemminger 已提交
162
			__be32 *data, hdata;
163

D
Dan Carpenter 已提交
164
			if (skb_headroom(skb) + toff > INT_MAX)
S
stephen hemminger 已提交
165 166
				goto out;

S
stephen hemminger 已提交
167
			data = skb_header_pointer(skb, toff, 4, &hdata);
168 169 170
			if (!data)
				goto out;
			if ((*data ^ key->val) & key->mask) {
171
				n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
172 173 174
				goto next_knode;
			}
#ifdef CONFIG_CLS_U32_PERF
175
			__this_cpu_inc(n->pf->kcnts[j]);
L
Linus Torvalds 已提交
176 177 178
			j++;
#endif
		}
179 180 181

		ht = rcu_dereference_bh(n->ht_down);
		if (!ht) {
L
Linus Torvalds 已提交
182
check_terminal:
E
Eric Dumazet 已提交
183
			if (n->sel.flags & TC_U32_TERMINAL) {
L
Linus Torvalds 已提交
184 185 186

				*res = n->res;
#ifdef CONFIG_NET_CLS_IND
187
				if (!tcf_match_indev(skb, n->ifindex)) {
188
					n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
189 190 191 192
					goto next_knode;
				}
#endif
#ifdef CONFIG_CLS_U32_PERF
193
				__this_cpu_inc(n->pf->rhit);
L
Linus Torvalds 已提交
194 195 196
#endif
				r = tcf_exts_exec(skb, &n->exts, res);
				if (r < 0) {
197
					n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
198 199 200 201 202
					goto next_knode;
				}

				return r;
			}
203
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
204 205 206 207 208 209 210
			goto next_knode;
		}

		/* PUSH */
		if (sdepth >= TC_U32_MAXDEPTH)
			goto deadloop;
		stack[sdepth].knode = n;
211
		stack[sdepth].off = off;
L
Linus Torvalds 已提交
212 213
		sdepth++;

214
		ht = rcu_dereference_bh(n->ht_down);
L
Linus Torvalds 已提交
215
		sel = 0;
216
		if (ht->divisor) {
S
stephen hemminger 已提交
217
			__be32 *data, hdata;
218 219

			data = skb_header_pointer(skb, off + n->sel.hoff, 4,
S
stephen hemminger 已提交
220
						  &hdata);
221 222 223 224 225
			if (!data)
				goto out;
			sel = ht->divisor & u32_hash_fold(*data, &n->sel,
							  n->fshift);
		}
E
Eric Dumazet 已提交
226
		if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
L
Linus Torvalds 已提交
227 228
			goto next_ht;

E
Eric Dumazet 已提交
229
		if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
L
Linus Torvalds 已提交
230
			off2 = n->sel.off + 3;
231
			if (n->sel.flags & TC_U32_VAROFFSET) {
S
stephen hemminger 已提交
232
				__be16 *data, hdata;
233 234 235

				data = skb_header_pointer(skb,
							  off + n->sel.offoff,
S
stephen hemminger 已提交
236
							  2, &hdata);
237 238 239 240 241
				if (!data)
					goto out;
				off2 += ntohs(n->sel.offmask & *data) >>
					n->sel.offshift;
			}
L
Linus Torvalds 已提交
242 243
			off2 &= ~3;
		}
E
Eric Dumazet 已提交
244
		if (n->sel.flags & TC_U32_EAT) {
245
			off += off2;
L
Linus Torvalds 已提交
246 247 248
			off2 = 0;
		}

249
		if (off < skb->len)
L
Linus Torvalds 已提交
250 251 252 253 254 255
			goto next_ht;
	}

	/* POP */
	if (sdepth--) {
		n = stack[sdepth].knode;
256
		ht = rcu_dereference_bh(n->ht_up);
257
		off = stack[sdepth].off;
L
Linus Torvalds 已提交
258 259
		goto check_terminal;
	}
260
out:
L
Linus Torvalds 已提交
261 262 263
	return -1;

deadloop:
264
	net_warn_ratelimited("cls_u32: dead loop\n");
L
Linus Torvalds 已提交
265 266 267
	return -1;
}

J
Jamal Hadi Salim 已提交
268
static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
L
Linus Torvalds 已提交
269 270 271
{
	struct tc_u_hnode *ht;

272 273 274
	for (ht = rtnl_dereference(tp_c->hlist);
	     ht;
	     ht = rtnl_dereference(ht->next))
L
Linus Torvalds 已提交
275 276 277 278 279 280
		if (ht->handle == handle)
			break;

	return ht;
}

J
Jamal Hadi Salim 已提交
281
static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
L
Linus Torvalds 已提交
282
{
E
Eric Dumazet 已提交
283
	unsigned int sel;
L
Linus Torvalds 已提交
284 285 286 287 288 289
	struct tc_u_knode *n = NULL;

	sel = TC_U32_HASH(handle);
	if (sel > ht->divisor)
		goto out;

290 291 292
	for (n = rtnl_dereference(ht->ht[sel]);
	     n;
	     n = rtnl_dereference(n->next))
L
Linus Torvalds 已提交
293 294 295 296 297 298 299
		if (n->handle == handle)
			break;
out:
	return n;
}


300
static void *u32_get(struct tcf_proto *tp, u32 handle)
L
Linus Torvalds 已提交
301 302 303 304 305
{
	struct tc_u_hnode *ht;
	struct tc_u_common *tp_c = tp->data;

	if (TC_U32_HTID(handle) == TC_U32_ROOT)
306
		ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
307 308 309 310
	else
		ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));

	if (!ht)
311
		return NULL;
L
Linus Torvalds 已提交
312 313

	if (TC_U32_KEY(handle) == 0)
314
		return ht;
L
Linus Torvalds 已提交
315

316
	return u32_lookup_key(ht, handle);
L
Linus Torvalds 已提交
317 318
}

319
static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
L
Linus Torvalds 已提交
320
{
321 322
	unsigned long idr_index;
	int err;
L
Linus Torvalds 已提交
323

324
	/* This is only used inside rtnl lock it is safe to increment
325 326
	 * without read _copy_ update semantics
	 */
327 328 329 330 331
	err = idr_alloc_ext(&tp_c->handle_idr, ptr, &idr_index,
			    1, 0x7FF, GFP_KERNEL);
	if (err)
		return 0;
	return (u32)(idr_index | 0x800) << 20;
L
Linus Torvalds 已提交
332 333
}

334 335 336 337 338 339 340
static struct hlist_head *tc_u_common_hash;

#define U32_HASH_SHIFT 10
#define U32_HASH_SIZE (1 << U32_HASH_SHIFT)

static unsigned int tc_u_hash(const struct tcf_proto *tp)
{
341
	return hash_ptr(tp->chain->block, U32_HASH_SHIFT);
342 343 344 345 346 347 348 349 350
}

static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
{
	struct tc_u_common *tc;
	unsigned int h;

	h = tc_u_hash(tp);
	hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
351
		if (tc->block == tp->chain->block)
352 353 354 355 356
			return tc;
	}
	return NULL;
}

L
Linus Torvalds 已提交
357 358 359 360
static int u32_init(struct tcf_proto *tp)
{
	struct tc_u_hnode *root_ht;
	struct tc_u_common *tp_c;
361
	unsigned int h;
L
Linus Torvalds 已提交
362

363
	tp_c = tc_u_common_find(tp);
L
Linus Torvalds 已提交
364

365
	root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
L
Linus Torvalds 已提交
366 367 368 369
	if (root_ht == NULL)
		return -ENOBUFS;

	root_ht->refcnt++;
370
	root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
L
Linus Torvalds 已提交
371
	root_ht->prio = tp->prio;
372
	idr_init(&root_ht->handle_idr);
L
Linus Torvalds 已提交
373 374

	if (tp_c == NULL) {
375
		tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
L
Linus Torvalds 已提交
376 377 378 379
		if (tp_c == NULL) {
			kfree(root_ht);
			return -ENOBUFS;
		}
380
		tp_c->block = tp->chain->block;
381
		INIT_HLIST_NODE(&tp_c->hnode);
382
		idr_init(&tp_c->handle_idr);
383 384 385

		h = tc_u_hash(tp);
		hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]);
L
Linus Torvalds 已提交
386 387 388
	}

	tp_c->refcnt++;
389 390
	RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
	rcu_assign_pointer(tp_c->hlist, root_ht);
L
Linus Torvalds 已提交
391 392
	root_ht->tp_c = tp_c;

393
	rcu_assign_pointer(tp->root, root_ht);
L
Linus Torvalds 已提交
394 395 396 397
	tp->data = tp_c;
	return 0;
}

J
Jamal Hadi Salim 已提交
398
static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
399
			   bool free_pf)
L
Linus Torvalds 已提交
400
{
401 402
	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);

403
	tcf_exts_destroy(&n->exts);
404
	tcf_exts_put_net(&n->exts);
405 406
	if (ht && --ht->refcnt == 0)
		kfree(ht);
L
Linus Torvalds 已提交
407
#ifdef CONFIG_CLS_U32_PERF
408 409
	if (free_pf)
		free_percpu(n->pf);
410 411
#endif
#ifdef CONFIG_CLS_U32_MARK
412 413
	if (free_pf)
		free_percpu(n->pcpu_success);
L
Linus Torvalds 已提交
414 415 416 417 418
#endif
	kfree(n);
	return 0;
}

419 420 421 422 423 424 425 426
/* u32_delete_key_rcu should be called when free'ing a copied
 * version of a tc_u_knode obtained from u32_init_knode(). When
 * copies are obtained from u32_init_knode() the statistics are
 * shared between the old and new copies to allow readers to
 * continue to update the statistics during the copy. To support
 * this the u32_delete_key_rcu variant does not free the percpu
 * statistics.
 */
427 428 429 430 431 432 433 434 435
static void u32_delete_key_work(struct work_struct *work)
{
	struct tc_u_knode *key = container_of(work, struct tc_u_knode, work);

	rtnl_lock();
	u32_destroy_key(key->tp, key, false);
	rtnl_unlock();
}

436 437 438 439
static void u32_delete_key_rcu(struct rcu_head *rcu)
{
	struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);

440 441
	INIT_WORK(&key->work, u32_delete_key_work);
	tcf_queue_work(&key->work);
442 443 444 445 446 447 448 449 450
}

/* u32_delete_key_freepf_rcu is the rcu callback variant
 * that free's the entire structure including the statistics
 * percpu variables. Only use this if the key is not a copy
 * returned by u32_init_knode(). See u32_delete_key_rcu()
 * for the variant that should be used with keys return from
 * u32_init_knode()
 */
451 452 453 454 455 456 457 458 459
static void u32_delete_key_freepf_work(struct work_struct *work)
{
	struct tc_u_knode *key = container_of(work, struct tc_u_knode, work);

	rtnl_lock();
	u32_destroy_key(key->tp, key, true);
	rtnl_unlock();
}

460 461 462 463
static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
{
	struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);

464 465
	INIT_WORK(&key->work, u32_delete_key_freepf_work);
	tcf_queue_work(&key->work);
466 467
}

468
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
L
Linus Torvalds 已提交
469
{
470 471
	struct tc_u_knode __rcu **kp;
	struct tc_u_knode *pkp;
472
	struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
L
Linus Torvalds 已提交
473 474

	if (ht) {
475 476 477 478 479
		kp = &ht->ht[TC_U32_HASH(key->handle)];
		for (pkp = rtnl_dereference(*kp); pkp;
		     kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
			if (pkp == key) {
				RCU_INIT_POINTER(*kp, key->next);
L
Linus Torvalds 已提交
480

481
				tcf_unbind_filter(tp, &key->res);
482
				tcf_exts_get_net(&key->exts);
483
				call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
L
Linus Torvalds 已提交
484 485 486 487
				return 0;
			}
		}
	}
488
	WARN_ON(1);
L
Linus Torvalds 已提交
489 490 491
	return 0;
}

492 493
static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
			       struct netlink_ext_ack *extack)
494
{
495
	struct tcf_block *block = tp->chain->block;
496
	struct tc_cls_u32_offload cls_u32 = {};
497

498
	tc_cls_common_offload_init(&cls_u32.common, tp, h->flags, extack);
499 500 501 502
	cls_u32.command = TC_CLSU32_DELETE_HNODE;
	cls_u32.hnode.divisor = h->divisor;
	cls_u32.hnode.handle = h->handle;
	cls_u32.hnode.prio = h->prio;
503

504
	tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
505 506
}

J
Jamal Hadi Salim 已提交
507
static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
508
				u32 flags, struct netlink_ext_ack *extack)
509
{
510
	struct tcf_block *block = tp->chain->block;
511
	struct tc_cls_u32_offload cls_u32 = {};
512 513
	bool skip_sw = tc_skip_sw(flags);
	bool offloaded = false;
514
	int err;
515

516
	tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
517 518 519 520
	cls_u32.command = TC_CLSU32_NEW_HNODE;
	cls_u32.hnode.divisor = h->divisor;
	cls_u32.hnode.handle = h->handle;
	cls_u32.hnode.prio = h->prio;
521

522 523
	err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
	if (err < 0) {
524
		u32_clear_hw_hnode(tp, h, NULL);
525
		return err;
526 527 528 529 530 531
	} else if (err > 0) {
		offloaded = true;
	}

	if (skip_sw && !offloaded)
		return -EINVAL;
532 533

	return 0;
534 535
}

536 537
static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
				struct netlink_ext_ack *extack)
538
{
539
	struct tcf_block *block = tp->chain->block;
540
	struct tc_cls_u32_offload cls_u32 = {};
541

542
	tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack);
543
	cls_u32.command = TC_CLSU32_DELETE_KNODE;
544
	cls_u32.knode.handle = n->handle;
545

546
	tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
547
	tcf_block_offload_dec(block, &n->flags);
548 549
}

J
Jamal Hadi Salim 已提交
550
static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
551
				u32 flags, struct netlink_ext_ack *extack)
552
{
553
	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
554
	struct tcf_block *block = tp->chain->block;
555
	struct tc_cls_u32_offload cls_u32 = {};
556
	bool skip_sw = tc_skip_sw(flags);
557
	int err;
558

559
	tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
560 561 562
	cls_u32.command = TC_CLSU32_REPLACE_KNODE;
	cls_u32.knode.handle = n->handle;
	cls_u32.knode.fshift = n->fshift;
563
#ifdef CONFIG_CLS_U32_MARK
564 565
	cls_u32.knode.val = n->val;
	cls_u32.knode.mask = n->mask;
566
#else
567 568
	cls_u32.knode.val = 0;
	cls_u32.knode.mask = 0;
569
#endif
570 571
	cls_u32.knode.sel = &n->sel;
	cls_u32.knode.exts = &n->exts;
572
	if (n->ht_down)
573
		cls_u32.knode.link_handle = ht->handle;
574

575 576
	err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
	if (err < 0) {
577
		u32_remove_hw_knode(tp, n, NULL);
578
		return err;
579
	} else if (err > 0) {
580
		tcf_block_offload_inc(block, &n->flags);
581 582
	}

583
	if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW))
584
		return -EINVAL;
585 586

	return 0;
587 588
}

589 590
static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
			    struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
591 592
{
	struct tc_u_knode *n;
E
Eric Dumazet 已提交
593
	unsigned int h;
L
Linus Torvalds 已提交
594

E
Eric Dumazet 已提交
595
	for (h = 0; h <= ht->divisor; h++) {
596 597 598
		while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
			RCU_INIT_POINTER(ht->ht[h],
					 rtnl_dereference(n->next));
599
			tcf_unbind_filter(tp, &n->res);
600
			u32_remove_hw_knode(tp, n, extack);
601
			idr_remove_ext(&ht->handle_idr, n->handle);
602 603 604 605
			if (tcf_exts_get_net(&n->exts))
				call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
			else
				u32_destroy_key(n->tp, n, true);
L
Linus Torvalds 已提交
606 607 608 609
		}
	}
}

610 611
static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
			     struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
612 613
{
	struct tc_u_common *tp_c = tp->data;
614 615
	struct tc_u_hnode __rcu **hn;
	struct tc_u_hnode *phn;
L
Linus Torvalds 已提交
616

617
	WARN_ON(ht->refcnt);
L
Linus Torvalds 已提交
618

619
	u32_clear_hnode(tp, ht, extack);
L
Linus Torvalds 已提交
620

621 622 623 624 625
	hn = &tp_c->hlist;
	for (phn = rtnl_dereference(*hn);
	     phn;
	     hn = &phn->next, phn = rtnl_dereference(*hn)) {
		if (phn == ht) {
626
			u32_clear_hw_hnode(tp, ht, extack);
627 628
			idr_destroy(&ht->handle_idr);
			idr_remove_ext(&tp_c->handle_idr, ht->handle);
629 630
			RCU_INIT_POINTER(*hn, ht->next);
			kfree_rcu(ht, rcu);
L
Linus Torvalds 已提交
631 632 633 634 635 636 637
			return 0;
		}
	}

	return -ENOENT;
}

638 639 640 641 642 643 644 645 646 647 648
static bool ht_empty(struct tc_u_hnode *ht)
{
	unsigned int h;

	for (h = 0; h <= ht->divisor; h++)
		if (rcu_access_pointer(ht->ht[h]))
			return false;

	return true;
}

649
static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
650 651
{
	struct tc_u_common *tp_c = tp->data;
652
	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
653

654
	WARN_ON(root_ht == NULL);
L
Linus Torvalds 已提交
655 656

	if (root_ht && --root_ht->refcnt == 0)
657
		u32_destroy_hnode(tp, root_ht, extack);
L
Linus Torvalds 已提交
658 659 660 661

	if (--tp_c->refcnt == 0) {
		struct tc_u_hnode *ht;

662
		hlist_del(&tp_c->hnode);
L
Linus Torvalds 已提交
663

664
		while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
665
			u32_clear_hnode(tp, ht, extack);
666
			RCU_INIT_POINTER(tp_c->hlist, ht->next);
667 668 669 670 671 672

			/* u32_destroy_key() will later free ht for us, if it's
			 * still referenced by some knode
			 */
			if (--ht->refcnt == 0)
				kfree_rcu(ht, rcu);
673
		}
L
Linus Torvalds 已提交
674

675
		idr_destroy(&tp_c->handle_idr);
L
Linus Torvalds 已提交
676 677 678 679 680 681
		kfree(tp_c);
	}

	tp->data = NULL;
}

682 683
static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
		      struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
684
{
685
	struct tc_u_hnode *ht = arg;
686
	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
687 688
	struct tc_u_common *tp_c = tp->data;
	int ret = 0;
L
Linus Torvalds 已提交
689 690

	if (ht == NULL)
691
		goto out;
L
Linus Torvalds 已提交
692

693
	if (TC_U32_KEY(ht->handle)) {
694
		u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack);
695 696
		ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
		goto out;
697
	}
L
Linus Torvalds 已提交
698

699 700
	if (root_ht == ht) {
		NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node");
L
Linus Torvalds 已提交
701
		return -EINVAL;
702
	}
L
Linus Torvalds 已提交
703

704 705
	if (ht->refcnt == 1) {
		ht->refcnt--;
706
		u32_destroy_hnode(tp, ht, extack);
707
	} else {
708
		NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
709 710
		return -EBUSY;
	}
L
Linus Torvalds 已提交
711

712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745
out:
	*last = true;
	if (root_ht) {
		if (root_ht->refcnt > 1) {
			*last = false;
			goto ret;
		}
		if (root_ht->refcnt == 1) {
			if (!ht_empty(root_ht)) {
				*last = false;
				goto ret;
			}
		}
	}

	if (tp_c->refcnt > 1) {
		*last = false;
		goto ret;
	}

	if (tp_c->refcnt == 1) {
		struct tc_u_hnode *ht;

		for (ht = rtnl_dereference(tp_c->hlist);
		     ht;
		     ht = rtnl_dereference(ht->next))
			if (!ht_empty(ht)) {
				*last = false;
				break;
			}
	}

ret:
	return ret;
L
Linus Torvalds 已提交
746 747
}

748
static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid)
L
Linus Torvalds 已提交
749
{
750 751 752 753 754 755 756 757 758 759 760
	unsigned long idr_index;
	u32 start = htid | 0x800;
	u32 max = htid | 0xFFF;
	u32 min = htid;

	if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
			  start, max + 1, GFP_KERNEL)) {
		if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
				  min + 1, max + 1, GFP_KERNEL))
			return max;
	}
761

762
	return (u32)idr_index;
L
Linus Torvalds 已提交
763 764
}

765 766 767 768 769 770 771 772
static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
	[TCA_U32_CLASSID]	= { .type = NLA_U32 },
	[TCA_U32_HASH]		= { .type = NLA_U32 },
	[TCA_U32_LINK]		= { .type = NLA_U32 },
	[TCA_U32_DIVISOR]	= { .type = NLA_U32 },
	[TCA_U32_SEL]		= { .len = sizeof(struct tc_u32_sel) },
	[TCA_U32_INDEV]		= { .type = NLA_STRING, .len = IFNAMSIZ },
	[TCA_U32_MARK]		= { .len = sizeof(struct tc_u32_mark) },
773
	[TCA_U32_FLAGS]		= { .type = NLA_U32 },
774 775
};

776 777
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
			 unsigned long base, struct tc_u_hnode *ht,
778
			 struct tc_u_knode *n, struct nlattr **tb,
779 780
			 struct nlattr *est, bool ovr,
			 struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
781
{
782
	int err;
L
Linus Torvalds 已提交
783

784
	err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack);
L
Linus Torvalds 已提交
785 786 787
	if (err < 0)
		return err;

788
	if (tb[TCA_U32_LINK]) {
789
		u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
790
		struct tc_u_hnode *ht_down = NULL, *ht_old;
L
Linus Torvalds 已提交
791

792 793
		if (TC_U32_KEY(handle)) {
			NL_SET_ERR_MSG_MOD(extack, "u32 Link handle must be a hash table");
794
			return -EINVAL;
795
		}
L
Linus Torvalds 已提交
796 797 798 799

		if (handle) {
			ht_down = u32_lookup_ht(ht->tp_c, handle);

800 801
			if (!ht_down) {
				NL_SET_ERR_MSG_MOD(extack, "Link hash table not found");
802
				return -EINVAL;
803
			}
L
Linus Torvalds 已提交
804 805 806
			ht_down->refcnt++;
		}

807 808
		ht_old = rtnl_dereference(n->ht_down);
		rcu_assign_pointer(n->ht_down, ht_down);
L
Linus Torvalds 已提交
809

810 811
		if (ht_old)
			ht_old->refcnt--;
L
Linus Torvalds 已提交
812
	}
813
	if (tb[TCA_U32_CLASSID]) {
814
		n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
L
Linus Torvalds 已提交
815 816 817 818
		tcf_bind_filter(tp, &n->res, base);
	}

#ifdef CONFIG_NET_CLS_IND
819
	if (tb[TCA_U32_INDEV]) {
820
		int ret;
821
		ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
822
		if (ret < 0)
823
			return -EINVAL;
824
		n->ifindex = ret;
L
Linus Torvalds 已提交
825 826 827 828 829
	}
#endif
	return 0;
}

J
Jamal Hadi Salim 已提交
830
static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851
			      struct tc_u_knode *n)
{
	struct tc_u_knode __rcu **ins;
	struct tc_u_knode *pins;
	struct tc_u_hnode *ht;

	if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
		ht = rtnl_dereference(tp->root);
	else
		ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));

	ins = &ht->ht[TC_U32_HASH(n->handle)];

	/* The node must always exist for it to be replaced if this is not the
	 * case then something went very wrong elsewhere.
	 */
	for (pins = rtnl_dereference(*ins); ;
	     ins = &pins->next, pins = rtnl_dereference(*ins))
		if (pins->handle == n->handle)
			break;

852
	idr_replace_ext(&ht->handle_idr, n, n->handle);
853 854 855 856 857 858 859
	RCU_INIT_POINTER(n->next, pins->next);
	rcu_assign_pointer(*ins, n);
}

static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
					 struct tc_u_knode *n)
{
860
	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
861
	struct tc_u32_sel *s = &n->sel;
862
	struct tc_u_knode *new;
863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878

	new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
		      GFP_KERNEL);

	if (!new)
		return NULL;

	RCU_INIT_POINTER(new->next, n->next);
	new->handle = n->handle;
	RCU_INIT_POINTER(new->ht_up, n->ht_up);

#ifdef CONFIG_NET_CLS_IND
	new->ifindex = n->ifindex;
#endif
	new->fshift = n->fshift;
	new->res = n->res;
879
	new->flags = n->flags;
880
	RCU_INIT_POINTER(new->ht_down, ht);
881 882

	/* bump reference count as long as we hold pointer to structure */
883 884
	if (ht)
		ht->refcnt++;
885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902

#ifdef CONFIG_CLS_U32_PERF
	/* Statistics may be incremented by readers during update
	 * so we must keep them in tact. When the node is later destroyed
	 * a special destroy call must be made to not free the pf memory.
	 */
	new->pf = n->pf;
#endif

#ifdef CONFIG_CLS_U32_MARK
	new->val = n->val;
	new->mask = n->mask;
	/* Similarly success statistics must be moved as pointers */
	new->pcpu_success = n->pcpu_success;
#endif
	new->tp = tp;
	memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));

903 904 905 906
	if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
		kfree(new);
		return NULL;
	}
907 908 909 910

	return new;
}

911
static int u32_change(struct net *net, struct sk_buff *in_skb,
912
		      struct tcf_proto *tp, unsigned long base, u32 handle,
913 914
		      struct nlattr **tca, void **arg, bool ovr,
		      struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
915 916 917 918 919
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode *ht;
	struct tc_u_knode *n;
	struct tc_u32_sel *s;
920 921
	struct nlattr *opt = tca[TCA_OPTIONS];
	struct nlattr *tb[TCA_U32_MAX + 1];
922
	u32 htid, flags = 0;
L
Linus Torvalds 已提交
923
	int err;
924 925 926
#ifdef CONFIG_CLS_U32_PERF
	size_t size;
#endif
L
Linus Torvalds 已提交
927

928 929 930 931 932 933 934 935
	if (!opt) {
		if (handle) {
			NL_SET_ERR_MSG_MOD(extack, "Filter handle requires options");
			return -EINVAL;
		} else {
			return 0;
		}
	}
L
Linus Torvalds 已提交
936

937
	err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, extack);
938 939
	if (err < 0)
		return err;
L
Linus Torvalds 已提交
940

941
	if (tb[TCA_U32_FLAGS]) {
942
		flags = nla_get_u32(tb[TCA_U32_FLAGS]);
943 944
		if (!tc_flags_valid(flags)) {
			NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags");
945
			return -EINVAL;
946
		}
947
	}
948

949
	n = *arg;
E
Eric Dumazet 已提交
950
	if (n) {
951 952
		struct tc_u_knode *new;

953 954
		if (TC_U32_KEY(n->handle) == 0) {
			NL_SET_ERR_MSG_MOD(extack, "Key node id cannot be zero");
L
Linus Torvalds 已提交
955
			return -EINVAL;
956
		}
L
Linus Torvalds 已提交
957

958 959
		if ((n->flags ^ flags) &
		    ~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) {
960
			NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags");
961
			return -EINVAL;
962
		}
963

964 965 966 967 968 969
		new = u32_init_knode(tp, n);
		if (!new)
			return -ENOMEM;

		err = u32_set_parms(net, tp, base,
				    rtnl_dereference(n->ht_up), new, tb,
970
				    tca[TCA_RATE], ovr, extack);
971 972 973 974 975 976

		if (err) {
			u32_destroy_key(tp, new, false);
			return err;
		}

977
		err = u32_replace_hw_knode(tp, new, flags, extack);
978 979 980 981 982
		if (err) {
			u32_destroy_key(tp, new, false);
			return err;
		}

983 984 985
		if (!tc_in_hw(new->flags))
			new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;

986
		u32_replace_knode(tp, tp_c, new);
987
		tcf_unbind_filter(tp, &n->res);
988
		tcf_exts_get_net(&n->exts);
989 990
		call_rcu(&n->rcu, u32_delete_key_rcu);
		return 0;
L
Linus Torvalds 已提交
991 992
	}

993
	if (tb[TCA_U32_DIVISOR]) {
E
Eric Dumazet 已提交
994
		unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
L
Linus Torvalds 已提交
995

996 997
		if (--divisor > 0x100) {
			NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets");
L
Linus Torvalds 已提交
998
			return -EINVAL;
999 1000 1001
		}
		if (TC_U32_KEY(handle)) {
			NL_SET_ERR_MSG_MOD(extack, "Divisor can only be used on a hash table");
L
Linus Torvalds 已提交
1002
			return -EINVAL;
1003
		}
E
Eric Dumazet 已提交
1004
		ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
L
Linus Torvalds 已提交
1005 1006
		if (ht == NULL)
			return -ENOBUFS;
1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020
		if (handle == 0) {
			handle = gen_new_htid(tp->data, ht);
			if (handle == 0) {
				kfree(ht);
				return -ENOMEM;
			}
		} else {
			err = idr_alloc_ext(&tp_c->handle_idr, ht, NULL,
					    handle, handle + 1, GFP_KERNEL);
			if (err) {
				kfree(ht);
				return err;
			}
		}
L
Linus Torvalds 已提交
1021
		ht->tp_c = tp_c;
1022
		ht->refcnt = 1;
L
Linus Torvalds 已提交
1023 1024 1025
		ht->divisor = divisor;
		ht->handle = handle;
		ht->prio = tp->prio;
1026
		idr_init(&ht->handle_idr);
1027
		ht->flags = flags;
1028

1029
		err = u32_replace_hw_hnode(tp, ht, flags, extack);
1030
		if (err) {
1031
			idr_remove_ext(&tp_c->handle_idr, handle);
1032 1033 1034 1035
			kfree(ht);
			return err;
		}

1036 1037
		RCU_INIT_POINTER(ht->next, tp_c->hlist);
		rcu_assign_pointer(tp_c->hlist, ht);
1038
		*arg = ht;
1039

L
Linus Torvalds 已提交
1040 1041 1042
		return 0;
	}

1043
	if (tb[TCA_U32_HASH]) {
1044
		htid = nla_get_u32(tb[TCA_U32_HASH]);
L
Linus Torvalds 已提交
1045
		if (TC_U32_HTID(htid) == TC_U32_ROOT) {
1046
			ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
1047 1048 1049
			htid = ht->handle;
		} else {
			ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
1050 1051
			if (!ht) {
				NL_SET_ERR_MSG_MOD(extack, "Specified hash table not found");
L
Linus Torvalds 已提交
1052
				return -EINVAL;
1053
			}
L
Linus Torvalds 已提交
1054 1055
		}
	} else {
1056
		ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
1057 1058 1059
		htid = ht->handle;
	}

1060 1061
	if (ht->divisor < TC_U32_HASH(htid)) {
		NL_SET_ERR_MSG_MOD(extack, "Specified hash table buckets exceed configured value");
L
Linus Torvalds 已提交
1062
		return -EINVAL;
1063
	}
L
Linus Torvalds 已提交
1064 1065

	if (handle) {
1066 1067
		if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
			NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");
L
Linus Torvalds 已提交
1068
			return -EINVAL;
1069
		}
L
Linus Torvalds 已提交
1070
		handle = htid | TC_U32_NODE(handle);
1071 1072 1073 1074 1075
		err = idr_alloc_ext(&ht->handle_idr, NULL, NULL,
				    handle, handle + 1,
				    GFP_KERNEL);
		if (err)
			return err;
L
Linus Torvalds 已提交
1076 1077 1078
	} else
		handle = gen_new_kid(ht, htid);

1079
	if (tb[TCA_U32_SEL] == NULL) {
1080
		NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
1081 1082 1083
		err = -EINVAL;
		goto erridr;
	}
L
Linus Torvalds 已提交
1084

1085
	s = nla_data(tb[TCA_U32_SEL]);
L
Linus Torvalds 已提交
1086

1087
	n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
1088 1089 1090 1091
	if (n == NULL) {
		err = -ENOBUFS;
		goto erridr;
	}
L
Linus Torvalds 已提交
1092 1093

#ifdef CONFIG_CLS_U32_PERF
1094 1095 1096
	size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
	n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
	if (!n->pf) {
1097 1098
		err = -ENOBUFS;
		goto errfree;
L
Linus Torvalds 已提交
1099 1100 1101 1102
	}
#endif

	memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
1103
	RCU_INIT_POINTER(n->ht_up, ht);
L
Linus Torvalds 已提交
1104
	n->handle = handle;
1105
	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
1106
	n->flags = flags;
1107
	n->tp = tp;
L
Linus Torvalds 已提交
1108

1109 1110 1111 1112
	err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
	if (err < 0)
		goto errout;

L
Linus Torvalds 已提交
1113
#ifdef CONFIG_CLS_U32_MARK
1114
	n->pcpu_success = alloc_percpu(u32);
1115 1116 1117 1118
	if (!n->pcpu_success) {
		err = -ENOMEM;
		goto errout;
	}
1119

1120
	if (tb[TCA_U32_MARK]) {
L
Linus Torvalds 已提交
1121 1122
		struct tc_u32_mark *mark;

1123
		mark = nla_data(tb[TCA_U32_MARK]);
1124 1125
		n->val = mark->val;
		n->mask = mark->mask;
L
Linus Torvalds 已提交
1126 1127 1128
	}
#endif

1129 1130
	err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr,
			    extack);
L
Linus Torvalds 已提交
1131
	if (err == 0) {
1132 1133 1134
		struct tc_u_knode __rcu **ins;
		struct tc_u_knode *pins;

1135
		err = u32_replace_hw_knode(tp, n, flags, extack);
1136 1137 1138
		if (err)
			goto errhw;

1139 1140 1141
		if (!tc_in_hw(n->flags))
			n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;

1142 1143 1144 1145
		ins = &ht->ht[TC_U32_HASH(handle)];
		for (pins = rtnl_dereference(*ins); pins;
		     ins = &pins->next, pins = rtnl_dereference(*ins))
			if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle))
L
Linus Torvalds 已提交
1146 1147
				break;

1148 1149
		RCU_INIT_POINTER(n->next, pins);
		rcu_assign_pointer(*ins, n);
1150
		*arg = n;
L
Linus Torvalds 已提交
1151 1152
		return 0;
	}
1153

1154
errhw:
1155 1156 1157 1158
#ifdef CONFIG_CLS_U32_MARK
	free_percpu(n->pcpu_success);
#endif

1159 1160
errout:
	tcf_exts_destroy(&n->exts);
L
Linus Torvalds 已提交
1161
#ifdef CONFIG_CLS_U32_PERF
1162
errfree:
1163
	free_percpu(n->pf);
L
Linus Torvalds 已提交
1164 1165
#endif
	kfree(n);
1166 1167
erridr:
	idr_remove_ext(&ht->handle_idr, handle);
L
Linus Torvalds 已提交
1168 1169 1170 1171 1172 1173 1174 1175
	return err;
}

static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode *ht;
	struct tc_u_knode *n;
E
Eric Dumazet 已提交
1176
	unsigned int h;
L
Linus Torvalds 已提交
1177 1178 1179 1180

	if (arg->stop)
		return;

1181 1182 1183
	for (ht = rtnl_dereference(tp_c->hlist);
	     ht;
	     ht = rtnl_dereference(ht->next)) {
L
Linus Torvalds 已提交
1184 1185 1186
		if (ht->prio != tp->prio)
			continue;
		if (arg->count >= arg->skip) {
1187
			if (arg->fn(tp, ht, arg) < 0) {
L
Linus Torvalds 已提交
1188 1189 1190 1191 1192 1193
				arg->stop = 1;
				return;
			}
		}
		arg->count++;
		for (h = 0; h <= ht->divisor; h++) {
1194 1195 1196
			for (n = rtnl_dereference(ht->ht[h]);
			     n;
			     n = rtnl_dereference(n->next)) {
L
Linus Torvalds 已提交
1197 1198 1199 1200
				if (arg->count < arg->skip) {
					arg->count++;
					continue;
				}
1201
				if (arg->fn(tp, n, arg) < 0) {
L
Linus Torvalds 已提交
1202 1203 1204 1205 1206 1207 1208 1209 1210
					arg->stop = 1;
					return;
				}
				arg->count++;
			}
		}
	}
}

1211 1212 1213 1214 1215 1216 1217 1218
static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
{
	struct tc_u_knode *n = fh;

	if (n && n->res.classid == classid)
		n->res.class = cl;
}

1219
static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
J
Jamal Hadi Salim 已提交
1220
		    struct sk_buff *skb, struct tcmsg *t)
L
Linus Torvalds 已提交
1221
{
1222
	struct tc_u_knode *n = fh;
1223
	struct tc_u_hnode *ht_up, *ht_down;
1224
	struct nlattr *nest;
L
Linus Torvalds 已提交
1225 1226 1227 1228 1229 1230

	if (n == NULL)
		return skb->len;

	t->tcm_handle = n->handle;

1231 1232 1233
	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;
L
Linus Torvalds 已提交
1234 1235

	if (TC_U32_KEY(n->handle) == 0) {
1236
		struct tc_u_hnode *ht = fh;
E
Eric Dumazet 已提交
1237 1238
		u32 divisor = ht->divisor + 1;

1239 1240
		if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
			goto nla_put_failure;
L
Linus Torvalds 已提交
1241
	} else {
1242 1243 1244
#ifdef CONFIG_CLS_U32_PERF
		struct tc_u32_pcnt *gpf;
		int cpu;
1245
#endif
1246

1247 1248 1249 1250
		if (nla_put(skb, TCA_U32_SEL,
			    sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
			    &n->sel))
			goto nla_put_failure;
1251 1252 1253

		ht_up = rtnl_dereference(n->ht_up);
		if (ht_up) {
L
Linus Torvalds 已提交
1254
			u32 htid = n->handle & 0xFFFFF000;
1255 1256
			if (nla_put_u32(skb, TCA_U32_HASH, htid))
				goto nla_put_failure;
L
Linus Torvalds 已提交
1257
		}
1258 1259 1260
		if (n->res.classid &&
		    nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
			goto nla_put_failure;
1261 1262 1263 1264

		ht_down = rtnl_dereference(n->ht_down);
		if (ht_down &&
		    nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
1265
			goto nla_put_failure;
L
Linus Torvalds 已提交
1266

1267 1268 1269
		if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags))
			goto nla_put_failure;

L
Linus Torvalds 已提交
1270
#ifdef CONFIG_CLS_U32_MARK
1271 1272 1273 1274
		if ((n->val || n->mask)) {
			struct tc_u32_mark mark = {.val = n->val,
						   .mask = n->mask,
						   .success = 0};
1275
			int cpum;
1276

1277 1278
			for_each_possible_cpu(cpum) {
				__u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum);
1279 1280 1281 1282 1283 1284 1285

				mark.success += cnt;
			}

			if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark))
				goto nla_put_failure;
		}
L
Linus Torvalds 已提交
1286 1287
#endif

1288
		if (tcf_exts_dump(skb, &n->exts) < 0)
1289
			goto nla_put_failure;
L
Linus Torvalds 已提交
1290 1291

#ifdef CONFIG_NET_CLS_IND
1292 1293 1294 1295 1296 1297
		if (n->ifindex) {
			struct net_device *dev;
			dev = __dev_get_by_index(net, n->ifindex);
			if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name))
				goto nla_put_failure;
		}
L
Linus Torvalds 已提交
1298 1299
#endif
#ifdef CONFIG_CLS_U32_PERF
1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315
		gpf = kzalloc(sizeof(struct tc_u32_pcnt) +
			      n->sel.nkeys * sizeof(u64),
			      GFP_KERNEL);
		if (!gpf)
			goto nla_put_failure;

		for_each_possible_cpu(cpu) {
			int i;
			struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);

			gpf->rcnt += pf->rcnt;
			gpf->rhit += pf->rhit;
			for (i = 0; i < n->sel.nkeys; i++)
				gpf->kcnts[i] += pf->kcnts[i];
		}

1316 1317 1318 1319
		if (nla_put_64bit(skb, TCA_U32_PCNT,
				  sizeof(struct tc_u32_pcnt) +
				  n->sel.nkeys * sizeof(u64),
				  gpf, TCA_U32_PAD)) {
1320
			kfree(gpf);
1321
			goto nla_put_failure;
1322 1323
		}
		kfree(gpf);
L
Linus Torvalds 已提交
1324 1325 1326
#endif
	}

1327 1328
	nla_nest_end(skb, nest);

L
Linus Torvalds 已提交
1329
	if (TC_U32_KEY(n->handle))
1330
		if (tcf_exts_dump_stats(skb, &n->exts) < 0)
1331
			goto nla_put_failure;
L
Linus Torvalds 已提交
1332 1333
	return skb->len;

1334
nla_put_failure:
1335
	nla_nest_cancel(skb, nest);
L
Linus Torvalds 已提交
1336 1337 1338
	return -1;
}

1339
static struct tcf_proto_ops cls_u32_ops __read_mostly = {
L
Linus Torvalds 已提交
1340 1341 1342 1343 1344 1345 1346 1347 1348
	.kind		=	"u32",
	.classify	=	u32_classify,
	.init		=	u32_init,
	.destroy	=	u32_destroy,
	.get		=	u32_get,
	.change		=	u32_change,
	.delete		=	u32_delete,
	.walk		=	u32_walk,
	.dump		=	u32_dump,
1349
	.bind_class	=	u32_bind_class,
L
Linus Torvalds 已提交
1350 1351 1352 1353 1354
	.owner		=	THIS_MODULE,
};

static int __init init_u32(void)
{
1355 1356
	int i, ret;

1357
	pr_info("u32 classifier\n");
L
Linus Torvalds 已提交
1358
#ifdef CONFIG_CLS_U32_PERF
1359
	pr_info("    Performance counters on\n");
L
Linus Torvalds 已提交
1360 1361
#endif
#ifdef CONFIG_NET_CLS_IND
1362
	pr_info("    input device check on\n");
L
Linus Torvalds 已提交
1363 1364
#endif
#ifdef CONFIG_NET_CLS_ACT
1365
	pr_info("    Actions configured\n");
L
Linus Torvalds 已提交
1366
#endif
1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379
	tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE,
					  sizeof(struct hlist_head),
					  GFP_KERNEL);
	if (!tc_u_common_hash)
		return -ENOMEM;

	for (i = 0; i < U32_HASH_SIZE; i++)
		INIT_HLIST_HEAD(&tc_u_common_hash[i]);

	ret = register_tcf_proto_ops(&cls_u32_ops);
	if (ret)
		kvfree(tc_u_common_hash);
	return ret;
L
Linus Torvalds 已提交
1380 1381
}

1382
static void __exit exit_u32(void)
L
Linus Torvalds 已提交
1383 1384
{
	unregister_tcf_proto_ops(&cls_u32_ops);
1385
	kvfree(tc_u_common_hash);
L
Linus Torvalds 已提交
1386 1387 1388 1389 1390
}

module_init(init_u32)
module_exit(exit_u32)
MODULE_LICENSE("GPL");