cls_u32.c 31.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
/*
 * net/sched/cls_u32.c	Ugly (or Universal) 32bit key Packet Classifier.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 *	The filters are packed to hash tables of key nodes
 *	with a set of 32bit key/mask pairs at every node.
 *	Nodes reference next level hash tables etc.
 *
 *	This scheme is the best universal classifier I managed to
 *	invent; it is not super-fast, but it is not slow (provided you
 *	program it correctly), and general enough.  And its relative
 *	speed grows as the number of rules becomes larger.
 *
 *	It seems that it represents the best middle point between
 *	speed and manageability both by human and by machine.
 *
 *	It is especially useful for link sharing combined with QoS;
 *	pure RSVP doesn't need such a general approach and can use
 *	much simpler (and faster) schemes, sort of cls_rsvp.c.
 *
 *	JHS: We should remove the CONFIG_NET_CLS_IND from here
 *	eventually when the meta match extension is made available
 *
 *	nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
 */

#include <linux/module.h>
34
#include <linux/slab.h>
L
Linus Torvalds 已提交
35 36 37 38
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
39
#include <linux/percpu.h>
L
Linus Torvalds 已提交
40 41
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
42
#include <linux/bitmap.h>
43 44
#include <linux/netdevice.h>
#include <linux/hash.h>
45
#include <net/netlink.h>
L
Linus Torvalds 已提交
46 47
#include <net/act_api.h>
#include <net/pkt_cls.h>
48
#include <linux/idr.h>
L
Linus Torvalds 已提交
49

E
Eric Dumazet 已提交
50
struct tc_u_knode {
51
	struct tc_u_knode __rcu	*next;
L
Linus Torvalds 已提交
52
	u32			handle;
53
	struct tc_u_hnode __rcu	*ht_up;
L
Linus Torvalds 已提交
54 55
	struct tcf_exts		exts;
#ifdef CONFIG_NET_CLS_IND
56
	int			ifindex;
L
Linus Torvalds 已提交
57 58 59
#endif
	u8			fshift;
	struct tcf_result	res;
60
	struct tc_u_hnode __rcu	*ht_down;
L
Linus Torvalds 已提交
61
#ifdef CONFIG_CLS_U32_PERF
62
	struct tc_u32_pcnt __percpu *pf;
L
Linus Torvalds 已提交
63
#endif
64
	u32			flags;
L
Linus Torvalds 已提交
65
#ifdef CONFIG_CLS_U32_MARK
66 67 68
	u32			val;
	u32			mask;
	u32 __percpu		*pcpu_success;
L
Linus Torvalds 已提交
69
#endif
70
	struct tcf_proto	*tp;
71 72 73 74
	union {
		struct work_struct	work;
		struct rcu_head		rcu;
	};
75 76 77 78
	/* The 'sel' field MUST be the last field in structure to allow for
	 * tc_u32_keys allocated at end of structure.
	 */
	struct tc_u32_sel	sel;
L
Linus Torvalds 已提交
79 80
};

E
Eric Dumazet 已提交
81
struct tc_u_hnode {
82
	struct tc_u_hnode __rcu	*next;
L
Linus Torvalds 已提交
83 84 85 86
	u32			handle;
	u32			prio;
	struct tc_u_common	*tp_c;
	int			refcnt;
E
Eric Dumazet 已提交
87
	unsigned int		divisor;
88
	struct idr		handle_idr;
89
	struct rcu_head		rcu;
90
	u32			flags;
91 92 93 94
	/* The 'ht' field MUST be the last field in structure to allow for
	 * more entries allocated at end of structure.
	 */
	struct tc_u_knode __rcu	*ht[1];
L
Linus Torvalds 已提交
95 96
};

E
Eric Dumazet 已提交
97
struct tc_u_common {
98
	struct tc_u_hnode __rcu	*hlist;
99
	struct tcf_block	*block;
L
Linus Torvalds 已提交
100
	int			refcnt;
101
	struct idr		handle_idr;
102
	struct hlist_node	hnode;
103
	struct rcu_head		rcu;
L
Linus Torvalds 已提交
104 105
};

E
Eric Dumazet 已提交
106 107 108
static inline unsigned int u32_hash_fold(__be32 key,
					 const struct tc_u32_sel *sel,
					 u8 fshift)
L
Linus Torvalds 已提交
109
{
E
Eric Dumazet 已提交
110
	unsigned int h = ntohl(key & sel->hmask) >> fshift;
L
Linus Torvalds 已提交
111 112 113 114

	return h;
}

J
Jamal Hadi Salim 已提交
115 116
static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp,
			struct tcf_result *res)
L
Linus Torvalds 已提交
117 118 119
{
	struct {
		struct tc_u_knode *knode;
120
		unsigned int	  off;
L
Linus Torvalds 已提交
121 122
	} stack[TC_U32_MAXDEPTH];

123
	struct tc_u_hnode *ht = rcu_dereference_bh(tp->root);
124
	unsigned int off = skb_network_offset(skb);
L
Linus Torvalds 已提交
125 126 127 128 129 130 131 132 133 134
	struct tc_u_knode *n;
	int sdepth = 0;
	int off2 = 0;
	int sel = 0;
#ifdef CONFIG_CLS_U32_PERF
	int j;
#endif
	int i, r;

next_ht:
135
	n = rcu_dereference_bh(ht->ht[sel]);
L
Linus Torvalds 已提交
136 137 138 139 140 141

next_knode:
	if (n) {
		struct tc_u32_key *key = n->sel.keys;

#ifdef CONFIG_CLS_U32_PERF
142
		__this_cpu_inc(n->pf->rcnt);
L
Linus Torvalds 已提交
143 144 145
		j = 0;
#endif

146 147 148 149 150
		if (tc_skip_sw(n->flags)) {
			n = rcu_dereference_bh(n->next);
			goto next_knode;
		}

L
Linus Torvalds 已提交
151
#ifdef CONFIG_CLS_U32_MARK
152
		if ((skb->mark & n->mask) != n->val) {
153
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
154 155
			goto next_knode;
		} else {
156
			__this_cpu_inc(*n->pcpu_success);
L
Linus Torvalds 已提交
157 158 159
		}
#endif

E
Eric Dumazet 已提交
160
		for (i = n->sel.nkeys; i > 0; i--, key++) {
S
stephen hemminger 已提交
161
			int toff = off + key->off + (off2 & key->offmask);
S
stephen hemminger 已提交
162
			__be32 *data, hdata;
163

D
Dan Carpenter 已提交
164
			if (skb_headroom(skb) + toff > INT_MAX)
S
stephen hemminger 已提交
165 166
				goto out;

S
stephen hemminger 已提交
167
			data = skb_header_pointer(skb, toff, 4, &hdata);
168 169 170
			if (!data)
				goto out;
			if ((*data ^ key->val) & key->mask) {
171
				n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
172 173 174
				goto next_knode;
			}
#ifdef CONFIG_CLS_U32_PERF
175
			__this_cpu_inc(n->pf->kcnts[j]);
L
Linus Torvalds 已提交
176 177 178
			j++;
#endif
		}
179 180 181

		ht = rcu_dereference_bh(n->ht_down);
		if (!ht) {
L
Linus Torvalds 已提交
182
check_terminal:
E
Eric Dumazet 已提交
183
			if (n->sel.flags & TC_U32_TERMINAL) {
L
Linus Torvalds 已提交
184 185 186

				*res = n->res;
#ifdef CONFIG_NET_CLS_IND
187
				if (!tcf_match_indev(skb, n->ifindex)) {
188
					n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
189 190 191 192
					goto next_knode;
				}
#endif
#ifdef CONFIG_CLS_U32_PERF
193
				__this_cpu_inc(n->pf->rhit);
L
Linus Torvalds 已提交
194 195 196
#endif
				r = tcf_exts_exec(skb, &n->exts, res);
				if (r < 0) {
197
					n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
198 199 200 201 202
					goto next_knode;
				}

				return r;
			}
203
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
204 205 206 207 208 209 210
			goto next_knode;
		}

		/* PUSH */
		if (sdepth >= TC_U32_MAXDEPTH)
			goto deadloop;
		stack[sdepth].knode = n;
211
		stack[sdepth].off = off;
L
Linus Torvalds 已提交
212 213
		sdepth++;

214
		ht = rcu_dereference_bh(n->ht_down);
L
Linus Torvalds 已提交
215
		sel = 0;
216
		if (ht->divisor) {
S
stephen hemminger 已提交
217
			__be32 *data, hdata;
218 219

			data = skb_header_pointer(skb, off + n->sel.hoff, 4,
S
stephen hemminger 已提交
220
						  &hdata);
221 222 223 224 225
			if (!data)
				goto out;
			sel = ht->divisor & u32_hash_fold(*data, &n->sel,
							  n->fshift);
		}
E
Eric Dumazet 已提交
226
		if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
L
Linus Torvalds 已提交
227 228
			goto next_ht;

E
Eric Dumazet 已提交
229
		if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
L
Linus Torvalds 已提交
230
			off2 = n->sel.off + 3;
231
			if (n->sel.flags & TC_U32_VAROFFSET) {
S
stephen hemminger 已提交
232
				__be16 *data, hdata;
233 234 235

				data = skb_header_pointer(skb,
							  off + n->sel.offoff,
S
stephen hemminger 已提交
236
							  2, &hdata);
237 238 239 240 241
				if (!data)
					goto out;
				off2 += ntohs(n->sel.offmask & *data) >>
					n->sel.offshift;
			}
L
Linus Torvalds 已提交
242 243
			off2 &= ~3;
		}
E
Eric Dumazet 已提交
244
		if (n->sel.flags & TC_U32_EAT) {
245
			off += off2;
L
Linus Torvalds 已提交
246 247 248
			off2 = 0;
		}

249
		if (off < skb->len)
L
Linus Torvalds 已提交
250 251 252 253 254 255
			goto next_ht;
	}

	/* POP */
	if (sdepth--) {
		n = stack[sdepth].knode;
256
		ht = rcu_dereference_bh(n->ht_up);
257
		off = stack[sdepth].off;
L
Linus Torvalds 已提交
258 259
		goto check_terminal;
	}
260
out:
L
Linus Torvalds 已提交
261 262 263
	return -1;

deadloop:
264
	net_warn_ratelimited("cls_u32: dead loop\n");
L
Linus Torvalds 已提交
265 266 267
	return -1;
}

J
Jamal Hadi Salim 已提交
268
static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
L
Linus Torvalds 已提交
269 270 271
{
	struct tc_u_hnode *ht;

272 273 274
	for (ht = rtnl_dereference(tp_c->hlist);
	     ht;
	     ht = rtnl_dereference(ht->next))
L
Linus Torvalds 已提交
275 276 277 278 279 280
		if (ht->handle == handle)
			break;

	return ht;
}

J
Jamal Hadi Salim 已提交
281
static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
L
Linus Torvalds 已提交
282
{
E
Eric Dumazet 已提交
283
	unsigned int sel;
L
Linus Torvalds 已提交
284 285 286 287 288 289
	struct tc_u_knode *n = NULL;

	sel = TC_U32_HASH(handle);
	if (sel > ht->divisor)
		goto out;

290 291 292
	for (n = rtnl_dereference(ht->ht[sel]);
	     n;
	     n = rtnl_dereference(n->next))
L
Linus Torvalds 已提交
293 294 295 296 297 298 299
		if (n->handle == handle)
			break;
out:
	return n;
}


300
static void *u32_get(struct tcf_proto *tp, u32 handle)
L
Linus Torvalds 已提交
301 302 303 304 305
{
	struct tc_u_hnode *ht;
	struct tc_u_common *tp_c = tp->data;

	if (TC_U32_HTID(handle) == TC_U32_ROOT)
306
		ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
307 308 309 310
	else
		ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));

	if (!ht)
311
		return NULL;
L
Linus Torvalds 已提交
312 313

	if (TC_U32_KEY(handle) == 0)
314
		return ht;
L
Linus Torvalds 已提交
315

316
	return u32_lookup_key(ht, handle);
L
Linus Torvalds 已提交
317 318
}

319
static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
L
Linus Torvalds 已提交
320
{
321 322
	unsigned long idr_index;
	int err;
L
Linus Torvalds 已提交
323

324
	/* This is only used inside rtnl lock it is safe to increment
325 326
	 * without read _copy_ update semantics
	 */
327 328 329 330 331
	err = idr_alloc_ext(&tp_c->handle_idr, ptr, &idr_index,
			    1, 0x7FF, GFP_KERNEL);
	if (err)
		return 0;
	return (u32)(idr_index | 0x800) << 20;
L
Linus Torvalds 已提交
332 333
}

334 335 336 337 338 339 340
static struct hlist_head *tc_u_common_hash;

#define U32_HASH_SHIFT 10
#define U32_HASH_SIZE (1 << U32_HASH_SHIFT)

static unsigned int tc_u_hash(const struct tcf_proto *tp)
{
341
	return hash_ptr(tp->chain->block, U32_HASH_SHIFT);
342 343 344 345 346 347 348 349 350
}

static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
{
	struct tc_u_common *tc;
	unsigned int h;

	h = tc_u_hash(tp);
	hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
351
		if (tc->block == tp->chain->block)
352 353 354 355 356
			return tc;
	}
	return NULL;
}

L
Linus Torvalds 已提交
357 358 359 360
static int u32_init(struct tcf_proto *tp)
{
	struct tc_u_hnode *root_ht;
	struct tc_u_common *tp_c;
361
	unsigned int h;
L
Linus Torvalds 已提交
362

363
	tp_c = tc_u_common_find(tp);
L
Linus Torvalds 已提交
364

365
	root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
L
Linus Torvalds 已提交
366 367 368 369
	if (root_ht == NULL)
		return -ENOBUFS;

	root_ht->refcnt++;
370
	root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
L
Linus Torvalds 已提交
371
	root_ht->prio = tp->prio;
372
	idr_init(&root_ht->handle_idr);
L
Linus Torvalds 已提交
373 374

	if (tp_c == NULL) {
375
		tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
L
Linus Torvalds 已提交
376 377 378 379
		if (tp_c == NULL) {
			kfree(root_ht);
			return -ENOBUFS;
		}
380
		tp_c->block = tp->chain->block;
381
		INIT_HLIST_NODE(&tp_c->hnode);
382
		idr_init(&tp_c->handle_idr);
383 384 385

		h = tc_u_hash(tp);
		hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]);
L
Linus Torvalds 已提交
386 387 388
	}

	tp_c->refcnt++;
389 390
	RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
	rcu_assign_pointer(tp_c->hlist, root_ht);
L
Linus Torvalds 已提交
391 392
	root_ht->tp_c = tp_c;

393
	rcu_assign_pointer(tp->root, root_ht);
L
Linus Torvalds 已提交
394 395 396 397
	tp->data = tp_c;
	return 0;
}

J
Jamal Hadi Salim 已提交
398
static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
399
			   bool free_pf)
L
Linus Torvalds 已提交
400
{
401 402
	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);

403
	tcf_exts_destroy(&n->exts);
404
	tcf_exts_put_net(&n->exts);
405 406
	if (ht && --ht->refcnt == 0)
		kfree(ht);
L
Linus Torvalds 已提交
407
#ifdef CONFIG_CLS_U32_PERF
408 409
	if (free_pf)
		free_percpu(n->pf);
410 411
#endif
#ifdef CONFIG_CLS_U32_MARK
412 413
	if (free_pf)
		free_percpu(n->pcpu_success);
L
Linus Torvalds 已提交
414 415 416 417 418
#endif
	kfree(n);
	return 0;
}

419 420 421 422 423 424 425 426
/* u32_delete_key_rcu should be called when free'ing a copied
 * version of a tc_u_knode obtained from u32_init_knode(). When
 * copies are obtained from u32_init_knode() the statistics are
 * shared between the old and new copies to allow readers to
 * continue to update the statistics during the copy. To support
 * this the u32_delete_key_rcu variant does not free the percpu
 * statistics.
 */
427 428 429 430 431 432 433 434 435
static void u32_delete_key_work(struct work_struct *work)
{
	struct tc_u_knode *key = container_of(work, struct tc_u_knode, work);

	rtnl_lock();
	u32_destroy_key(key->tp, key, false);
	rtnl_unlock();
}

436 437 438 439
static void u32_delete_key_rcu(struct rcu_head *rcu)
{
	struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);

440 441
	INIT_WORK(&key->work, u32_delete_key_work);
	tcf_queue_work(&key->work);
442 443 444 445 446 447 448 449 450
}

/* u32_delete_key_freepf_rcu is the rcu callback variant
 * that free's the entire structure including the statistics
 * percpu variables. Only use this if the key is not a copy
 * returned by u32_init_knode(). See u32_delete_key_rcu()
 * for the variant that should be used with keys return from
 * u32_init_knode()
 */
451 452 453 454 455 456 457 458 459
static void u32_delete_key_freepf_work(struct work_struct *work)
{
	struct tc_u_knode *key = container_of(work, struct tc_u_knode, work);

	rtnl_lock();
	u32_destroy_key(key->tp, key, true);
	rtnl_unlock();
}

460 461 462 463
static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
{
	struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);

464 465
	INIT_WORK(&key->work, u32_delete_key_freepf_work);
	tcf_queue_work(&key->work);
466 467
}

468
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
L
Linus Torvalds 已提交
469
{
470 471
	struct tc_u_knode __rcu **kp;
	struct tc_u_knode *pkp;
472
	struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
L
Linus Torvalds 已提交
473 474

	if (ht) {
475 476 477 478 479
		kp = &ht->ht[TC_U32_HASH(key->handle)];
		for (pkp = rtnl_dereference(*kp); pkp;
		     kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
			if (pkp == key) {
				RCU_INIT_POINTER(*kp, key->next);
L
Linus Torvalds 已提交
480

481
				tcf_unbind_filter(tp, &key->res);
482
				tcf_exts_get_net(&key->exts);
483
				call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
L
Linus Torvalds 已提交
484 485 486 487
				return 0;
			}
		}
	}
488
	WARN_ON(1);
L
Linus Torvalds 已提交
489 490 491
	return 0;
}

492 493
static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
			       struct netlink_ext_ack *extack)
494
{
495
	struct tcf_block *block = tp->chain->block;
496
	struct tc_cls_u32_offload cls_u32 = {};
497

498
	tc_cls_common_offload_init(&cls_u32.common, tp, h->flags, extack);
499 500 501 502
	cls_u32.command = TC_CLSU32_DELETE_HNODE;
	cls_u32.hnode.divisor = h->divisor;
	cls_u32.hnode.handle = h->handle;
	cls_u32.hnode.prio = h->prio;
503

504
	tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
505 506
}

J
Jamal Hadi Salim 已提交
507
static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
508
				u32 flags, struct netlink_ext_ack *extack)
509
{
510
	struct tcf_block *block = tp->chain->block;
511
	struct tc_cls_u32_offload cls_u32 = {};
512 513
	bool skip_sw = tc_skip_sw(flags);
	bool offloaded = false;
514
	int err;
515

516
	tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
517 518 519 520
	cls_u32.command = TC_CLSU32_NEW_HNODE;
	cls_u32.hnode.divisor = h->divisor;
	cls_u32.hnode.handle = h->handle;
	cls_u32.hnode.prio = h->prio;
521

522 523
	err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
	if (err < 0) {
524
		u32_clear_hw_hnode(tp, h, NULL);
525
		return err;
526 527 528 529 530 531
	} else if (err > 0) {
		offloaded = true;
	}

	if (skip_sw && !offloaded)
		return -EINVAL;
532 533

	return 0;
534 535
}

536 537
static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
				struct netlink_ext_ack *extack)
538
{
539
	struct tcf_block *block = tp->chain->block;
540
	struct tc_cls_u32_offload cls_u32 = {};
541

542
	tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack);
543
	cls_u32.command = TC_CLSU32_DELETE_KNODE;
544
	cls_u32.knode.handle = n->handle;
545

546
	tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
547
	tcf_block_offload_dec(block, &n->flags);
548 549
}

J
Jamal Hadi Salim 已提交
550
static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
551
				u32 flags, struct netlink_ext_ack *extack)
552
{
553
	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
554
	struct tcf_block *block = tp->chain->block;
555
	struct tc_cls_u32_offload cls_u32 = {};
556
	bool skip_sw = tc_skip_sw(flags);
557
	int err;
558

559
	tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
560 561 562
	cls_u32.command = TC_CLSU32_REPLACE_KNODE;
	cls_u32.knode.handle = n->handle;
	cls_u32.knode.fshift = n->fshift;
563
#ifdef CONFIG_CLS_U32_MARK
564 565
	cls_u32.knode.val = n->val;
	cls_u32.knode.mask = n->mask;
566
#else
567 568
	cls_u32.knode.val = 0;
	cls_u32.knode.mask = 0;
569
#endif
570 571
	cls_u32.knode.sel = &n->sel;
	cls_u32.knode.exts = &n->exts;
572
	if (n->ht_down)
573
		cls_u32.knode.link_handle = ht->handle;
574

575 576
	err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
	if (err < 0) {
577
		u32_remove_hw_knode(tp, n, NULL);
578
		return err;
579
	} else if (err > 0) {
580
		tcf_block_offload_inc(block, &n->flags);
581 582
	}

583
	if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW))
584
		return -EINVAL;
585 586

	return 0;
587 588
}

589 590
static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
			    struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
591 592
{
	struct tc_u_knode *n;
E
Eric Dumazet 已提交
593
	unsigned int h;
L
Linus Torvalds 已提交
594

E
Eric Dumazet 已提交
595
	for (h = 0; h <= ht->divisor; h++) {
596 597 598
		while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
			RCU_INIT_POINTER(ht->ht[h],
					 rtnl_dereference(n->next));
599
			tcf_unbind_filter(tp, &n->res);
600
			u32_remove_hw_knode(tp, n, extack);
601
			idr_remove_ext(&ht->handle_idr, n->handle);
602 603 604 605
			if (tcf_exts_get_net(&n->exts))
				call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
			else
				u32_destroy_key(n->tp, n, true);
L
Linus Torvalds 已提交
606 607 608 609
		}
	}
}

610 611
static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
			     struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
612 613
{
	struct tc_u_common *tp_c = tp->data;
614 615
	struct tc_u_hnode __rcu **hn;
	struct tc_u_hnode *phn;
L
Linus Torvalds 已提交
616

617
	WARN_ON(ht->refcnt);
L
Linus Torvalds 已提交
618

619
	u32_clear_hnode(tp, ht, extack);
L
Linus Torvalds 已提交
620

621 622 623 624 625
	hn = &tp_c->hlist;
	for (phn = rtnl_dereference(*hn);
	     phn;
	     hn = &phn->next, phn = rtnl_dereference(*hn)) {
		if (phn == ht) {
626
			u32_clear_hw_hnode(tp, ht, extack);
627 628
			idr_destroy(&ht->handle_idr);
			idr_remove_ext(&tp_c->handle_idr, ht->handle);
629 630
			RCU_INIT_POINTER(*hn, ht->next);
			kfree_rcu(ht, rcu);
L
Linus Torvalds 已提交
631 632 633 634 635 636 637
			return 0;
		}
	}

	return -ENOENT;
}

638 639 640 641 642 643 644 645 646 647 648
static bool ht_empty(struct tc_u_hnode *ht)
{
	unsigned int h;

	for (h = 0; h <= ht->divisor; h++)
		if (rcu_access_pointer(ht->ht[h]))
			return false;

	return true;
}

649
static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
650 651
{
	struct tc_u_common *tp_c = tp->data;
652
	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
653

654
	WARN_ON(root_ht == NULL);
L
Linus Torvalds 已提交
655 656

	if (root_ht && --root_ht->refcnt == 0)
657
		u32_destroy_hnode(tp, root_ht, extack);
L
Linus Torvalds 已提交
658 659 660 661

	if (--tp_c->refcnt == 0) {
		struct tc_u_hnode *ht;

662
		hlist_del(&tp_c->hnode);
L
Linus Torvalds 已提交
663

664
		while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
665
			u32_clear_hnode(tp, ht, extack);
666
			RCU_INIT_POINTER(tp_c->hlist, ht->next);
667 668 669 670 671 672

			/* u32_destroy_key() will later free ht for us, if it's
			 * still referenced by some knode
			 */
			if (--ht->refcnt == 0)
				kfree_rcu(ht, rcu);
673
		}
L
Linus Torvalds 已提交
674

675
		idr_destroy(&tp_c->handle_idr);
L
Linus Torvalds 已提交
676 677 678 679 680 681
		kfree(tp_c);
	}

	tp->data = NULL;
}

682 683
static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
		      struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
684
{
685
	struct tc_u_hnode *ht = arg;
686
	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
687 688
	struct tc_u_common *tp_c = tp->data;
	int ret = 0;
L
Linus Torvalds 已提交
689 690

	if (ht == NULL)
691
		goto out;
L
Linus Torvalds 已提交
692

693
	if (TC_U32_KEY(ht->handle)) {
694
		u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack);
695 696
		ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
		goto out;
697
	}
L
Linus Torvalds 已提交
698

699 700
	if (root_ht == ht) {
		NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node");
L
Linus Torvalds 已提交
701
		return -EINVAL;
702
	}
L
Linus Torvalds 已提交
703

704 705
	if (ht->refcnt == 1) {
		ht->refcnt--;
706
		u32_destroy_hnode(tp, ht, extack);
707
	} else {
708
		NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
709 710
		return -EBUSY;
	}
L
Linus Torvalds 已提交
711

712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745
out:
	*last = true;
	if (root_ht) {
		if (root_ht->refcnt > 1) {
			*last = false;
			goto ret;
		}
		if (root_ht->refcnt == 1) {
			if (!ht_empty(root_ht)) {
				*last = false;
				goto ret;
			}
		}
	}

	if (tp_c->refcnt > 1) {
		*last = false;
		goto ret;
	}

	if (tp_c->refcnt == 1) {
		struct tc_u_hnode *ht;

		for (ht = rtnl_dereference(tp_c->hlist);
		     ht;
		     ht = rtnl_dereference(ht->next))
			if (!ht_empty(ht)) {
				*last = false;
				break;
			}
	}

ret:
	return ret;
L
Linus Torvalds 已提交
746 747
}

748
static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid)
L
Linus Torvalds 已提交
749
{
750 751 752 753 754 755 756 757 758 759 760
	unsigned long idr_index;
	u32 start = htid | 0x800;
	u32 max = htid | 0xFFF;
	u32 min = htid;

	if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
			  start, max + 1, GFP_KERNEL)) {
		if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
				  min + 1, max + 1, GFP_KERNEL))
			return max;
	}
761

762
	return (u32)idr_index;
L
Linus Torvalds 已提交
763 764
}

765 766 767 768 769 770 771 772
static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
	[TCA_U32_CLASSID]	= { .type = NLA_U32 },
	[TCA_U32_HASH]		= { .type = NLA_U32 },
	[TCA_U32_LINK]		= { .type = NLA_U32 },
	[TCA_U32_DIVISOR]	= { .type = NLA_U32 },
	[TCA_U32_SEL]		= { .len = sizeof(struct tc_u32_sel) },
	[TCA_U32_INDEV]		= { .type = NLA_STRING, .len = IFNAMSIZ },
	[TCA_U32_MARK]		= { .len = sizeof(struct tc_u32_mark) },
773
	[TCA_U32_FLAGS]		= { .type = NLA_U32 },
774 775
};

776 777
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
			 unsigned long base, struct tc_u_hnode *ht,
778
			 struct tc_u_knode *n, struct nlattr **tb,
779 780
			 struct nlattr *est, bool ovr,
			 struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
781
{
782
	int err;
L
Linus Torvalds 已提交
783

784
	err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack);
L
Linus Torvalds 已提交
785 786 787
	if (err < 0)
		return err;

788
	if (tb[TCA_U32_LINK]) {
789
		u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
790
		struct tc_u_hnode *ht_down = NULL, *ht_old;
L
Linus Torvalds 已提交
791

792 793
		if (TC_U32_KEY(handle)) {
			NL_SET_ERR_MSG_MOD(extack, "u32 Link handle must be a hash table");
794
			return -EINVAL;
795
		}
L
Linus Torvalds 已提交
796 797 798 799

		if (handle) {
			ht_down = u32_lookup_ht(ht->tp_c, handle);

800 801
			if (!ht_down) {
				NL_SET_ERR_MSG_MOD(extack, "Link hash table not found");
802
				return -EINVAL;
803
			}
L
Linus Torvalds 已提交
804 805 806
			ht_down->refcnt++;
		}

807 808
		ht_old = rtnl_dereference(n->ht_down);
		rcu_assign_pointer(n->ht_down, ht_down);
L
Linus Torvalds 已提交
809

810 811
		if (ht_old)
			ht_old->refcnt--;
L
Linus Torvalds 已提交
812
	}
813
	if (tb[TCA_U32_CLASSID]) {
814
		n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
L
Linus Torvalds 已提交
815 816 817 818
		tcf_bind_filter(tp, &n->res, base);
	}

#ifdef CONFIG_NET_CLS_IND
819
	if (tb[TCA_U32_INDEV]) {
820
		int ret;
821
		ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
822
		if (ret < 0)
823
			return -EINVAL;
824
		n->ifindex = ret;
L
Linus Torvalds 已提交
825 826 827 828 829
	}
#endif
	return 0;
}

J
Jamal Hadi Salim 已提交
830
static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851
			      struct tc_u_knode *n)
{
	struct tc_u_knode __rcu **ins;
	struct tc_u_knode *pins;
	struct tc_u_hnode *ht;

	if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
		ht = rtnl_dereference(tp->root);
	else
		ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));

	ins = &ht->ht[TC_U32_HASH(n->handle)];

	/* The node must always exist for it to be replaced if this is not the
	 * case then something went very wrong elsewhere.
	 */
	for (pins = rtnl_dereference(*ins); ;
	     ins = &pins->next, pins = rtnl_dereference(*ins))
		if (pins->handle == n->handle)
			break;

852
	idr_replace_ext(&ht->handle_idr, n, n->handle);
853 854 855 856 857 858 859
	RCU_INIT_POINTER(n->next, pins->next);
	rcu_assign_pointer(*ins, n);
}

static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
					 struct tc_u_knode *n)
{
860
	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
861
	struct tc_u32_sel *s = &n->sel;
862
	struct tc_u_knode *new;
863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878

	new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
		      GFP_KERNEL);

	if (!new)
		return NULL;

	RCU_INIT_POINTER(new->next, n->next);
	new->handle = n->handle;
	RCU_INIT_POINTER(new->ht_up, n->ht_up);

#ifdef CONFIG_NET_CLS_IND
	new->ifindex = n->ifindex;
#endif
	new->fshift = n->fshift;
	new->res = n->res;
879
	new->flags = n->flags;
880
	RCU_INIT_POINTER(new->ht_down, ht);
881 882

	/* bump reference count as long as we hold pointer to structure */
883 884
	if (ht)
		ht->refcnt++;
885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902

#ifdef CONFIG_CLS_U32_PERF
	/* Statistics may be incremented by readers during update
	 * so we must keep them in tact. When the node is later destroyed
	 * a special destroy call must be made to not free the pf memory.
	 */
	new->pf = n->pf;
#endif

#ifdef CONFIG_CLS_U32_MARK
	new->val = n->val;
	new->mask = n->mask;
	/* Similarly success statistics must be moved as pointers */
	new->pcpu_success = n->pcpu_success;
#endif
	new->tp = tp;
	memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));

903 904 905 906
	if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
		kfree(new);
		return NULL;
	}
907 908 909 910

	return new;
}

911
static int u32_change(struct net *net, struct sk_buff *in_skb,
912
		      struct tcf_proto *tp, unsigned long base, u32 handle,
913 914
		      struct nlattr **tca, void **arg, bool ovr,
		      struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
915 916 917 918 919
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode *ht;
	struct tc_u_knode *n;
	struct tc_u32_sel *s;
920 921
	struct nlattr *opt = tca[TCA_OPTIONS];
	struct nlattr *tb[TCA_U32_MAX + 1];
922
	u32 htid, flags = 0;
L
Linus Torvalds 已提交
923
	int err;
924 925 926
#ifdef CONFIG_CLS_U32_PERF
	size_t size;
#endif
L
Linus Torvalds 已提交
927

928 929 930 931 932 933 934 935
	if (!opt) {
		if (handle) {
			NL_SET_ERR_MSG_MOD(extack, "Filter handle requires options");
			return -EINVAL;
		} else {
			return 0;
		}
	}
L
Linus Torvalds 已提交
936

937
	err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, extack);
938 939
	if (err < 0)
		return err;
L
Linus Torvalds 已提交
940

941
	if (tb[TCA_U32_FLAGS]) {
942
		flags = nla_get_u32(tb[TCA_U32_FLAGS]);
943 944
		if (!tc_flags_valid(flags)) {
			NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags");
945
			return -EINVAL;
946
		}
947
	}
948

949
	n = *arg;
E
Eric Dumazet 已提交
950
	if (n) {
951 952
		struct tc_u_knode *new;

953 954
		if (TC_U32_KEY(n->handle) == 0) {
			NL_SET_ERR_MSG_MOD(extack, "Key node id cannot be zero");
L
Linus Torvalds 已提交
955
			return -EINVAL;
956
		}
L
Linus Torvalds 已提交
957

958 959
		if (n->flags != flags) {
			NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags");
960
			return -EINVAL;
961
		}
962

963 964 965 966 967 968
		new = u32_init_knode(tp, n);
		if (!new)
			return -ENOMEM;

		err = u32_set_parms(net, tp, base,
				    rtnl_dereference(n->ht_up), new, tb,
969
				    tca[TCA_RATE], ovr, extack);
970 971 972 973 974 975

		if (err) {
			u32_destroy_key(tp, new, false);
			return err;
		}

976
		err = u32_replace_hw_knode(tp, new, flags, extack);
977 978 979 980 981
		if (err) {
			u32_destroy_key(tp, new, false);
			return err;
		}

982 983 984
		if (!tc_in_hw(new->flags))
			new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;

985
		u32_replace_knode(tp, tp_c, new);
986
		tcf_unbind_filter(tp, &n->res);
987
		tcf_exts_get_net(&n->exts);
988 989
		call_rcu(&n->rcu, u32_delete_key_rcu);
		return 0;
L
Linus Torvalds 已提交
990 991
	}

992
	if (tb[TCA_U32_DIVISOR]) {
E
Eric Dumazet 已提交
993
		unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
L
Linus Torvalds 已提交
994

995 996
		if (--divisor > 0x100) {
			NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets");
L
Linus Torvalds 已提交
997
			return -EINVAL;
998 999 1000
		}
		if (TC_U32_KEY(handle)) {
			NL_SET_ERR_MSG_MOD(extack, "Divisor can only be used on a hash table");
L
Linus Torvalds 已提交
1001
			return -EINVAL;
1002
		}
E
Eric Dumazet 已提交
1003
		ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
L
Linus Torvalds 已提交
1004 1005
		if (ht == NULL)
			return -ENOBUFS;
1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019
		if (handle == 0) {
			handle = gen_new_htid(tp->data, ht);
			if (handle == 0) {
				kfree(ht);
				return -ENOMEM;
			}
		} else {
			err = idr_alloc_ext(&tp_c->handle_idr, ht, NULL,
					    handle, handle + 1, GFP_KERNEL);
			if (err) {
				kfree(ht);
				return err;
			}
		}
L
Linus Torvalds 已提交
1020
		ht->tp_c = tp_c;
1021
		ht->refcnt = 1;
L
Linus Torvalds 已提交
1022 1023 1024
		ht->divisor = divisor;
		ht->handle = handle;
		ht->prio = tp->prio;
1025
		idr_init(&ht->handle_idr);
1026
		ht->flags = flags;
1027

1028
		err = u32_replace_hw_hnode(tp, ht, flags, extack);
1029
		if (err) {
1030
			idr_remove_ext(&tp_c->handle_idr, handle);
1031 1032 1033 1034
			kfree(ht);
			return err;
		}

1035 1036
		RCU_INIT_POINTER(ht->next, tp_c->hlist);
		rcu_assign_pointer(tp_c->hlist, ht);
1037
		*arg = ht;
1038

L
Linus Torvalds 已提交
1039 1040 1041
		return 0;
	}

1042
	if (tb[TCA_U32_HASH]) {
1043
		htid = nla_get_u32(tb[TCA_U32_HASH]);
L
Linus Torvalds 已提交
1044
		if (TC_U32_HTID(htid) == TC_U32_ROOT) {
1045
			ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
1046 1047 1048
			htid = ht->handle;
		} else {
			ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
1049 1050
			if (!ht) {
				NL_SET_ERR_MSG_MOD(extack, "Specified hash table not found");
L
Linus Torvalds 已提交
1051
				return -EINVAL;
1052
			}
L
Linus Torvalds 已提交
1053 1054
		}
	} else {
1055
		ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
1056 1057 1058
		htid = ht->handle;
	}

1059 1060
	if (ht->divisor < TC_U32_HASH(htid)) {
		NL_SET_ERR_MSG_MOD(extack, "Specified hash table buckets exceed configured value");
L
Linus Torvalds 已提交
1061
		return -EINVAL;
1062
	}
L
Linus Torvalds 已提交
1063 1064

	if (handle) {
1065 1066
		if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
			NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");
L
Linus Torvalds 已提交
1067
			return -EINVAL;
1068
		}
L
Linus Torvalds 已提交
1069
		handle = htid | TC_U32_NODE(handle);
1070 1071 1072 1073 1074
		err = idr_alloc_ext(&ht->handle_idr, NULL, NULL,
				    handle, handle + 1,
				    GFP_KERNEL);
		if (err)
			return err;
L
Linus Torvalds 已提交
1075 1076 1077
	} else
		handle = gen_new_kid(ht, htid);

1078
	if (tb[TCA_U32_SEL] == NULL) {
1079
		NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
1080 1081 1082
		err = -EINVAL;
		goto erridr;
	}
L
Linus Torvalds 已提交
1083

1084
	s = nla_data(tb[TCA_U32_SEL]);
L
Linus Torvalds 已提交
1085

1086
	n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
1087 1088 1089 1090
	if (n == NULL) {
		err = -ENOBUFS;
		goto erridr;
	}
L
Linus Torvalds 已提交
1091 1092

#ifdef CONFIG_CLS_U32_PERF
1093 1094 1095
	size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
	n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
	if (!n->pf) {
1096 1097
		err = -ENOBUFS;
		goto errfree;
L
Linus Torvalds 已提交
1098 1099 1100 1101
	}
#endif

	memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
1102
	RCU_INIT_POINTER(n->ht_up, ht);
L
Linus Torvalds 已提交
1103
	n->handle = handle;
1104
	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
1105
	n->flags = flags;
1106
	n->tp = tp;
L
Linus Torvalds 已提交
1107

1108 1109 1110 1111
	err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
	if (err < 0)
		goto errout;

L
Linus Torvalds 已提交
1112
#ifdef CONFIG_CLS_U32_MARK
1113
	n->pcpu_success = alloc_percpu(u32);
1114 1115 1116 1117
	if (!n->pcpu_success) {
		err = -ENOMEM;
		goto errout;
	}
1118

1119
	if (tb[TCA_U32_MARK]) {
L
Linus Torvalds 已提交
1120 1121
		struct tc_u32_mark *mark;

1122
		mark = nla_data(tb[TCA_U32_MARK]);
1123 1124
		n->val = mark->val;
		n->mask = mark->mask;
L
Linus Torvalds 已提交
1125 1126 1127
	}
#endif

1128 1129
	err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr,
			    extack);
L
Linus Torvalds 已提交
1130
	if (err == 0) {
1131 1132 1133
		struct tc_u_knode __rcu **ins;
		struct tc_u_knode *pins;

1134
		err = u32_replace_hw_knode(tp, n, flags, extack);
1135 1136 1137
		if (err)
			goto errhw;

1138 1139 1140
		if (!tc_in_hw(n->flags))
			n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;

1141 1142 1143 1144
		ins = &ht->ht[TC_U32_HASH(handle)];
		for (pins = rtnl_dereference(*ins); pins;
		     ins = &pins->next, pins = rtnl_dereference(*ins))
			if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle))
L
Linus Torvalds 已提交
1145 1146
				break;

1147 1148
		RCU_INIT_POINTER(n->next, pins);
		rcu_assign_pointer(*ins, n);
1149
		*arg = n;
L
Linus Torvalds 已提交
1150 1151
		return 0;
	}
1152

1153
errhw:
1154 1155 1156 1157
#ifdef CONFIG_CLS_U32_MARK
	free_percpu(n->pcpu_success);
#endif

1158 1159
errout:
	tcf_exts_destroy(&n->exts);
L
Linus Torvalds 已提交
1160
#ifdef CONFIG_CLS_U32_PERF
1161
errfree:
1162
	free_percpu(n->pf);
L
Linus Torvalds 已提交
1163 1164
#endif
	kfree(n);
1165 1166
erridr:
	idr_remove_ext(&ht->handle_idr, handle);
L
Linus Torvalds 已提交
1167 1168 1169 1170 1171 1172 1173 1174
	return err;
}

static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode *ht;
	struct tc_u_knode *n;
E
Eric Dumazet 已提交
1175
	unsigned int h;
L
Linus Torvalds 已提交
1176 1177 1178 1179

	if (arg->stop)
		return;

1180 1181 1182
	for (ht = rtnl_dereference(tp_c->hlist);
	     ht;
	     ht = rtnl_dereference(ht->next)) {
L
Linus Torvalds 已提交
1183 1184 1185
		if (ht->prio != tp->prio)
			continue;
		if (arg->count >= arg->skip) {
1186
			if (arg->fn(tp, ht, arg) < 0) {
L
Linus Torvalds 已提交
1187 1188 1189 1190 1191 1192
				arg->stop = 1;
				return;
			}
		}
		arg->count++;
		for (h = 0; h <= ht->divisor; h++) {
1193 1194 1195
			for (n = rtnl_dereference(ht->ht[h]);
			     n;
			     n = rtnl_dereference(n->next)) {
L
Linus Torvalds 已提交
1196 1197 1198 1199
				if (arg->count < arg->skip) {
					arg->count++;
					continue;
				}
1200
				if (arg->fn(tp, n, arg) < 0) {
L
Linus Torvalds 已提交
1201 1202 1203 1204 1205 1206 1207 1208 1209
					arg->stop = 1;
					return;
				}
				arg->count++;
			}
		}
	}
}

1210 1211 1212 1213 1214 1215 1216 1217
static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
{
	struct tc_u_knode *n = fh;

	if (n && n->res.classid == classid)
		n->res.class = cl;
}

1218
static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
J
Jamal Hadi Salim 已提交
1219
		    struct sk_buff *skb, struct tcmsg *t)
L
Linus Torvalds 已提交
1220
{
1221
	struct tc_u_knode *n = fh;
1222
	struct tc_u_hnode *ht_up, *ht_down;
1223
	struct nlattr *nest;
L
Linus Torvalds 已提交
1224 1225 1226 1227 1228 1229

	if (n == NULL)
		return skb->len;

	t->tcm_handle = n->handle;

1230 1231 1232
	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;
L
Linus Torvalds 已提交
1233 1234

	if (TC_U32_KEY(n->handle) == 0) {
1235
		struct tc_u_hnode *ht = fh;
E
Eric Dumazet 已提交
1236 1237
		u32 divisor = ht->divisor + 1;

1238 1239
		if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
			goto nla_put_failure;
L
Linus Torvalds 已提交
1240
	} else {
1241 1242 1243
#ifdef CONFIG_CLS_U32_PERF
		struct tc_u32_pcnt *gpf;
		int cpu;
1244
#endif
1245

1246 1247 1248 1249
		if (nla_put(skb, TCA_U32_SEL,
			    sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
			    &n->sel))
			goto nla_put_failure;
1250 1251 1252

		ht_up = rtnl_dereference(n->ht_up);
		if (ht_up) {
L
Linus Torvalds 已提交
1253
			u32 htid = n->handle & 0xFFFFF000;
1254 1255
			if (nla_put_u32(skb, TCA_U32_HASH, htid))
				goto nla_put_failure;
L
Linus Torvalds 已提交
1256
		}
1257 1258 1259
		if (n->res.classid &&
		    nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
			goto nla_put_failure;
1260 1261 1262 1263

		ht_down = rtnl_dereference(n->ht_down);
		if (ht_down &&
		    nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
1264
			goto nla_put_failure;
L
Linus Torvalds 已提交
1265

1266 1267 1268
		if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags))
			goto nla_put_failure;

L
Linus Torvalds 已提交
1269
#ifdef CONFIG_CLS_U32_MARK
1270 1271 1272 1273
		if ((n->val || n->mask)) {
			struct tc_u32_mark mark = {.val = n->val,
						   .mask = n->mask,
						   .success = 0};
1274
			int cpum;
1275

1276 1277
			for_each_possible_cpu(cpum) {
				__u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum);
1278 1279 1280 1281 1282 1283 1284

				mark.success += cnt;
			}

			if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark))
				goto nla_put_failure;
		}
L
Linus Torvalds 已提交
1285 1286
#endif

1287
		if (tcf_exts_dump(skb, &n->exts) < 0)
1288
			goto nla_put_failure;
L
Linus Torvalds 已提交
1289 1290

#ifdef CONFIG_NET_CLS_IND
1291 1292 1293 1294 1295 1296
		if (n->ifindex) {
			struct net_device *dev;
			dev = __dev_get_by_index(net, n->ifindex);
			if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name))
				goto nla_put_failure;
		}
L
Linus Torvalds 已提交
1297 1298
#endif
#ifdef CONFIG_CLS_U32_PERF
1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314
		gpf = kzalloc(sizeof(struct tc_u32_pcnt) +
			      n->sel.nkeys * sizeof(u64),
			      GFP_KERNEL);
		if (!gpf)
			goto nla_put_failure;

		for_each_possible_cpu(cpu) {
			int i;
			struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);

			gpf->rcnt += pf->rcnt;
			gpf->rhit += pf->rhit;
			for (i = 0; i < n->sel.nkeys; i++)
				gpf->kcnts[i] += pf->kcnts[i];
		}

1315 1316 1317 1318
		if (nla_put_64bit(skb, TCA_U32_PCNT,
				  sizeof(struct tc_u32_pcnt) +
				  n->sel.nkeys * sizeof(u64),
				  gpf, TCA_U32_PAD)) {
1319
			kfree(gpf);
1320
			goto nla_put_failure;
1321 1322
		}
		kfree(gpf);
L
Linus Torvalds 已提交
1323 1324 1325
#endif
	}

1326 1327
	nla_nest_end(skb, nest);

L
Linus Torvalds 已提交
1328
	if (TC_U32_KEY(n->handle))
1329
		if (tcf_exts_dump_stats(skb, &n->exts) < 0)
1330
			goto nla_put_failure;
L
Linus Torvalds 已提交
1331 1332
	return skb->len;

1333
nla_put_failure:
1334
	nla_nest_cancel(skb, nest);
L
Linus Torvalds 已提交
1335 1336 1337
	return -1;
}

1338
static struct tcf_proto_ops cls_u32_ops __read_mostly = {
L
Linus Torvalds 已提交
1339 1340 1341 1342 1343 1344 1345 1346 1347
	.kind		=	"u32",
	.classify	=	u32_classify,
	.init		=	u32_init,
	.destroy	=	u32_destroy,
	.get		=	u32_get,
	.change		=	u32_change,
	.delete		=	u32_delete,
	.walk		=	u32_walk,
	.dump		=	u32_dump,
1348
	.bind_class	=	u32_bind_class,
L
Linus Torvalds 已提交
1349 1350 1351 1352 1353
	.owner		=	THIS_MODULE,
};

static int __init init_u32(void)
{
1354 1355
	int i, ret;

1356
	pr_info("u32 classifier\n");
L
Linus Torvalds 已提交
1357
#ifdef CONFIG_CLS_U32_PERF
1358
	pr_info("    Performance counters on\n");
L
Linus Torvalds 已提交
1359 1360
#endif
#ifdef CONFIG_NET_CLS_IND
1361
	pr_info("    input device check on\n");
L
Linus Torvalds 已提交
1362 1363
#endif
#ifdef CONFIG_NET_CLS_ACT
1364
	pr_info("    Actions configured\n");
L
Linus Torvalds 已提交
1365
#endif
1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378
	tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE,
					  sizeof(struct hlist_head),
					  GFP_KERNEL);
	if (!tc_u_common_hash)
		return -ENOMEM;

	for (i = 0; i < U32_HASH_SIZE; i++)
		INIT_HLIST_HEAD(&tc_u_common_hash[i]);

	ret = register_tcf_proto_ops(&cls_u32_ops);
	if (ret)
		kvfree(tc_u_common_hash);
	return ret;
L
Linus Torvalds 已提交
1379 1380
}

1381
static void __exit exit_u32(void)
L
Linus Torvalds 已提交
1382 1383
{
	unregister_tcf_proto_ops(&cls_u32_ops);
1384
	kvfree(tc_u_common_hash);
L
Linus Torvalds 已提交
1385 1386 1387 1388 1389
}

module_init(init_u32)
module_exit(exit_u32)
MODULE_LICENSE("GPL");