cls_u32.c 33.2 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-or-later
L
Linus Torvalds 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/*
 * net/sched/cls_u32.c	Ugly (or Universal) 32bit key Packet Classifier.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 *	The filters are packed to hash tables of key nodes
 *	with a set of 32bit key/mask pairs at every node.
 *	Nodes reference next level hash tables etc.
 *
 *	This scheme is the best universal classifier I managed to
 *	invent; it is not super-fast, but it is not slow (provided you
 *	program it correctly), and general enough.  And its relative
 *	speed grows as the number of rules becomes larger.
 *
 *	It seems that it represents the best middle point between
 *	speed and manageability both by human and by machine.
 *
 *	It is especially useful for link sharing combined with QoS;
 *	pure RSVP doesn't need such a general approach and can use
 *	much simpler (and faster) schemes, sort of cls_rsvp.c.
 *
 *	nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
 */

#include <linux/module.h>
27
#include <linux/slab.h>
L
Linus Torvalds 已提交
28 29 30 31
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
32
#include <linux/percpu.h>
L
Linus Torvalds 已提交
33 34
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
35
#include <linux/bitmap.h>
36 37
#include <linux/netdevice.h>
#include <linux/hash.h>
38
#include <net/netlink.h>
L
Linus Torvalds 已提交
39 40
#include <net/act_api.h>
#include <net/pkt_cls.h>
41
#include <linux/idr.h>
L
Linus Torvalds 已提交
42

E
Eric Dumazet 已提交
43
struct tc_u_knode {
44
	struct tc_u_knode __rcu	*next;
L
Linus Torvalds 已提交
45
	u32			handle;
46
	struct tc_u_hnode __rcu	*ht_up;
L
Linus Torvalds 已提交
47
	struct tcf_exts		exts;
48
	int			ifindex;
L
Linus Torvalds 已提交
49 50
	u8			fshift;
	struct tcf_result	res;
51
	struct tc_u_hnode __rcu	*ht_down;
L
Linus Torvalds 已提交
52
#ifdef CONFIG_CLS_U32_PERF
53
	struct tc_u32_pcnt __percpu *pf;
L
Linus Torvalds 已提交
54
#endif
55
	u32			flags;
56
	unsigned int		in_hw_count;
L
Linus Torvalds 已提交
57
#ifdef CONFIG_CLS_U32_MARK
58 59 60
	u32			val;
	u32			mask;
	u32 __percpu		*pcpu_success;
L
Linus Torvalds 已提交
61
#endif
C
Cong Wang 已提交
62
	struct rcu_work		rwork;
63 64 65 66
	/* The 'sel' field MUST be the last field in structure to allow for
	 * tc_u32_keys allocated at end of structure.
	 */
	struct tc_u32_sel	sel;
L
Linus Torvalds 已提交
67 68
};

E
Eric Dumazet 已提交
69
struct tc_u_hnode {
70
	struct tc_u_hnode __rcu	*next;
L
Linus Torvalds 已提交
71 72 73
	u32			handle;
	u32			prio;
	int			refcnt;
E
Eric Dumazet 已提交
74
	unsigned int		divisor;
75
	struct idr		handle_idr;
76
	bool			is_root;
77
	struct rcu_head		rcu;
78
	u32			flags;
79 80 81
	/* The 'ht' field MUST be the last field in structure to allow for
	 * more entries allocated at end of structure.
	 */
82
	struct tc_u_knode __rcu	*ht[];
L
Linus Torvalds 已提交
83 84
};

E
Eric Dumazet 已提交
85
struct tc_u_common {
86
	struct tc_u_hnode __rcu	*hlist;
J
Jiri Pirko 已提交
87
	void			*ptr;
L
Linus Torvalds 已提交
88
	int			refcnt;
89
	struct idr		handle_idr;
90
	struct hlist_node	hnode;
91
	long			knodes;
L
Linus Torvalds 已提交
92 93
};

E
Eric Dumazet 已提交
94 95 96
static inline unsigned int u32_hash_fold(__be32 key,
					 const struct tc_u32_sel *sel,
					 u8 fshift)
L
Linus Torvalds 已提交
97
{
E
Eric Dumazet 已提交
98
	unsigned int h = ntohl(key & sel->hmask) >> fshift;
L
Linus Torvalds 已提交
99 100 101 102

	return h;
}

J
Jamal Hadi Salim 已提交
103 104
static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp,
			struct tcf_result *res)
L
Linus Torvalds 已提交
105 106 107
{
	struct {
		struct tc_u_knode *knode;
108
		unsigned int	  off;
L
Linus Torvalds 已提交
109 110
	} stack[TC_U32_MAXDEPTH];

111
	struct tc_u_hnode *ht = rcu_dereference_bh(tp->root);
112
	unsigned int off = skb_network_offset(skb);
L
Linus Torvalds 已提交
113 114 115 116 117 118 119 120 121 122
	struct tc_u_knode *n;
	int sdepth = 0;
	int off2 = 0;
	int sel = 0;
#ifdef CONFIG_CLS_U32_PERF
	int j;
#endif
	int i, r;

next_ht:
123
	n = rcu_dereference_bh(ht->ht[sel]);
L
Linus Torvalds 已提交
124 125 126 127 128 129

next_knode:
	if (n) {
		struct tc_u32_key *key = n->sel.keys;

#ifdef CONFIG_CLS_U32_PERF
130
		__this_cpu_inc(n->pf->rcnt);
L
Linus Torvalds 已提交
131 132 133
		j = 0;
#endif

134 135 136 137 138
		if (tc_skip_sw(n->flags)) {
			n = rcu_dereference_bh(n->next);
			goto next_knode;
		}

L
Linus Torvalds 已提交
139
#ifdef CONFIG_CLS_U32_MARK
140
		if ((skb->mark & n->mask) != n->val) {
141
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
142 143
			goto next_knode;
		} else {
144
			__this_cpu_inc(*n->pcpu_success);
L
Linus Torvalds 已提交
145 146 147
		}
#endif

E
Eric Dumazet 已提交
148
		for (i = n->sel.nkeys; i > 0; i--, key++) {
S
stephen hemminger 已提交
149
			int toff = off + key->off + (off2 & key->offmask);
S
stephen hemminger 已提交
150
			__be32 *data, hdata;
151

D
Dan Carpenter 已提交
152
			if (skb_headroom(skb) + toff > INT_MAX)
S
stephen hemminger 已提交
153 154
				goto out;

S
stephen hemminger 已提交
155
			data = skb_header_pointer(skb, toff, 4, &hdata);
156 157 158
			if (!data)
				goto out;
			if ((*data ^ key->val) & key->mask) {
159
				n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
160 161 162
				goto next_knode;
			}
#ifdef CONFIG_CLS_U32_PERF
163
			__this_cpu_inc(n->pf->kcnts[j]);
L
Linus Torvalds 已提交
164 165 166
			j++;
#endif
		}
167 168 169

		ht = rcu_dereference_bh(n->ht_down);
		if (!ht) {
L
Linus Torvalds 已提交
170
check_terminal:
E
Eric Dumazet 已提交
171
			if (n->sel.flags & TC_U32_TERMINAL) {
L
Linus Torvalds 已提交
172 173

				*res = n->res;
174
				if (!tcf_match_indev(skb, n->ifindex)) {
175
					n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
176 177 178
					goto next_knode;
				}
#ifdef CONFIG_CLS_U32_PERF
179
				__this_cpu_inc(n->pf->rhit);
L
Linus Torvalds 已提交
180 181 182
#endif
				r = tcf_exts_exec(skb, &n->exts, res);
				if (r < 0) {
183
					n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
184 185 186 187 188
					goto next_knode;
				}

				return r;
			}
189
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
190 191 192 193 194 195 196
			goto next_knode;
		}

		/* PUSH */
		if (sdepth >= TC_U32_MAXDEPTH)
			goto deadloop;
		stack[sdepth].knode = n;
197
		stack[sdepth].off = off;
L
Linus Torvalds 已提交
198 199
		sdepth++;

200
		ht = rcu_dereference_bh(n->ht_down);
L
Linus Torvalds 已提交
201
		sel = 0;
202
		if (ht->divisor) {
S
stephen hemminger 已提交
203
			__be32 *data, hdata;
204 205

			data = skb_header_pointer(skb, off + n->sel.hoff, 4,
S
stephen hemminger 已提交
206
						  &hdata);
207 208 209 210 211
			if (!data)
				goto out;
			sel = ht->divisor & u32_hash_fold(*data, &n->sel,
							  n->fshift);
		}
E
Eric Dumazet 已提交
212
		if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
L
Linus Torvalds 已提交
213 214
			goto next_ht;

E
Eric Dumazet 已提交
215
		if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
L
Linus Torvalds 已提交
216
			off2 = n->sel.off + 3;
217
			if (n->sel.flags & TC_U32_VAROFFSET) {
S
stephen hemminger 已提交
218
				__be16 *data, hdata;
219 220 221

				data = skb_header_pointer(skb,
							  off + n->sel.offoff,
S
stephen hemminger 已提交
222
							  2, &hdata);
223 224 225 226 227
				if (!data)
					goto out;
				off2 += ntohs(n->sel.offmask & *data) >>
					n->sel.offshift;
			}
L
Linus Torvalds 已提交
228 229
			off2 &= ~3;
		}
E
Eric Dumazet 已提交
230
		if (n->sel.flags & TC_U32_EAT) {
231
			off += off2;
L
Linus Torvalds 已提交
232 233 234
			off2 = 0;
		}

235
		if (off < skb->len)
L
Linus Torvalds 已提交
236 237 238 239 240 241
			goto next_ht;
	}

	/* POP */
	if (sdepth--) {
		n = stack[sdepth].knode;
242
		ht = rcu_dereference_bh(n->ht_up);
243
		off = stack[sdepth].off;
L
Linus Torvalds 已提交
244 245
		goto check_terminal;
	}
246
out:
L
Linus Torvalds 已提交
247 248 249
	return -1;

deadloop:
250
	net_warn_ratelimited("cls_u32: dead loop\n");
L
Linus Torvalds 已提交
251 252 253
	return -1;
}

J
Jamal Hadi Salim 已提交
254
static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
L
Linus Torvalds 已提交
255 256 257
{
	struct tc_u_hnode *ht;

258 259 260
	for (ht = rtnl_dereference(tp_c->hlist);
	     ht;
	     ht = rtnl_dereference(ht->next))
L
Linus Torvalds 已提交
261 262 263 264 265 266
		if (ht->handle == handle)
			break;

	return ht;
}

J
Jamal Hadi Salim 已提交
267
static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
L
Linus Torvalds 已提交
268
{
E
Eric Dumazet 已提交
269
	unsigned int sel;
L
Linus Torvalds 已提交
270 271 272 273 274 275
	struct tc_u_knode *n = NULL;

	sel = TC_U32_HASH(handle);
	if (sel > ht->divisor)
		goto out;

276 277 278
	for (n = rtnl_dereference(ht->ht[sel]);
	     n;
	     n = rtnl_dereference(n->next))
L
Linus Torvalds 已提交
279 280 281 282 283 284 285
		if (n->handle == handle)
			break;
out:
	return n;
}


286
static void *u32_get(struct tcf_proto *tp, u32 handle)
L
Linus Torvalds 已提交
287 288 289 290 291
{
	struct tc_u_hnode *ht;
	struct tc_u_common *tp_c = tp->data;

	if (TC_U32_HTID(handle) == TC_U32_ROOT)
292
		ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
293 294 295 296
	else
		ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));

	if (!ht)
297
		return NULL;
L
Linus Torvalds 已提交
298 299

	if (TC_U32_KEY(handle) == 0)
300
		return ht;
L
Linus Torvalds 已提交
301

302
	return u32_lookup_key(ht, handle);
L
Linus Torvalds 已提交
303 304
}

305
/* Protected by rtnl lock */
306
static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
L
Linus Torvalds 已提交
307
{
308 309
	int id = idr_alloc_cyclic(&tp_c->handle_idr, ptr, 1, 0x7FF, GFP_KERNEL);
	if (id < 0)
310
		return 0;
311
	return (id | 0x800U) << 20;
L
Linus Torvalds 已提交
312 313
}

314 315 316 317 318
static struct hlist_head *tc_u_common_hash;

#define U32_HASH_SHIFT 10
#define U32_HASH_SIZE (1 << U32_HASH_SHIFT)

J
Jiri Pirko 已提交
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
static void *tc_u_common_ptr(const struct tcf_proto *tp)
{
	struct tcf_block *block = tp->chain->block;

	/* The block sharing is currently supported only
	 * for classless qdiscs. In that case we use block
	 * for tc_u_common identification. In case the
	 * block is not shared, block->q is a valid pointer
	 * and we can use that. That works for classful qdiscs.
	 */
	if (tcf_block_shared(block))
		return block;
	else
		return block->q;
}

335
static struct hlist_head *tc_u_hash(void *key)
336
{
337
	return tc_u_common_hash + hash_ptr(key, U32_HASH_SHIFT);
338 339
}

340
static struct tc_u_common *tc_u_common_find(void *key)
341 342
{
	struct tc_u_common *tc;
343 344
	hlist_for_each_entry(tc, tc_u_hash(key), hnode) {
		if (tc->ptr == key)
345 346 347 348 349
			return tc;
	}
	return NULL;
}

L
Linus Torvalds 已提交
350 351 352
static int u32_init(struct tcf_proto *tp)
{
	struct tc_u_hnode *root_ht;
353 354
	void *key = tc_u_common_ptr(tp);
	struct tc_u_common *tp_c = tc_u_common_find(key);
L
Linus Torvalds 已提交
355

356
	root_ht = kzalloc(struct_size(root_ht, ht, 1), GFP_KERNEL);
L
Linus Torvalds 已提交
357 358 359 360
	if (root_ht == NULL)
		return -ENOBUFS;

	root_ht->refcnt++;
361
	root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
L
Linus Torvalds 已提交
362
	root_ht->prio = tp->prio;
363
	root_ht->is_root = true;
364
	idr_init(&root_ht->handle_idr);
L
Linus Torvalds 已提交
365 366

	if (tp_c == NULL) {
367
		tp_c = kzalloc(struct_size(tp_c, hlist->ht, 1), GFP_KERNEL);
L
Linus Torvalds 已提交
368 369 370 371
		if (tp_c == NULL) {
			kfree(root_ht);
			return -ENOBUFS;
		}
372
		tp_c->ptr = key;
373
		INIT_HLIST_NODE(&tp_c->hnode);
374
		idr_init(&tp_c->handle_idr);
375

376
		hlist_add_head(&tp_c->hnode, tc_u_hash(key));
L
Linus Torvalds 已提交
377 378 379
	}

	tp_c->refcnt++;
380 381
	RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
	rcu_assign_pointer(tp_c->hlist, root_ht);
L
Linus Torvalds 已提交
382

383
	root_ht->refcnt++;
384
	rcu_assign_pointer(tp->root, root_ht);
L
Linus Torvalds 已提交
385 386 387 388
	tp->data = tp_c;
	return 0;
}

389
static void __u32_destroy_key(struct tc_u_knode *n)
L
Linus Torvalds 已提交
390
{
391 392
	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);

393
	tcf_exts_destroy(&n->exts);
394 395
	if (ht && --ht->refcnt == 0)
		kfree(ht);
396 397 398 399 400 401
	kfree(n);
}

static void u32_destroy_key(struct tc_u_knode *n, bool free_pf)
{
	tcf_exts_put_net(&n->exts);
L
Linus Torvalds 已提交
402
#ifdef CONFIG_CLS_U32_PERF
403 404
	if (free_pf)
		free_percpu(n->pf);
405 406
#endif
#ifdef CONFIG_CLS_U32_MARK
407 408
	if (free_pf)
		free_percpu(n->pcpu_success);
L
Linus Torvalds 已提交
409
#endif
410
	__u32_destroy_key(n);
L
Linus Torvalds 已提交
411 412
}

413 414 415 416 417 418 419 420
/* u32_delete_key_rcu should be called when free'ing a copied
 * version of a tc_u_knode obtained from u32_init_knode(). When
 * copies are obtained from u32_init_knode() the statistics are
 * shared between the old and new copies to allow readers to
 * continue to update the statistics during the copy. To support
 * this the u32_delete_key_rcu variant does not free the percpu
 * statistics.
 */
421 422
static void u32_delete_key_work(struct work_struct *work)
{
C
Cong Wang 已提交
423 424 425
	struct tc_u_knode *key = container_of(to_rcu_work(work),
					      struct tc_u_knode,
					      rwork);
426
	rtnl_lock();
427
	u32_destroy_key(key, false);
428 429 430
	rtnl_unlock();
}

431 432 433 434 435 436 437
/* u32_delete_key_freepf_rcu is the rcu callback variant
 * that free's the entire structure including the statistics
 * percpu variables. Only use this if the key is not a copy
 * returned by u32_init_knode(). See u32_delete_key_rcu()
 * for the variant that should be used with keys return from
 * u32_init_knode()
 */
438 439
static void u32_delete_key_freepf_work(struct work_struct *work)
{
C
Cong Wang 已提交
440 441 442
	struct tc_u_knode *key = container_of(to_rcu_work(work),
					      struct tc_u_knode,
					      rwork);
443
	rtnl_lock();
444
	u32_destroy_key(key, true);
445 446 447
	rtnl_unlock();
}

448
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
L
Linus Torvalds 已提交
449
{
450
	struct tc_u_common *tp_c = tp->data;
451 452
	struct tc_u_knode __rcu **kp;
	struct tc_u_knode *pkp;
453
	struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
L
Linus Torvalds 已提交
454 455

	if (ht) {
456 457 458 459 460
		kp = &ht->ht[TC_U32_HASH(key->handle)];
		for (pkp = rtnl_dereference(*kp); pkp;
		     kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
			if (pkp == key) {
				RCU_INIT_POINTER(*kp, key->next);
461
				tp_c->knodes--;
L
Linus Torvalds 已提交
462

463
				tcf_unbind_filter(tp, &key->res);
464
				idr_remove(&ht->handle_idr, key->handle);
465
				tcf_exts_get_net(&key->exts);
C
Cong Wang 已提交
466
				tcf_queue_work(&key->rwork, u32_delete_key_freepf_work);
L
Linus Torvalds 已提交
467 468 469 470
				return 0;
			}
		}
	}
471
	WARN_ON(1);
L
Linus Torvalds 已提交
472 473 474
	return 0;
}

475 476
static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
			       struct netlink_ext_ack *extack)
477
{
478
	struct tcf_block *block = tp->chain->block;
479
	struct tc_cls_u32_offload cls_u32 = {};
480

481
	tc_cls_common_offload_init(&cls_u32.common, tp, h->flags, extack);
482 483 484 485
	cls_u32.command = TC_CLSU32_DELETE_HNODE;
	cls_u32.hnode.divisor = h->divisor;
	cls_u32.hnode.handle = h->handle;
	cls_u32.hnode.prio = h->prio;
486

487
	tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false, true);
488 489
}

J
Jamal Hadi Salim 已提交
490
static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
491
				u32 flags, struct netlink_ext_ack *extack)
492
{
493
	struct tcf_block *block = tp->chain->block;
494
	struct tc_cls_u32_offload cls_u32 = {};
495 496
	bool skip_sw = tc_skip_sw(flags);
	bool offloaded = false;
497
	int err;
498

499
	tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
500 501 502 503
	cls_u32.command = TC_CLSU32_NEW_HNODE;
	cls_u32.hnode.divisor = h->divisor;
	cls_u32.hnode.handle = h->handle;
	cls_u32.hnode.prio = h->prio;
504

505
	err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw, true);
506
	if (err < 0) {
507
		u32_clear_hw_hnode(tp, h, NULL);
508
		return err;
509 510 511 512 513 514
	} else if (err > 0) {
		offloaded = true;
	}

	if (skip_sw && !offloaded)
		return -EINVAL;
515 516

	return 0;
517 518
}

519 520
static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
				struct netlink_ext_ack *extack)
521
{
522
	struct tcf_block *block = tp->chain->block;
523
	struct tc_cls_u32_offload cls_u32 = {};
524

525
	tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack);
526
	cls_u32.command = TC_CLSU32_DELETE_KNODE;
527
	cls_u32.knode.handle = n->handle;
528

529 530
	tc_setup_cb_destroy(block, tp, TC_SETUP_CLSU32, &cls_u32, false,
			    &n->flags, &n->in_hw_count, true);
531 532
}

J
Jamal Hadi Salim 已提交
533
static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
534
				u32 flags, struct netlink_ext_ack *extack)
535
{
536
	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
537
	struct tcf_block *block = tp->chain->block;
538
	struct tc_cls_u32_offload cls_u32 = {};
539
	bool skip_sw = tc_skip_sw(flags);
540
	int err;
541

542
	tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
543 544 545
	cls_u32.command = TC_CLSU32_REPLACE_KNODE;
	cls_u32.knode.handle = n->handle;
	cls_u32.knode.fshift = n->fshift;
546
#ifdef CONFIG_CLS_U32_MARK
547 548
	cls_u32.knode.val = n->val;
	cls_u32.knode.mask = n->mask;
549
#else
550 551
	cls_u32.knode.val = 0;
	cls_u32.knode.mask = 0;
552
#endif
553
	cls_u32.knode.sel = &n->sel;
554
	cls_u32.knode.res = &n->res;
555
	cls_u32.knode.exts = &n->exts;
556
	if (n->ht_down)
557
		cls_u32.knode.link_handle = ht->handle;
558

559 560 561
	err = tc_setup_cb_add(block, tp, TC_SETUP_CLSU32, &cls_u32, skip_sw,
			      &n->flags, &n->in_hw_count, true);
	if (err) {
562
		u32_remove_hw_knode(tp, n, NULL);
563
		return err;
564 565
	}

566
	if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW))
567
		return -EINVAL;
568 569

	return 0;
570 571
}

572 573
static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
			    struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
574
{
575
	struct tc_u_common *tp_c = tp->data;
L
Linus Torvalds 已提交
576
	struct tc_u_knode *n;
E
Eric Dumazet 已提交
577
	unsigned int h;
L
Linus Torvalds 已提交
578

E
Eric Dumazet 已提交
579
	for (h = 0; h <= ht->divisor; h++) {
580 581 582
		while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
			RCU_INIT_POINTER(ht->ht[h],
					 rtnl_dereference(n->next));
583
			tp_c->knodes--;
584
			tcf_unbind_filter(tp, &n->res);
585
			u32_remove_hw_knode(tp, n, extack);
586
			idr_remove(&ht->handle_idr, n->handle);
587
			if (tcf_exts_get_net(&n->exts))
C
Cong Wang 已提交
588
				tcf_queue_work(&n->rwork, u32_delete_key_freepf_work);
589
			else
590
				u32_destroy_key(n, true);
L
Linus Torvalds 已提交
591 592 593 594
		}
	}
}

595 596
static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
			     struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
597 598
{
	struct tc_u_common *tp_c = tp->data;
599 600
	struct tc_u_hnode __rcu **hn;
	struct tc_u_hnode *phn;
L
Linus Torvalds 已提交
601

602
	WARN_ON(--ht->refcnt);
L
Linus Torvalds 已提交
603

604
	u32_clear_hnode(tp, ht, extack);
L
Linus Torvalds 已提交
605

606 607 608 609 610
	hn = &tp_c->hlist;
	for (phn = rtnl_dereference(*hn);
	     phn;
	     hn = &phn->next, phn = rtnl_dereference(*hn)) {
		if (phn == ht) {
611
			u32_clear_hw_hnode(tp, ht, extack);
612
			idr_destroy(&ht->handle_idr);
613
			idr_remove(&tp_c->handle_idr, ht->handle);
614 615
			RCU_INIT_POINTER(*hn, ht->next);
			kfree_rcu(ht, rcu);
L
Linus Torvalds 已提交
616 617 618 619 620 621 622
			return 0;
		}
	}

	return -ENOENT;
}

623 624
static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
			struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
625 626
{
	struct tc_u_common *tp_c = tp->data;
627
	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
628

629
	WARN_ON(root_ht == NULL);
L
Linus Torvalds 已提交
630

631
	if (root_ht && --root_ht->refcnt == 1)
632
		u32_destroy_hnode(tp, root_ht, extack);
L
Linus Torvalds 已提交
633 634 635 636

	if (--tp_c->refcnt == 0) {
		struct tc_u_hnode *ht;

637
		hlist_del(&tp_c->hnode);
L
Linus Torvalds 已提交
638

639
		while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
640
			u32_clear_hnode(tp, ht, extack);
641
			RCU_INIT_POINTER(tp_c->hlist, ht->next);
642 643 644 645 646 647

			/* u32_destroy_key() will later free ht for us, if it's
			 * still referenced by some knode
			 */
			if (--ht->refcnt == 0)
				kfree_rcu(ht, rcu);
648
		}
L
Linus Torvalds 已提交
649

650
		idr_destroy(&tp_c->handle_idr);
L
Linus Torvalds 已提交
651 652 653 654 655 656
		kfree(tp_c);
	}

	tp->data = NULL;
}

657
static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
658
		      bool rtnl_held, struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
659
{
660
	struct tc_u_hnode *ht = arg;
661 662
	struct tc_u_common *tp_c = tp->data;
	int ret = 0;
L
Linus Torvalds 已提交
663

664
	if (TC_U32_KEY(ht->handle)) {
665
		u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack);
666 667
		ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
		goto out;
668
	}
L
Linus Torvalds 已提交
669

670
	if (ht->is_root) {
671
		NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node");
L
Linus Torvalds 已提交
672
		return -EINVAL;
673
	}
L
Linus Torvalds 已提交
674

675
	if (ht->refcnt == 1) {
676
		u32_destroy_hnode(tp, ht, extack);
677
	} else {
678
		NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
679 680
		return -EBUSY;
	}
L
Linus Torvalds 已提交
681

682
out:
683
	*last = tp_c->refcnt == 1 && tp_c->knodes == 0;
684
	return ret;
L
Linus Torvalds 已提交
685 686
}

687
static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid)
L
Linus Torvalds 已提交
688
{
689
	u32 index = htid | 0x800;
690 691
	u32 max = htid | 0xFFF;

692 693 694 695 696
	if (idr_alloc_u32(&ht->handle_idr, NULL, &index, max, GFP_KERNEL)) {
		index = htid + 1;
		if (idr_alloc_u32(&ht->handle_idr, NULL, &index, max,
				 GFP_KERNEL))
			index = max;
697
	}
698

699
	return index;
L
Linus Torvalds 已提交
700 701
}

702 703 704 705 706 707 708 709
static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
	[TCA_U32_CLASSID]	= { .type = NLA_U32 },
	[TCA_U32_HASH]		= { .type = NLA_U32 },
	[TCA_U32_LINK]		= { .type = NLA_U32 },
	[TCA_U32_DIVISOR]	= { .type = NLA_U32 },
	[TCA_U32_SEL]		= { .len = sizeof(struct tc_u32_sel) },
	[TCA_U32_INDEV]		= { .type = NLA_STRING, .len = IFNAMSIZ },
	[TCA_U32_MARK]		= { .len = sizeof(struct tc_u32_mark) },
710
	[TCA_U32_FLAGS]		= { .type = NLA_U32 },
711 712
};

713
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
A
Al Viro 已提交
714
			 unsigned long base,
715
			 struct tc_u_knode *n, struct nlattr **tb,
716 717
			 struct nlattr *est, bool ovr,
			 struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
718
{
719
	int err, ifindex = -1;
L
Linus Torvalds 已提交
720

721
	err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack);
L
Linus Torvalds 已提交
722 723 724
	if (err < 0)
		return err;

725 726 727 728 729 730
	if (tb[TCA_U32_INDEV]) {
		ifindex = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
		if (ifindex < 0)
			return -EINVAL;
	}

731
	if (tb[TCA_U32_LINK]) {
732
		u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
733
		struct tc_u_hnode *ht_down = NULL, *ht_old;
L
Linus Torvalds 已提交
734

735 736
		if (TC_U32_KEY(handle)) {
			NL_SET_ERR_MSG_MOD(extack, "u32 Link handle must be a hash table");
737
			return -EINVAL;
738
		}
L
Linus Torvalds 已提交
739 740

		if (handle) {
A
Al Viro 已提交
741
			ht_down = u32_lookup_ht(tp->data, handle);
L
Linus Torvalds 已提交
742

743 744
			if (!ht_down) {
				NL_SET_ERR_MSG_MOD(extack, "Link hash table not found");
745
				return -EINVAL;
746
			}
747 748 749 750
			if (ht_down->is_root) {
				NL_SET_ERR_MSG_MOD(extack, "Not linking to root node");
				return -EINVAL;
			}
L
Linus Torvalds 已提交
751 752 753
			ht_down->refcnt++;
		}

754 755
		ht_old = rtnl_dereference(n->ht_down);
		rcu_assign_pointer(n->ht_down, ht_down);
L
Linus Torvalds 已提交
756

757 758
		if (ht_old)
			ht_old->refcnt--;
L
Linus Torvalds 已提交
759
	}
760
	if (tb[TCA_U32_CLASSID]) {
761
		n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
L
Linus Torvalds 已提交
762 763 764
		tcf_bind_filter(tp, &n->res, base);
	}

765 766 767
	if (ifindex >= 0)
		n->ifindex = ifindex;

L
Linus Torvalds 已提交
768 769 770
	return 0;
}

J
Jamal Hadi Salim 已提交
771
static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792
			      struct tc_u_knode *n)
{
	struct tc_u_knode __rcu **ins;
	struct tc_u_knode *pins;
	struct tc_u_hnode *ht;

	if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
		ht = rtnl_dereference(tp->root);
	else
		ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));

	ins = &ht->ht[TC_U32_HASH(n->handle)];

	/* The node must always exist for it to be replaced if this is not the
	 * case then something went very wrong elsewhere.
	 */
	for (pins = rtnl_dereference(*ins); ;
	     ins = &pins->next, pins = rtnl_dereference(*ins))
		if (pins->handle == n->handle)
			break;

793
	idr_replace(&ht->handle_idr, n, n->handle);
794 795 796 797
	RCU_INIT_POINTER(n->next, pins->next);
	rcu_assign_pointer(*ins, n);
}

798
static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
799 800
					 struct tc_u_knode *n)
{
801
	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
802
	struct tc_u32_sel *s = &n->sel;
803
	struct tc_u_knode *new;
804

805
	new = kzalloc(struct_size(new, sel.keys, s->nkeys), GFP_KERNEL);
806 807 808 809 810 811 812 813 814
	if (!new)
		return NULL;

	RCU_INIT_POINTER(new->next, n->next);
	new->handle = n->handle;
	RCU_INIT_POINTER(new->ht_up, n->ht_up);

	new->ifindex = n->ifindex;
	new->fshift = n->fshift;
815
	new->flags = n->flags;
816
	RCU_INIT_POINTER(new->ht_down, ht);
817 818 819 820 821 822 823 824 825 826 827 828 829 830 831

#ifdef CONFIG_CLS_U32_PERF
	/* Statistics may be incremented by readers during update
	 * so we must keep them in tact. When the node is later destroyed
	 * a special destroy call must be made to not free the pf memory.
	 */
	new->pf = n->pf;
#endif

#ifdef CONFIG_CLS_U32_MARK
	new->val = n->val;
	new->mask = n->mask;
	/* Similarly success statistics must be moved as pointers */
	new->pcpu_success = n->pcpu_success;
#endif
832
	memcpy(&new->sel, s, struct_size(s, keys, s->nkeys));
833

834
	if (tcf_exts_init(&new->exts, net, TCA_U32_ACT, TCA_U32_POLICE)) {
835 836 837
		kfree(new);
		return NULL;
	}
838

839 840 841 842
	/* bump reference count as long as we hold pointer to structure */
	if (ht)
		ht->refcnt++;

843 844 845
	return new;
}

846
static int u32_change(struct net *net, struct sk_buff *in_skb,
847
		      struct tcf_proto *tp, unsigned long base, u32 handle,
848
		      struct nlattr **tca, void **arg, bool ovr, bool rtnl_held,
849
		      struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
850 851 852 853 854
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode *ht;
	struct tc_u_knode *n;
	struct tc_u32_sel *s;
855 856
	struct nlattr *opt = tca[TCA_OPTIONS];
	struct nlattr *tb[TCA_U32_MAX + 1];
857
	u32 htid, flags = 0;
858
	size_t sel_size;
L
Linus Torvalds 已提交
859 860
	int err;

861 862 863 864 865 866 867 868
	if (!opt) {
		if (handle) {
			NL_SET_ERR_MSG_MOD(extack, "Filter handle requires options");
			return -EINVAL;
		} else {
			return 0;
		}
	}
L
Linus Torvalds 已提交
869

870 871
	err = nla_parse_nested_deprecated(tb, TCA_U32_MAX, opt, u32_policy,
					  extack);
872 873
	if (err < 0)
		return err;
L
Linus Torvalds 已提交
874

875
	if (tb[TCA_U32_FLAGS]) {
876
		flags = nla_get_u32(tb[TCA_U32_FLAGS]);
877 878
		if (!tc_flags_valid(flags)) {
			NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags");
879
			return -EINVAL;
880
		}
881
	}
882

883
	n = *arg;
E
Eric Dumazet 已提交
884
	if (n) {
885 886
		struct tc_u_knode *new;

887 888
		if (TC_U32_KEY(n->handle) == 0) {
			NL_SET_ERR_MSG_MOD(extack, "Key node id cannot be zero");
L
Linus Torvalds 已提交
889
			return -EINVAL;
890
		}
L
Linus Torvalds 已提交
891

892 893
		if ((n->flags ^ flags) &
		    ~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) {
894
			NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags");
895
			return -EINVAL;
896
		}
897

898
		new = u32_init_knode(net, tp, n);
899 900 901
		if (!new)
			return -ENOMEM;

A
Al Viro 已提交
902
		err = u32_set_parms(net, tp, base, new, tb,
903
				    tca[TCA_RATE], ovr, extack);
904 905

		if (err) {
906
			__u32_destroy_key(new);
907 908 909
			return err;
		}

910
		err = u32_replace_hw_knode(tp, new, flags, extack);
911
		if (err) {
912
			__u32_destroy_key(new);
913 914 915
			return err;
		}

916 917 918
		if (!tc_in_hw(new->flags))
			new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;

919
		u32_replace_knode(tp, tp_c, new);
920
		tcf_unbind_filter(tp, &n->res);
921
		tcf_exts_get_net(&n->exts);
C
Cong Wang 已提交
922
		tcf_queue_work(&n->rwork, u32_delete_key_work);
923
		return 0;
L
Linus Torvalds 已提交
924 925
	}

926
	if (tb[TCA_U32_DIVISOR]) {
E
Eric Dumazet 已提交
927
		unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
L
Linus Torvalds 已提交
928

929 930 931 932 933
		if (!is_power_of_2(divisor)) {
			NL_SET_ERR_MSG_MOD(extack, "Divisor is not a power of 2");
			return -EINVAL;
		}
		if (divisor-- > 0x100) {
934
			NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets");
L
Linus Torvalds 已提交
935
			return -EINVAL;
936 937 938
		}
		if (TC_U32_KEY(handle)) {
			NL_SET_ERR_MSG_MOD(extack, "Divisor can only be used on a hash table");
L
Linus Torvalds 已提交
939
			return -EINVAL;
940
		}
941
		ht = kzalloc(struct_size(ht, ht, divisor + 1), GFP_KERNEL);
L
Linus Torvalds 已提交
942 943
		if (ht == NULL)
			return -ENOBUFS;
944 945 946 947 948 949 950
		if (handle == 0) {
			handle = gen_new_htid(tp->data, ht);
			if (handle == 0) {
				kfree(ht);
				return -ENOMEM;
			}
		} else {
951 952
			err = idr_alloc_u32(&tp_c->handle_idr, ht, &handle,
					    handle, GFP_KERNEL);
953 954 955 956 957
			if (err) {
				kfree(ht);
				return err;
			}
		}
958
		ht->refcnt = 1;
L
Linus Torvalds 已提交
959 960 961
		ht->divisor = divisor;
		ht->handle = handle;
		ht->prio = tp->prio;
962
		idr_init(&ht->handle_idr);
963
		ht->flags = flags;
964

965
		err = u32_replace_hw_hnode(tp, ht, flags, extack);
966
		if (err) {
967
			idr_remove(&tp_c->handle_idr, handle);
968 969 970 971
			kfree(ht);
			return err;
		}

972 973
		RCU_INIT_POINTER(ht->next, tp_c->hlist);
		rcu_assign_pointer(tp_c->hlist, ht);
974
		*arg = ht;
975

L
Linus Torvalds 已提交
976 977 978
		return 0;
	}

979
	if (tb[TCA_U32_HASH]) {
980
		htid = nla_get_u32(tb[TCA_U32_HASH]);
L
Linus Torvalds 已提交
981
		if (TC_U32_HTID(htid) == TC_U32_ROOT) {
982
			ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
983 984 985
			htid = ht->handle;
		} else {
			ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
986 987
			if (!ht) {
				NL_SET_ERR_MSG_MOD(extack, "Specified hash table not found");
L
Linus Torvalds 已提交
988
				return -EINVAL;
989
			}
L
Linus Torvalds 已提交
990 991
		}
	} else {
992
		ht = rtnl_dereference(tp->root);
L
Linus Torvalds 已提交
993 994 995
		htid = ht->handle;
	}

996 997
	if (ht->divisor < TC_U32_HASH(htid)) {
		NL_SET_ERR_MSG_MOD(extack, "Specified hash table buckets exceed configured value");
L
Linus Torvalds 已提交
998
		return -EINVAL;
999
	}
L
Linus Torvalds 已提交
1000 1001

	if (handle) {
1002 1003
		if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
			NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");
L
Linus Torvalds 已提交
1004
			return -EINVAL;
1005
		}
L
Linus Torvalds 已提交
1006
		handle = htid | TC_U32_NODE(handle);
1007
		err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle,
1008 1009 1010
				    GFP_KERNEL);
		if (err)
			return err;
L
Linus Torvalds 已提交
1011 1012 1013
	} else
		handle = gen_new_kid(ht, htid);

1014
	if (tb[TCA_U32_SEL] == NULL) {
1015
		NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
1016 1017 1018
		err = -EINVAL;
		goto erridr;
	}
L
Linus Torvalds 已提交
1019

1020
	s = nla_data(tb[TCA_U32_SEL]);
1021 1022 1023 1024 1025
	sel_size = struct_size(s, keys, s->nkeys);
	if (nla_len(tb[TCA_U32_SEL]) < sel_size) {
		err = -EINVAL;
		goto erridr;
	}
L
Linus Torvalds 已提交
1026

1027
	n = kzalloc(struct_size(n, sel.keys, s->nkeys), GFP_KERNEL);
1028 1029 1030 1031
	if (n == NULL) {
		err = -ENOBUFS;
		goto erridr;
	}
L
Linus Torvalds 已提交
1032 1033

#ifdef CONFIG_CLS_U32_PERF
1034 1035
	n->pf = __alloc_percpu(struct_size(n->pf, kcnts, s->nkeys),
			       __alignof__(struct tc_u32_pcnt));
1036
	if (!n->pf) {
1037 1038
		err = -ENOBUFS;
		goto errfree;
L
Linus Torvalds 已提交
1039 1040 1041
	}
#endif

1042
	memcpy(&n->sel, s, sel_size);
1043
	RCU_INIT_POINTER(n->ht_up, ht);
L
Linus Torvalds 已提交
1044
	n->handle = handle;
1045
	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
1046
	n->flags = flags;
L
Linus Torvalds 已提交
1047

1048
	err = tcf_exts_init(&n->exts, net, TCA_U32_ACT, TCA_U32_POLICE);
1049 1050 1051
	if (err < 0)
		goto errout;

L
Linus Torvalds 已提交
1052
#ifdef CONFIG_CLS_U32_MARK
1053
	n->pcpu_success = alloc_percpu(u32);
1054 1055 1056 1057
	if (!n->pcpu_success) {
		err = -ENOMEM;
		goto errout;
	}
1058

1059
	if (tb[TCA_U32_MARK]) {
L
Linus Torvalds 已提交
1060 1061
		struct tc_u32_mark *mark;

1062
		mark = nla_data(tb[TCA_U32_MARK]);
1063 1064
		n->val = mark->val;
		n->mask = mark->mask;
L
Linus Torvalds 已提交
1065 1066 1067
	}
#endif

A
Al Viro 已提交
1068
	err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], ovr,
1069
			    extack);
L
Linus Torvalds 已提交
1070
	if (err == 0) {
1071 1072 1073
		struct tc_u_knode __rcu **ins;
		struct tc_u_knode *pins;

1074
		err = u32_replace_hw_knode(tp, n, flags, extack);
1075 1076 1077
		if (err)
			goto errhw;

1078 1079 1080
		if (!tc_in_hw(n->flags))
			n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;

1081 1082 1083 1084
		ins = &ht->ht[TC_U32_HASH(handle)];
		for (pins = rtnl_dereference(*ins); pins;
		     ins = &pins->next, pins = rtnl_dereference(*ins))
			if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle))
L
Linus Torvalds 已提交
1085 1086
				break;

1087 1088
		RCU_INIT_POINTER(n->next, pins);
		rcu_assign_pointer(*ins, n);
1089
		tp_c->knodes++;
1090
		*arg = n;
L
Linus Torvalds 已提交
1091 1092
		return 0;
	}
1093

1094
errhw:
1095 1096 1097 1098
#ifdef CONFIG_CLS_U32_MARK
	free_percpu(n->pcpu_success);
#endif

1099 1100
errout:
	tcf_exts_destroy(&n->exts);
L
Linus Torvalds 已提交
1101
#ifdef CONFIG_CLS_U32_PERF
1102
errfree:
1103
	free_percpu(n->pf);
L
Linus Torvalds 已提交
1104 1105
#endif
	kfree(n);
1106
erridr:
1107
	idr_remove(&ht->handle_idr, handle);
L
Linus Torvalds 已提交
1108 1109 1110
	return err;
}

1111 1112
static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
		     bool rtnl_held)
L
Linus Torvalds 已提交
1113 1114 1115 1116
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode *ht;
	struct tc_u_knode *n;
E
Eric Dumazet 已提交
1117
	unsigned int h;
L
Linus Torvalds 已提交
1118 1119 1120 1121

	if (arg->stop)
		return;

1122 1123 1124
	for (ht = rtnl_dereference(tp_c->hlist);
	     ht;
	     ht = rtnl_dereference(ht->next)) {
L
Linus Torvalds 已提交
1125 1126 1127
		if (ht->prio != tp->prio)
			continue;
		if (arg->count >= arg->skip) {
1128
			if (arg->fn(tp, ht, arg) < 0) {
L
Linus Torvalds 已提交
1129 1130 1131 1132 1133 1134
				arg->stop = 1;
				return;
			}
		}
		arg->count++;
		for (h = 0; h <= ht->divisor; h++) {
1135 1136 1137
			for (n = rtnl_dereference(ht->ht[h]);
			     n;
			     n = rtnl_dereference(n->next)) {
L
Linus Torvalds 已提交
1138 1139 1140 1141
				if (arg->count < arg->skip) {
					arg->count++;
					continue;
				}
1142
				if (arg->fn(tp, n, arg) < 0) {
L
Linus Torvalds 已提交
1143 1144 1145 1146 1147 1148 1149 1150 1151
					arg->stop = 1;
					return;
				}
				arg->count++;
			}
		}
	}
}

1152
static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
1153
			       bool add, flow_setup_cb_t *cb, void *cb_priv,
1154 1155 1156 1157 1158
			       struct netlink_ext_ack *extack)
{
	struct tc_cls_u32_offload cls_u32 = {};
	int err;

1159
	tc_cls_common_offload_init(&cls_u32.common, tp, ht->flags, extack);
1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172
	cls_u32.command = add ? TC_CLSU32_NEW_HNODE : TC_CLSU32_DELETE_HNODE;
	cls_u32.hnode.divisor = ht->divisor;
	cls_u32.hnode.handle = ht->handle;
	cls_u32.hnode.prio = ht->prio;

	err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv);
	if (err && add && tc_skip_sw(ht->flags))
		return err;

	return 0;
}

static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
1173
			       bool add, flow_setup_cb_t *cb, void *cb_priv,
1174 1175 1176 1177 1178 1179 1180
			       struct netlink_ext_ack *extack)
{
	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
	struct tcf_block *block = tp->chain->block;
	struct tc_cls_u32_offload cls_u32 = {};
	int err;

1181
	tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack);
1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195
	cls_u32.command = add ?
		TC_CLSU32_REPLACE_KNODE : TC_CLSU32_DELETE_KNODE;
	cls_u32.knode.handle = n->handle;

	if (add) {
		cls_u32.knode.fshift = n->fshift;
#ifdef CONFIG_CLS_U32_MARK
		cls_u32.knode.val = n->val;
		cls_u32.knode.mask = n->mask;
#else
		cls_u32.knode.val = 0;
		cls_u32.knode.mask = 0;
#endif
		cls_u32.knode.sel = &n->sel;
1196
		cls_u32.knode.res = &n->res;
1197 1198 1199 1200 1201
		cls_u32.knode.exts = &n->exts;
		if (n->ht_down)
			cls_u32.knode.link_handle = ht->handle;
	}

1202 1203 1204 1205 1206
	err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSU32,
				    &cls_u32, cb_priv, &n->flags,
				    &n->in_hw_count);
	if (err)
		return err;
1207 1208 1209 1210

	return 0;
}

1211
static int u32_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257
			 void *cb_priv, struct netlink_ext_ack *extack)
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode *ht;
	struct tc_u_knode *n;
	unsigned int h;
	int err;

	for (ht = rtnl_dereference(tp_c->hlist);
	     ht;
	     ht = rtnl_dereference(ht->next)) {
		if (ht->prio != tp->prio)
			continue;

		/* When adding filters to a new dev, try to offload the
		 * hashtable first. When removing, do the filters before the
		 * hashtable.
		 */
		if (add && !tc_skip_hw(ht->flags)) {
			err = u32_reoffload_hnode(tp, ht, add, cb, cb_priv,
						  extack);
			if (err)
				return err;
		}

		for (h = 0; h <= ht->divisor; h++) {
			for (n = rtnl_dereference(ht->ht[h]);
			     n;
			     n = rtnl_dereference(n->next)) {
				if (tc_skip_hw(n->flags))
					continue;

				err = u32_reoffload_knode(tp, n, add, cb,
							  cb_priv, extack);
				if (err)
					return err;
			}
		}

		if (!add && !tc_skip_hw(ht->flags))
			u32_reoffload_hnode(tp, ht, add, cb, cb_priv, extack);
	}

	return 0;
}

1258 1259
static void u32_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
			   unsigned long base)
1260 1261 1262
{
	struct tc_u_knode *n = fh;

1263 1264 1265 1266 1267 1268
	if (n && n->res.classid == classid) {
		if (cl)
			__tcf_bind_filter(q, &n->res, base);
		else
			__tcf_unbind_filter(q, &n->res);
	}
1269 1270
}

1271
static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
1272
		    struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
L
Linus Torvalds 已提交
1273
{
1274
	struct tc_u_knode *n = fh;
1275
	struct tc_u_hnode *ht_up, *ht_down;
1276
	struct nlattr *nest;
L
Linus Torvalds 已提交
1277 1278 1279 1280 1281 1282

	if (n == NULL)
		return skb->len;

	t->tcm_handle = n->handle;

1283
	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1284 1285
	if (nest == NULL)
		goto nla_put_failure;
L
Linus Torvalds 已提交
1286 1287

	if (TC_U32_KEY(n->handle) == 0) {
1288
		struct tc_u_hnode *ht = fh;
E
Eric Dumazet 已提交
1289 1290
		u32 divisor = ht->divisor + 1;

1291 1292
		if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
			goto nla_put_failure;
L
Linus Torvalds 已提交
1293
	} else {
1294 1295 1296
#ifdef CONFIG_CLS_U32_PERF
		struct tc_u32_pcnt *gpf;
		int cpu;
1297
#endif
1298

1299
		if (nla_put(skb, TCA_U32_SEL, struct_size(&n->sel, keys, n->sel.nkeys),
1300 1301
			    &n->sel))
			goto nla_put_failure;
1302 1303 1304

		ht_up = rtnl_dereference(n->ht_up);
		if (ht_up) {
L
Linus Torvalds 已提交
1305
			u32 htid = n->handle & 0xFFFFF000;
1306 1307
			if (nla_put_u32(skb, TCA_U32_HASH, htid))
				goto nla_put_failure;
L
Linus Torvalds 已提交
1308
		}
1309 1310 1311
		if (n->res.classid &&
		    nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
			goto nla_put_failure;
1312 1313 1314 1315

		ht_down = rtnl_dereference(n->ht_down);
		if (ht_down &&
		    nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
1316
			goto nla_put_failure;
L
Linus Torvalds 已提交
1317

1318 1319 1320
		if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags))
			goto nla_put_failure;

L
Linus Torvalds 已提交
1321
#ifdef CONFIG_CLS_U32_MARK
1322 1323 1324 1325
		if ((n->val || n->mask)) {
			struct tc_u32_mark mark = {.val = n->val,
						   .mask = n->mask,
						   .success = 0};
1326
			int cpum;
1327

1328 1329
			for_each_possible_cpu(cpum) {
				__u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum);
1330 1331 1332 1333 1334 1335 1336

				mark.success += cnt;
			}

			if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark))
				goto nla_put_failure;
		}
L
Linus Torvalds 已提交
1337 1338
#endif

1339
		if (tcf_exts_dump(skb, &n->exts) < 0)
1340
			goto nla_put_failure;
L
Linus Torvalds 已提交
1341

1342 1343 1344 1345 1346 1347
		if (n->ifindex) {
			struct net_device *dev;
			dev = __dev_get_by_index(net, n->ifindex);
			if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name))
				goto nla_put_failure;
		}
L
Linus Torvalds 已提交
1348
#ifdef CONFIG_CLS_U32_PERF
1349
		gpf = kzalloc(struct_size(gpf, kcnts, n->sel.nkeys), GFP_KERNEL);
1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362
		if (!gpf)
			goto nla_put_failure;

		for_each_possible_cpu(cpu) {
			int i;
			struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);

			gpf->rcnt += pf->rcnt;
			gpf->rhit += pf->rhit;
			for (i = 0; i < n->sel.nkeys; i++)
				gpf->kcnts[i] += pf->kcnts[i];
		}

1363
		if (nla_put_64bit(skb, TCA_U32_PCNT, struct_size(gpf, kcnts, n->sel.nkeys),
1364
				  gpf, TCA_U32_PAD)) {
1365
			kfree(gpf);
1366
			goto nla_put_failure;
1367 1368
		}
		kfree(gpf);
L
Linus Torvalds 已提交
1369 1370 1371
#endif
	}

1372 1373
	nla_nest_end(skb, nest);

L
Linus Torvalds 已提交
1374
	if (TC_U32_KEY(n->handle))
1375
		if (tcf_exts_dump_stats(skb, &n->exts) < 0)
1376
			goto nla_put_failure;
L
Linus Torvalds 已提交
1377 1378
	return skb->len;

1379
nla_put_failure:
1380
	nla_nest_cancel(skb, nest);
L
Linus Torvalds 已提交
1381 1382 1383
	return -1;
}

1384
static struct tcf_proto_ops cls_u32_ops __read_mostly = {
L
Linus Torvalds 已提交
1385 1386 1387 1388 1389 1390 1391 1392
	.kind		=	"u32",
	.classify	=	u32_classify,
	.init		=	u32_init,
	.destroy	=	u32_destroy,
	.get		=	u32_get,
	.change		=	u32_change,
	.delete		=	u32_delete,
	.walk		=	u32_walk,
1393
	.reoffload	=	u32_reoffload,
L
Linus Torvalds 已提交
1394
	.dump		=	u32_dump,
1395
	.bind_class	=	u32_bind_class,
L
Linus Torvalds 已提交
1396 1397 1398 1399 1400
	.owner		=	THIS_MODULE,
};

static int __init init_u32(void)
{
1401 1402
	int i, ret;

1403
	pr_info("u32 classifier\n");
L
Linus Torvalds 已提交
1404
#ifdef CONFIG_CLS_U32_PERF
1405
	pr_info("    Performance counters on\n");
L
Linus Torvalds 已提交
1406
#endif
1407
	pr_info("    input device check on\n");
L
Linus Torvalds 已提交
1408
#ifdef CONFIG_NET_CLS_ACT
1409
	pr_info("    Actions configured\n");
L
Linus Torvalds 已提交
1410
#endif
1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423
	tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE,
					  sizeof(struct hlist_head),
					  GFP_KERNEL);
	if (!tc_u_common_hash)
		return -ENOMEM;

	for (i = 0; i < U32_HASH_SIZE; i++)
		INIT_HLIST_HEAD(&tc_u_common_hash[i]);

	ret = register_tcf_proto_ops(&cls_u32_ops);
	if (ret)
		kvfree(tc_u_common_hash);
	return ret;
L
Linus Torvalds 已提交
1424 1425
}

1426
static void __exit exit_u32(void)
L
Linus Torvalds 已提交
1427 1428
{
	unregister_tcf_proto_ops(&cls_u32_ops);
1429
	kvfree(tc_u_common_hash);
L
Linus Torvalds 已提交
1430 1431 1432 1433 1434
}

module_init(init_u32)
module_exit(exit_u32)
MODULE_LICENSE("GPL");