cls_api.c 82.4 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-or-later
L
Linus Torvalds 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * net/sched/cls_api.c	Packet classifier API.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 * Changes:
 *
 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
17
#include <linux/err.h>
L
Linus Torvalds 已提交
18 19 20
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
21
#include <linux/slab.h>
22
#include <linux/idr.h>
23
#include <linux/rhashtable.h>
24 25
#include <net/net_namespace.h>
#include <net/sock.h>
26
#include <net/netlink.h>
L
Linus Torvalds 已提交
27 28
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
29
#include <net/tc_act/tc_pedit.h>
30 31 32 33 34
#include <net/tc_act/tc_mirred.h>
#include <net/tc_act/tc_vlan.h>
#include <net/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_csum.h>
#include <net/tc_act/tc_gact.h>
35
#include <net/tc_act/tc_police.h>
36
#include <net/tc_act/tc_sample.h>
37
#include <net/tc_act/tc_skbedit.h>
P
Paul Blakey 已提交
38
#include <net/tc_act/tc_ct.h>
L
Linus Torvalds 已提交
39

40 41
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];

L
Linus Torvalds 已提交
42
/* The list of all installed classifier types */
43
static LIST_HEAD(tcf_proto_base);
L
Linus Torvalds 已提交
44 45 46 47 48 49

/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK(cls_mod_lock);

/* Find classifier type by string name */

50
static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
L
Linus Torvalds 已提交
51
{
52
	const struct tcf_proto_ops *t, *res = NULL;
L
Linus Torvalds 已提交
53 54 55

	if (kind) {
		read_lock(&cls_mod_lock);
56
		list_for_each_entry(t, &tcf_proto_base, head) {
57
			if (strcmp(kind, t->kind) == 0) {
58 59
				if (try_module_get(t->owner))
					res = t;
L
Linus Torvalds 已提交
60 61 62 63 64
				break;
			}
		}
		read_unlock(&cls_mod_lock);
	}
65
	return res;
L
Linus Torvalds 已提交
66 67
}

68
static const struct tcf_proto_ops *
69 70
tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
		     struct netlink_ext_ack *extack)
71 72 73 74 75 76 77
{
	const struct tcf_proto_ops *ops;

	ops = __tcf_proto_lookup_ops(kind);
	if (ops)
		return ops;
#ifdef CONFIG_MODULES
78 79
	if (rtnl_held)
		rtnl_unlock();
80
	request_module("cls_%s", kind);
81 82
	if (rtnl_held)
		rtnl_lock();
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
	ops = __tcf_proto_lookup_ops(kind);
	/* We dropped the RTNL semaphore in order to perform
	 * the module load. So, even if we succeeded in loading
	 * the module we have to replay the request. We indicate
	 * this using -EAGAIN.
	 */
	if (ops) {
		module_put(ops->owner);
		return ERR_PTR(-EAGAIN);
	}
#endif
	NL_SET_ERR_MSG(extack, "TC classifier not found");
	return ERR_PTR(-ENOENT);
}

L
Linus Torvalds 已提交
98 99 100 101
/* Register(unregister) new classifier type */

int register_tcf_proto_ops(struct tcf_proto_ops *ops)
{
102
	struct tcf_proto_ops *t;
L
Linus Torvalds 已提交
103 104 105
	int rc = -EEXIST;

	write_lock(&cls_mod_lock);
106
	list_for_each_entry(t, &tcf_proto_base, head)
L
Linus Torvalds 已提交
107 108 109
		if (!strcmp(ops->kind, t->kind))
			goto out;

110
	list_add_tail(&ops->head, &tcf_proto_base);
L
Linus Torvalds 已提交
111 112 113 114 115
	rc = 0;
out:
	write_unlock(&cls_mod_lock);
	return rc;
}
116
EXPORT_SYMBOL(register_tcf_proto_ops);
L
Linus Torvalds 已提交
117

118 119
static struct workqueue_struct *tc_filter_wq;

L
Linus Torvalds 已提交
120 121
int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
{
122
	struct tcf_proto_ops *t;
L
Linus Torvalds 已提交
123 124
	int rc = -ENOENT;

125 126 127 128
	/* Wait for outstanding call_rcu()s, if any, from a
	 * tcf_proto_ops's destroy() handler.
	 */
	rcu_barrier();
129
	flush_workqueue(tc_filter_wq);
130

L
Linus Torvalds 已提交
131
	write_lock(&cls_mod_lock);
132 133 134 135
	list_for_each_entry(t, &tcf_proto_base, head) {
		if (t == ops) {
			list_del(&t->head);
			rc = 0;
L
Linus Torvalds 已提交
136
			break;
137 138
		}
	}
L
Linus Torvalds 已提交
139 140 141
	write_unlock(&cls_mod_lock);
	return rc;
}
142
EXPORT_SYMBOL(unregister_tcf_proto_ops);
L
Linus Torvalds 已提交
143

C
Cong Wang 已提交
144
bool tcf_queue_work(struct rcu_work *rwork, work_func_t func)
145
{
C
Cong Wang 已提交
146 147
	INIT_RCU_WORK(rwork, func);
	return queue_rcu_work(tc_filter_wq, rwork);
148 149 150
}
EXPORT_SYMBOL(tcf_queue_work);

L
Linus Torvalds 已提交
151 152
/* Select new prio value from the range, managed by kernel. */

153
static inline u32 tcf_auto_prio(struct tcf_proto *tp)
L
Linus Torvalds 已提交
154
{
155
	u32 first = TC_H_MAKE(0xC0000000U, 0U);
L
Linus Torvalds 已提交
156 157

	if (tp)
E
Eric Dumazet 已提交
158
		first = tp->prio - 1;
L
Linus Torvalds 已提交
159

160
	return TC_H_MAJ(first);
L
Linus Torvalds 已提交
161 162
}

163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
static bool tcf_proto_is_unlocked(const char *kind)
{
	const struct tcf_proto_ops *ops;
	bool ret;

	ops = tcf_proto_lookup_ops(kind, false, NULL);
	/* On error return false to take rtnl lock. Proto lookup/create
	 * functions will perform lookup again and properly handle errors.
	 */
	if (IS_ERR(ops))
		return false;

	ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
	module_put(ops->owner);
	return ret;
}

180
static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
181
					  u32 prio, struct tcf_chain *chain,
182
					  bool rtnl_held,
183
					  struct netlink_ext_ack *extack)
184 185 186 187 188 189 190 191
{
	struct tcf_proto *tp;
	int err;

	tp = kzalloc(sizeof(*tp), GFP_KERNEL);
	if (!tp)
		return ERR_PTR(-ENOBUFS);

192
	tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
193 194
	if (IS_ERR(tp->ops)) {
		err = PTR_ERR(tp->ops);
195
		goto errout;
196 197 198 199
	}
	tp->classify = tp->ops->classify;
	tp->protocol = protocol;
	tp->prio = prio;
200
	tp->chain = chain;
201
	spin_lock_init(&tp->lock);
202
	refcount_set(&tp->refcnt, 1);
203 204 205 206 207 208 209 210 211 212 213 214 215

	err = tp->ops->init(tp);
	if (err) {
		module_put(tp->ops->owner);
		goto errout;
	}
	return tp;

errout:
	kfree(tp);
	return ERR_PTR(err);
}

216 217 218 219 220 221 222
static void tcf_proto_get(struct tcf_proto *tp)
{
	refcount_inc(&tp->refcnt);
}

static void tcf_chain_put(struct tcf_chain *chain);

223
static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
224
			      struct netlink_ext_ack *extack)
225
{
226
	tp->ops->destroy(tp, rtnl_held, extack);
227
	tcf_chain_put(tp->chain);
228 229
	module_put(tp->ops->owner);
	kfree_rcu(tp, rcu);
230 231
}

232
static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
233 234 235
			  struct netlink_ext_ack *extack)
{
	if (refcount_dec_and_test(&tp->refcnt))
236
		tcf_proto_destroy(tp, rtnl_held, extack);
237 238
}

239
static int walker_check_empty(struct tcf_proto *tp, void *fh,
240
			      struct tcf_walker *arg)
241
{
242
	if (fh) {
243 244 245 246
		arg->nonempty = true;
		return -1;
	}
	return 0;
247 248
}

249
static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held)
250
{
251
	struct tcf_walker walker = { .fn = walker_check_empty, };
252 253

	if (tp->ops->walk) {
254
		tp->ops->walk(tp, &walker, rtnl_held);
255
		return !walker.nonempty;
256 257 258 259
	}
	return true;
}

260
static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held)
261 262
{
	spin_lock(&tp->lock);
263
	if (tcf_proto_is_empty(tp, rtnl_held))
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
		tp->deleting = true;
	spin_unlock(&tp->lock);
	return tp->deleting;
}

static void tcf_proto_mark_delete(struct tcf_proto *tp)
{
	spin_lock(&tp->lock);
	tp->deleting = true;
	spin_unlock(&tp->lock);
}

static bool tcf_proto_is_deleting(struct tcf_proto *tp)
{
	bool deleting;

	spin_lock(&tp->lock);
	deleting = tp->deleting;
	spin_unlock(&tp->lock);

	return deleting;
}

287 288 289
#define ASSERT_BLOCK_LOCKED(block)					\
	lockdep_assert_held(&(block)->lock)

290 291 292 293 294 295
struct tcf_filter_chain_list_item {
	struct list_head list;
	tcf_chain_head_change_t *chain_head_change;
	void *chain_head_change_priv;
};

296 297
static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
					  u32 chain_index)
298
{
299 300
	struct tcf_chain *chain;

301 302
	ASSERT_BLOCK_LOCKED(block);

303 304 305 306
	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
	if (!chain)
		return NULL;
	list_add_tail(&chain->list, &block->chain_list);
307
	mutex_init(&chain->filter_chain_lock);
308 309
	chain->block = block;
	chain->index = chain_index;
310
	chain->refcnt = 1;
311 312
	if (!chain->index)
		block->chain0.chain = chain;
313
	return chain;
314 315
}

316 317 318 319 320 321
static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
				       struct tcf_proto *tp_head)
{
	if (item->chain_head_change)
		item->chain_head_change(tp_head, item->chain_head_change_priv);
}
322 323 324

static void tcf_chain0_head_change(struct tcf_chain *chain,
				   struct tcf_proto *tp_head)
325
{
326
	struct tcf_filter_chain_list_item *item;
327
	struct tcf_block *block = chain->block;
328

329 330
	if (chain->index)
		return;
331 332

	mutex_lock(&block->lock);
333
	list_for_each_entry(item, &block->chain0.filter_chain_list, list)
334
		tcf_chain_head_change_item(item, tp_head);
335
	mutex_unlock(&block->lock);
336 337
}

338 339 340
/* Returns true if block can be safely freed. */

static bool tcf_chain_detach(struct tcf_chain *chain)
J
Jiri Pirko 已提交
341
{
342 343
	struct tcf_block *block = chain->block;

344 345
	ASSERT_BLOCK_LOCKED(block);

346
	list_del(&chain->list);
347 348
	if (!chain->index)
		block->chain0.chain = NULL;
349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366

	if (list_empty(&block->chain_list) &&
	    refcount_read(&block->refcnt) == 0)
		return true;

	return false;
}

static void tcf_block_destroy(struct tcf_block *block)
{
	mutex_destroy(&block->lock);
	kfree_rcu(block, rcu);
}

static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
{
	struct tcf_block *block = chain->block;

367
	mutex_destroy(&chain->filter_chain_lock);
368
	kfree_rcu(chain, rcu);
369 370
	if (free_block)
		tcf_block_destroy(block);
371
}
372

373 374
static void tcf_chain_hold(struct tcf_chain *chain)
{
375 376
	ASSERT_BLOCK_LOCKED(chain->block);

377
	++chain->refcnt;
378 379
}

380
static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
381
{
382 383
	ASSERT_BLOCK_LOCKED(chain->block);

384
	/* In case all the references are action references, this
385
	 * chain should not be shown to the user.
386 387 388 389
	 */
	return chain->refcnt == chain->action_refcnt;
}

390 391
static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
					  u32 chain_index)
392 393 394
{
	struct tcf_chain *chain;

395 396
	ASSERT_BLOCK_LOCKED(block);

397
	list_for_each_entry(chain, &block->chain_list, list) {
398
		if (chain->index == chain_index)
399
			return chain;
400 401 402 403 404 405 406
	}
	return NULL;
}

static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
			   u32 seq, u16 flags, int event, bool unicast);

407 408 409
static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
					 u32 chain_index, bool create,
					 bool by_act)
410
{
411 412
	struct tcf_chain *chain = NULL;
	bool is_first_reference;
413

414 415
	mutex_lock(&block->lock);
	chain = tcf_chain_lookup(block, chain_index);
416 417
	if (chain) {
		tcf_chain_hold(chain);
418 419
	} else {
		if (!create)
420
			goto errout;
421 422
		chain = tcf_chain_create(block, chain_index);
		if (!chain)
423
			goto errout;
424
	}
425

426 427
	if (by_act)
		++chain->action_refcnt;
428 429
	is_first_reference = chain->refcnt - chain->action_refcnt == 1;
	mutex_unlock(&block->lock);
430 431 432 433 434 435

	/* Send notification only in case we got the first
	 * non-action reference. Until then, the chain acts only as
	 * a placeholder for actions pointing to it and user ought
	 * not know about them.
	 */
436
	if (is_first_reference && !by_act)
437 438 439
		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
				RTM_NEWCHAIN, false);

440
	return chain;
441 442 443 444

errout:
	mutex_unlock(&block->lock);
	return chain;
445
}
446

447 448
static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
				       bool create)
449 450 451
{
	return __tcf_chain_get(block, chain_index, create, false);
}
452

453 454
struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
{
455
	return __tcf_chain_get(block, chain_index, true, true);
456 457 458
}
EXPORT_SYMBOL(tcf_chain_get_by_act);

459 460 461 462 463 464
static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
			       void *tmplt_priv);
static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
				  void *tmplt_priv, u32 chain_index,
				  struct tcf_block *block, struct sk_buff *oskb,
				  u32 seq, u16 flags, bool unicast);
465

466 467
static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
			    bool explicitly_created)
468
{
469
	struct tcf_block *block = chain->block;
470
	const struct tcf_proto_ops *tmplt_ops;
471
	bool free_block = false;
472
	unsigned int refcnt;
473
	void *tmplt_priv;
474 475

	mutex_lock(&block->lock);
476 477 478 479 480 481 482 483
	if (explicitly_created) {
		if (!chain->explicitly_created) {
			mutex_unlock(&block->lock);
			return;
		}
		chain->explicitly_created = false;
	}

484 485
	if (by_act)
		chain->action_refcnt--;
486 487 488 489 490 491

	/* tc_chain_notify_delete can't be called while holding block lock.
	 * However, when block is unlocked chain can be changed concurrently, so
	 * save these to temporary variables.
	 */
	refcnt = --chain->refcnt;
492 493
	tmplt_ops = chain->tmplt_ops;
	tmplt_priv = chain->tmplt_priv;
494 495

	/* The last dropped non-action reference will trigger notification. */
496 497
	if (refcnt - chain->action_refcnt == 0 && !by_act) {
		tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index,
498
				       block, NULL, 0, 0, false);
499 500 501
		/* Last reference to chain, no need to lock. */
		chain->flushing = false;
	}
502

503 504 505 506
	if (refcnt == 0)
		free_block = tcf_chain_detach(chain);
	mutex_unlock(&block->lock);

507
	if (refcnt == 0) {
508
		tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
509
		tcf_chain_destroy(chain, free_block);
510
	}
511
}
512

513
static void tcf_chain_put(struct tcf_chain *chain)
514
{
515
	__tcf_chain_put(chain, false, false);
516
}
517

518 519
void tcf_chain_put_by_act(struct tcf_chain *chain)
{
520
	__tcf_chain_put(chain, true, false);
521 522 523
}
EXPORT_SYMBOL(tcf_chain_put_by_act);

524 525
static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
{
526
	__tcf_chain_put(chain, false, true);
527 528
}

529
static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
530
{
531
	struct tcf_proto *tp, *tp_next;
532

533 534
	mutex_lock(&chain->filter_chain_lock);
	tp = tcf_chain_dereference(chain->filter_chain, chain);
535
	RCU_INIT_POINTER(chain->filter_chain, NULL);
536
	tcf_chain0_head_change(chain, NULL);
537
	chain->flushing = true;
538 539
	mutex_unlock(&chain->filter_chain_lock);

540
	while (tp) {
541
		tp_next = rcu_dereference_protected(tp->next, 1);
542
		tcf_proto_put(tp, rtnl_held, NULL);
543
		tp = tp_next;
544 545 546
	}
}

547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677
static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
{
	const struct Qdisc_class_ops *cops;
	struct Qdisc *qdisc;

	if (!dev_ingress_queue(dev))
		return NULL;

	qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
	if (!qdisc)
		return NULL;

	cops = qdisc->ops->cl_ops;
	if (!cops)
		return NULL;

	if (!cops->tcf_block)
		return NULL;

	return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
}

static struct rhashtable indr_setup_block_ht;

struct tc_indr_block_dev {
	struct rhash_head ht_node;
	struct net_device *dev;
	unsigned int refcnt;
	struct list_head cb_list;
	struct tcf_block *block;
};

struct tc_indr_block_cb {
	struct list_head list;
	void *cb_priv;
	tc_indr_block_bind_cb_t *cb;
	void *cb_ident;
};

static const struct rhashtable_params tc_indr_setup_block_ht_params = {
	.key_offset	= offsetof(struct tc_indr_block_dev, dev),
	.head_offset	= offsetof(struct tc_indr_block_dev, ht_node),
	.key_len	= sizeof(struct net_device *),
};

static struct tc_indr_block_dev *
tc_indr_block_dev_lookup(struct net_device *dev)
{
	return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
				      tc_indr_setup_block_ht_params);
}

static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev)
{
	struct tc_indr_block_dev *indr_dev;

	indr_dev = tc_indr_block_dev_lookup(dev);
	if (indr_dev)
		goto inc_ref;

	indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
	if (!indr_dev)
		return NULL;

	INIT_LIST_HEAD(&indr_dev->cb_list);
	indr_dev->dev = dev;
	indr_dev->block = tc_dev_ingress_block(dev);
	if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
				   tc_indr_setup_block_ht_params)) {
		kfree(indr_dev);
		return NULL;
	}

inc_ref:
	indr_dev->refcnt++;
	return indr_dev;
}

static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev)
{
	if (--indr_dev->refcnt)
		return;

	rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
			       tc_indr_setup_block_ht_params);
	kfree(indr_dev);
}

static struct tc_indr_block_cb *
tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev,
			tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
	struct tc_indr_block_cb *indr_block_cb;

	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
		if (indr_block_cb->cb == cb &&
		    indr_block_cb->cb_ident == cb_ident)
			return indr_block_cb;
	return NULL;
}

static struct tc_indr_block_cb *
tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv,
		     tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
	struct tc_indr_block_cb *indr_block_cb;

	indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
	if (indr_block_cb)
		return ERR_PTR(-EEXIST);

	indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
	if (!indr_block_cb)
		return ERR_PTR(-ENOMEM);

	indr_block_cb->cb_priv = cb_priv;
	indr_block_cb->cb = cb;
	indr_block_cb->cb_ident = cb_ident;
	list_add(&indr_block_cb->list, &indr_dev->cb_list);

	return indr_block_cb;
}

static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb)
{
	list_del(&indr_block_cb->list);
	kfree(indr_block_cb);
}

static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
				  struct tc_indr_block_cb *indr_block_cb,
678
				  enum flow_block_command command)
679 680 681
{
	struct tc_block_offload bo = {
		.command	= command,
682
		.binder_type	= FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
683
		.net		= dev_net(indr_dev->dev),
684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709
		.block		= indr_dev->block,
	};

	if (!indr_dev->block)
		return;

	indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
			  &bo);
}

int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
				tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
	struct tc_indr_block_cb *indr_block_cb;
	struct tc_indr_block_dev *indr_dev;
	int err;

	indr_dev = tc_indr_block_dev_get(dev);
	if (!indr_dev)
		return -ENOMEM;

	indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
	err = PTR_ERR_OR_ZERO(indr_block_cb);
	if (err)
		goto err_dev_put;

710
	tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_BIND);
711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746
	return 0;

err_dev_put:
	tc_indr_block_dev_put(indr_dev);
	return err;
}
EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register);

int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
			      tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
	int err;

	rtnl_lock();
	err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
	rtnl_unlock();

	return err;
}
EXPORT_SYMBOL_GPL(tc_indr_block_cb_register);

void __tc_indr_block_cb_unregister(struct net_device *dev,
				   tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
	struct tc_indr_block_cb *indr_block_cb;
	struct tc_indr_block_dev *indr_dev;

	indr_dev = tc_indr_block_dev_lookup(dev);
	if (!indr_dev)
		return;

	indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
	if (!indr_block_cb)
		return;

	/* Send unbind message if required to free any block cbs. */
747
	tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_UNBIND);
748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763
	tc_indr_block_cb_del(indr_block_cb);
	tc_indr_block_dev_put(indr_dev);
}
EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister);

void tc_indr_block_cb_unregister(struct net_device *dev,
				 tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
	rtnl_lock();
	__tc_indr_block_cb_unregister(dev, cb, cb_ident);
	rtnl_unlock();
}
EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister);

static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
			       struct tcf_block_ext_info *ei,
764
			       enum flow_block_command command,
765 766 767 768 769 770 771
			       struct netlink_ext_ack *extack)
{
	struct tc_indr_block_cb *indr_block_cb;
	struct tc_indr_block_dev *indr_dev;
	struct tc_block_offload bo = {
		.command	= command,
		.binder_type	= ei->binder_type,
772
		.net		= dev_net(dev),
773 774 775 776 777 778 779 780
		.block		= block,
		.extack		= extack,
	};

	indr_dev = tc_indr_block_dev_lookup(dev);
	if (!indr_dev)
		return;

781
	indr_dev->block = command == FLOW_BLOCK_BIND ? block : NULL;
782 783 784 785 786 787

	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
		indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
				  &bo);
}

788 789 790 791 792 793 794 795
static bool tcf_block_offload_in_use(struct tcf_block *block)
{
	return block->offloadcnt;
}

static int tcf_block_offload_cmd(struct tcf_block *block,
				 struct net_device *dev,
				 struct tcf_block_ext_info *ei,
796
				 enum flow_block_command command,
797
				 struct netlink_ext_ack *extack)
798 799 800
{
	struct tc_block_offload bo = {};

801
	bo.net = dev_net(dev);
802 803 804
	bo.command = command;
	bo.binder_type = ei->binder_type;
	bo.block = block;
805
	bo.extack = extack;
806
	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
807 808
}

809
static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
810 811
				  struct tcf_block_ext_info *ei,
				  struct netlink_ext_ack *extack)
812
{
813 814 815 816 817 818 819 820 821
	struct net_device *dev = q->dev_queue->dev;
	int err;

	if (!dev->netdev_ops->ndo_setup_tc)
		goto no_offload_dev_inc;

	/* If tc offload feature is disabled and the block we try to bind
	 * to already has some offloaded filters, forbid to bind.
	 */
822 823
	if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
		NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
824
		return -EOPNOTSUPP;
825
	}
826

827
	err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_BIND, extack);
828 829
	if (err == -EOPNOTSUPP)
		goto no_offload_dev_inc;
830 831 832
	if (err)
		return err;

833
	tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
834
	return 0;
835 836 837 838 839

no_offload_dev_inc:
	if (tcf_block_offload_in_use(block))
		return -EOPNOTSUPP;
	block->nooffloaddevcnt++;
840
	tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
841
	return 0;
842 843 844 845 846
}

static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
				     struct tcf_block_ext_info *ei)
{
847 848 849
	struct net_device *dev = q->dev_queue->dev;
	int err;

850
	tc_indr_block_call(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
851

852 853
	if (!dev->netdev_ops->ndo_setup_tc)
		goto no_offload_dev_dec;
854
	err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
855 856 857 858 859 860
	if (err == -EOPNOTSUPP)
		goto no_offload_dev_dec;
	return;

no_offload_dev_dec:
	WARN_ON(block->nooffloaddevcnt-- == 0);
861 862
}

863
static int
864 865 866
tcf_chain0_head_change_cb_add(struct tcf_block *block,
			      struct tcf_block_ext_info *ei,
			      struct netlink_ext_ack *extack)
867 868
{
	struct tcf_filter_chain_list_item *item;
869
	struct tcf_chain *chain0;
870 871 872 873 874 875 876 877

	item = kmalloc(sizeof(*item), GFP_KERNEL);
	if (!item) {
		NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed");
		return -ENOMEM;
	}
	item->chain_head_change = ei->chain_head_change;
	item->chain_head_change_priv = ei->chain_head_change_priv;
878 879 880

	mutex_lock(&block->lock);
	chain0 = block->chain0.chain;
881 882 883 884
	if (chain0)
		tcf_chain_hold(chain0);
	else
		list_add(&item->list, &block->chain0.filter_chain_list);
885 886
	mutex_unlock(&block->lock);

887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903
	if (chain0) {
		struct tcf_proto *tp_head;

		mutex_lock(&chain0->filter_chain_lock);

		tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
		if (tp_head)
			tcf_chain_head_change_item(item, tp_head);

		mutex_lock(&block->lock);
		list_add(&item->list, &block->chain0.filter_chain_list);
		mutex_unlock(&block->lock);

		mutex_unlock(&chain0->filter_chain_lock);
		tcf_chain_put(chain0);
	}

904 905 906 907
	return 0;
}

static void
908 909
tcf_chain0_head_change_cb_del(struct tcf_block *block,
			      struct tcf_block_ext_info *ei)
910 911 912
{
	struct tcf_filter_chain_list_item *item;

913
	mutex_lock(&block->lock);
914
	list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
915 916 917
		if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
		    (item->chain_head_change == ei->chain_head_change &&
		     item->chain_head_change_priv == ei->chain_head_change_priv)) {
918
			if (block->chain0.chain)
919
				tcf_chain_head_change_item(item, NULL);
920
			list_del(&item->list);
921 922
			mutex_unlock(&block->lock);

923 924 925 926
			kfree(item);
			return;
		}
	}
927
	mutex_unlock(&block->lock);
928 929 930
	WARN_ON(1);
}

931
struct tcf_net {
932
	spinlock_t idr_lock; /* Protects idr */
933 934 935 936 937 938
	struct idr idr;
};

static unsigned int tcf_net_id;

static int tcf_block_insert(struct tcf_block *block, struct net *net,
939
			    struct netlink_ext_ack *extack)
940
{
941
	struct tcf_net *tn = net_generic(net, tcf_net_id);
942 943 944 945 946 947 948 949
	int err;

	idr_preload(GFP_KERNEL);
	spin_lock(&tn->idr_lock);
	err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
			    GFP_NOWAIT);
	spin_unlock(&tn->idr_lock);
	idr_preload_end();
950

951
	return err;
952 953
}

954 955 956 957
static void tcf_block_remove(struct tcf_block *block, struct net *net)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

958
	spin_lock(&tn->idr_lock);
959
	idr_remove(&tn->idr, block->index);
960
	spin_unlock(&tn->idr_lock);
961 962 963
}

static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
964
					  u32 block_index,
965
					  struct netlink_ext_ack *extack)
966
{
967
	struct tcf_block *block;
968

969
	block = kzalloc(sizeof(*block), GFP_KERNEL);
970 971
	if (!block) {
		NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
972
		return ERR_PTR(-ENOMEM);
973
	}
974
	mutex_init(&block->lock);
975
	INIT_LIST_HEAD(&block->chain_list);
976
	INIT_LIST_HEAD(&block->cb_list);
977
	INIT_LIST_HEAD(&block->owner_list);
978
	INIT_LIST_HEAD(&block->chain0.filter_chain_list);
979

980
	refcount_set(&block->refcnt, 1);
981
	block->net = net;
982 983 984 985 986
	block->index = block_index;

	/* Don't store q pointer for blocks which are shared */
	if (!tcf_block_shared(block))
		block->q = q;
987 988 989 990 991 992 993
	return block;
}

static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

994
	return idr_find(&tn->idr, block_index);
995 996
}

997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009
static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
{
	struct tcf_block *block;

	rcu_read_lock();
	block = tcf_block_lookup(net, block_index);
	if (block && !refcount_inc_not_zero(&block->refcnt))
		block = NULL;
	rcu_read_unlock();

	return block;
}

1010 1011
static struct tcf_chain *
__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
1012
{
1013 1014 1015 1016 1017 1018 1019
	mutex_lock(&block->lock);
	if (chain)
		chain = list_is_last(&chain->list, &block->chain_list) ?
			NULL : list_next_entry(chain, list);
	else
		chain = list_first_entry_or_null(&block->chain_list,
						 struct tcf_chain, list);
1020

1021 1022 1023 1024 1025 1026
	/* skip all action-only chains */
	while (chain && tcf_chain_held_by_acts_only(chain))
		chain = list_is_last(&chain->list, &block->chain_list) ?
			NULL : list_next_entry(chain, list);

	if (chain)
1027
		tcf_chain_hold(chain);
1028
	mutex_unlock(&block->lock);
1029

1030
	return chain;
1031 1032
}

1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043
/* Function to be used by all clients that want to iterate over all chains on
 * block. It properly obtains block->lock and takes reference to chain before
 * returning it. Users of this function must be tolerant to concurrent chain
 * insertion/deletion or ensure that no concurrent chain modification is
 * possible. Note that all netlink dump callbacks cannot guarantee to provide
 * consistent dump because rtnl lock is released each time skb is filled with
 * data and sent to user-space.
 */

struct tcf_chain *
tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
1044
{
1045
	struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
1046

1047
	if (chain)
1048
		tcf_chain_put(chain);
1049 1050 1051 1052 1053

	return chain_next;
}
EXPORT_SYMBOL(tcf_get_next_chain);

1054 1055 1056
static struct tcf_proto *
__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
{
1057 1058
	u32 prio = 0;

1059 1060 1061
	ASSERT_RTNL();
	mutex_lock(&chain->filter_chain_lock);

1062
	if (!tp) {
1063
		tp = tcf_chain_dereference(chain->filter_chain, chain);
1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075
	} else if (tcf_proto_is_deleting(tp)) {
		/* 'deleting' flag is set and chain->filter_chain_lock was
		 * unlocked, which means next pointer could be invalid. Restart
		 * search.
		 */
		prio = tp->prio + 1;
		tp = tcf_chain_dereference(chain->filter_chain, chain);

		for (; tp; tp = tcf_chain_dereference(tp->next, chain))
			if (!tp->deleting && tp->prio >= prio)
				break;
	} else {
1076
		tp = tcf_chain_dereference(tp->next, chain);
1077
	}
1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095

	if (tp)
		tcf_proto_get(tp);

	mutex_unlock(&chain->filter_chain_lock);

	return tp;
}

/* Function to be used by all clients that want to iterate over all tp's on
 * chain. Users of this function must be tolerant to concurrent tp
 * insertion/deletion or ensure that no concurrent chain modification is
 * possible. Note that all netlink dump callbacks cannot guarantee to provide
 * consistent dump because rtnl lock is released each time skb is filled with
 * data and sent to user-space.
 */

struct tcf_proto *
1096 1097
tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp,
		   bool rtnl_held)
1098 1099 1100 1101
{
	struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);

	if (tp)
1102
		tcf_proto_put(tp, rtnl_held, NULL);
1103 1104 1105 1106 1107

	return tp_next;
}
EXPORT_SYMBOL(tcf_get_next_proto);

1108
static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
1109 1110 1111 1112 1113 1114 1115 1116 1117 1118
{
	struct tcf_chain *chain;

	/* Last reference to block. At this point chains cannot be added or
	 * removed concurrently.
	 */
	for (chain = tcf_get_next_chain(block, NULL);
	     chain;
	     chain = tcf_get_next_chain(block, chain)) {
		tcf_chain_put_explicitly_created(chain);
1119
		tcf_chain_flush(chain, rtnl_held);
1120 1121 1122
	}
}

1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258
/* Lookup Qdisc and increments its reference counter.
 * Set parent, if necessary.
 */

static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
			    u32 *parent, int ifindex, bool rtnl_held,
			    struct netlink_ext_ack *extack)
{
	const struct Qdisc_class_ops *cops;
	struct net_device *dev;
	int err = 0;

	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
		return 0;

	rcu_read_lock();

	/* Find link */
	dev = dev_get_by_index_rcu(net, ifindex);
	if (!dev) {
		rcu_read_unlock();
		return -ENODEV;
	}

	/* Find qdisc */
	if (!*parent) {
		*q = dev->qdisc;
		*parent = (*q)->handle;
	} else {
		*q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
		if (!*q) {
			NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
			err = -EINVAL;
			goto errout_rcu;
		}
	}

	*q = qdisc_refcount_inc_nz(*q);
	if (!*q) {
		NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
		err = -EINVAL;
		goto errout_rcu;
	}

	/* Is it classful? */
	cops = (*q)->ops->cl_ops;
	if (!cops) {
		NL_SET_ERR_MSG(extack, "Qdisc not classful");
		err = -EINVAL;
		goto errout_qdisc;
	}

	if (!cops->tcf_block) {
		NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
		err = -EOPNOTSUPP;
		goto errout_qdisc;
	}

errout_rcu:
	/* At this point we know that qdisc is not noop_qdisc,
	 * which means that qdisc holds a reference to net_device
	 * and we hold a reference to qdisc, so it is safe to release
	 * rcu read lock.
	 */
	rcu_read_unlock();
	return err;

errout_qdisc:
	rcu_read_unlock();

	if (rtnl_held)
		qdisc_put(*q);
	else
		qdisc_put_unlocked(*q);
	*q = NULL;

	return err;
}

static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl,
			       int ifindex, struct netlink_ext_ack *extack)
{
	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
		return 0;

	/* Do we search for filter, attached to class? */
	if (TC_H_MIN(parent)) {
		const struct Qdisc_class_ops *cops = q->ops->cl_ops;

		*cl = cops->find(q, parent);
		if (*cl == 0) {
			NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
			return -ENOENT;
		}
	}

	return 0;
}

static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q,
					  unsigned long cl, int ifindex,
					  u32 block_index,
					  struct netlink_ext_ack *extack)
{
	struct tcf_block *block;

	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
		block = tcf_block_refcnt_get(net, block_index);
		if (!block) {
			NL_SET_ERR_MSG(extack, "Block of given index was not found");
			return ERR_PTR(-EINVAL);
		}
	} else {
		const struct Qdisc_class_ops *cops = q->ops->cl_ops;

		block = cops->tcf_block(q, cl, extack);
		if (!block)
			return ERR_PTR(-EINVAL);

		if (tcf_block_shared(block)) {
			NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
			return ERR_PTR(-EOPNOTSUPP);
		}

		/* Always take reference to block in order to support execution
		 * of rules update path of cls API without rtnl lock. Caller
		 * must release block when it is finished using it. 'if' block
		 * of this conditional obtain reference to block by calling
		 * tcf_block_refcnt_get().
		 */
		refcount_inc(&block->refcnt);
	}

	return block;
}

1259
static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
1260
			    struct tcf_block_ext_info *ei, bool rtnl_held)
1261
{
1262
	if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
1263 1264 1265 1266 1267 1268 1269 1270
		/* Flushing/putting all chains will cause the block to be
		 * deallocated when last chain is freed. However, if chain_list
		 * is empty, block has to be manually deallocated. After block
		 * reference counter reached 0, it is no longer possible to
		 * increment it or add new chains to block.
		 */
		bool free_block = list_empty(&block->chain_list);

1271
		mutex_unlock(&block->lock);
1272 1273 1274 1275 1276 1277 1278
		if (tcf_block_shared(block))
			tcf_block_remove(block, block->net);

		if (q)
			tcf_block_offload_unbind(block, q, ei);

		if (free_block)
1279
			tcf_block_destroy(block);
1280
		else
1281
			tcf_block_flush_all_chains(block, rtnl_held);
1282 1283 1284 1285 1286
	} else if (q) {
		tcf_block_offload_unbind(block, q, ei);
	}
}

1287
static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
1288
{
1289
	__tcf_block_put(block, NULL, NULL, rtnl_held);
1290 1291
}

1292 1293 1294 1295 1296 1297 1298 1299 1300 1301
/* Find tcf block.
 * Set q, parent, cl when appropriate.
 */

static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
					u32 *parent, unsigned long *cl,
					int ifindex, u32 block_index,
					struct netlink_ext_ack *extack)
{
	struct tcf_block *block;
1302
	int err = 0;
1303

1304
	ASSERT_RTNL();
1305

1306 1307 1308
	err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack);
	if (err)
		goto errout;
1309

1310 1311 1312
	err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack);
	if (err)
		goto errout_qdisc;
1313

1314
	block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack);
1315 1316
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
1317
		goto errout_qdisc;
1318
	}
1319 1320

	return block;
1321 1322

errout_qdisc:
1323
	if (*q)
1324
		qdisc_put(*q);
1325 1326
errout:
	*q = NULL;
1327 1328 1329
	return ERR_PTR(err);
}

1330 1331
static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
			      bool rtnl_held)
1332
{
1333
	if (!IS_ERR_OR_NULL(block))
1334
		tcf_block_refcnt_put(block, rtnl_held);
1335

1336 1337 1338 1339 1340 1341
	if (q) {
		if (rtnl_held)
			qdisc_put(q);
		else
			qdisc_put_unlocked(q);
	}
1342 1343
}

1344 1345 1346
struct tcf_block_owner_item {
	struct list_head list;
	struct Qdisc *q;
1347
	enum flow_block_binder_type binder_type;
1348 1349 1350 1351 1352
};

static void
tcf_block_owner_netif_keep_dst(struct tcf_block *block,
			       struct Qdisc *q,
1353
			       enum flow_block_binder_type binder_type)
1354 1355
{
	if (block->keep_dst &&
1356 1357
	    binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
	    binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373
		netif_keep_dst(qdisc_dev(q));
}

void tcf_block_netif_keep_dst(struct tcf_block *block)
{
	struct tcf_block_owner_item *item;

	block->keep_dst = true;
	list_for_each_entry(item, &block->owner_list, list)
		tcf_block_owner_netif_keep_dst(block, item->q,
					       item->binder_type);
}
EXPORT_SYMBOL(tcf_block_netif_keep_dst);

static int tcf_block_owner_add(struct tcf_block *block,
			       struct Qdisc *q,
1374
			       enum flow_block_binder_type binder_type)
1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388
{
	struct tcf_block_owner_item *item;

	item = kmalloc(sizeof(*item), GFP_KERNEL);
	if (!item)
		return -ENOMEM;
	item->q = q;
	item->binder_type = binder_type;
	list_add(&item->list, &block->owner_list);
	return 0;
}

static void tcf_block_owner_del(struct tcf_block *block,
				struct Qdisc *q,
1389
				enum flow_block_binder_type binder_type)
1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402
{
	struct tcf_block_owner_item *item;

	list_for_each_entry(item, &block->owner_list, list) {
		if (item->q == q && item->binder_type == binder_type) {
			list_del(&item->list);
			kfree(item);
			return;
		}
	}
	WARN_ON(1);
}

1403 1404 1405 1406 1407 1408 1409 1410
int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
		      struct tcf_block_ext_info *ei,
		      struct netlink_ext_ack *extack)
{
	struct net *net = qdisc_net(q);
	struct tcf_block *block = NULL;
	int err;

1411
	if (ei->block_index)
1412
		/* block_index not 0 means the shared block is requested */
1413
		block = tcf_block_refcnt_get(net, ei->block_index);
1414 1415

	if (!block) {
1416
		block = tcf_block_create(net, q, ei->block_index, extack);
1417 1418
		if (IS_ERR(block))
			return PTR_ERR(block);
1419 1420
		if (tcf_block_shared(block)) {
			err = tcf_block_insert(block, net, extack);
1421 1422 1423 1424 1425
			if (err)
				goto err_block_insert;
		}
	}

1426 1427 1428 1429 1430 1431
	err = tcf_block_owner_add(block, q, ei->binder_type);
	if (err)
		goto err_block_owner_add;

	tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);

1432
	err = tcf_chain0_head_change_cb_add(block, ei, extack);
1433
	if (err)
1434
		goto err_chain0_head_change_cb_add;
1435

1436
	err = tcf_block_offload_bind(block, q, ei, extack);
1437 1438 1439
	if (err)
		goto err_block_offload_bind;

1440 1441
	*p_block = block;
	return 0;
1442

1443
err_block_offload_bind:
1444 1445
	tcf_chain0_head_change_cb_del(block, ei);
err_chain0_head_change_cb_add:
1446 1447
	tcf_block_owner_del(block, q, ei->binder_type);
err_block_owner_add:
1448
err_block_insert:
1449
	tcf_block_refcnt_put(block, true);
1450
	return err;
1451
}
1452 1453
EXPORT_SYMBOL(tcf_block_get_ext);

1454 1455 1456 1457 1458 1459 1460
static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
{
	struct tcf_proto __rcu **p_filter_chain = priv;

	rcu_assign_pointer(*p_filter_chain, tp_head);
}

1461
int tcf_block_get(struct tcf_block **p_block,
1462 1463
		  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
		  struct netlink_ext_ack *extack)
1464
{
1465 1466 1467 1468
	struct tcf_block_ext_info ei = {
		.chain_head_change = tcf_chain_head_change_dflt,
		.chain_head_change_priv = p_filter_chain,
	};
1469

1470
	WARN_ON(!p_filter_chain);
1471
	return tcf_block_get_ext(p_block, q, &ei, extack);
1472
}
1473 1474
EXPORT_SYMBOL(tcf_block_get);

1475
/* XXX: Standalone actions are not allowed to jump to any chain, and bound
1476
 * actions should be all removed after flushing.
1477
 */
1478
void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
1479
		       struct tcf_block_ext_info *ei)
1480
{
1481 1482
	if (!block)
		return;
1483
	tcf_chain0_head_change_cb_del(block, ei);
1484
	tcf_block_owner_del(block, q, ei->binder_type);
1485

1486
	__tcf_block_put(block, q, ei, true);
1487
}
1488 1489 1490 1491 1492 1493
EXPORT_SYMBOL(tcf_block_put_ext);

void tcf_block_put(struct tcf_block *block)
{
	struct tcf_block_ext_info ei = {0, };

1494 1495
	if (!block)
		return;
1496
	tcf_block_put_ext(block, block->q, &ei);
1497
}
1498

1499
EXPORT_SYMBOL(tcf_block_put);
1500

1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537
struct tcf_block_cb {
	struct list_head list;
	tc_setup_cb_t *cb;
	void *cb_ident;
	void *cb_priv;
	unsigned int refcnt;
};

void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
{
	return block_cb->cb_priv;
}
EXPORT_SYMBOL(tcf_block_cb_priv);

struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
					 tc_setup_cb_t *cb, void *cb_ident)
{	struct tcf_block_cb *block_cb;

	list_for_each_entry(block_cb, &block->cb_list, list)
		if (block_cb->cb == cb && block_cb->cb_ident == cb_ident)
			return block_cb;
	return NULL;
}
EXPORT_SYMBOL(tcf_block_cb_lookup);

void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
{
	block_cb->refcnt++;
}
EXPORT_SYMBOL(tcf_block_cb_incref);

unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
{
	return --block_cb->refcnt;
}
EXPORT_SYMBOL(tcf_block_cb_decref);

1538 1539 1540 1541 1542
static int
tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
			    void *cb_priv, bool add, bool offload_in_use,
			    struct netlink_ext_ack *extack)
{
1543
	struct tcf_chain *chain, *chain_prev;
1544
	struct tcf_proto *tp, *tp_prev;
1545 1546
	int err;

1547 1548 1549 1550 1551
	for (chain = __tcf_get_next_chain(block, NULL);
	     chain;
	     chain_prev = chain,
		     chain = __tcf_get_next_chain(block, chain),
		     tcf_chain_put(chain_prev)) {
1552 1553 1554
		for (tp = __tcf_get_next_proto(chain, NULL); tp;
		     tp_prev = tp,
			     tp = __tcf_get_next_proto(chain, tp),
1555
			     tcf_proto_put(tp_prev, true, NULL)) {
1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571
			if (tp->ops->reoffload) {
				err = tp->ops->reoffload(tp, add, cb, cb_priv,
							 extack);
				if (err && add)
					goto err_playback_remove;
			} else if (add && offload_in_use) {
				err = -EOPNOTSUPP;
				NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support");
				goto err_playback_remove;
			}
		}
	}

	return 0;

err_playback_remove:
1572
	tcf_proto_put(tp, true, NULL);
1573
	tcf_chain_put(chain);
1574 1575 1576 1577 1578
	tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
				    extack);
	return err;
}

1579 1580
struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
					     tc_setup_cb_t *cb, void *cb_ident,
1581 1582
					     void *cb_priv,
					     struct netlink_ext_ack *extack)
1583 1584
{
	struct tcf_block_cb *block_cb;
1585
	int err;
1586

1587 1588 1589 1590 1591 1592
	/* Replay any already present rules */
	err = tcf_block_playback_offloads(block, cb, cb_priv, true,
					  tcf_block_offload_in_use(block),
					  extack);
	if (err)
		return ERR_PTR(err);
1593

1594 1595
	block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
	if (!block_cb)
1596
		return ERR_PTR(-ENOMEM);
1597 1598 1599 1600 1601 1602 1603 1604 1605 1606
	block_cb->cb = cb;
	block_cb->cb_ident = cb_ident;
	block_cb->cb_priv = cb_priv;
	list_add(&block_cb->list, &block->cb_list);
	return block_cb;
}
EXPORT_SYMBOL(__tcf_block_cb_register);

int tcf_block_cb_register(struct tcf_block *block,
			  tc_setup_cb_t *cb, void *cb_ident,
1607
			  void *cb_priv, struct netlink_ext_ack *extack)
1608 1609 1610
{
	struct tcf_block_cb *block_cb;

1611 1612
	block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv,
					   extack);
1613
	return PTR_ERR_OR_ZERO(block_cb);
1614 1615 1616
}
EXPORT_SYMBOL(tcf_block_cb_register);

1617 1618
void __tcf_block_cb_unregister(struct tcf_block *block,
			       struct tcf_block_cb *block_cb)
1619
{
1620 1621 1622
	tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv,
				    false, tcf_block_offload_in_use(block),
				    NULL);
1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635
	list_del(&block_cb->list);
	kfree(block_cb);
}
EXPORT_SYMBOL(__tcf_block_cb_unregister);

void tcf_block_cb_unregister(struct tcf_block *block,
			     tc_setup_cb_t *cb, void *cb_ident)
{
	struct tcf_block_cb *block_cb;

	block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
	if (!block_cb)
		return;
1636
	__tcf_block_cb_unregister(block, block_cb);
1637 1638 1639
}
EXPORT_SYMBOL(tcf_block_cb_unregister);

1640 1641 1642 1643 1644 1645 1646 1647 1648
/* Main classifier routine: scans classifier chain attached
 * to this qdisc, (optionally) tests for protocol and asks
 * specific classifiers.
 */
int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
		 struct tcf_result *res, bool compat_mode)
{
#ifdef CONFIG_NET_CLS_ACT
	const int max_reclassify_loop = 4;
1649 1650
	const struct tcf_proto *orig_tp = tp;
	const struct tcf_proto *first_tp;
1651 1652 1653 1654 1655
	int limit = 0;

reclassify:
#endif
	for (; tp; tp = rcu_dereference_bh(tp->next)) {
1656
		__be16 protocol = tc_skb_protocol(skb);
1657 1658 1659 1660 1661 1662 1663 1664
		int err;

		if (tp->protocol != protocol &&
		    tp->protocol != htons(ETH_P_ALL))
			continue;

		err = tp->classify(skb, tp, res);
#ifdef CONFIG_NET_CLS_ACT
1665
		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
1666
			first_tp = orig_tp;
1667
			goto reset;
1668
		} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
1669
			first_tp = res->goto_tp;
1670 1671
			goto reset;
		}
1672 1673 1674 1675 1676 1677 1678 1679 1680
#endif
		if (err >= 0)
			return err;
	}

	return TC_ACT_UNSPEC; /* signal: continue lookup */
#ifdef CONFIG_NET_CLS_ACT
reset:
	if (unlikely(limit++ >= max_reclassify_loop)) {
1681 1682 1683
		net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
				       tp->chain->block->index,
				       tp->prio & 0xffff,
1684 1685 1686 1687
				       ntohs(tp->protocol));
		return TC_ACT_SHOT;
	}

1688
	tp = first_tp;
1689 1690 1691 1692 1693
	goto reclassify;
#endif
}
EXPORT_SYMBOL(tcf_classify);

1694 1695 1696 1697 1698
struct tcf_chain_info {
	struct tcf_proto __rcu **pprev;
	struct tcf_proto __rcu *next;
};

1699 1700
static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
					   struct tcf_chain_info *chain_info)
1701
{
1702
	return tcf_chain_dereference(*chain_info->pprev, chain);
1703 1704
}

1705 1706 1707
static int tcf_chain_tp_insert(struct tcf_chain *chain,
			       struct tcf_chain_info *chain_info,
			       struct tcf_proto *tp)
1708
{
1709 1710 1711
	if (chain->flushing)
		return -EAGAIN;

1712
	if (*chain_info->pprev == chain->filter_chain)
1713
		tcf_chain0_head_change(chain, tp);
1714
	tcf_proto_get(tp);
1715
	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
1716
	rcu_assign_pointer(*chain_info->pprev, tp);
1717 1718

	return 0;
1719 1720 1721 1722 1723 1724
}

static void tcf_chain_tp_remove(struct tcf_chain *chain,
				struct tcf_chain_info *chain_info,
				struct tcf_proto *tp)
{
1725
	struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
1726

1727
	tcf_proto_mark_delete(tp);
1728
	if (tp == chain->filter_chain)
1729
		tcf_chain0_head_change(chain, next);
1730 1731 1732
	RCU_INIT_POINTER(*chain_info->pprev, next);
}

1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744
static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
					   struct tcf_chain_info *chain_info,
					   u32 protocol, u32 prio,
					   bool prio_allocate);

/* Try to insert new proto.
 * If proto with specified priority already exists, free new proto
 * and return existing one.
 */

static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
						    struct tcf_proto *tp_new,
1745 1746
						    u32 protocol, u32 prio,
						    bool rtnl_held)
1747 1748 1749
{
	struct tcf_chain_info chain_info;
	struct tcf_proto *tp;
1750
	int err = 0;
1751 1752 1753 1754 1755 1756

	mutex_lock(&chain->filter_chain_lock);

	tp = tcf_chain_tp_find(chain, &chain_info,
			       protocol, prio, false);
	if (!tp)
1757
		err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
1758 1759 1760
	mutex_unlock(&chain->filter_chain_lock);

	if (tp) {
1761
		tcf_proto_destroy(tp_new, rtnl_held, NULL);
1762
		tp_new = tp;
1763
	} else if (err) {
1764
		tcf_proto_destroy(tp_new, rtnl_held, NULL);
1765
		tp_new = ERR_PTR(err);
1766 1767 1768 1769 1770 1771
	}

	return tp_new;
}

static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
1772
				      struct tcf_proto *tp, bool rtnl_held,
1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796
				      struct netlink_ext_ack *extack)
{
	struct tcf_chain_info chain_info;
	struct tcf_proto *tp_iter;
	struct tcf_proto **pprev;
	struct tcf_proto *next;

	mutex_lock(&chain->filter_chain_lock);

	/* Atomically find and remove tp from chain. */
	for (pprev = &chain->filter_chain;
	     (tp_iter = tcf_chain_dereference(*pprev, chain));
	     pprev = &tp_iter->next) {
		if (tp_iter == tp) {
			chain_info.pprev = pprev;
			chain_info.next = tp_iter->next;
			WARN_ON(tp_iter->deleting);
			break;
		}
	}
	/* Verify that tp still exists and no new filters were inserted
	 * concurrently.
	 * Mark tp for deletion if it is empty.
	 */
1797
	if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) {
1798 1799 1800 1801 1802 1803 1804 1805 1806 1807
		mutex_unlock(&chain->filter_chain_lock);
		return;
	}

	next = tcf_chain_dereference(chain_info.next, chain);
	if (tp == chain->filter_chain)
		tcf_chain0_head_change(chain, next);
	RCU_INIT_POINTER(*chain_info.pprev, next);
	mutex_unlock(&chain->filter_chain_lock);

1808
	tcf_proto_put(tp, rtnl_held, extack);
1809 1810
}

1811 1812 1813 1814 1815 1816 1817 1818 1819 1820
static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
					   struct tcf_chain_info *chain_info,
					   u32 protocol, u32 prio,
					   bool prio_allocate)
{
	struct tcf_proto **pprev;
	struct tcf_proto *tp;

	/* Check the chain for existence of proto-tcf with this priority */
	for (pprev = &chain->filter_chain;
1821 1822
	     (tp = tcf_chain_dereference(*pprev, chain));
	     pprev = &tp->next) {
1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834
		if (tp->prio >= prio) {
			if (tp->prio == prio) {
				if (prio_allocate ||
				    (tp->protocol != protocol && protocol))
					return ERR_PTR(-EINVAL);
			} else {
				tp = NULL;
			}
			break;
		}
	}
	chain_info->pprev = pprev;
1835 1836 1837 1838 1839 1840
	if (tp) {
		chain_info->next = tp->next;
		tcf_proto_get(tp);
	} else {
		chain_info->next = NULL;
	}
1841 1842 1843
	return tp;
}

1844
static int tcf_fill_node(struct net *net, struct sk_buff *skb,
1845 1846
			 struct tcf_proto *tp, struct tcf_block *block,
			 struct Qdisc *q, u32 parent, void *fh,
1847 1848
			 u32 portid, u32 seq, u16 flags, int event,
			 bool rtnl_held)
1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860
{
	struct tcmsg *tcm;
	struct nlmsghdr  *nlh;
	unsigned char *b = skb_tail_pointer(skb);

	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
	if (!nlh)
		goto out_nlmsg_trim;
	tcm = nlmsg_data(nlh);
	tcm->tcm_family = AF_UNSPEC;
	tcm->tcm__pad1 = 0;
	tcm->tcm__pad2 = 0;
1861 1862 1863 1864 1865 1866 1867
	if (q) {
		tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
		tcm->tcm_parent = parent;
	} else {
		tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
		tcm->tcm_block_index = block->index;
	}
1868 1869 1870 1871 1872 1873 1874 1875
	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
	if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
		goto nla_put_failure;
	if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
		goto nla_put_failure;
	if (!fh) {
		tcm->tcm_handle = 0;
	} else {
1876 1877
		if (tp->ops->dump &&
		    tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890
			goto nla_put_failure;
	}
	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
	return skb->len;

out_nlmsg_trim:
nla_put_failure:
	nlmsg_trim(skb, b);
	return -1;
}

static int tfilter_notify(struct net *net, struct sk_buff *oskb,
			  struct nlmsghdr *n, struct tcf_proto *tp,
1891
			  struct tcf_block *block, struct Qdisc *q,
1892 1893
			  u32 parent, void *fh, int event, bool unicast,
			  bool rtnl_held)
1894 1895 1896
{
	struct sk_buff *skb;
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1897
	int err = 0;
1898 1899 1900 1901 1902

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

1903
	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1904 1905
			  n->nlmsg_seq, n->nlmsg_flags, event,
			  rtnl_held) <= 0) {
1906 1907 1908 1909 1910
		kfree_skb(skb);
		return -EINVAL;
	}

	if (unicast)
1911 1912 1913 1914
		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
	else
		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
				     n->nlmsg_flags & NLM_F_ECHO);
1915

1916 1917 1918
	if (err > 0)
		err = 0;
	return err;
1919 1920 1921 1922
}

static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
			      struct nlmsghdr *n, struct tcf_proto *tp,
1923
			      struct tcf_block *block, struct Qdisc *q,
1924
			      u32 parent, void *fh, bool unicast, bool *last,
1925
			      bool rtnl_held, struct netlink_ext_ack *extack)
1926 1927 1928 1929 1930 1931 1932 1933 1934
{
	struct sk_buff *skb;
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
	int err;

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

1935
	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1936 1937
			  n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
			  rtnl_held) <= 0) {
1938
		NL_SET_ERR_MSG(extack, "Failed to build del event notification");
1939 1940 1941 1942
		kfree_skb(skb);
		return -EINVAL;
	}

1943
	err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
1944 1945 1946 1947 1948 1949
	if (err) {
		kfree_skb(skb);
		return err;
	}

	if (unicast)
1950 1951 1952 1953
		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
	else
		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
				     n->nlmsg_flags & NLM_F_ECHO);
1954 1955
	if (err < 0)
		NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
1956 1957 1958

	if (err > 0)
		err = 0;
1959
	return err;
1960 1961 1962
}

static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
1963 1964
				 struct tcf_block *block, struct Qdisc *q,
				 u32 parent, struct nlmsghdr *n,
1965 1966
				 struct tcf_chain *chain, int event,
				 bool rtnl_held)
1967 1968 1969
{
	struct tcf_proto *tp;

1970 1971
	for (tp = tcf_get_next_proto(chain, NULL, rtnl_held);
	     tp; tp = tcf_get_next_proto(chain, tp, rtnl_held))
1972
		tfilter_notify(net, oskb, n, tp, block,
1973
			       q, parent, NULL, event, false, rtnl_held);
1974 1975
}

1976 1977 1978 1979 1980 1981
static void tfilter_put(struct tcf_proto *tp, void *fh)
{
	if (tp->ops->put && fh)
		tp->ops->put(tp, fh);
}

1982
static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1983
			  struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
1984
{
1985
	struct net *net = sock_net(skb->sk);
1986
	struct nlattr *tca[TCA_MAX + 1];
L
Linus Torvalds 已提交
1987 1988 1989
	struct tcmsg *t;
	u32 protocol;
	u32 prio;
1990
	bool prio_allocate;
L
Linus Torvalds 已提交
1991
	u32 parent;
1992
	u32 chain_index;
1993
	struct Qdisc *q = NULL;
1994
	struct tcf_chain_info chain_info;
1995
	struct tcf_chain *chain = NULL;
1996
	struct tcf_block *block;
L
Linus Torvalds 已提交
1997 1998
	struct tcf_proto *tp;
	unsigned long cl;
1999
	void *fh;
L
Linus Torvalds 已提交
2000
	int err;
2001
	int tp_created;
2002
	bool rtnl_held = false;
L
Linus Torvalds 已提交
2003

2004
	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
2005
		return -EPERM;
2006

L
Linus Torvalds 已提交
2007
replay:
2008 2009
	tp_created = 0;

2010 2011
	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
				     rtm_tca_policy, extack);
2012 2013 2014
	if (err < 0)
		return err;

2015
	t = nlmsg_data(n);
L
Linus Torvalds 已提交
2016 2017
	protocol = TC_H_MIN(t->tcm_info);
	prio = TC_H_MAJ(t->tcm_info);
2018
	prio_allocate = false;
L
Linus Torvalds 已提交
2019
	parent = t->tcm_parent;
2020
	tp = NULL;
L
Linus Torvalds 已提交
2021
	cl = 0;
2022
	block = NULL;
L
Linus Torvalds 已提交
2023 2024

	if (prio == 0) {
2025 2026 2027 2028 2029 2030 2031
		/* If no priority is provided by the user,
		 * we allocate one.
		 */
		if (n->nlmsg_flags & NLM_F_CREATE) {
			prio = TC_H_MAKE(0x80000000U, 0U);
			prio_allocate = true;
		} else {
2032
			NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
L
Linus Torvalds 已提交
2033
			return -ENOENT;
2034
		}
L
Linus Torvalds 已提交
2035 2036 2037 2038
	}

	/* Find head of filter chain. */

2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059
	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
	if (err)
		return err;

	/* Take rtnl mutex if rtnl_held was set to true on previous iteration,
	 * block is shared (no qdisc found), qdisc is not unlocked, classifier
	 * type is not specified, classifier is not unlocked.
	 */
	if (rtnl_held ||
	    (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
	    !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
		rtnl_held = true;
		rtnl_lock();
	}

	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
	if (err)
		goto errout;

	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
				 extack);
2060 2061 2062
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
		goto errout;
2063
	}
2064 2065 2066

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
2067
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2068 2069 2070
		err = -EINVAL;
		goto errout;
	}
2071
	chain = tcf_chain_get(block, chain_index, true);
2072
	if (!chain) {
2073
		NL_SET_ERR_MSG(extack, "Cannot create specified filter chain");
2074
		err = -ENOMEM;
2075 2076
		goto errout;
	}
L
Linus Torvalds 已提交
2077

2078
	mutex_lock(&chain->filter_chain_lock);
2079 2080 2081
	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
			       prio, prio_allocate);
	if (IS_ERR(tp)) {
2082
		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2083
		err = PTR_ERR(tp);
2084
		goto errout_locked;
L
Linus Torvalds 已提交
2085 2086 2087
	}

	if (tp == NULL) {
2088 2089
		struct tcf_proto *tp_new = NULL;

2090 2091 2092 2093 2094
		if (chain->flushing) {
			err = -EAGAIN;
			goto errout_locked;
		}

L
Linus Torvalds 已提交
2095 2096
		/* Proto-tcf does not exist, create new one */

2097
		if (tca[TCA_KIND] == NULL || !protocol) {
2098
			NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
2099
			err = -EINVAL;
2100
			goto errout_locked;
2101
		}
L
Linus Torvalds 已提交
2102

2103
		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2104
			NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2105
			err = -ENOENT;
2106
			goto errout_locked;
2107
		}
L
Linus Torvalds 已提交
2108

2109
		if (prio_allocate)
2110 2111
			prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
							       &chain_info));
L
Linus Torvalds 已提交
2112

2113
		mutex_unlock(&chain->filter_chain_lock);
2114
		tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]),
2115 2116
					  protocol, prio, chain, rtnl_held,
					  extack);
2117 2118
		if (IS_ERR(tp_new)) {
			err = PTR_ERR(tp_new);
2119
			goto errout_tp;
L
Linus Torvalds 已提交
2120
		}
2121

2122
		tp_created = 1;
2123 2124
		tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
						rtnl_held);
2125 2126 2127 2128
		if (IS_ERR(tp)) {
			err = PTR_ERR(tp);
			goto errout_tp;
		}
2129 2130
	} else {
		mutex_unlock(&chain->filter_chain_lock);
2131
	}
L
Linus Torvalds 已提交
2132

2133 2134 2135 2136 2137 2138
	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
		err = -EINVAL;
		goto errout;
	}

L
Linus Torvalds 已提交
2139 2140
	fh = tp->ops->get(tp, t->tcm_handle);

2141
	if (!fh) {
2142
		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2143
			NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2144
			err = -ENOENT;
L
Linus Torvalds 已提交
2145
			goto errout;
2146
		}
2147
	} else if (n->nlmsg_flags & NLM_F_EXCL) {
2148
		tfilter_put(tp, fh);
2149 2150 2151
		NL_SET_ERR_MSG(extack, "Filter already exists");
		err = -EEXIST;
		goto errout;
L
Linus Torvalds 已提交
2152 2153
	}

2154 2155 2156 2157 2158 2159
	if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
		NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
		err = -EINVAL;
		goto errout;
	}

2160
	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
2161
			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
2162
			      rtnl_held, extack);
2163
	if (err == 0) {
2164
		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2165
			       RTM_NEWTFILTER, false, rtnl_held);
2166 2167
		tfilter_put(tp, fh);
	}
L
Linus Torvalds 已提交
2168 2169

errout:
2170
	if (err && tp_created)
2171
		tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
2172
errout_tp:
2173 2174
	if (chain) {
		if (tp && !IS_ERR(tp))
2175
			tcf_proto_put(tp, rtnl_held, NULL);
2176 2177 2178
		if (!tp_created)
			tcf_chain_put(chain);
	}
2179
	tcf_block_release(q, block, rtnl_held);
2180 2181 2182 2183 2184 2185 2186 2187 2188

	if (rtnl_held)
		rtnl_unlock();

	if (err == -EAGAIN) {
		/* Take rtnl lock in case EAGAIN is caused by concurrent flush
		 * of target chain.
		 */
		rtnl_held = true;
L
Linus Torvalds 已提交
2189 2190
		/* Replay the request. */
		goto replay;
2191
	}
L
Linus Torvalds 已提交
2192
	return err;
2193 2194 2195 2196

errout_locked:
	mutex_unlock(&chain->filter_chain_lock);
	goto errout;
L
Linus Torvalds 已提交
2197 2198
}

2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211
static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
			  struct netlink_ext_ack *extack)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
	struct tcmsg *t;
	u32 protocol;
	u32 prio;
	u32 parent;
	u32 chain_index;
	struct Qdisc *q = NULL;
	struct tcf_chain_info chain_info;
	struct tcf_chain *chain = NULL;
2212
	struct tcf_block *block = NULL;
2213 2214 2215 2216
	struct tcf_proto *tp = NULL;
	unsigned long cl = 0;
	void *fh = NULL;
	int err;
2217
	bool rtnl_held = false;
2218 2219 2220 2221

	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
		return -EPERM;

2222 2223
	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
				     rtm_tca_policy, extack);
2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238
	if (err < 0)
		return err;

	t = nlmsg_data(n);
	protocol = TC_H_MIN(t->tcm_info);
	prio = TC_H_MAJ(t->tcm_info);
	parent = t->tcm_parent;

	if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) {
		NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
		return -ENOENT;
	}

	/* Find head of filter chain. */

2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259
	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
	if (err)
		return err;

	/* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
	 * found), qdisc is not unlocked, classifier type is not specified,
	 * classifier is not unlocked.
	 */
	if (!prio ||
	    (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
	    !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
		rtnl_held = true;
		rtnl_lock();
	}

	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
	if (err)
		goto errout;

	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
				 extack);
2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
		goto errout;
	}

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
		err = -EINVAL;
		goto errout;
	}
	chain = tcf_chain_get(block, chain_index, false);
	if (!chain) {
2273 2274 2275 2276 2277 2278 2279
		/* User requested flush on non-existent chain. Nothing to do,
		 * so just return success.
		 */
		if (prio == 0) {
			err = 0;
			goto errout;
		}
2280
		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2281
		err = -ENOENT;
2282 2283 2284 2285 2286
		goto errout;
	}

	if (prio == 0) {
		tfilter_notify_chain(net, skb, block, q, parent, n,
2287 2288
				     chain, RTM_DELTFILTER, rtnl_held);
		tcf_chain_flush(chain, rtnl_held);
2289 2290 2291 2292
		err = 0;
		goto errout;
	}

2293
	mutex_lock(&chain->filter_chain_lock);
2294 2295 2296 2297
	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
			       prio, false);
	if (!tp || IS_ERR(tp)) {
		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2298
		err = tp ? PTR_ERR(tp) : -ENOENT;
2299
		goto errout_locked;
2300 2301 2302
	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
		err = -EINVAL;
2303 2304 2305 2306 2307
		goto errout_locked;
	} else if (t->tcm_handle == 0) {
		tcf_chain_tp_remove(chain, &chain_info, tp);
		mutex_unlock(&chain->filter_chain_lock);

2308
		tcf_proto_put(tp, rtnl_held, NULL);
2309
		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2310
			       RTM_DELTFILTER, false, rtnl_held);
2311
		err = 0;
2312 2313
		goto errout;
	}
2314
	mutex_unlock(&chain->filter_chain_lock);
2315 2316 2317 2318

	fh = tp->ops->get(tp, t->tcm_handle);

	if (!fh) {
2319 2320
		NL_SET_ERR_MSG(extack, "Specified filter handle not found");
		err = -ENOENT;
2321 2322 2323 2324 2325
	} else {
		bool last;

		err = tfilter_del_notify(net, skb, n, tp, block,
					 q, parent, fh, false, &last,
2326 2327
					 rtnl_held, extack);

2328 2329
		if (err)
			goto errout;
2330
		if (last)
2331
			tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
2332 2333 2334
	}

errout:
2335 2336
	if (chain) {
		if (tp && !IS_ERR(tp))
2337
			tcf_proto_put(tp, rtnl_held, NULL);
2338
		tcf_chain_put(chain);
2339
	}
2340
	tcf_block_release(q, block, rtnl_held);
2341 2342 2343 2344

	if (rtnl_held)
		rtnl_unlock();

2345
	return err;
2346 2347 2348 2349

errout_locked:
	mutex_unlock(&chain->filter_chain_lock);
	goto errout;
2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364
}

static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
			  struct netlink_ext_ack *extack)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
	struct tcmsg *t;
	u32 protocol;
	u32 prio;
	u32 parent;
	u32 chain_index;
	struct Qdisc *q = NULL;
	struct tcf_chain_info chain_info;
	struct tcf_chain *chain = NULL;
2365
	struct tcf_block *block = NULL;
2366 2367 2368 2369
	struct tcf_proto *tp = NULL;
	unsigned long cl = 0;
	void *fh = NULL;
	int err;
2370
	bool rtnl_held = false;
2371

2372 2373
	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
				     rtm_tca_policy, extack);
2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388
	if (err < 0)
		return err;

	t = nlmsg_data(n);
	protocol = TC_H_MIN(t->tcm_info);
	prio = TC_H_MAJ(t->tcm_info);
	parent = t->tcm_parent;

	if (prio == 0) {
		NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
		return -ENOENT;
	}

	/* Find head of filter chain. */

2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408
	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
	if (err)
		return err;

	/* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
	 * unlocked, classifier type is not specified, classifier is not
	 * unlocked.
	 */
	if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
	    !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
		rtnl_held = true;
		rtnl_lock();
	}

	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
	if (err)
		goto errout;

	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
				 extack);
2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
		goto errout;
	}

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
		err = -EINVAL;
		goto errout;
	}
	chain = tcf_chain_get(block, chain_index, false);
	if (!chain) {
		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
		err = -EINVAL;
		goto errout;
	}

2427
	mutex_lock(&chain->filter_chain_lock);
2428 2429
	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
			       prio, false);
2430
	mutex_unlock(&chain->filter_chain_lock);
2431 2432
	if (!tp || IS_ERR(tp)) {
		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2433
		err = tp ? PTR_ERR(tp) : -ENOENT;
2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447
		goto errout;
	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
		err = -EINVAL;
		goto errout;
	}

	fh = tp->ops->get(tp, t->tcm_handle);

	if (!fh) {
		NL_SET_ERR_MSG(extack, "Specified filter handle not found");
		err = -ENOENT;
	} else {
		err = tfilter_notify(net, skb, n, tp, block, q, parent,
2448
				     fh, RTM_NEWTFILTER, true, rtnl_held);
2449 2450 2451 2452
		if (err < 0)
			NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
	}

2453
	tfilter_put(tp, fh);
2454
errout:
2455 2456
	if (chain) {
		if (tp && !IS_ERR(tp))
2457
			tcf_proto_put(tp, rtnl_held, NULL);
2458
		tcf_chain_put(chain);
2459
	}
2460
	tcf_block_release(q, block, rtnl_held);
2461 2462 2463 2464

	if (rtnl_held)
		rtnl_unlock();

2465 2466 2467
	return err;
}

2468
struct tcf_dump_args {
L
Linus Torvalds 已提交
2469 2470 2471
	struct tcf_walker w;
	struct sk_buff *skb;
	struct netlink_callback *cb;
2472
	struct tcf_block *block;
2473 2474
	struct Qdisc *q;
	u32 parent;
L
Linus Torvalds 已提交
2475 2476
};

2477
static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
L
Linus Torvalds 已提交
2478
{
2479
	struct tcf_dump_args *a = (void *)arg;
2480
	struct net *net = sock_net(a->skb->sk);
L
Linus Torvalds 已提交
2481

2482
	return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
2483
			     n, NETLINK_CB(a->cb->skb).portid,
J
Jamal Hadi Salim 已提交
2484
			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2485
			     RTM_NEWTFILTER, true);
L
Linus Torvalds 已提交
2486 2487
}

2488 2489
static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
			   struct sk_buff *skb, struct netlink_callback *cb,
2490 2491 2492
			   long index_start, long *p_index)
{
	struct net *net = sock_net(skb->sk);
2493
	struct tcf_block *block = chain->block;
2494
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2495
	struct tcf_proto *tp, *tp_prev;
2496 2497
	struct tcf_dump_args arg;

2498 2499 2500 2501
	for (tp = __tcf_get_next_proto(chain, NULL);
	     tp;
	     tp_prev = tp,
		     tp = __tcf_get_next_proto(chain, tp),
2502
		     tcf_proto_put(tp_prev, true, NULL),
2503
		     (*p_index)++) {
2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515
		if (*p_index < index_start)
			continue;
		if (TC_H_MAJ(tcm->tcm_info) &&
		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
			continue;
		if (TC_H_MIN(tcm->tcm_info) &&
		    TC_H_MIN(tcm->tcm_info) != tp->protocol)
			continue;
		if (*p_index > index_start)
			memset(&cb->args[1], 0,
			       sizeof(cb->args) - sizeof(cb->args[0]));
		if (cb->args[1] == 0) {
2516
			if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
2517 2518
					  NETLINK_CB(cb->skb).portid,
					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
2519
					  RTM_NEWTFILTER, true) <= 0)
2520
				goto errout;
2521 2522 2523 2524 2525 2526 2527
			cb->args[1] = 1;
		}
		if (!tp->ops->walk)
			continue;
		arg.w.fn = tcf_node_dump;
		arg.skb = skb;
		arg.cb = cb;
2528
		arg.block = block;
2529 2530
		arg.q = q;
		arg.parent = parent;
2531 2532 2533
		arg.w.stop = 0;
		arg.w.skip = cb->args[1] - 1;
		arg.w.count = 0;
2534
		arg.w.cookie = cb->args[2];
2535
		tp->ops->walk(tp, &arg.w, true);
2536
		cb->args[2] = arg.w.cookie;
2537 2538
		cb->args[1] = arg.w.count + 1;
		if (arg.w.stop)
2539
			goto errout;
2540
	}
2541
	return true;
2542 2543

errout:
2544
	tcf_proto_put(tp, true, NULL);
2545
	return false;
2546 2547
}

E
Eric Dumazet 已提交
2548
/* called with RTNL */
L
Linus Torvalds 已提交
2549 2550
static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
2551
	struct tcf_chain *chain, *chain_prev;
2552
	struct net *net = sock_net(skb->sk);
2553
	struct nlattr *tca[TCA_MAX + 1];
2554
	struct Qdisc *q = NULL;
2555
	struct tcf_block *block;
2556
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2557 2558
	long index_start;
	long index;
2559
	u32 parent;
2560
	int err;
L
Linus Torvalds 已提交
2561

2562
	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
L
Linus Torvalds 已提交
2563
		return skb->len;
2564

2565 2566
	err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
				     NULL, cb->extack);
2567 2568 2569
	if (err)
		return err;

2570
	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2571
		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2572 2573
		if (!block)
			goto out;
2574 2575 2576 2577 2578 2579 2580
		/* If we work with block index, q is NULL and parent value
		 * will never be used in the following code. The check
		 * in tcf_fill_node prevents it. However, compiler does not
		 * see that far, so set parent to zero to silence the warning
		 * about parent being uninitialized.
		 */
		parent = 0;
2581
	} else {
2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600
		const struct Qdisc_class_ops *cops;
		struct net_device *dev;
		unsigned long cl = 0;

		dev = __dev_get_by_index(net, tcm->tcm_ifindex);
		if (!dev)
			return skb->len;

		parent = tcm->tcm_parent;
		if (!parent) {
			q = dev->qdisc;
			parent = q->handle;
		} else {
			q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
		}
		if (!q)
			goto out;
		cops = q->ops->cl_ops;
		if (!cops)
2601
			goto out;
2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613
		if (!cops->tcf_block)
			goto out;
		if (TC_H_MIN(tcm->tcm_parent)) {
			cl = cops->find(q, tcm->tcm_parent);
			if (cl == 0)
				goto out;
		}
		block = cops->tcf_block(q, cl, NULL);
		if (!block)
			goto out;
		if (tcf_block_shared(block))
			q = NULL;
L
Linus Torvalds 已提交
2614 2615
	}

2616 2617
	index_start = cb->args[0];
	index = 0;
2618

2619 2620 2621 2622 2623
	for (chain = __tcf_get_next_chain(block, NULL);
	     chain;
	     chain_prev = chain,
		     chain = __tcf_get_next_chain(block, chain),
		     tcf_chain_put(chain_prev)) {
2624 2625 2626
		if (tca[TCA_CHAIN] &&
		    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
			continue;
2627
		if (!tcf_chain_dump(chain, q, parent, skb, cb,
2628
				    index_start, &index)) {
2629
			tcf_chain_put(chain);
2630
			err = -EMSGSIZE;
2631
			break;
2632
		}
2633 2634
	}

2635
	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
2636
		tcf_block_refcnt_put(block, true);
2637
	cb->args[0] = index;
L
Linus Torvalds 已提交
2638 2639

out:
2640 2641 2642
	/* If we did no progress, the error (EMSGSIZE) is real */
	if (skb->len == 0 && err)
		return err;
L
Linus Torvalds 已提交
2643 2644 2645
	return skb->len;
}

2646 2647 2648 2649
static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
			      void *tmplt_priv, u32 chain_index,
			      struct net *net, struct sk_buff *skb,
			      struct tcf_block *block,
2650 2651 2652
			      u32 portid, u32 seq, u16 flags, int event)
{
	unsigned char *b = skb_tail_pointer(skb);
2653
	const struct tcf_proto_ops *ops;
2654 2655
	struct nlmsghdr *nlh;
	struct tcmsg *tcm;
2656 2657
	void *priv;

2658 2659
	ops = tmplt_ops;
	priv = tmplt_priv;
2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676

	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
	if (!nlh)
		goto out_nlmsg_trim;
	tcm = nlmsg_data(nlh);
	tcm->tcm_family = AF_UNSPEC;
	tcm->tcm__pad1 = 0;
	tcm->tcm__pad2 = 0;
	tcm->tcm_handle = 0;
	if (block->q) {
		tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex;
		tcm->tcm_parent = block->q->handle;
	} else {
		tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
		tcm->tcm_block_index = block->index;
	}

2677
	if (nla_put_u32(skb, TCA_CHAIN, chain_index))
2678 2679
		goto nla_put_failure;

2680 2681 2682 2683 2684 2685 2686
	if (ops) {
		if (nla_put_string(skb, TCA_KIND, ops->kind))
			goto nla_put_failure;
		if (ops->tmplt_dump(skb, net, priv) < 0)
			goto nla_put_failure;
	}

2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702
	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
	return skb->len;

out_nlmsg_trim:
nla_put_failure:
	nlmsg_trim(skb, b);
	return -EMSGSIZE;
}

static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
			   u32 seq, u16 flags, int event, bool unicast)
{
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
	struct tcf_block *block = chain->block;
	struct net *net = block->net;
	struct sk_buff *skb;
2703
	int err = 0;
2704 2705 2706 2707 2708

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

2709 2710
	if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
			       chain->index, net, skb, block, portid,
2711 2712 2713 2714 2715 2716
			       seq, flags, event) <= 0) {
		kfree_skb(skb);
		return -EINVAL;
	}

	if (unicast)
2717 2718 2719 2720
		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
	else
		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
				     flags & NLM_F_ECHO);
2721

2722 2723 2724
	if (err > 0)
		err = 0;
	return err;
2725 2726
}

2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751
static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
				  void *tmplt_priv, u32 chain_index,
				  struct tcf_block *block, struct sk_buff *oskb,
				  u32 seq, u16 flags, bool unicast)
{
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
	struct net *net = block->net;
	struct sk_buff *skb;

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

	if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
			       block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
		kfree_skb(skb);
		return -EINVAL;
	}

	if (unicast)
		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);

	return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
}

2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762
static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
			      struct nlattr **tca,
			      struct netlink_ext_ack *extack)
{
	const struct tcf_proto_ops *ops;
	void *tmplt_priv;

	/* If kind is not set, user did not specify template. */
	if (!tca[TCA_KIND])
		return 0;

2763
	ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack);
2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780
	if (IS_ERR(ops))
		return PTR_ERR(ops);
	if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
		NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
		return -EOPNOTSUPP;
	}

	tmplt_priv = ops->tmplt_create(net, chain, tca, extack);
	if (IS_ERR(tmplt_priv)) {
		module_put(ops->owner);
		return PTR_ERR(tmplt_priv);
	}
	chain->tmplt_ops = ops;
	chain->tmplt_priv = tmplt_priv;
	return 0;
}

2781 2782
static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
			       void *tmplt_priv)
2783 2784
{
	/* If template ops are set, no work to do for us. */
2785
	if (!tmplt_ops)
2786 2787
		return;

2788 2789
	tmplt_ops->tmplt_destroy(tmplt_priv);
	module_put(tmplt_ops->owner);
2790 2791
}

2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812
/* Add/delete/get a chain */

static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
			struct netlink_ext_ack *extack)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
	struct tcmsg *t;
	u32 parent;
	u32 chain_index;
	struct Qdisc *q = NULL;
	struct tcf_chain *chain = NULL;
	struct tcf_block *block;
	unsigned long cl;
	int err;

	if (n->nlmsg_type != RTM_GETCHAIN &&
	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
		return -EPERM;

replay:
2813 2814
	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
				     rtm_tca_policy, extack);
2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829
	if (err < 0)
		return err;

	t = nlmsg_data(n);
	parent = t->tcm_parent;
	cl = 0;

	block = tcf_block_find(net, &q, &parent, &cl,
			       t->tcm_ifindex, t->tcm_block_index, extack);
	if (IS_ERR(block))
		return PTR_ERR(block);

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2830 2831
		err = -EINVAL;
		goto errout_block;
2832
	}
2833 2834

	mutex_lock(&block->lock);
2835 2836 2837
	chain = tcf_chain_lookup(block, chain_index);
	if (n->nlmsg_type == RTM_NEWCHAIN) {
		if (chain) {
2838
			if (tcf_chain_held_by_acts_only(chain)) {
2839
				/* The chain exists only because there is
2840
				 * some action referencing it.
2841 2842 2843 2844
				 */
				tcf_chain_hold(chain);
			} else {
				NL_SET_ERR_MSG(extack, "Filter chain already exists");
2845
				err = -EEXIST;
2846
				goto errout_block_locked;
2847 2848 2849 2850
			}
		} else {
			if (!(n->nlmsg_flags & NLM_F_CREATE)) {
				NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
2851
				err = -ENOENT;
2852
				goto errout_block_locked;
2853 2854 2855 2856
			}
			chain = tcf_chain_create(block, chain_index);
			if (!chain) {
				NL_SET_ERR_MSG(extack, "Failed to create filter chain");
2857
				err = -ENOMEM;
2858
				goto errout_block_locked;
2859
			}
2860 2861
		}
	} else {
2862
		if (!chain || tcf_chain_held_by_acts_only(chain)) {
2863
			NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2864
			err = -EINVAL;
2865
			goto errout_block_locked;
2866 2867 2868 2869
		}
		tcf_chain_hold(chain);
	}

2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880
	if (n->nlmsg_type == RTM_NEWCHAIN) {
		/* Modifying chain requires holding parent block lock. In case
		 * the chain was successfully added, take a reference to the
		 * chain. This ensures that an empty chain does not disappear at
		 * the end of this function.
		 */
		tcf_chain_hold(chain);
		chain->explicitly_created = true;
	}
	mutex_unlock(&block->lock);

2881 2882
	switch (n->nlmsg_type) {
	case RTM_NEWCHAIN:
2883
		err = tc_chain_tmplt_add(chain, net, tca, extack);
2884 2885
		if (err) {
			tcf_chain_put_explicitly_created(chain);
2886
			goto errout;
2887 2888
		}

2889 2890 2891 2892
		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
				RTM_NEWCHAIN, false);
		break;
	case RTM_DELCHAIN:
2893
		tfilter_notify_chain(net, skb, block, q, parent, n,
2894
				     chain, RTM_DELTFILTER, true);
2895
		/* Flush the chain first as the user requested chain removal. */
2896
		tcf_chain_flush(chain, true);
2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915
		/* In case the chain was successfully deleted, put a reference
		 * to the chain previously taken during addition.
		 */
		tcf_chain_put_explicitly_created(chain);
		break;
	case RTM_GETCHAIN:
		err = tc_chain_notify(chain, skb, n->nlmsg_seq,
				      n->nlmsg_seq, n->nlmsg_type, true);
		if (err < 0)
			NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
		break;
	default:
		err = -EOPNOTSUPP;
		NL_SET_ERR_MSG(extack, "Unsupported message type");
		goto errout;
	}

errout:
	tcf_chain_put(chain);
2916
errout_block:
2917
	tcf_block_release(q, block, true);
2918 2919 2920 2921
	if (err == -EAGAIN)
		/* Replay the request. */
		goto replay;
	return err;
2922 2923 2924 2925

errout_block_locked:
	mutex_unlock(&block->lock);
	goto errout_block;
2926 2927 2928 2929 2930 2931 2932 2933 2934 2935
}

/* called with RTNL */
static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
	struct Qdisc *q = NULL;
	struct tcf_block *block;
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2936
	struct tcf_chain *chain;
2937 2938 2939 2940 2941 2942 2943 2944
	long index_start;
	long index;
	u32 parent;
	int err;

	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
		return skb->len;

2945 2946
	err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
				     rtm_tca_policy, cb->extack);
2947 2948 2949 2950
	if (err)
		return err;

	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2951
		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998
		if (!block)
			goto out;
		/* If we work with block index, q is NULL and parent value
		 * will never be used in the following code. The check
		 * in tcf_fill_node prevents it. However, compiler does not
		 * see that far, so set parent to zero to silence the warning
		 * about parent being uninitialized.
		 */
		parent = 0;
	} else {
		const struct Qdisc_class_ops *cops;
		struct net_device *dev;
		unsigned long cl = 0;

		dev = __dev_get_by_index(net, tcm->tcm_ifindex);
		if (!dev)
			return skb->len;

		parent = tcm->tcm_parent;
		if (!parent) {
			q = dev->qdisc;
			parent = q->handle;
		} else {
			q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
		}
		if (!q)
			goto out;
		cops = q->ops->cl_ops;
		if (!cops)
			goto out;
		if (!cops->tcf_block)
			goto out;
		if (TC_H_MIN(tcm->tcm_parent)) {
			cl = cops->find(q, tcm->tcm_parent);
			if (cl == 0)
				goto out;
		}
		block = cops->tcf_block(q, cl, NULL);
		if (!block)
			goto out;
		if (tcf_block_shared(block))
			q = NULL;
	}

	index_start = cb->args[0];
	index = 0;

2999 3000
	mutex_lock(&block->lock);
	list_for_each_entry(chain, &block->chain_list, list) {
3001 3002 3003 3004 3005 3006 3007
		if ((tca[TCA_CHAIN] &&
		     nla_get_u32(tca[TCA_CHAIN]) != chain->index))
			continue;
		if (index < index_start) {
			index++;
			continue;
		}
3008 3009
		if (tcf_chain_held_by_acts_only(chain))
			continue;
3010 3011
		err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
					 chain->index, net, skb, block,
3012 3013 3014
					 NETLINK_CB(cb->skb).portid,
					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
					 RTM_NEWCHAIN);
3015
		if (err <= 0)
3016 3017 3018
			break;
		index++;
	}
3019
	mutex_unlock(&block->lock);
3020

3021
	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
3022
		tcf_block_refcnt_put(block, true);
3023 3024 3025 3026 3027 3028 3029 3030 3031
	cb->args[0] = index;

out:
	/* If we did no progress, the error (EMSGSIZE) is real */
	if (skb->len == 0 && err)
		return err;
	return skb->len;
}

3032
void tcf_exts_destroy(struct tcf_exts *exts)
L
Linus Torvalds 已提交
3033 3034
{
#ifdef CONFIG_NET_CLS_ACT
3035
	tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
3036 3037
	kfree(exts->actions);
	exts->nr_actions = 0;
L
Linus Torvalds 已提交
3038 3039
#endif
}
3040
EXPORT_SYMBOL(tcf_exts_destroy);
L
Linus Torvalds 已提交
3041

3042
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
3043
		      struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
3044
		      bool rtnl_held, struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
3045 3046 3047 3048
{
#ifdef CONFIG_NET_CLS_ACT
	{
		struct tc_action *act;
3049
		size_t attr_size = 0;
L
Linus Torvalds 已提交
3050

3051
		if (exts->police && tb[exts->police]) {
3052 3053
			act = tcf_action_init_1(net, tp, tb[exts->police],
						rate_tlv, "police", ovr,
3054 3055
						TCA_ACT_BIND, rtnl_held,
						extack);
3056 3057
			if (IS_ERR(act))
				return PTR_ERR(act);
L
Linus Torvalds 已提交
3058

3059
			act->type = exts->type = TCA_OLD_COMPAT;
3060 3061
			exts->actions[0] = act;
			exts->nr_actions = 1;
3062
		} else if (exts->action && tb[exts->action]) {
3063
			int err;
3064

3065 3066
			err = tcf_action_init(net, tp, tb[exts->action],
					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
3067 3068
					      exts->actions, &attr_size,
					      rtnl_held, extack);
3069
			if (err < 0)
3070
				return err;
3071
			exts->nr_actions = err;
L
Linus Torvalds 已提交
3072 3073 3074
		}
	}
#else
3075
	if ((exts->action && tb[exts->action]) ||
3076 3077
	    (exts->police && tb[exts->police])) {
		NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
L
Linus Torvalds 已提交
3078
		return -EOPNOTSUPP;
3079
	}
L
Linus Torvalds 已提交
3080 3081 3082 3083
#endif

	return 0;
}
3084
EXPORT_SYMBOL(tcf_exts_validate);
L
Linus Torvalds 已提交
3085

3086
void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
L
Linus Torvalds 已提交
3087 3088
{
#ifdef CONFIG_NET_CLS_ACT
3089 3090
	struct tcf_exts old = *dst;

3091
	*dst = *src;
3092
	tcf_exts_destroy(&old);
L
Linus Torvalds 已提交
3093 3094
#endif
}
3095
EXPORT_SYMBOL(tcf_exts_change);
L
Linus Torvalds 已提交
3096

3097 3098 3099 3100 3101 3102 3103 3104 3105
#ifdef CONFIG_NET_CLS_ACT
static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
{
	if (exts->nr_actions == 0)
		return NULL;
	else
		return exts->actions[0];
}
#endif
3106

3107
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
L
Linus Torvalds 已提交
3108 3109
{
#ifdef CONFIG_NET_CLS_ACT
3110 3111
	struct nlattr *nest;

3112
	if (exts->action && tcf_exts_has_actions(exts)) {
L
Linus Torvalds 已提交
3113 3114 3115 3116 3117
		/*
		 * again for backward compatible mode - we want
		 * to work with both old and new modes of entering
		 * tc data even if iproute2  was newer - jhs
		 */
3118
		if (exts->type != TCA_OLD_COMPAT) {
3119
			nest = nla_nest_start_noflag(skb, exts->action);
3120 3121
			if (nest == NULL)
				goto nla_put_failure;
3122

3123
			if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
3124
				goto nla_put_failure;
3125
			nla_nest_end(skb, nest);
3126
		} else if (exts->police) {
3127
			struct tc_action *act = tcf_exts_first_act(exts);
3128
			nest = nla_nest_start_noflag(skb, exts->police);
3129
			if (nest == NULL || !act)
3130
				goto nla_put_failure;
3131
			if (tcf_action_dump_old(skb, act, 0, 0) < 0)
3132
				goto nla_put_failure;
3133
			nla_nest_end(skb, nest);
L
Linus Torvalds 已提交
3134 3135 3136
		}
	}
	return 0;
3137 3138 3139

nla_put_failure:
	nla_nest_cancel(skb, nest);
L
Linus Torvalds 已提交
3140
	return -1;
3141 3142 3143
#else
	return 0;
#endif
L
Linus Torvalds 已提交
3144
}
3145
EXPORT_SYMBOL(tcf_exts_dump);
L
Linus Torvalds 已提交
3146

3147

3148
int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
L
Linus Torvalds 已提交
3149 3150
{
#ifdef CONFIG_NET_CLS_ACT
3151
	struct tc_action *a = tcf_exts_first_act(exts);
3152
	if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
3153
		return -1;
L
Linus Torvalds 已提交
3154 3155 3156
#endif
	return 0;
}
3157
EXPORT_SYMBOL(tcf_exts_dump_stats);
L
Linus Torvalds 已提交
3158

3159 3160
int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
		     void *type_data, bool err_stop)
3161
{
3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179
	struct tcf_block_cb *block_cb;
	int ok_count = 0;
	int err;

	/* Make sure all netdevs sharing this block are offload-capable. */
	if (block->nooffloaddevcnt && err_stop)
		return -EOPNOTSUPP;

	list_for_each_entry(block_cb, &block->cb_list, list) {
		err = block_cb->cb(type, type_data, block_cb->cb_priv);
		if (err) {
			if (err_stop)
				return err;
		} else {
			ok_count++;
		}
	}
	return ok_count;
3180 3181
}
EXPORT_SYMBOL(tc_setup_cb_call);
3182

3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260
int tc_setup_flow_action(struct flow_action *flow_action,
			 const struct tcf_exts *exts)
{
	const struct tc_action *act;
	int i, j, k;

	if (!exts)
		return 0;

	j = 0;
	tcf_exts_for_each_action(i, act, exts) {
		struct flow_action_entry *entry;

		entry = &flow_action->entries[j];
		if (is_tcf_gact_ok(act)) {
			entry->id = FLOW_ACTION_ACCEPT;
		} else if (is_tcf_gact_shot(act)) {
			entry->id = FLOW_ACTION_DROP;
		} else if (is_tcf_gact_trap(act)) {
			entry->id = FLOW_ACTION_TRAP;
		} else if (is_tcf_gact_goto_chain(act)) {
			entry->id = FLOW_ACTION_GOTO;
			entry->chain_index = tcf_gact_goto_chain_index(act);
		} else if (is_tcf_mirred_egress_redirect(act)) {
			entry->id = FLOW_ACTION_REDIRECT;
			entry->dev = tcf_mirred_dev(act);
		} else if (is_tcf_mirred_egress_mirror(act)) {
			entry->id = FLOW_ACTION_MIRRED;
			entry->dev = tcf_mirred_dev(act);
		} else if (is_tcf_vlan(act)) {
			switch (tcf_vlan_action(act)) {
			case TCA_VLAN_ACT_PUSH:
				entry->id = FLOW_ACTION_VLAN_PUSH;
				entry->vlan.vid = tcf_vlan_push_vid(act);
				entry->vlan.proto = tcf_vlan_push_proto(act);
				entry->vlan.prio = tcf_vlan_push_prio(act);
				break;
			case TCA_VLAN_ACT_POP:
				entry->id = FLOW_ACTION_VLAN_POP;
				break;
			case TCA_VLAN_ACT_MODIFY:
				entry->id = FLOW_ACTION_VLAN_MANGLE;
				entry->vlan.vid = tcf_vlan_push_vid(act);
				entry->vlan.proto = tcf_vlan_push_proto(act);
				entry->vlan.prio = tcf_vlan_push_prio(act);
				break;
			default:
				goto err_out;
			}
		} else if (is_tcf_tunnel_set(act)) {
			entry->id = FLOW_ACTION_TUNNEL_ENCAP;
			entry->tunnel = tcf_tunnel_info(act);
		} else if (is_tcf_tunnel_release(act)) {
			entry->id = FLOW_ACTION_TUNNEL_DECAP;
		} else if (is_tcf_pedit(act)) {
			for (k = 0; k < tcf_pedit_nkeys(act); k++) {
				switch (tcf_pedit_cmd(act, k)) {
				case TCA_PEDIT_KEY_EX_CMD_SET:
					entry->id = FLOW_ACTION_MANGLE;
					break;
				case TCA_PEDIT_KEY_EX_CMD_ADD:
					entry->id = FLOW_ACTION_ADD;
					break;
				default:
					goto err_out;
				}
				entry->mangle.htype = tcf_pedit_htype(act, k);
				entry->mangle.mask = tcf_pedit_mask(act, k);
				entry->mangle.val = tcf_pedit_val(act, k);
				entry->mangle.offset = tcf_pedit_offset(act, k);
				entry = &flow_action->entries[++j];
			}
		} else if (is_tcf_csum(act)) {
			entry->id = FLOW_ACTION_CSUM;
			entry->csum_flags = tcf_csum_update_flags(act);
		} else if (is_tcf_skbedit_mark(act)) {
			entry->id = FLOW_ACTION_MARK;
			entry->mark = tcf_skbedit_mark(act);
3261 3262 3263 3264 3265 3266 3267
		} else if (is_tcf_sample(act)) {
			entry->id = FLOW_ACTION_SAMPLE;
			entry->sample.psample_group =
				tcf_sample_psample_group(act);
			entry->sample.trunc_size = tcf_sample_trunc_size(act);
			entry->sample.truncate = tcf_sample_truncate(act);
			entry->sample.rate = tcf_sample_rate(act);
3268 3269 3270 3271 3272
		} else if (is_tcf_police(act)) {
			entry->id = FLOW_ACTION_POLICE;
			entry->police.burst = tcf_police_tcfp_burst(act);
			entry->police.rate_bytes_ps =
				tcf_police_rate_bytes_ps(act);
P
Paul Blakey 已提交
3273 3274 3275 3276
		} else if (is_tcf_ct(act)) {
			entry->id = FLOW_ACTION_CT;
			entry->ct.action = tcf_ct_action(act);
			entry->ct.zone = tcf_ct_zone(act);
3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289
		} else {
			goto err_out;
		}

		if (!is_tcf_pedit(act))
			j++;
	}
	return 0;
err_out:
	return -EOPNOTSUPP;
}
EXPORT_SYMBOL(tc_setup_flow_action);

3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305
unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
{
	unsigned int num_acts = 0;
	struct tc_action *act;
	int i;

	tcf_exts_for_each_action(i, act, exts) {
		if (is_tcf_pedit(act))
			num_acts += tcf_pedit_nkeys(act);
		else
			num_acts++;
	}
	return num_acts;
}
EXPORT_SYMBOL(tcf_exts_num_actions);

3306 3307 3308 3309
static __net_init int tcf_net_init(struct net *net)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

3310
	spin_lock_init(&tn->idr_lock);
3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328
	idr_init(&tn->idr);
	return 0;
}

static void __net_exit tcf_net_exit(struct net *net)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

	idr_destroy(&tn->idr);
}

static struct pernet_operations tcf_net_ops = {
	.init = tcf_net_init,
	.exit = tcf_net_exit,
	.id   = &tcf_net_id,
	.size = sizeof(struct tcf_net),
};

L
Linus Torvalds 已提交
3329 3330
static int __init tc_filter_init(void)
{
3331 3332
	int err;

3333 3334 3335 3336
	tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
	if (!tc_filter_wq)
		return -ENOMEM;

3337 3338 3339 3340
	err = register_pernet_subsys(&tcf_net_ops);
	if (err)
		goto err_register_pernet_subsys;

3341 3342 3343 3344 3345
	err = rhashtable_init(&indr_setup_block_ht,
			      &tc_indr_setup_block_ht_params);
	if (err)
		goto err_rhash_setup_block_ht;

3346 3347 3348 3349
	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
		      RTNL_FLAG_DOIT_UNLOCKED);
	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
		      RTNL_FLAG_DOIT_UNLOCKED);
3350
	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
3351
		      tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
3352 3353 3354 3355
	rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
	rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
	rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
		      tc_dump_chain, 0);
L
Linus Torvalds 已提交
3356 3357

	return 0;
3358

3359 3360
err_rhash_setup_block_ht:
	unregister_pernet_subsys(&tcf_net_ops);
3361 3362 3363
err_register_pernet_subsys:
	destroy_workqueue(tc_filter_wq);
	return err;
L
Linus Torvalds 已提交
3364 3365 3366
}

subsys_initcall(tc_filter_init);