cls_api.c 82.1 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-or-later
L
Linus Torvalds 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * net/sched/cls_api.c	Packet classifier API.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 * Changes:
 *
 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
17
#include <linux/err.h>
L
Linus Torvalds 已提交
18 19 20
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
21
#include <linux/slab.h>
22
#include <linux/idr.h>
23
#include <linux/rhashtable.h>
24 25
#include <net/net_namespace.h>
#include <net/sock.h>
26
#include <net/netlink.h>
L
Linus Torvalds 已提交
27 28
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
29
#include <net/tc_act/tc_pedit.h>
30 31 32 33 34
#include <net/tc_act/tc_mirred.h>
#include <net/tc_act/tc_vlan.h>
#include <net/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_csum.h>
#include <net/tc_act/tc_gact.h>
35
#include <net/tc_act/tc_police.h>
36
#include <net/tc_act/tc_sample.h>
37
#include <net/tc_act/tc_skbedit.h>
P
Paul Blakey 已提交
38
#include <net/tc_act/tc_ct.h>
L
Linus Torvalds 已提交
39

40 41
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];

L
Linus Torvalds 已提交
42
/* The list of all installed classifier types */
43
static LIST_HEAD(tcf_proto_base);
L
Linus Torvalds 已提交
44 45 46 47 48 49

/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK(cls_mod_lock);

/* Find classifier type by string name */

50
static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
L
Linus Torvalds 已提交
51
{
52
	const struct tcf_proto_ops *t, *res = NULL;
L
Linus Torvalds 已提交
53 54 55

	if (kind) {
		read_lock(&cls_mod_lock);
56
		list_for_each_entry(t, &tcf_proto_base, head) {
57
			if (strcmp(kind, t->kind) == 0) {
58 59
				if (try_module_get(t->owner))
					res = t;
L
Linus Torvalds 已提交
60 61 62 63 64
				break;
			}
		}
		read_unlock(&cls_mod_lock);
	}
65
	return res;
L
Linus Torvalds 已提交
66 67
}

68
static const struct tcf_proto_ops *
69 70
tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
		     struct netlink_ext_ack *extack)
71 72 73 74 75 76 77
{
	const struct tcf_proto_ops *ops;

	ops = __tcf_proto_lookup_ops(kind);
	if (ops)
		return ops;
#ifdef CONFIG_MODULES
78 79
	if (rtnl_held)
		rtnl_unlock();
80
	request_module("cls_%s", kind);
81 82
	if (rtnl_held)
		rtnl_lock();
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
	ops = __tcf_proto_lookup_ops(kind);
	/* We dropped the RTNL semaphore in order to perform
	 * the module load. So, even if we succeeded in loading
	 * the module we have to replay the request. We indicate
	 * this using -EAGAIN.
	 */
	if (ops) {
		module_put(ops->owner);
		return ERR_PTR(-EAGAIN);
	}
#endif
	NL_SET_ERR_MSG(extack, "TC classifier not found");
	return ERR_PTR(-ENOENT);
}

L
Linus Torvalds 已提交
98 99 100 101
/* Register(unregister) new classifier type */

int register_tcf_proto_ops(struct tcf_proto_ops *ops)
{
102
	struct tcf_proto_ops *t;
L
Linus Torvalds 已提交
103 104 105
	int rc = -EEXIST;

	write_lock(&cls_mod_lock);
106
	list_for_each_entry(t, &tcf_proto_base, head)
L
Linus Torvalds 已提交
107 108 109
		if (!strcmp(ops->kind, t->kind))
			goto out;

110
	list_add_tail(&ops->head, &tcf_proto_base);
L
Linus Torvalds 已提交
111 112 113 114 115
	rc = 0;
out:
	write_unlock(&cls_mod_lock);
	return rc;
}
116
EXPORT_SYMBOL(register_tcf_proto_ops);
L
Linus Torvalds 已提交
117

118 119
static struct workqueue_struct *tc_filter_wq;

L
Linus Torvalds 已提交
120 121
int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
{
122
	struct tcf_proto_ops *t;
L
Linus Torvalds 已提交
123 124
	int rc = -ENOENT;

125 126 127 128
	/* Wait for outstanding call_rcu()s, if any, from a
	 * tcf_proto_ops's destroy() handler.
	 */
	rcu_barrier();
129
	flush_workqueue(tc_filter_wq);
130

L
Linus Torvalds 已提交
131
	write_lock(&cls_mod_lock);
132 133 134 135
	list_for_each_entry(t, &tcf_proto_base, head) {
		if (t == ops) {
			list_del(&t->head);
			rc = 0;
L
Linus Torvalds 已提交
136
			break;
137 138
		}
	}
L
Linus Torvalds 已提交
139 140 141
	write_unlock(&cls_mod_lock);
	return rc;
}
142
EXPORT_SYMBOL(unregister_tcf_proto_ops);
L
Linus Torvalds 已提交
143

C
Cong Wang 已提交
144
bool tcf_queue_work(struct rcu_work *rwork, work_func_t func)
145
{
C
Cong Wang 已提交
146 147
	INIT_RCU_WORK(rwork, func);
	return queue_rcu_work(tc_filter_wq, rwork);
148 149 150
}
EXPORT_SYMBOL(tcf_queue_work);

L
Linus Torvalds 已提交
151 152
/* Select new prio value from the range, managed by kernel. */

153
static inline u32 tcf_auto_prio(struct tcf_proto *tp)
L
Linus Torvalds 已提交
154
{
155
	u32 first = TC_H_MAKE(0xC0000000U, 0U);
L
Linus Torvalds 已提交
156 157

	if (tp)
E
Eric Dumazet 已提交
158
		first = tp->prio - 1;
L
Linus Torvalds 已提交
159

160
	return TC_H_MAJ(first);
L
Linus Torvalds 已提交
161 162
}

163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
static bool tcf_proto_is_unlocked(const char *kind)
{
	const struct tcf_proto_ops *ops;
	bool ret;

	ops = tcf_proto_lookup_ops(kind, false, NULL);
	/* On error return false to take rtnl lock. Proto lookup/create
	 * functions will perform lookup again and properly handle errors.
	 */
	if (IS_ERR(ops))
		return false;

	ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
	module_put(ops->owner);
	return ret;
}

180
static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
181
					  u32 prio, struct tcf_chain *chain,
182
					  bool rtnl_held,
183
					  struct netlink_ext_ack *extack)
184 185 186 187 188 189 190 191
{
	struct tcf_proto *tp;
	int err;

	tp = kzalloc(sizeof(*tp), GFP_KERNEL);
	if (!tp)
		return ERR_PTR(-ENOBUFS);

192
	tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
193 194
	if (IS_ERR(tp->ops)) {
		err = PTR_ERR(tp->ops);
195
		goto errout;
196 197 198 199
	}
	tp->classify = tp->ops->classify;
	tp->protocol = protocol;
	tp->prio = prio;
200
	tp->chain = chain;
201
	spin_lock_init(&tp->lock);
202
	refcount_set(&tp->refcnt, 1);
203 204 205 206 207 208 209 210 211 212 213 214 215

	err = tp->ops->init(tp);
	if (err) {
		module_put(tp->ops->owner);
		goto errout;
	}
	return tp;

errout:
	kfree(tp);
	return ERR_PTR(err);
}

216 217 218 219 220 221 222
static void tcf_proto_get(struct tcf_proto *tp)
{
	refcount_inc(&tp->refcnt);
}

static void tcf_chain_put(struct tcf_chain *chain);

223
static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
224
			      struct netlink_ext_ack *extack)
225
{
226
	tp->ops->destroy(tp, rtnl_held, extack);
227
	tcf_chain_put(tp->chain);
228 229
	module_put(tp->ops->owner);
	kfree_rcu(tp, rcu);
230 231
}

232
static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
233 234 235
			  struct netlink_ext_ack *extack)
{
	if (refcount_dec_and_test(&tp->refcnt))
236
		tcf_proto_destroy(tp, rtnl_held, extack);
237 238
}

239
static int walker_check_empty(struct tcf_proto *tp, void *fh,
240
			      struct tcf_walker *arg)
241
{
242
	if (fh) {
243 244 245 246
		arg->nonempty = true;
		return -1;
	}
	return 0;
247 248
}

249
static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held)
250
{
251
	struct tcf_walker walker = { .fn = walker_check_empty, };
252 253

	if (tp->ops->walk) {
254
		tp->ops->walk(tp, &walker, rtnl_held);
255
		return !walker.nonempty;
256 257 258 259
	}
	return true;
}

260
static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held)
261 262
{
	spin_lock(&tp->lock);
263
	if (tcf_proto_is_empty(tp, rtnl_held))
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
		tp->deleting = true;
	spin_unlock(&tp->lock);
	return tp->deleting;
}

static void tcf_proto_mark_delete(struct tcf_proto *tp)
{
	spin_lock(&tp->lock);
	tp->deleting = true;
	spin_unlock(&tp->lock);
}

static bool tcf_proto_is_deleting(struct tcf_proto *tp)
{
	bool deleting;

	spin_lock(&tp->lock);
	deleting = tp->deleting;
	spin_unlock(&tp->lock);

	return deleting;
}

287 288 289
#define ASSERT_BLOCK_LOCKED(block)					\
	lockdep_assert_held(&(block)->lock)

290 291 292 293 294 295
struct tcf_filter_chain_list_item {
	struct list_head list;
	tcf_chain_head_change_t *chain_head_change;
	void *chain_head_change_priv;
};

296 297
static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
					  u32 chain_index)
298
{
299 300
	struct tcf_chain *chain;

301 302
	ASSERT_BLOCK_LOCKED(block);

303 304 305 306
	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
	if (!chain)
		return NULL;
	list_add_tail(&chain->list, &block->chain_list);
307
	mutex_init(&chain->filter_chain_lock);
308 309
	chain->block = block;
	chain->index = chain_index;
310
	chain->refcnt = 1;
311 312
	if (!chain->index)
		block->chain0.chain = chain;
313
	return chain;
314 315
}

316 317 318 319 320 321
static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
				       struct tcf_proto *tp_head)
{
	if (item->chain_head_change)
		item->chain_head_change(tp_head, item->chain_head_change_priv);
}
322 323 324

static void tcf_chain0_head_change(struct tcf_chain *chain,
				   struct tcf_proto *tp_head)
325
{
326
	struct tcf_filter_chain_list_item *item;
327
	struct tcf_block *block = chain->block;
328

329 330
	if (chain->index)
		return;
331 332

	mutex_lock(&block->lock);
333
	list_for_each_entry(item, &block->chain0.filter_chain_list, list)
334
		tcf_chain_head_change_item(item, tp_head);
335
	mutex_unlock(&block->lock);
336 337
}

338 339 340
/* Returns true if block can be safely freed. */

static bool tcf_chain_detach(struct tcf_chain *chain)
J
Jiri Pirko 已提交
341
{
342 343
	struct tcf_block *block = chain->block;

344 345
	ASSERT_BLOCK_LOCKED(block);

346
	list_del(&chain->list);
347 348
	if (!chain->index)
		block->chain0.chain = NULL;
349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366

	if (list_empty(&block->chain_list) &&
	    refcount_read(&block->refcnt) == 0)
		return true;

	return false;
}

static void tcf_block_destroy(struct tcf_block *block)
{
	mutex_destroy(&block->lock);
	kfree_rcu(block, rcu);
}

static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
{
	struct tcf_block *block = chain->block;

367
	mutex_destroy(&chain->filter_chain_lock);
368
	kfree_rcu(chain, rcu);
369 370
	if (free_block)
		tcf_block_destroy(block);
371
}
372

373 374
static void tcf_chain_hold(struct tcf_chain *chain)
{
375 376
	ASSERT_BLOCK_LOCKED(chain->block);

377
	++chain->refcnt;
378 379
}

380
static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
381
{
382 383
	ASSERT_BLOCK_LOCKED(chain->block);

384
	/* In case all the references are action references, this
385
	 * chain should not be shown to the user.
386 387 388 389
	 */
	return chain->refcnt == chain->action_refcnt;
}

390 391
static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
					  u32 chain_index)
392 393 394
{
	struct tcf_chain *chain;

395 396
	ASSERT_BLOCK_LOCKED(block);

397
	list_for_each_entry(chain, &block->chain_list, list) {
398
		if (chain->index == chain_index)
399
			return chain;
400 401 402 403 404 405 406
	}
	return NULL;
}

static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
			   u32 seq, u16 flags, int event, bool unicast);

407 408 409
static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
					 u32 chain_index, bool create,
					 bool by_act)
410
{
411 412
	struct tcf_chain *chain = NULL;
	bool is_first_reference;
413

414 415
	mutex_lock(&block->lock);
	chain = tcf_chain_lookup(block, chain_index);
416 417
	if (chain) {
		tcf_chain_hold(chain);
418 419
	} else {
		if (!create)
420
			goto errout;
421 422
		chain = tcf_chain_create(block, chain_index);
		if (!chain)
423
			goto errout;
424
	}
425

426 427
	if (by_act)
		++chain->action_refcnt;
428 429
	is_first_reference = chain->refcnt - chain->action_refcnt == 1;
	mutex_unlock(&block->lock);
430 431 432 433 434 435

	/* Send notification only in case we got the first
	 * non-action reference. Until then, the chain acts only as
	 * a placeholder for actions pointing to it and user ought
	 * not know about them.
	 */
436
	if (is_first_reference && !by_act)
437 438 439
		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
				RTM_NEWCHAIN, false);

440
	return chain;
441 442 443 444

errout:
	mutex_unlock(&block->lock);
	return chain;
445
}
446

447 448
static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
				       bool create)
449 450 451
{
	return __tcf_chain_get(block, chain_index, create, false);
}
452

453 454
struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
{
455
	return __tcf_chain_get(block, chain_index, true, true);
456 457 458
}
EXPORT_SYMBOL(tcf_chain_get_by_act);

459 460 461 462 463 464
static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
			       void *tmplt_priv);
static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
				  void *tmplt_priv, u32 chain_index,
				  struct tcf_block *block, struct sk_buff *oskb,
				  u32 seq, u16 flags, bool unicast);
465

466 467
static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
			    bool explicitly_created)
468
{
469
	struct tcf_block *block = chain->block;
470
	const struct tcf_proto_ops *tmplt_ops;
471
	bool free_block = false;
472
	unsigned int refcnt;
473
	void *tmplt_priv;
474 475

	mutex_lock(&block->lock);
476 477 478 479 480 481 482 483
	if (explicitly_created) {
		if (!chain->explicitly_created) {
			mutex_unlock(&block->lock);
			return;
		}
		chain->explicitly_created = false;
	}

484 485
	if (by_act)
		chain->action_refcnt--;
486 487 488 489 490 491

	/* tc_chain_notify_delete can't be called while holding block lock.
	 * However, when block is unlocked chain can be changed concurrently, so
	 * save these to temporary variables.
	 */
	refcnt = --chain->refcnt;
492 493
	tmplt_ops = chain->tmplt_ops;
	tmplt_priv = chain->tmplt_priv;
494 495

	/* The last dropped non-action reference will trigger notification. */
496 497
	if (refcnt - chain->action_refcnt == 0 && !by_act) {
		tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index,
498
				       block, NULL, 0, 0, false);
499 500 501
		/* Last reference to chain, no need to lock. */
		chain->flushing = false;
	}
502

503 504 505 506
	if (refcnt == 0)
		free_block = tcf_chain_detach(chain);
	mutex_unlock(&block->lock);

507
	if (refcnt == 0) {
508
		tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
509
		tcf_chain_destroy(chain, free_block);
510
	}
511
}
512

513
static void tcf_chain_put(struct tcf_chain *chain)
514
{
515
	__tcf_chain_put(chain, false, false);
516
}
517

518 519
void tcf_chain_put_by_act(struct tcf_chain *chain)
{
520
	__tcf_chain_put(chain, true, false);
521 522 523
}
EXPORT_SYMBOL(tcf_chain_put_by_act);

524 525
static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
{
526
	__tcf_chain_put(chain, false, true);
527 528
}

529
static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
530
{
531
	struct tcf_proto *tp, *tp_next;
532

533 534
	mutex_lock(&chain->filter_chain_lock);
	tp = tcf_chain_dereference(chain->filter_chain, chain);
535
	RCU_INIT_POINTER(chain->filter_chain, NULL);
536
	tcf_chain0_head_change(chain, NULL);
537
	chain->flushing = true;
538 539
	mutex_unlock(&chain->filter_chain_lock);

540
	while (tp) {
541
		tp_next = rcu_dereference_protected(tp->next, 1);
542
		tcf_proto_put(tp, rtnl_held, NULL);
543
		tp = tp_next;
544 545 546
	}
}

547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675
static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
{
	const struct Qdisc_class_ops *cops;
	struct Qdisc *qdisc;

	if (!dev_ingress_queue(dev))
		return NULL;

	qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
	if (!qdisc)
		return NULL;

	cops = qdisc->ops->cl_ops;
	if (!cops)
		return NULL;

	if (!cops->tcf_block)
		return NULL;

	return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
}

static struct rhashtable indr_setup_block_ht;

struct tc_indr_block_dev {
	struct rhash_head ht_node;
	struct net_device *dev;
	unsigned int refcnt;
	struct list_head cb_list;
	struct tcf_block *block;
};

struct tc_indr_block_cb {
	struct list_head list;
	void *cb_priv;
	tc_indr_block_bind_cb_t *cb;
	void *cb_ident;
};

static const struct rhashtable_params tc_indr_setup_block_ht_params = {
	.key_offset	= offsetof(struct tc_indr_block_dev, dev),
	.head_offset	= offsetof(struct tc_indr_block_dev, ht_node),
	.key_len	= sizeof(struct net_device *),
};

static struct tc_indr_block_dev *
tc_indr_block_dev_lookup(struct net_device *dev)
{
	return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
				      tc_indr_setup_block_ht_params);
}

static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev)
{
	struct tc_indr_block_dev *indr_dev;

	indr_dev = tc_indr_block_dev_lookup(dev);
	if (indr_dev)
		goto inc_ref;

	indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
	if (!indr_dev)
		return NULL;

	INIT_LIST_HEAD(&indr_dev->cb_list);
	indr_dev->dev = dev;
	indr_dev->block = tc_dev_ingress_block(dev);
	if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
				   tc_indr_setup_block_ht_params)) {
		kfree(indr_dev);
		return NULL;
	}

inc_ref:
	indr_dev->refcnt++;
	return indr_dev;
}

static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev)
{
	if (--indr_dev->refcnt)
		return;

	rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
			       tc_indr_setup_block_ht_params);
	kfree(indr_dev);
}

static struct tc_indr_block_cb *
tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev,
			tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
	struct tc_indr_block_cb *indr_block_cb;

	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
		if (indr_block_cb->cb == cb &&
		    indr_block_cb->cb_ident == cb_ident)
			return indr_block_cb;
	return NULL;
}

static struct tc_indr_block_cb *
tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv,
		     tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
	struct tc_indr_block_cb *indr_block_cb;

	indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
	if (indr_block_cb)
		return ERR_PTR(-EEXIST);

	indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
	if (!indr_block_cb)
		return ERR_PTR(-ENOMEM);

	indr_block_cb->cb_priv = cb_priv;
	indr_block_cb->cb = cb;
	indr_block_cb->cb_ident = cb_ident;
	list_add(&indr_block_cb->list, &indr_dev->cb_list);

	return indr_block_cb;
}

static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb)
{
	list_del(&indr_block_cb->list);
	kfree(indr_block_cb);
}

676 677 678
static int tcf_block_setup(struct tcf_block *block,
			   struct flow_block_offload *bo);

679 680
static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
				  struct tc_indr_block_cb *indr_block_cb,
681
				  enum flow_block_command command)
682
{
683
	struct flow_block_offload bo = {
684
		.command	= command,
685
		.binder_type	= FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
686
		.net		= dev_net(indr_dev->dev),
687
		.block_shared	= tcf_block_non_null_shared(indr_dev->block),
688
	};
689
	INIT_LIST_HEAD(&bo.cb_list);
690 691 692 693

	if (!indr_dev->block)
		return;

694 695
	bo.block = &indr_dev->block->flow_block;

696 697
	indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
			  &bo);
698
	tcf_block_setup(indr_dev->block, &bo);
699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716
}

int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
				tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
	struct tc_indr_block_cb *indr_block_cb;
	struct tc_indr_block_dev *indr_dev;
	int err;

	indr_dev = tc_indr_block_dev_get(dev);
	if (!indr_dev)
		return -ENOMEM;

	indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
	err = PTR_ERR_OR_ZERO(indr_block_cb);
	if (err)
		goto err_dev_put;

717
	tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_BIND);
718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753
	return 0;

err_dev_put:
	tc_indr_block_dev_put(indr_dev);
	return err;
}
EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register);

int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
			      tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
	int err;

	rtnl_lock();
	err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
	rtnl_unlock();

	return err;
}
EXPORT_SYMBOL_GPL(tc_indr_block_cb_register);

void __tc_indr_block_cb_unregister(struct net_device *dev,
				   tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
	struct tc_indr_block_cb *indr_block_cb;
	struct tc_indr_block_dev *indr_dev;

	indr_dev = tc_indr_block_dev_lookup(dev);
	if (!indr_dev)
		return;

	indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
	if (!indr_block_cb)
		return;

	/* Send unbind message if required to free any block cbs. */
754
	tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_UNBIND);
755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770
	tc_indr_block_cb_del(indr_block_cb);
	tc_indr_block_dev_put(indr_dev);
}
EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister);

void tc_indr_block_cb_unregister(struct net_device *dev,
				 tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
	rtnl_lock();
	__tc_indr_block_cb_unregister(dev, cb, cb_ident);
	rtnl_unlock();
}
EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister);

static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
			       struct tcf_block_ext_info *ei,
771
			       enum flow_block_command command,
772 773 774 775
			       struct netlink_ext_ack *extack)
{
	struct tc_indr_block_cb *indr_block_cb;
	struct tc_indr_block_dev *indr_dev;
776
	struct flow_block_offload bo = {
777 778
		.command	= command,
		.binder_type	= ei->binder_type,
779
		.net		= dev_net(dev),
780
		.block		= &block->flow_block,
781
		.block_shared	= tcf_block_shared(block),
782 783
		.extack		= extack,
	};
784
	INIT_LIST_HEAD(&bo.cb_list);
785 786 787 788 789

	indr_dev = tc_indr_block_dev_lookup(dev);
	if (!indr_dev)
		return;

790
	indr_dev->block = command == FLOW_BLOCK_BIND ? block : NULL;
791 792 793 794

	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
		indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
				  &bo);
795 796

	tcf_block_setup(block, &bo);
797 798
}

799 800 801 802 803 804 805 806
static bool tcf_block_offload_in_use(struct tcf_block *block)
{
	return block->offloadcnt;
}

static int tcf_block_offload_cmd(struct tcf_block *block,
				 struct net_device *dev,
				 struct tcf_block_ext_info *ei,
807
				 enum flow_block_command command,
808
				 struct netlink_ext_ack *extack)
809
{
810
	struct flow_block_offload bo = {};
811
	int err;
812

813
	bo.net = dev_net(dev);
814 815
	bo.command = command;
	bo.binder_type = ei->binder_type;
816
	bo.block = &block->flow_block;
817
	bo.block_shared = tcf_block_shared(block);
818
	bo.extack = extack;
819 820 821 822 823 824 825
	INIT_LIST_HEAD(&bo.cb_list);

	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
	if (err < 0)
		return err;

	return tcf_block_setup(block, &bo);
826 827
}

828
static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
829 830
				  struct tcf_block_ext_info *ei,
				  struct netlink_ext_ack *extack)
831
{
832 833 834 835 836 837 838 839 840
	struct net_device *dev = q->dev_queue->dev;
	int err;

	if (!dev->netdev_ops->ndo_setup_tc)
		goto no_offload_dev_inc;

	/* If tc offload feature is disabled and the block we try to bind
	 * to already has some offloaded filters, forbid to bind.
	 */
841 842
	if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
		NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
843
		return -EOPNOTSUPP;
844
	}
845

846
	err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_BIND, extack);
847 848
	if (err == -EOPNOTSUPP)
		goto no_offload_dev_inc;
849 850 851
	if (err)
		return err;

852
	tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
853
	return 0;
854 855 856 857 858

no_offload_dev_inc:
	if (tcf_block_offload_in_use(block))
		return -EOPNOTSUPP;
	block->nooffloaddevcnt++;
859
	tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
860
	return 0;
861 862 863 864 865
}

static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
				     struct tcf_block_ext_info *ei)
{
866 867 868
	struct net_device *dev = q->dev_queue->dev;
	int err;

869
	tc_indr_block_call(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
870

871 872
	if (!dev->netdev_ops->ndo_setup_tc)
		goto no_offload_dev_dec;
873
	err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
874 875 876 877 878 879
	if (err == -EOPNOTSUPP)
		goto no_offload_dev_dec;
	return;

no_offload_dev_dec:
	WARN_ON(block->nooffloaddevcnt-- == 0);
880 881
}

882
static int
883 884 885
tcf_chain0_head_change_cb_add(struct tcf_block *block,
			      struct tcf_block_ext_info *ei,
			      struct netlink_ext_ack *extack)
886 887
{
	struct tcf_filter_chain_list_item *item;
888
	struct tcf_chain *chain0;
889 890 891 892 893 894 895 896

	item = kmalloc(sizeof(*item), GFP_KERNEL);
	if (!item) {
		NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed");
		return -ENOMEM;
	}
	item->chain_head_change = ei->chain_head_change;
	item->chain_head_change_priv = ei->chain_head_change_priv;
897 898 899

	mutex_lock(&block->lock);
	chain0 = block->chain0.chain;
900 901 902 903
	if (chain0)
		tcf_chain_hold(chain0);
	else
		list_add(&item->list, &block->chain0.filter_chain_list);
904 905
	mutex_unlock(&block->lock);

906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922
	if (chain0) {
		struct tcf_proto *tp_head;

		mutex_lock(&chain0->filter_chain_lock);

		tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
		if (tp_head)
			tcf_chain_head_change_item(item, tp_head);

		mutex_lock(&block->lock);
		list_add(&item->list, &block->chain0.filter_chain_list);
		mutex_unlock(&block->lock);

		mutex_unlock(&chain0->filter_chain_lock);
		tcf_chain_put(chain0);
	}

923 924 925 926
	return 0;
}

static void
927 928
tcf_chain0_head_change_cb_del(struct tcf_block *block,
			      struct tcf_block_ext_info *ei)
929 930 931
{
	struct tcf_filter_chain_list_item *item;

932
	mutex_lock(&block->lock);
933
	list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
934 935 936
		if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
		    (item->chain_head_change == ei->chain_head_change &&
		     item->chain_head_change_priv == ei->chain_head_change_priv)) {
937
			if (block->chain0.chain)
938
				tcf_chain_head_change_item(item, NULL);
939
			list_del(&item->list);
940 941
			mutex_unlock(&block->lock);

942 943 944 945
			kfree(item);
			return;
		}
	}
946
	mutex_unlock(&block->lock);
947 948 949
	WARN_ON(1);
}

950
struct tcf_net {
951
	spinlock_t idr_lock; /* Protects idr */
952 953 954 955 956 957
	struct idr idr;
};

static unsigned int tcf_net_id;

static int tcf_block_insert(struct tcf_block *block, struct net *net,
958
			    struct netlink_ext_ack *extack)
959
{
960
	struct tcf_net *tn = net_generic(net, tcf_net_id);
961 962 963 964 965 966 967 968
	int err;

	idr_preload(GFP_KERNEL);
	spin_lock(&tn->idr_lock);
	err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
			    GFP_NOWAIT);
	spin_unlock(&tn->idr_lock);
	idr_preload_end();
969

970
	return err;
971 972
}

973 974 975 976
static void tcf_block_remove(struct tcf_block *block, struct net *net)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

977
	spin_lock(&tn->idr_lock);
978
	idr_remove(&tn->idr, block->index);
979
	spin_unlock(&tn->idr_lock);
980 981 982
}

static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
983
					  u32 block_index,
984
					  struct netlink_ext_ack *extack)
985
{
986
	struct tcf_block *block;
987

988
	block = kzalloc(sizeof(*block), GFP_KERNEL);
989 990
	if (!block) {
		NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
991
		return ERR_PTR(-ENOMEM);
992
	}
993
	mutex_init(&block->lock);
994
	flow_block_init(&block->flow_block);
995
	INIT_LIST_HEAD(&block->chain_list);
996
	INIT_LIST_HEAD(&block->owner_list);
997
	INIT_LIST_HEAD(&block->chain0.filter_chain_list);
998

999
	refcount_set(&block->refcnt, 1);
1000
	block->net = net;
1001 1002 1003 1004 1005
	block->index = block_index;

	/* Don't store q pointer for blocks which are shared */
	if (!tcf_block_shared(block))
		block->q = q;
1006 1007 1008 1009 1010 1011 1012
	return block;
}

static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

1013
	return idr_find(&tn->idr, block_index);
1014 1015
}

1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028
static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
{
	struct tcf_block *block;

	rcu_read_lock();
	block = tcf_block_lookup(net, block_index);
	if (block && !refcount_inc_not_zero(&block->refcnt))
		block = NULL;
	rcu_read_unlock();

	return block;
}

1029 1030
static struct tcf_chain *
__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
1031
{
1032 1033 1034 1035 1036 1037 1038
	mutex_lock(&block->lock);
	if (chain)
		chain = list_is_last(&chain->list, &block->chain_list) ?
			NULL : list_next_entry(chain, list);
	else
		chain = list_first_entry_or_null(&block->chain_list,
						 struct tcf_chain, list);
1039

1040 1041 1042 1043 1044 1045
	/* skip all action-only chains */
	while (chain && tcf_chain_held_by_acts_only(chain))
		chain = list_is_last(&chain->list, &block->chain_list) ?
			NULL : list_next_entry(chain, list);

	if (chain)
1046
		tcf_chain_hold(chain);
1047
	mutex_unlock(&block->lock);
1048

1049
	return chain;
1050 1051
}

1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062
/* Function to be used by all clients that want to iterate over all chains on
 * block. It properly obtains block->lock and takes reference to chain before
 * returning it. Users of this function must be tolerant to concurrent chain
 * insertion/deletion or ensure that no concurrent chain modification is
 * possible. Note that all netlink dump callbacks cannot guarantee to provide
 * consistent dump because rtnl lock is released each time skb is filled with
 * data and sent to user-space.
 */

struct tcf_chain *
tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
1063
{
1064
	struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
1065

1066
	if (chain)
1067
		tcf_chain_put(chain);
1068 1069 1070 1071 1072

	return chain_next;
}
EXPORT_SYMBOL(tcf_get_next_chain);

1073 1074 1075
static struct tcf_proto *
__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
{
1076 1077
	u32 prio = 0;

1078 1079 1080
	ASSERT_RTNL();
	mutex_lock(&chain->filter_chain_lock);

1081
	if (!tp) {
1082
		tp = tcf_chain_dereference(chain->filter_chain, chain);
1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
	} else if (tcf_proto_is_deleting(tp)) {
		/* 'deleting' flag is set and chain->filter_chain_lock was
		 * unlocked, which means next pointer could be invalid. Restart
		 * search.
		 */
		prio = tp->prio + 1;
		tp = tcf_chain_dereference(chain->filter_chain, chain);

		for (; tp; tp = tcf_chain_dereference(tp->next, chain))
			if (!tp->deleting && tp->prio >= prio)
				break;
	} else {
1095
		tp = tcf_chain_dereference(tp->next, chain);
1096
	}
1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114

	if (tp)
		tcf_proto_get(tp);

	mutex_unlock(&chain->filter_chain_lock);

	return tp;
}

/* Function to be used by all clients that want to iterate over all tp's on
 * chain. Users of this function must be tolerant to concurrent tp
 * insertion/deletion or ensure that no concurrent chain modification is
 * possible. Note that all netlink dump callbacks cannot guarantee to provide
 * consistent dump because rtnl lock is released each time skb is filled with
 * data and sent to user-space.
 */

struct tcf_proto *
1115 1116
tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp,
		   bool rtnl_held)
1117 1118 1119 1120
{
	struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);

	if (tp)
1121
		tcf_proto_put(tp, rtnl_held, NULL);
1122 1123 1124 1125 1126

	return tp_next;
}
EXPORT_SYMBOL(tcf_get_next_proto);

1127
static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
1128 1129 1130 1131 1132 1133 1134 1135 1136 1137
{
	struct tcf_chain *chain;

	/* Last reference to block. At this point chains cannot be added or
	 * removed concurrently.
	 */
	for (chain = tcf_get_next_chain(block, NULL);
	     chain;
	     chain = tcf_get_next_chain(block, chain)) {
		tcf_chain_put_explicitly_created(chain);
1138
		tcf_chain_flush(chain, rtnl_held);
1139 1140 1141
	}
}

1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277
/* Lookup Qdisc and increments its reference counter.
 * Set parent, if necessary.
 */

static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
			    u32 *parent, int ifindex, bool rtnl_held,
			    struct netlink_ext_ack *extack)
{
	const struct Qdisc_class_ops *cops;
	struct net_device *dev;
	int err = 0;

	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
		return 0;

	rcu_read_lock();

	/* Find link */
	dev = dev_get_by_index_rcu(net, ifindex);
	if (!dev) {
		rcu_read_unlock();
		return -ENODEV;
	}

	/* Find qdisc */
	if (!*parent) {
		*q = dev->qdisc;
		*parent = (*q)->handle;
	} else {
		*q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
		if (!*q) {
			NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
			err = -EINVAL;
			goto errout_rcu;
		}
	}

	*q = qdisc_refcount_inc_nz(*q);
	if (!*q) {
		NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
		err = -EINVAL;
		goto errout_rcu;
	}

	/* Is it classful? */
	cops = (*q)->ops->cl_ops;
	if (!cops) {
		NL_SET_ERR_MSG(extack, "Qdisc not classful");
		err = -EINVAL;
		goto errout_qdisc;
	}

	if (!cops->tcf_block) {
		NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
		err = -EOPNOTSUPP;
		goto errout_qdisc;
	}

errout_rcu:
	/* At this point we know that qdisc is not noop_qdisc,
	 * which means that qdisc holds a reference to net_device
	 * and we hold a reference to qdisc, so it is safe to release
	 * rcu read lock.
	 */
	rcu_read_unlock();
	return err;

errout_qdisc:
	rcu_read_unlock();

	if (rtnl_held)
		qdisc_put(*q);
	else
		qdisc_put_unlocked(*q);
	*q = NULL;

	return err;
}

static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl,
			       int ifindex, struct netlink_ext_ack *extack)
{
	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
		return 0;

	/* Do we search for filter, attached to class? */
	if (TC_H_MIN(parent)) {
		const struct Qdisc_class_ops *cops = q->ops->cl_ops;

		*cl = cops->find(q, parent);
		if (*cl == 0) {
			NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
			return -ENOENT;
		}
	}

	return 0;
}

static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q,
					  unsigned long cl, int ifindex,
					  u32 block_index,
					  struct netlink_ext_ack *extack)
{
	struct tcf_block *block;

	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
		block = tcf_block_refcnt_get(net, block_index);
		if (!block) {
			NL_SET_ERR_MSG(extack, "Block of given index was not found");
			return ERR_PTR(-EINVAL);
		}
	} else {
		const struct Qdisc_class_ops *cops = q->ops->cl_ops;

		block = cops->tcf_block(q, cl, extack);
		if (!block)
			return ERR_PTR(-EINVAL);

		if (tcf_block_shared(block)) {
			NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
			return ERR_PTR(-EOPNOTSUPP);
		}

		/* Always take reference to block in order to support execution
		 * of rules update path of cls API without rtnl lock. Caller
		 * must release block when it is finished using it. 'if' block
		 * of this conditional obtain reference to block by calling
		 * tcf_block_refcnt_get().
		 */
		refcount_inc(&block->refcnt);
	}

	return block;
}

1278
static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
1279
			    struct tcf_block_ext_info *ei, bool rtnl_held)
1280
{
1281
	if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
1282 1283 1284 1285 1286 1287 1288 1289
		/* Flushing/putting all chains will cause the block to be
		 * deallocated when last chain is freed. However, if chain_list
		 * is empty, block has to be manually deallocated. After block
		 * reference counter reached 0, it is no longer possible to
		 * increment it or add new chains to block.
		 */
		bool free_block = list_empty(&block->chain_list);

1290
		mutex_unlock(&block->lock);
1291 1292 1293 1294 1295 1296 1297
		if (tcf_block_shared(block))
			tcf_block_remove(block, block->net);

		if (q)
			tcf_block_offload_unbind(block, q, ei);

		if (free_block)
1298
			tcf_block_destroy(block);
1299
		else
1300
			tcf_block_flush_all_chains(block, rtnl_held);
1301 1302 1303 1304 1305
	} else if (q) {
		tcf_block_offload_unbind(block, q, ei);
	}
}

1306
static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
1307
{
1308
	__tcf_block_put(block, NULL, NULL, rtnl_held);
1309 1310
}

1311 1312 1313 1314 1315 1316 1317 1318 1319 1320
/* Find tcf block.
 * Set q, parent, cl when appropriate.
 */

static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
					u32 *parent, unsigned long *cl,
					int ifindex, u32 block_index,
					struct netlink_ext_ack *extack)
{
	struct tcf_block *block;
1321
	int err = 0;
1322

1323
	ASSERT_RTNL();
1324

1325 1326 1327
	err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack);
	if (err)
		goto errout;
1328

1329 1330 1331
	err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack);
	if (err)
		goto errout_qdisc;
1332

1333
	block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack);
1334 1335
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
1336
		goto errout_qdisc;
1337
	}
1338 1339

	return block;
1340 1341

errout_qdisc:
1342
	if (*q)
1343
		qdisc_put(*q);
1344 1345
errout:
	*q = NULL;
1346 1347 1348
	return ERR_PTR(err);
}

1349 1350
static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
			      bool rtnl_held)
1351
{
1352
	if (!IS_ERR_OR_NULL(block))
1353
		tcf_block_refcnt_put(block, rtnl_held);
1354

1355 1356 1357 1358 1359 1360
	if (q) {
		if (rtnl_held)
			qdisc_put(q);
		else
			qdisc_put_unlocked(q);
	}
1361 1362
}

1363 1364 1365
struct tcf_block_owner_item {
	struct list_head list;
	struct Qdisc *q;
1366
	enum flow_block_binder_type binder_type;
1367 1368 1369 1370 1371
};

static void
tcf_block_owner_netif_keep_dst(struct tcf_block *block,
			       struct Qdisc *q,
1372
			       enum flow_block_binder_type binder_type)
1373 1374
{
	if (block->keep_dst &&
1375 1376
	    binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
	    binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392
		netif_keep_dst(qdisc_dev(q));
}

void tcf_block_netif_keep_dst(struct tcf_block *block)
{
	struct tcf_block_owner_item *item;

	block->keep_dst = true;
	list_for_each_entry(item, &block->owner_list, list)
		tcf_block_owner_netif_keep_dst(block, item->q,
					       item->binder_type);
}
EXPORT_SYMBOL(tcf_block_netif_keep_dst);

static int tcf_block_owner_add(struct tcf_block *block,
			       struct Qdisc *q,
1393
			       enum flow_block_binder_type binder_type)
1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407
{
	struct tcf_block_owner_item *item;

	item = kmalloc(sizeof(*item), GFP_KERNEL);
	if (!item)
		return -ENOMEM;
	item->q = q;
	item->binder_type = binder_type;
	list_add(&item->list, &block->owner_list);
	return 0;
}

static void tcf_block_owner_del(struct tcf_block *block,
				struct Qdisc *q,
1408
				enum flow_block_binder_type binder_type)
1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421
{
	struct tcf_block_owner_item *item;

	list_for_each_entry(item, &block->owner_list, list) {
		if (item->q == q && item->binder_type == binder_type) {
			list_del(&item->list);
			kfree(item);
			return;
		}
	}
	WARN_ON(1);
}

1422 1423 1424 1425 1426 1427 1428 1429
int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
		      struct tcf_block_ext_info *ei,
		      struct netlink_ext_ack *extack)
{
	struct net *net = qdisc_net(q);
	struct tcf_block *block = NULL;
	int err;

1430
	if (ei->block_index)
1431
		/* block_index not 0 means the shared block is requested */
1432
		block = tcf_block_refcnt_get(net, ei->block_index);
1433 1434

	if (!block) {
1435
		block = tcf_block_create(net, q, ei->block_index, extack);
1436 1437
		if (IS_ERR(block))
			return PTR_ERR(block);
1438 1439
		if (tcf_block_shared(block)) {
			err = tcf_block_insert(block, net, extack);
1440 1441 1442 1443 1444
			if (err)
				goto err_block_insert;
		}
	}

1445 1446 1447 1448 1449 1450
	err = tcf_block_owner_add(block, q, ei->binder_type);
	if (err)
		goto err_block_owner_add;

	tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);

1451
	err = tcf_chain0_head_change_cb_add(block, ei, extack);
1452
	if (err)
1453
		goto err_chain0_head_change_cb_add;
1454

1455
	err = tcf_block_offload_bind(block, q, ei, extack);
1456 1457 1458
	if (err)
		goto err_block_offload_bind;

1459 1460
	*p_block = block;
	return 0;
1461

1462
err_block_offload_bind:
1463 1464
	tcf_chain0_head_change_cb_del(block, ei);
err_chain0_head_change_cb_add:
1465 1466
	tcf_block_owner_del(block, q, ei->binder_type);
err_block_owner_add:
1467
err_block_insert:
1468
	tcf_block_refcnt_put(block, true);
1469
	return err;
1470
}
1471 1472
EXPORT_SYMBOL(tcf_block_get_ext);

1473 1474 1475 1476 1477 1478 1479
static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
{
	struct tcf_proto __rcu **p_filter_chain = priv;

	rcu_assign_pointer(*p_filter_chain, tp_head);
}

1480
int tcf_block_get(struct tcf_block **p_block,
1481 1482
		  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
		  struct netlink_ext_ack *extack)
1483
{
1484 1485 1486 1487
	struct tcf_block_ext_info ei = {
		.chain_head_change = tcf_chain_head_change_dflt,
		.chain_head_change_priv = p_filter_chain,
	};
1488

1489
	WARN_ON(!p_filter_chain);
1490
	return tcf_block_get_ext(p_block, q, &ei, extack);
1491
}
1492 1493
EXPORT_SYMBOL(tcf_block_get);

1494
/* XXX: Standalone actions are not allowed to jump to any chain, and bound
1495
 * actions should be all removed after flushing.
1496
 */
1497
void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
1498
		       struct tcf_block_ext_info *ei)
1499
{
1500 1501
	if (!block)
		return;
1502
	tcf_chain0_head_change_cb_del(block, ei);
1503
	tcf_block_owner_del(block, q, ei->binder_type);
1504

1505
	__tcf_block_put(block, q, ei, true);
1506
}
1507 1508 1509 1510 1511 1512
EXPORT_SYMBOL(tcf_block_put_ext);

void tcf_block_put(struct tcf_block *block)
{
	struct tcf_block_ext_info ei = {0, };

1513 1514
	if (!block)
		return;
1515
	tcf_block_put_ext(block, block->q, &ei);
1516
}
1517

1518
EXPORT_SYMBOL(tcf_block_put);
1519

1520
static int
1521
tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
1522 1523 1524
			    void *cb_priv, bool add, bool offload_in_use,
			    struct netlink_ext_ack *extack)
{
1525
	struct tcf_chain *chain, *chain_prev;
1526
	struct tcf_proto *tp, *tp_prev;
1527 1528
	int err;

1529 1530 1531 1532 1533
	for (chain = __tcf_get_next_chain(block, NULL);
	     chain;
	     chain_prev = chain,
		     chain = __tcf_get_next_chain(block, chain),
		     tcf_chain_put(chain_prev)) {
1534 1535 1536
		for (tp = __tcf_get_next_proto(chain, NULL); tp;
		     tp_prev = tp,
			     tp = __tcf_get_next_proto(chain, tp),
1537
			     tcf_proto_put(tp_prev, true, NULL)) {
1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553
			if (tp->ops->reoffload) {
				err = tp->ops->reoffload(tp, add, cb, cb_priv,
							 extack);
				if (err && add)
					goto err_playback_remove;
			} else if (add && offload_in_use) {
				err = -EOPNOTSUPP;
				NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support");
				goto err_playback_remove;
			}
		}
	}

	return 0;

err_playback_remove:
1554
	tcf_proto_put(tp, true, NULL);
1555
	tcf_chain_put(chain);
1556 1557 1558 1559 1560
	tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
				    extack);
	return err;
}

1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576
static int tcf_block_bind(struct tcf_block *block,
			  struct flow_block_offload *bo)
{
	struct flow_block_cb *block_cb, *next;
	int err, i = 0;

	list_for_each_entry(block_cb, &bo->cb_list, list) {
		err = tcf_block_playback_offloads(block, block_cb->cb,
						  block_cb->cb_priv, true,
						  tcf_block_offload_in_use(block),
						  bo->extack);
		if (err)
			goto err_unroll;

		i++;
	}
1577
	list_splice(&bo->cb_list, &block->flow_block.cb_list);
1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631

	return 0;

err_unroll:
	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
		if (i-- > 0) {
			list_del(&block_cb->list);
			tcf_block_playback_offloads(block, block_cb->cb,
						    block_cb->cb_priv, false,
						    tcf_block_offload_in_use(block),
						    NULL);
		}
		flow_block_cb_free(block_cb);
	}

	return err;
}

static void tcf_block_unbind(struct tcf_block *block,
			     struct flow_block_offload *bo)
{
	struct flow_block_cb *block_cb, *next;

	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
		tcf_block_playback_offloads(block, block_cb->cb,
					    block_cb->cb_priv, false,
					    tcf_block_offload_in_use(block),
					    NULL);
		list_del(&block_cb->list);
		flow_block_cb_free(block_cb);
	}
}

static int tcf_block_setup(struct tcf_block *block,
			   struct flow_block_offload *bo)
{
	int err;

	switch (bo->command) {
	case FLOW_BLOCK_BIND:
		err = tcf_block_bind(block, bo);
		break;
	case FLOW_BLOCK_UNBIND:
		err = 0;
		tcf_block_unbind(block, bo);
		break;
	default:
		WARN_ON_ONCE(1);
		err = -EOPNOTSUPP;
	}

	return err;
}

1632 1633 1634 1635 1636 1637 1638 1639 1640
/* Main classifier routine: scans classifier chain attached
 * to this qdisc, (optionally) tests for protocol and asks
 * specific classifiers.
 */
int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
		 struct tcf_result *res, bool compat_mode)
{
#ifdef CONFIG_NET_CLS_ACT
	const int max_reclassify_loop = 4;
1641 1642
	const struct tcf_proto *orig_tp = tp;
	const struct tcf_proto *first_tp;
1643 1644 1645 1646 1647
	int limit = 0;

reclassify:
#endif
	for (; tp; tp = rcu_dereference_bh(tp->next)) {
1648
		__be16 protocol = tc_skb_protocol(skb);
1649 1650 1651 1652 1653 1654 1655 1656
		int err;

		if (tp->protocol != protocol &&
		    tp->protocol != htons(ETH_P_ALL))
			continue;

		err = tp->classify(skb, tp, res);
#ifdef CONFIG_NET_CLS_ACT
1657
		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
1658
			first_tp = orig_tp;
1659
			goto reset;
1660
		} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
1661
			first_tp = res->goto_tp;
1662 1663
			goto reset;
		}
1664 1665 1666 1667 1668 1669 1670 1671 1672
#endif
		if (err >= 0)
			return err;
	}

	return TC_ACT_UNSPEC; /* signal: continue lookup */
#ifdef CONFIG_NET_CLS_ACT
reset:
	if (unlikely(limit++ >= max_reclassify_loop)) {
1673 1674 1675
		net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
				       tp->chain->block->index,
				       tp->prio & 0xffff,
1676 1677 1678 1679
				       ntohs(tp->protocol));
		return TC_ACT_SHOT;
	}

1680
	tp = first_tp;
1681 1682 1683 1684 1685
	goto reclassify;
#endif
}
EXPORT_SYMBOL(tcf_classify);

1686 1687 1688 1689 1690
struct tcf_chain_info {
	struct tcf_proto __rcu **pprev;
	struct tcf_proto __rcu *next;
};

1691 1692
static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
					   struct tcf_chain_info *chain_info)
1693
{
1694
	return tcf_chain_dereference(*chain_info->pprev, chain);
1695 1696
}

1697 1698 1699
static int tcf_chain_tp_insert(struct tcf_chain *chain,
			       struct tcf_chain_info *chain_info,
			       struct tcf_proto *tp)
1700
{
1701 1702 1703
	if (chain->flushing)
		return -EAGAIN;

1704
	if (*chain_info->pprev == chain->filter_chain)
1705
		tcf_chain0_head_change(chain, tp);
1706
	tcf_proto_get(tp);
1707
	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
1708
	rcu_assign_pointer(*chain_info->pprev, tp);
1709 1710

	return 0;
1711 1712 1713 1714 1715 1716
}

static void tcf_chain_tp_remove(struct tcf_chain *chain,
				struct tcf_chain_info *chain_info,
				struct tcf_proto *tp)
{
1717
	struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
1718

1719
	tcf_proto_mark_delete(tp);
1720
	if (tp == chain->filter_chain)
1721
		tcf_chain0_head_change(chain, next);
1722 1723 1724
	RCU_INIT_POINTER(*chain_info->pprev, next);
}

1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736
static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
					   struct tcf_chain_info *chain_info,
					   u32 protocol, u32 prio,
					   bool prio_allocate);

/* Try to insert new proto.
 * If proto with specified priority already exists, free new proto
 * and return existing one.
 */

static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
						    struct tcf_proto *tp_new,
1737 1738
						    u32 protocol, u32 prio,
						    bool rtnl_held)
1739 1740 1741
{
	struct tcf_chain_info chain_info;
	struct tcf_proto *tp;
1742
	int err = 0;
1743 1744 1745 1746 1747 1748

	mutex_lock(&chain->filter_chain_lock);

	tp = tcf_chain_tp_find(chain, &chain_info,
			       protocol, prio, false);
	if (!tp)
1749
		err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
1750 1751 1752
	mutex_unlock(&chain->filter_chain_lock);

	if (tp) {
1753
		tcf_proto_destroy(tp_new, rtnl_held, NULL);
1754
		tp_new = tp;
1755
	} else if (err) {
1756
		tcf_proto_destroy(tp_new, rtnl_held, NULL);
1757
		tp_new = ERR_PTR(err);
1758 1759 1760 1761 1762 1763
	}

	return tp_new;
}

static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
1764
				      struct tcf_proto *tp, bool rtnl_held,
1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788
				      struct netlink_ext_ack *extack)
{
	struct tcf_chain_info chain_info;
	struct tcf_proto *tp_iter;
	struct tcf_proto **pprev;
	struct tcf_proto *next;

	mutex_lock(&chain->filter_chain_lock);

	/* Atomically find and remove tp from chain. */
	for (pprev = &chain->filter_chain;
	     (tp_iter = tcf_chain_dereference(*pprev, chain));
	     pprev = &tp_iter->next) {
		if (tp_iter == tp) {
			chain_info.pprev = pprev;
			chain_info.next = tp_iter->next;
			WARN_ON(tp_iter->deleting);
			break;
		}
	}
	/* Verify that tp still exists and no new filters were inserted
	 * concurrently.
	 * Mark tp for deletion if it is empty.
	 */
1789
	if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) {
1790 1791 1792 1793 1794 1795 1796 1797 1798 1799
		mutex_unlock(&chain->filter_chain_lock);
		return;
	}

	next = tcf_chain_dereference(chain_info.next, chain);
	if (tp == chain->filter_chain)
		tcf_chain0_head_change(chain, next);
	RCU_INIT_POINTER(*chain_info.pprev, next);
	mutex_unlock(&chain->filter_chain_lock);

1800
	tcf_proto_put(tp, rtnl_held, extack);
1801 1802
}

1803 1804 1805 1806 1807 1808 1809 1810 1811 1812
static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
					   struct tcf_chain_info *chain_info,
					   u32 protocol, u32 prio,
					   bool prio_allocate)
{
	struct tcf_proto **pprev;
	struct tcf_proto *tp;

	/* Check the chain for existence of proto-tcf with this priority */
	for (pprev = &chain->filter_chain;
1813 1814
	     (tp = tcf_chain_dereference(*pprev, chain));
	     pprev = &tp->next) {
1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826
		if (tp->prio >= prio) {
			if (tp->prio == prio) {
				if (prio_allocate ||
				    (tp->protocol != protocol && protocol))
					return ERR_PTR(-EINVAL);
			} else {
				tp = NULL;
			}
			break;
		}
	}
	chain_info->pprev = pprev;
1827 1828 1829 1830 1831 1832
	if (tp) {
		chain_info->next = tp->next;
		tcf_proto_get(tp);
	} else {
		chain_info->next = NULL;
	}
1833 1834 1835
	return tp;
}

1836
static int tcf_fill_node(struct net *net, struct sk_buff *skb,
1837 1838
			 struct tcf_proto *tp, struct tcf_block *block,
			 struct Qdisc *q, u32 parent, void *fh,
1839 1840
			 u32 portid, u32 seq, u16 flags, int event,
			 bool rtnl_held)
1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852
{
	struct tcmsg *tcm;
	struct nlmsghdr  *nlh;
	unsigned char *b = skb_tail_pointer(skb);

	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
	if (!nlh)
		goto out_nlmsg_trim;
	tcm = nlmsg_data(nlh);
	tcm->tcm_family = AF_UNSPEC;
	tcm->tcm__pad1 = 0;
	tcm->tcm__pad2 = 0;
1853 1854 1855 1856 1857 1858 1859
	if (q) {
		tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
		tcm->tcm_parent = parent;
	} else {
		tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
		tcm->tcm_block_index = block->index;
	}
1860 1861 1862 1863 1864 1865 1866 1867
	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
	if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
		goto nla_put_failure;
	if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
		goto nla_put_failure;
	if (!fh) {
		tcm->tcm_handle = 0;
	} else {
1868 1869
		if (tp->ops->dump &&
		    tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882
			goto nla_put_failure;
	}
	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
	return skb->len;

out_nlmsg_trim:
nla_put_failure:
	nlmsg_trim(skb, b);
	return -1;
}

static int tfilter_notify(struct net *net, struct sk_buff *oskb,
			  struct nlmsghdr *n, struct tcf_proto *tp,
1883
			  struct tcf_block *block, struct Qdisc *q,
1884 1885
			  u32 parent, void *fh, int event, bool unicast,
			  bool rtnl_held)
1886 1887 1888
{
	struct sk_buff *skb;
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1889
	int err = 0;
1890 1891 1892 1893 1894

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

1895
	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1896 1897
			  n->nlmsg_seq, n->nlmsg_flags, event,
			  rtnl_held) <= 0) {
1898 1899 1900 1901 1902
		kfree_skb(skb);
		return -EINVAL;
	}

	if (unicast)
1903 1904 1905 1906
		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
	else
		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
				     n->nlmsg_flags & NLM_F_ECHO);
1907

1908 1909 1910
	if (err > 0)
		err = 0;
	return err;
1911 1912 1913 1914
}

static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
			      struct nlmsghdr *n, struct tcf_proto *tp,
1915
			      struct tcf_block *block, struct Qdisc *q,
1916
			      u32 parent, void *fh, bool unicast, bool *last,
1917
			      bool rtnl_held, struct netlink_ext_ack *extack)
1918 1919 1920 1921 1922 1923 1924 1925 1926
{
	struct sk_buff *skb;
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
	int err;

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

1927
	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1928 1929
			  n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
			  rtnl_held) <= 0) {
1930
		NL_SET_ERR_MSG(extack, "Failed to build del event notification");
1931 1932 1933 1934
		kfree_skb(skb);
		return -EINVAL;
	}

1935
	err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
1936 1937 1938 1939 1940 1941
	if (err) {
		kfree_skb(skb);
		return err;
	}

	if (unicast)
1942 1943 1944 1945
		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
	else
		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
				     n->nlmsg_flags & NLM_F_ECHO);
1946 1947
	if (err < 0)
		NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
1948 1949 1950

	if (err > 0)
		err = 0;
1951
	return err;
1952 1953 1954
}

static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
1955 1956
				 struct tcf_block *block, struct Qdisc *q,
				 u32 parent, struct nlmsghdr *n,
1957 1958
				 struct tcf_chain *chain, int event,
				 bool rtnl_held)
1959 1960 1961
{
	struct tcf_proto *tp;

1962 1963
	for (tp = tcf_get_next_proto(chain, NULL, rtnl_held);
	     tp; tp = tcf_get_next_proto(chain, tp, rtnl_held))
1964
		tfilter_notify(net, oskb, n, tp, block,
1965
			       q, parent, NULL, event, false, rtnl_held);
1966 1967
}

1968 1969 1970 1971 1972 1973
static void tfilter_put(struct tcf_proto *tp, void *fh)
{
	if (tp->ops->put && fh)
		tp->ops->put(tp, fh);
}

1974
static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1975
			  struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
1976
{
1977
	struct net *net = sock_net(skb->sk);
1978
	struct nlattr *tca[TCA_MAX + 1];
L
Linus Torvalds 已提交
1979 1980 1981
	struct tcmsg *t;
	u32 protocol;
	u32 prio;
1982
	bool prio_allocate;
L
Linus Torvalds 已提交
1983
	u32 parent;
1984
	u32 chain_index;
1985
	struct Qdisc *q = NULL;
1986
	struct tcf_chain_info chain_info;
1987
	struct tcf_chain *chain = NULL;
1988
	struct tcf_block *block;
L
Linus Torvalds 已提交
1989 1990
	struct tcf_proto *tp;
	unsigned long cl;
1991
	void *fh;
L
Linus Torvalds 已提交
1992
	int err;
1993
	int tp_created;
1994
	bool rtnl_held = false;
L
Linus Torvalds 已提交
1995

1996
	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1997
		return -EPERM;
1998

L
Linus Torvalds 已提交
1999
replay:
2000 2001
	tp_created = 0;

2002 2003
	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
				     rtm_tca_policy, extack);
2004 2005 2006
	if (err < 0)
		return err;

2007
	t = nlmsg_data(n);
L
Linus Torvalds 已提交
2008 2009
	protocol = TC_H_MIN(t->tcm_info);
	prio = TC_H_MAJ(t->tcm_info);
2010
	prio_allocate = false;
L
Linus Torvalds 已提交
2011
	parent = t->tcm_parent;
2012
	tp = NULL;
L
Linus Torvalds 已提交
2013
	cl = 0;
2014
	block = NULL;
L
Linus Torvalds 已提交
2015 2016

	if (prio == 0) {
2017 2018 2019 2020 2021 2022 2023
		/* If no priority is provided by the user,
		 * we allocate one.
		 */
		if (n->nlmsg_flags & NLM_F_CREATE) {
			prio = TC_H_MAKE(0x80000000U, 0U);
			prio_allocate = true;
		} else {
2024
			NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
L
Linus Torvalds 已提交
2025
			return -ENOENT;
2026
		}
L
Linus Torvalds 已提交
2027 2028 2029 2030
	}

	/* Find head of filter chain. */

2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051
	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
	if (err)
		return err;

	/* Take rtnl mutex if rtnl_held was set to true on previous iteration,
	 * block is shared (no qdisc found), qdisc is not unlocked, classifier
	 * type is not specified, classifier is not unlocked.
	 */
	if (rtnl_held ||
	    (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
	    !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
		rtnl_held = true;
		rtnl_lock();
	}

	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
	if (err)
		goto errout;

	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
				 extack);
2052 2053 2054
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
		goto errout;
2055
	}
2056 2057 2058

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
2059
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2060 2061 2062
		err = -EINVAL;
		goto errout;
	}
2063
	chain = tcf_chain_get(block, chain_index, true);
2064
	if (!chain) {
2065
		NL_SET_ERR_MSG(extack, "Cannot create specified filter chain");
2066
		err = -ENOMEM;
2067 2068
		goto errout;
	}
L
Linus Torvalds 已提交
2069

2070
	mutex_lock(&chain->filter_chain_lock);
2071 2072 2073
	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
			       prio, prio_allocate);
	if (IS_ERR(tp)) {
2074
		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2075
		err = PTR_ERR(tp);
2076
		goto errout_locked;
L
Linus Torvalds 已提交
2077 2078 2079
	}

	if (tp == NULL) {
2080 2081
		struct tcf_proto *tp_new = NULL;

2082 2083 2084 2085 2086
		if (chain->flushing) {
			err = -EAGAIN;
			goto errout_locked;
		}

L
Linus Torvalds 已提交
2087 2088
		/* Proto-tcf does not exist, create new one */

2089
		if (tca[TCA_KIND] == NULL || !protocol) {
2090
			NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
2091
			err = -EINVAL;
2092
			goto errout_locked;
2093
		}
L
Linus Torvalds 已提交
2094

2095
		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2096
			NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2097
			err = -ENOENT;
2098
			goto errout_locked;
2099
		}
L
Linus Torvalds 已提交
2100

2101
		if (prio_allocate)
2102 2103
			prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
							       &chain_info));
L
Linus Torvalds 已提交
2104

2105
		mutex_unlock(&chain->filter_chain_lock);
2106
		tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]),
2107 2108
					  protocol, prio, chain, rtnl_held,
					  extack);
2109 2110
		if (IS_ERR(tp_new)) {
			err = PTR_ERR(tp_new);
2111
			goto errout_tp;
L
Linus Torvalds 已提交
2112
		}
2113

2114
		tp_created = 1;
2115 2116
		tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
						rtnl_held);
2117 2118 2119 2120
		if (IS_ERR(tp)) {
			err = PTR_ERR(tp);
			goto errout_tp;
		}
2121 2122
	} else {
		mutex_unlock(&chain->filter_chain_lock);
2123
	}
L
Linus Torvalds 已提交
2124

2125 2126 2127 2128 2129 2130
	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
		err = -EINVAL;
		goto errout;
	}

L
Linus Torvalds 已提交
2131 2132
	fh = tp->ops->get(tp, t->tcm_handle);

2133
	if (!fh) {
2134
		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2135
			NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2136
			err = -ENOENT;
L
Linus Torvalds 已提交
2137
			goto errout;
2138
		}
2139
	} else if (n->nlmsg_flags & NLM_F_EXCL) {
2140
		tfilter_put(tp, fh);
2141 2142 2143
		NL_SET_ERR_MSG(extack, "Filter already exists");
		err = -EEXIST;
		goto errout;
L
Linus Torvalds 已提交
2144 2145
	}

2146 2147 2148 2149 2150 2151
	if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
		NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
		err = -EINVAL;
		goto errout;
	}

2152
	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
2153
			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
2154
			      rtnl_held, extack);
2155
	if (err == 0) {
2156
		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2157
			       RTM_NEWTFILTER, false, rtnl_held);
2158
		tfilter_put(tp, fh);
2159 2160 2161
		/* q pointer is NULL for shared blocks */
		if (q)
			q->flags &= ~TCQ_F_CAN_BYPASS;
2162
	}
L
Linus Torvalds 已提交
2163 2164

errout:
2165
	if (err && tp_created)
2166
		tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
2167
errout_tp:
2168 2169
	if (chain) {
		if (tp && !IS_ERR(tp))
2170
			tcf_proto_put(tp, rtnl_held, NULL);
2171 2172 2173
		if (!tp_created)
			tcf_chain_put(chain);
	}
2174
	tcf_block_release(q, block, rtnl_held);
2175 2176 2177 2178 2179 2180 2181 2182 2183

	if (rtnl_held)
		rtnl_unlock();

	if (err == -EAGAIN) {
		/* Take rtnl lock in case EAGAIN is caused by concurrent flush
		 * of target chain.
		 */
		rtnl_held = true;
L
Linus Torvalds 已提交
2184 2185
		/* Replay the request. */
		goto replay;
2186
	}
L
Linus Torvalds 已提交
2187
	return err;
2188 2189 2190 2191

errout_locked:
	mutex_unlock(&chain->filter_chain_lock);
	goto errout;
L
Linus Torvalds 已提交
2192 2193
}

2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206
static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
			  struct netlink_ext_ack *extack)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
	struct tcmsg *t;
	u32 protocol;
	u32 prio;
	u32 parent;
	u32 chain_index;
	struct Qdisc *q = NULL;
	struct tcf_chain_info chain_info;
	struct tcf_chain *chain = NULL;
2207
	struct tcf_block *block = NULL;
2208 2209 2210 2211
	struct tcf_proto *tp = NULL;
	unsigned long cl = 0;
	void *fh = NULL;
	int err;
2212
	bool rtnl_held = false;
2213 2214 2215 2216

	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
		return -EPERM;

2217 2218
	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
				     rtm_tca_policy, extack);
2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233
	if (err < 0)
		return err;

	t = nlmsg_data(n);
	protocol = TC_H_MIN(t->tcm_info);
	prio = TC_H_MAJ(t->tcm_info);
	parent = t->tcm_parent;

	if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) {
		NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
		return -ENOENT;
	}

	/* Find head of filter chain. */

2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254
	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
	if (err)
		return err;

	/* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
	 * found), qdisc is not unlocked, classifier type is not specified,
	 * classifier is not unlocked.
	 */
	if (!prio ||
	    (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
	    !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
		rtnl_held = true;
		rtnl_lock();
	}

	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
	if (err)
		goto errout;

	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
				 extack);
2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
		goto errout;
	}

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
		err = -EINVAL;
		goto errout;
	}
	chain = tcf_chain_get(block, chain_index, false);
	if (!chain) {
2268 2269 2270 2271 2272 2273 2274
		/* User requested flush on non-existent chain. Nothing to do,
		 * so just return success.
		 */
		if (prio == 0) {
			err = 0;
			goto errout;
		}
2275
		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2276
		err = -ENOENT;
2277 2278 2279 2280 2281
		goto errout;
	}

	if (prio == 0) {
		tfilter_notify_chain(net, skb, block, q, parent, n,
2282 2283
				     chain, RTM_DELTFILTER, rtnl_held);
		tcf_chain_flush(chain, rtnl_held);
2284 2285 2286 2287
		err = 0;
		goto errout;
	}

2288
	mutex_lock(&chain->filter_chain_lock);
2289 2290 2291 2292
	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
			       prio, false);
	if (!tp || IS_ERR(tp)) {
		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2293
		err = tp ? PTR_ERR(tp) : -ENOENT;
2294
		goto errout_locked;
2295 2296 2297
	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
		err = -EINVAL;
2298 2299 2300 2301 2302
		goto errout_locked;
	} else if (t->tcm_handle == 0) {
		tcf_chain_tp_remove(chain, &chain_info, tp);
		mutex_unlock(&chain->filter_chain_lock);

2303
		tcf_proto_put(tp, rtnl_held, NULL);
2304
		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2305
			       RTM_DELTFILTER, false, rtnl_held);
2306
		err = 0;
2307 2308
		goto errout;
	}
2309
	mutex_unlock(&chain->filter_chain_lock);
2310 2311 2312 2313

	fh = tp->ops->get(tp, t->tcm_handle);

	if (!fh) {
2314 2315
		NL_SET_ERR_MSG(extack, "Specified filter handle not found");
		err = -ENOENT;
2316 2317 2318 2319 2320
	} else {
		bool last;

		err = tfilter_del_notify(net, skb, n, tp, block,
					 q, parent, fh, false, &last,
2321 2322
					 rtnl_held, extack);

2323 2324
		if (err)
			goto errout;
2325
		if (last)
2326
			tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
2327 2328 2329
	}

errout:
2330 2331
	if (chain) {
		if (tp && !IS_ERR(tp))
2332
			tcf_proto_put(tp, rtnl_held, NULL);
2333
		tcf_chain_put(chain);
2334
	}
2335
	tcf_block_release(q, block, rtnl_held);
2336 2337 2338 2339

	if (rtnl_held)
		rtnl_unlock();

2340
	return err;
2341 2342 2343 2344

errout_locked:
	mutex_unlock(&chain->filter_chain_lock);
	goto errout;
2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359
}

static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
			  struct netlink_ext_ack *extack)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
	struct tcmsg *t;
	u32 protocol;
	u32 prio;
	u32 parent;
	u32 chain_index;
	struct Qdisc *q = NULL;
	struct tcf_chain_info chain_info;
	struct tcf_chain *chain = NULL;
2360
	struct tcf_block *block = NULL;
2361 2362 2363 2364
	struct tcf_proto *tp = NULL;
	unsigned long cl = 0;
	void *fh = NULL;
	int err;
2365
	bool rtnl_held = false;
2366

2367 2368
	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
				     rtm_tca_policy, extack);
2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383
	if (err < 0)
		return err;

	t = nlmsg_data(n);
	protocol = TC_H_MIN(t->tcm_info);
	prio = TC_H_MAJ(t->tcm_info);
	parent = t->tcm_parent;

	if (prio == 0) {
		NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
		return -ENOENT;
	}

	/* Find head of filter chain. */

2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403
	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
	if (err)
		return err;

	/* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
	 * unlocked, classifier type is not specified, classifier is not
	 * unlocked.
	 */
	if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
	    !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
		rtnl_held = true;
		rtnl_lock();
	}

	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
	if (err)
		goto errout;

	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
				 extack);
2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
		goto errout;
	}

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
		err = -EINVAL;
		goto errout;
	}
	chain = tcf_chain_get(block, chain_index, false);
	if (!chain) {
		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
		err = -EINVAL;
		goto errout;
	}

2422
	mutex_lock(&chain->filter_chain_lock);
2423 2424
	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
			       prio, false);
2425
	mutex_unlock(&chain->filter_chain_lock);
2426 2427
	if (!tp || IS_ERR(tp)) {
		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2428
		err = tp ? PTR_ERR(tp) : -ENOENT;
2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442
		goto errout;
	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
		err = -EINVAL;
		goto errout;
	}

	fh = tp->ops->get(tp, t->tcm_handle);

	if (!fh) {
		NL_SET_ERR_MSG(extack, "Specified filter handle not found");
		err = -ENOENT;
	} else {
		err = tfilter_notify(net, skb, n, tp, block, q, parent,
2443
				     fh, RTM_NEWTFILTER, true, rtnl_held);
2444 2445 2446 2447
		if (err < 0)
			NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
	}

2448
	tfilter_put(tp, fh);
2449
errout:
2450 2451
	if (chain) {
		if (tp && !IS_ERR(tp))
2452
			tcf_proto_put(tp, rtnl_held, NULL);
2453
		tcf_chain_put(chain);
2454
	}
2455
	tcf_block_release(q, block, rtnl_held);
2456 2457 2458 2459

	if (rtnl_held)
		rtnl_unlock();

2460 2461 2462
	return err;
}

2463
struct tcf_dump_args {
L
Linus Torvalds 已提交
2464 2465 2466
	struct tcf_walker w;
	struct sk_buff *skb;
	struct netlink_callback *cb;
2467
	struct tcf_block *block;
2468 2469
	struct Qdisc *q;
	u32 parent;
L
Linus Torvalds 已提交
2470 2471
};

2472
static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
L
Linus Torvalds 已提交
2473
{
2474
	struct tcf_dump_args *a = (void *)arg;
2475
	struct net *net = sock_net(a->skb->sk);
L
Linus Torvalds 已提交
2476

2477
	return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
2478
			     n, NETLINK_CB(a->cb->skb).portid,
J
Jamal Hadi Salim 已提交
2479
			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2480
			     RTM_NEWTFILTER, true);
L
Linus Torvalds 已提交
2481 2482
}

2483 2484
static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
			   struct sk_buff *skb, struct netlink_callback *cb,
2485 2486 2487
			   long index_start, long *p_index)
{
	struct net *net = sock_net(skb->sk);
2488
	struct tcf_block *block = chain->block;
2489
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2490
	struct tcf_proto *tp, *tp_prev;
2491 2492
	struct tcf_dump_args arg;

2493 2494 2495 2496
	for (tp = __tcf_get_next_proto(chain, NULL);
	     tp;
	     tp_prev = tp,
		     tp = __tcf_get_next_proto(chain, tp),
2497
		     tcf_proto_put(tp_prev, true, NULL),
2498
		     (*p_index)++) {
2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510
		if (*p_index < index_start)
			continue;
		if (TC_H_MAJ(tcm->tcm_info) &&
		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
			continue;
		if (TC_H_MIN(tcm->tcm_info) &&
		    TC_H_MIN(tcm->tcm_info) != tp->protocol)
			continue;
		if (*p_index > index_start)
			memset(&cb->args[1], 0,
			       sizeof(cb->args) - sizeof(cb->args[0]));
		if (cb->args[1] == 0) {
2511
			if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
2512 2513
					  NETLINK_CB(cb->skb).portid,
					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
2514
					  RTM_NEWTFILTER, true) <= 0)
2515
				goto errout;
2516 2517 2518 2519 2520 2521 2522
			cb->args[1] = 1;
		}
		if (!tp->ops->walk)
			continue;
		arg.w.fn = tcf_node_dump;
		arg.skb = skb;
		arg.cb = cb;
2523
		arg.block = block;
2524 2525
		arg.q = q;
		arg.parent = parent;
2526 2527 2528
		arg.w.stop = 0;
		arg.w.skip = cb->args[1] - 1;
		arg.w.count = 0;
2529
		arg.w.cookie = cb->args[2];
2530
		tp->ops->walk(tp, &arg.w, true);
2531
		cb->args[2] = arg.w.cookie;
2532 2533
		cb->args[1] = arg.w.count + 1;
		if (arg.w.stop)
2534
			goto errout;
2535
	}
2536
	return true;
2537 2538

errout:
2539
	tcf_proto_put(tp, true, NULL);
2540
	return false;
2541 2542
}

E
Eric Dumazet 已提交
2543
/* called with RTNL */
L
Linus Torvalds 已提交
2544 2545
static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
2546
	struct tcf_chain *chain, *chain_prev;
2547
	struct net *net = sock_net(skb->sk);
2548
	struct nlattr *tca[TCA_MAX + 1];
2549
	struct Qdisc *q = NULL;
2550
	struct tcf_block *block;
2551
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2552 2553
	long index_start;
	long index;
2554
	u32 parent;
2555
	int err;
L
Linus Torvalds 已提交
2556

2557
	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
L
Linus Torvalds 已提交
2558
		return skb->len;
2559

2560 2561
	err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
				     NULL, cb->extack);
2562 2563 2564
	if (err)
		return err;

2565
	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2566
		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2567 2568
		if (!block)
			goto out;
2569 2570 2571 2572 2573 2574 2575
		/* If we work with block index, q is NULL and parent value
		 * will never be used in the following code. The check
		 * in tcf_fill_node prevents it. However, compiler does not
		 * see that far, so set parent to zero to silence the warning
		 * about parent being uninitialized.
		 */
		parent = 0;
2576
	} else {
2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595
		const struct Qdisc_class_ops *cops;
		struct net_device *dev;
		unsigned long cl = 0;

		dev = __dev_get_by_index(net, tcm->tcm_ifindex);
		if (!dev)
			return skb->len;

		parent = tcm->tcm_parent;
		if (!parent) {
			q = dev->qdisc;
			parent = q->handle;
		} else {
			q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
		}
		if (!q)
			goto out;
		cops = q->ops->cl_ops;
		if (!cops)
2596
			goto out;
2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608
		if (!cops->tcf_block)
			goto out;
		if (TC_H_MIN(tcm->tcm_parent)) {
			cl = cops->find(q, tcm->tcm_parent);
			if (cl == 0)
				goto out;
		}
		block = cops->tcf_block(q, cl, NULL);
		if (!block)
			goto out;
		if (tcf_block_shared(block))
			q = NULL;
L
Linus Torvalds 已提交
2609 2610
	}

2611 2612
	index_start = cb->args[0];
	index = 0;
2613

2614 2615 2616 2617 2618
	for (chain = __tcf_get_next_chain(block, NULL);
	     chain;
	     chain_prev = chain,
		     chain = __tcf_get_next_chain(block, chain),
		     tcf_chain_put(chain_prev)) {
2619 2620 2621
		if (tca[TCA_CHAIN] &&
		    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
			continue;
2622
		if (!tcf_chain_dump(chain, q, parent, skb, cb,
2623
				    index_start, &index)) {
2624
			tcf_chain_put(chain);
2625
			err = -EMSGSIZE;
2626
			break;
2627
		}
2628 2629
	}

2630
	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
2631
		tcf_block_refcnt_put(block, true);
2632
	cb->args[0] = index;
L
Linus Torvalds 已提交
2633 2634

out:
2635 2636 2637
	/* If we did no progress, the error (EMSGSIZE) is real */
	if (skb->len == 0 && err)
		return err;
L
Linus Torvalds 已提交
2638 2639 2640
	return skb->len;
}

2641 2642 2643 2644
static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
			      void *tmplt_priv, u32 chain_index,
			      struct net *net, struct sk_buff *skb,
			      struct tcf_block *block,
2645 2646 2647
			      u32 portid, u32 seq, u16 flags, int event)
{
	unsigned char *b = skb_tail_pointer(skb);
2648
	const struct tcf_proto_ops *ops;
2649 2650
	struct nlmsghdr *nlh;
	struct tcmsg *tcm;
2651 2652
	void *priv;

2653 2654
	ops = tmplt_ops;
	priv = tmplt_priv;
2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671

	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
	if (!nlh)
		goto out_nlmsg_trim;
	tcm = nlmsg_data(nlh);
	tcm->tcm_family = AF_UNSPEC;
	tcm->tcm__pad1 = 0;
	tcm->tcm__pad2 = 0;
	tcm->tcm_handle = 0;
	if (block->q) {
		tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex;
		tcm->tcm_parent = block->q->handle;
	} else {
		tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
		tcm->tcm_block_index = block->index;
	}

2672
	if (nla_put_u32(skb, TCA_CHAIN, chain_index))
2673 2674
		goto nla_put_failure;

2675 2676 2677 2678 2679 2680 2681
	if (ops) {
		if (nla_put_string(skb, TCA_KIND, ops->kind))
			goto nla_put_failure;
		if (ops->tmplt_dump(skb, net, priv) < 0)
			goto nla_put_failure;
	}

2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697
	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
	return skb->len;

out_nlmsg_trim:
nla_put_failure:
	nlmsg_trim(skb, b);
	return -EMSGSIZE;
}

static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
			   u32 seq, u16 flags, int event, bool unicast)
{
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
	struct tcf_block *block = chain->block;
	struct net *net = block->net;
	struct sk_buff *skb;
2698
	int err = 0;
2699 2700 2701 2702 2703

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

2704 2705
	if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
			       chain->index, net, skb, block, portid,
2706 2707 2708 2709 2710 2711
			       seq, flags, event) <= 0) {
		kfree_skb(skb);
		return -EINVAL;
	}

	if (unicast)
2712 2713 2714 2715
		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
	else
		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
				     flags & NLM_F_ECHO);
2716

2717 2718 2719
	if (err > 0)
		err = 0;
	return err;
2720 2721
}

2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746
static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
				  void *tmplt_priv, u32 chain_index,
				  struct tcf_block *block, struct sk_buff *oskb,
				  u32 seq, u16 flags, bool unicast)
{
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
	struct net *net = block->net;
	struct sk_buff *skb;

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

	if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
			       block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
		kfree_skb(skb);
		return -EINVAL;
	}

	if (unicast)
		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);

	return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
}

2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757
static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
			      struct nlattr **tca,
			      struct netlink_ext_ack *extack)
{
	const struct tcf_proto_ops *ops;
	void *tmplt_priv;

	/* If kind is not set, user did not specify template. */
	if (!tca[TCA_KIND])
		return 0;

2758
	ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack);
2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775
	if (IS_ERR(ops))
		return PTR_ERR(ops);
	if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
		NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
		return -EOPNOTSUPP;
	}

	tmplt_priv = ops->tmplt_create(net, chain, tca, extack);
	if (IS_ERR(tmplt_priv)) {
		module_put(ops->owner);
		return PTR_ERR(tmplt_priv);
	}
	chain->tmplt_ops = ops;
	chain->tmplt_priv = tmplt_priv;
	return 0;
}

2776 2777
static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
			       void *tmplt_priv)
2778 2779
{
	/* If template ops are set, no work to do for us. */
2780
	if (!tmplt_ops)
2781 2782
		return;

2783 2784
	tmplt_ops->tmplt_destroy(tmplt_priv);
	module_put(tmplt_ops->owner);
2785 2786
}

2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807
/* Add/delete/get a chain */

static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
			struct netlink_ext_ack *extack)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
	struct tcmsg *t;
	u32 parent;
	u32 chain_index;
	struct Qdisc *q = NULL;
	struct tcf_chain *chain = NULL;
	struct tcf_block *block;
	unsigned long cl;
	int err;

	if (n->nlmsg_type != RTM_GETCHAIN &&
	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
		return -EPERM;

replay:
2808 2809
	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
				     rtm_tca_policy, extack);
2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824
	if (err < 0)
		return err;

	t = nlmsg_data(n);
	parent = t->tcm_parent;
	cl = 0;

	block = tcf_block_find(net, &q, &parent, &cl,
			       t->tcm_ifindex, t->tcm_block_index, extack);
	if (IS_ERR(block))
		return PTR_ERR(block);

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2825 2826
		err = -EINVAL;
		goto errout_block;
2827
	}
2828 2829

	mutex_lock(&block->lock);
2830 2831 2832
	chain = tcf_chain_lookup(block, chain_index);
	if (n->nlmsg_type == RTM_NEWCHAIN) {
		if (chain) {
2833
			if (tcf_chain_held_by_acts_only(chain)) {
2834
				/* The chain exists only because there is
2835
				 * some action referencing it.
2836 2837 2838 2839
				 */
				tcf_chain_hold(chain);
			} else {
				NL_SET_ERR_MSG(extack, "Filter chain already exists");
2840
				err = -EEXIST;
2841
				goto errout_block_locked;
2842 2843 2844 2845
			}
		} else {
			if (!(n->nlmsg_flags & NLM_F_CREATE)) {
				NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
2846
				err = -ENOENT;
2847
				goto errout_block_locked;
2848 2849 2850 2851
			}
			chain = tcf_chain_create(block, chain_index);
			if (!chain) {
				NL_SET_ERR_MSG(extack, "Failed to create filter chain");
2852
				err = -ENOMEM;
2853
				goto errout_block_locked;
2854
			}
2855 2856
		}
	} else {
2857
		if (!chain || tcf_chain_held_by_acts_only(chain)) {
2858
			NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2859
			err = -EINVAL;
2860
			goto errout_block_locked;
2861 2862 2863 2864
		}
		tcf_chain_hold(chain);
	}

2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875
	if (n->nlmsg_type == RTM_NEWCHAIN) {
		/* Modifying chain requires holding parent block lock. In case
		 * the chain was successfully added, take a reference to the
		 * chain. This ensures that an empty chain does not disappear at
		 * the end of this function.
		 */
		tcf_chain_hold(chain);
		chain->explicitly_created = true;
	}
	mutex_unlock(&block->lock);

2876 2877
	switch (n->nlmsg_type) {
	case RTM_NEWCHAIN:
2878
		err = tc_chain_tmplt_add(chain, net, tca, extack);
2879 2880
		if (err) {
			tcf_chain_put_explicitly_created(chain);
2881
			goto errout;
2882 2883
		}

2884 2885 2886 2887
		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
				RTM_NEWCHAIN, false);
		break;
	case RTM_DELCHAIN:
2888
		tfilter_notify_chain(net, skb, block, q, parent, n,
2889
				     chain, RTM_DELTFILTER, true);
2890
		/* Flush the chain first as the user requested chain removal. */
2891
		tcf_chain_flush(chain, true);
2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910
		/* In case the chain was successfully deleted, put a reference
		 * to the chain previously taken during addition.
		 */
		tcf_chain_put_explicitly_created(chain);
		break;
	case RTM_GETCHAIN:
		err = tc_chain_notify(chain, skb, n->nlmsg_seq,
				      n->nlmsg_seq, n->nlmsg_type, true);
		if (err < 0)
			NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
		break;
	default:
		err = -EOPNOTSUPP;
		NL_SET_ERR_MSG(extack, "Unsupported message type");
		goto errout;
	}

errout:
	tcf_chain_put(chain);
2911
errout_block:
2912
	tcf_block_release(q, block, true);
2913 2914 2915 2916
	if (err == -EAGAIN)
		/* Replay the request. */
		goto replay;
	return err;
2917 2918 2919 2920

errout_block_locked:
	mutex_unlock(&block->lock);
	goto errout_block;
2921 2922 2923 2924 2925 2926 2927 2928 2929 2930
}

/* called with RTNL */
static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
	struct Qdisc *q = NULL;
	struct tcf_block *block;
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2931
	struct tcf_chain *chain;
2932 2933 2934 2935 2936 2937 2938 2939
	long index_start;
	long index;
	u32 parent;
	int err;

	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
		return skb->len;

2940 2941
	err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
				     rtm_tca_policy, cb->extack);
2942 2943 2944 2945
	if (err)
		return err;

	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2946
		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993
		if (!block)
			goto out;
		/* If we work with block index, q is NULL and parent value
		 * will never be used in the following code. The check
		 * in tcf_fill_node prevents it. However, compiler does not
		 * see that far, so set parent to zero to silence the warning
		 * about parent being uninitialized.
		 */
		parent = 0;
	} else {
		const struct Qdisc_class_ops *cops;
		struct net_device *dev;
		unsigned long cl = 0;

		dev = __dev_get_by_index(net, tcm->tcm_ifindex);
		if (!dev)
			return skb->len;

		parent = tcm->tcm_parent;
		if (!parent) {
			q = dev->qdisc;
			parent = q->handle;
		} else {
			q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
		}
		if (!q)
			goto out;
		cops = q->ops->cl_ops;
		if (!cops)
			goto out;
		if (!cops->tcf_block)
			goto out;
		if (TC_H_MIN(tcm->tcm_parent)) {
			cl = cops->find(q, tcm->tcm_parent);
			if (cl == 0)
				goto out;
		}
		block = cops->tcf_block(q, cl, NULL);
		if (!block)
			goto out;
		if (tcf_block_shared(block))
			q = NULL;
	}

	index_start = cb->args[0];
	index = 0;

2994 2995
	mutex_lock(&block->lock);
	list_for_each_entry(chain, &block->chain_list, list) {
2996 2997 2998 2999 3000 3001 3002
		if ((tca[TCA_CHAIN] &&
		     nla_get_u32(tca[TCA_CHAIN]) != chain->index))
			continue;
		if (index < index_start) {
			index++;
			continue;
		}
3003 3004
		if (tcf_chain_held_by_acts_only(chain))
			continue;
3005 3006
		err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
					 chain->index, net, skb, block,
3007 3008 3009
					 NETLINK_CB(cb->skb).portid,
					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
					 RTM_NEWCHAIN);
3010
		if (err <= 0)
3011 3012 3013
			break;
		index++;
	}
3014
	mutex_unlock(&block->lock);
3015

3016
	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
3017
		tcf_block_refcnt_put(block, true);
3018 3019 3020 3021 3022 3023 3024 3025 3026
	cb->args[0] = index;

out:
	/* If we did no progress, the error (EMSGSIZE) is real */
	if (skb->len == 0 && err)
		return err;
	return skb->len;
}

3027
void tcf_exts_destroy(struct tcf_exts *exts)
L
Linus Torvalds 已提交
3028 3029
{
#ifdef CONFIG_NET_CLS_ACT
3030
	tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
3031 3032
	kfree(exts->actions);
	exts->nr_actions = 0;
L
Linus Torvalds 已提交
3033 3034
#endif
}
3035
EXPORT_SYMBOL(tcf_exts_destroy);
L
Linus Torvalds 已提交
3036

3037
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
3038
		      struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
3039
		      bool rtnl_held, struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
3040 3041 3042 3043
{
#ifdef CONFIG_NET_CLS_ACT
	{
		struct tc_action *act;
3044
		size_t attr_size = 0;
L
Linus Torvalds 已提交
3045

3046
		if (exts->police && tb[exts->police]) {
3047 3048
			act = tcf_action_init_1(net, tp, tb[exts->police],
						rate_tlv, "police", ovr,
3049 3050
						TCA_ACT_BIND, rtnl_held,
						extack);
3051 3052
			if (IS_ERR(act))
				return PTR_ERR(act);
L
Linus Torvalds 已提交
3053

3054
			act->type = exts->type = TCA_OLD_COMPAT;
3055 3056
			exts->actions[0] = act;
			exts->nr_actions = 1;
3057
		} else if (exts->action && tb[exts->action]) {
3058
			int err;
3059

3060 3061
			err = tcf_action_init(net, tp, tb[exts->action],
					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
3062 3063
					      exts->actions, &attr_size,
					      rtnl_held, extack);
3064
			if (err < 0)
3065
				return err;
3066
			exts->nr_actions = err;
L
Linus Torvalds 已提交
3067 3068 3069
		}
	}
#else
3070
	if ((exts->action && tb[exts->action]) ||
3071 3072
	    (exts->police && tb[exts->police])) {
		NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
L
Linus Torvalds 已提交
3073
		return -EOPNOTSUPP;
3074
	}
L
Linus Torvalds 已提交
3075 3076 3077 3078
#endif

	return 0;
}
3079
EXPORT_SYMBOL(tcf_exts_validate);
L
Linus Torvalds 已提交
3080

3081
void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
L
Linus Torvalds 已提交
3082 3083
{
#ifdef CONFIG_NET_CLS_ACT
3084 3085
	struct tcf_exts old = *dst;

3086
	*dst = *src;
3087
	tcf_exts_destroy(&old);
L
Linus Torvalds 已提交
3088 3089
#endif
}
3090
EXPORT_SYMBOL(tcf_exts_change);
L
Linus Torvalds 已提交
3091

3092 3093 3094 3095 3096 3097 3098 3099 3100
#ifdef CONFIG_NET_CLS_ACT
static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
{
	if (exts->nr_actions == 0)
		return NULL;
	else
		return exts->actions[0];
}
#endif
3101

3102
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
L
Linus Torvalds 已提交
3103 3104
{
#ifdef CONFIG_NET_CLS_ACT
3105 3106
	struct nlattr *nest;

3107
	if (exts->action && tcf_exts_has_actions(exts)) {
L
Linus Torvalds 已提交
3108 3109 3110 3111 3112
		/*
		 * again for backward compatible mode - we want
		 * to work with both old and new modes of entering
		 * tc data even if iproute2  was newer - jhs
		 */
3113
		if (exts->type != TCA_OLD_COMPAT) {
3114
			nest = nla_nest_start_noflag(skb, exts->action);
3115 3116
			if (nest == NULL)
				goto nla_put_failure;
3117

3118
			if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
3119
				goto nla_put_failure;
3120
			nla_nest_end(skb, nest);
3121
		} else if (exts->police) {
3122
			struct tc_action *act = tcf_exts_first_act(exts);
3123
			nest = nla_nest_start_noflag(skb, exts->police);
3124
			if (nest == NULL || !act)
3125
				goto nla_put_failure;
3126
			if (tcf_action_dump_old(skb, act, 0, 0) < 0)
3127
				goto nla_put_failure;
3128
			nla_nest_end(skb, nest);
L
Linus Torvalds 已提交
3129 3130 3131
		}
	}
	return 0;
3132 3133 3134

nla_put_failure:
	nla_nest_cancel(skb, nest);
L
Linus Torvalds 已提交
3135
	return -1;
3136 3137 3138
#else
	return 0;
#endif
L
Linus Torvalds 已提交
3139
}
3140
EXPORT_SYMBOL(tcf_exts_dump);
L
Linus Torvalds 已提交
3141

3142

3143
int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
L
Linus Torvalds 已提交
3144 3145
{
#ifdef CONFIG_NET_CLS_ACT
3146
	struct tc_action *a = tcf_exts_first_act(exts);
3147
	if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
3148
		return -1;
L
Linus Torvalds 已提交
3149 3150 3151
#endif
	return 0;
}
3152
EXPORT_SYMBOL(tcf_exts_dump_stats);
L
Linus Torvalds 已提交
3153

3154 3155
int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
		     void *type_data, bool err_stop)
3156
{
3157
	struct flow_block_cb *block_cb;
3158 3159 3160 3161 3162 3163 3164
	int ok_count = 0;
	int err;

	/* Make sure all netdevs sharing this block are offload-capable. */
	if (block->nooffloaddevcnt && err_stop)
		return -EOPNOTSUPP;

3165
	list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
3166 3167 3168 3169 3170 3171 3172 3173 3174
		err = block_cb->cb(type, type_data, block_cb->cb_priv);
		if (err) {
			if (err_stop)
				return err;
		} else {
			ok_count++;
		}
	}
	return ok_count;
3175 3176
}
EXPORT_SYMBOL(tc_setup_cb_call);
3177

3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255
int tc_setup_flow_action(struct flow_action *flow_action,
			 const struct tcf_exts *exts)
{
	const struct tc_action *act;
	int i, j, k;

	if (!exts)
		return 0;

	j = 0;
	tcf_exts_for_each_action(i, act, exts) {
		struct flow_action_entry *entry;

		entry = &flow_action->entries[j];
		if (is_tcf_gact_ok(act)) {
			entry->id = FLOW_ACTION_ACCEPT;
		} else if (is_tcf_gact_shot(act)) {
			entry->id = FLOW_ACTION_DROP;
		} else if (is_tcf_gact_trap(act)) {
			entry->id = FLOW_ACTION_TRAP;
		} else if (is_tcf_gact_goto_chain(act)) {
			entry->id = FLOW_ACTION_GOTO;
			entry->chain_index = tcf_gact_goto_chain_index(act);
		} else if (is_tcf_mirred_egress_redirect(act)) {
			entry->id = FLOW_ACTION_REDIRECT;
			entry->dev = tcf_mirred_dev(act);
		} else if (is_tcf_mirred_egress_mirror(act)) {
			entry->id = FLOW_ACTION_MIRRED;
			entry->dev = tcf_mirred_dev(act);
		} else if (is_tcf_vlan(act)) {
			switch (tcf_vlan_action(act)) {
			case TCA_VLAN_ACT_PUSH:
				entry->id = FLOW_ACTION_VLAN_PUSH;
				entry->vlan.vid = tcf_vlan_push_vid(act);
				entry->vlan.proto = tcf_vlan_push_proto(act);
				entry->vlan.prio = tcf_vlan_push_prio(act);
				break;
			case TCA_VLAN_ACT_POP:
				entry->id = FLOW_ACTION_VLAN_POP;
				break;
			case TCA_VLAN_ACT_MODIFY:
				entry->id = FLOW_ACTION_VLAN_MANGLE;
				entry->vlan.vid = tcf_vlan_push_vid(act);
				entry->vlan.proto = tcf_vlan_push_proto(act);
				entry->vlan.prio = tcf_vlan_push_prio(act);
				break;
			default:
				goto err_out;
			}
		} else if (is_tcf_tunnel_set(act)) {
			entry->id = FLOW_ACTION_TUNNEL_ENCAP;
			entry->tunnel = tcf_tunnel_info(act);
		} else if (is_tcf_tunnel_release(act)) {
			entry->id = FLOW_ACTION_TUNNEL_DECAP;
		} else if (is_tcf_pedit(act)) {
			for (k = 0; k < tcf_pedit_nkeys(act); k++) {
				switch (tcf_pedit_cmd(act, k)) {
				case TCA_PEDIT_KEY_EX_CMD_SET:
					entry->id = FLOW_ACTION_MANGLE;
					break;
				case TCA_PEDIT_KEY_EX_CMD_ADD:
					entry->id = FLOW_ACTION_ADD;
					break;
				default:
					goto err_out;
				}
				entry->mangle.htype = tcf_pedit_htype(act, k);
				entry->mangle.mask = tcf_pedit_mask(act, k);
				entry->mangle.val = tcf_pedit_val(act, k);
				entry->mangle.offset = tcf_pedit_offset(act, k);
				entry = &flow_action->entries[++j];
			}
		} else if (is_tcf_csum(act)) {
			entry->id = FLOW_ACTION_CSUM;
			entry->csum_flags = tcf_csum_update_flags(act);
		} else if (is_tcf_skbedit_mark(act)) {
			entry->id = FLOW_ACTION_MARK;
			entry->mark = tcf_skbedit_mark(act);
3256 3257 3258 3259 3260 3261 3262
		} else if (is_tcf_sample(act)) {
			entry->id = FLOW_ACTION_SAMPLE;
			entry->sample.psample_group =
				tcf_sample_psample_group(act);
			entry->sample.trunc_size = tcf_sample_trunc_size(act);
			entry->sample.truncate = tcf_sample_truncate(act);
			entry->sample.rate = tcf_sample_rate(act);
3263 3264 3265 3266 3267
		} else if (is_tcf_police(act)) {
			entry->id = FLOW_ACTION_POLICE;
			entry->police.burst = tcf_police_tcfp_burst(act);
			entry->police.rate_bytes_ps =
				tcf_police_rate_bytes_ps(act);
P
Paul Blakey 已提交
3268 3269 3270 3271
		} else if (is_tcf_ct(act)) {
			entry->id = FLOW_ACTION_CT;
			entry->ct.action = tcf_ct_action(act);
			entry->ct.zone = tcf_ct_zone(act);
3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284
		} else {
			goto err_out;
		}

		if (!is_tcf_pedit(act))
			j++;
	}
	return 0;
err_out:
	return -EOPNOTSUPP;
}
EXPORT_SYMBOL(tc_setup_flow_action);

3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300
unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
{
	unsigned int num_acts = 0;
	struct tc_action *act;
	int i;

	tcf_exts_for_each_action(i, act, exts) {
		if (is_tcf_pedit(act))
			num_acts += tcf_pedit_nkeys(act);
		else
			num_acts++;
	}
	return num_acts;
}
EXPORT_SYMBOL(tcf_exts_num_actions);

3301 3302 3303 3304
static __net_init int tcf_net_init(struct net *net)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

3305
	spin_lock_init(&tn->idr_lock);
3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323
	idr_init(&tn->idr);
	return 0;
}

static void __net_exit tcf_net_exit(struct net *net)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

	idr_destroy(&tn->idr);
}

static struct pernet_operations tcf_net_ops = {
	.init = tcf_net_init,
	.exit = tcf_net_exit,
	.id   = &tcf_net_id,
	.size = sizeof(struct tcf_net),
};

L
Linus Torvalds 已提交
3324 3325
static int __init tc_filter_init(void)
{
3326 3327
	int err;

3328 3329 3330 3331
	tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
	if (!tc_filter_wq)
		return -ENOMEM;

3332 3333 3334 3335
	err = register_pernet_subsys(&tcf_net_ops);
	if (err)
		goto err_register_pernet_subsys;

3336 3337 3338 3339 3340
	err = rhashtable_init(&indr_setup_block_ht,
			      &tc_indr_setup_block_ht_params);
	if (err)
		goto err_rhash_setup_block_ht;

3341 3342 3343 3344
	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
		      RTNL_FLAG_DOIT_UNLOCKED);
	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
		      RTNL_FLAG_DOIT_UNLOCKED);
3345
	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
3346
		      tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
3347 3348 3349 3350
	rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
	rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
	rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
		      tc_dump_chain, 0);
L
Linus Torvalds 已提交
3351 3352

	return 0;
3353

3354 3355
err_rhash_setup_block_ht:
	unregister_pernet_subsys(&tcf_net_ops);
3356 3357 3358
err_register_pernet_subsys:
	destroy_workqueue(tc_filter_wq);
	return err;
L
Linus Torvalds 已提交
3359 3360 3361
}

subsys_initcall(tc_filter_init);