cls_api.c 96.2 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-or-later
L
Linus Torvalds 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * net/sched/cls_api.c	Packet classifier API.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 * Changes:
 *
 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
17
#include <linux/err.h>
L
Linus Torvalds 已提交
18 19 20
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
21
#include <linux/slab.h>
22
#include <linux/idr.h>
23
#include <linux/jhash.h>
24
#include <linux/rculist.h>
25 26
#include <net/net_namespace.h>
#include <net/sock.h>
27
#include <net/netlink.h>
L
Linus Torvalds 已提交
28 29
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
30
#include <net/tc_act/tc_pedit.h>
31 32 33 34 35
#include <net/tc_act/tc_mirred.h>
#include <net/tc_act/tc_vlan.h>
#include <net/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_csum.h>
#include <net/tc_act/tc_gact.h>
36
#include <net/tc_act/tc_police.h>
37
#include <net/tc_act/tc_sample.h>
38
#include <net/tc_act/tc_skbedit.h>
P
Paul Blakey 已提交
39
#include <net/tc_act/tc_ct.h>
40
#include <net/tc_act/tc_mpls.h>
41
#include <net/tc_act/tc_gate.h>
42
#include <net/flow_offload.h>
L
Linus Torvalds 已提交
43

44 45
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];

L
Linus Torvalds 已提交
46
/* The list of all installed classifier types */
47
static LIST_HEAD(tcf_proto_base);
L
Linus Torvalds 已提交
48 49 50 51

/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK(cls_mod_lock);

52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
{
	return jhash_3words(tp->chain->index, tp->prio,
			    (__force __u32)tp->protocol, 0);
}

static void tcf_proto_signal_destroying(struct tcf_chain *chain,
					struct tcf_proto *tp)
{
	struct tcf_block *block = chain->block;

	mutex_lock(&block->proto_destroy_lock);
	hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node,
		     destroy_obj_hashfn(tp));
	mutex_unlock(&block->proto_destroy_lock);
}

static bool tcf_proto_cmp(const struct tcf_proto *tp1,
			  const struct tcf_proto *tp2)
{
	return tp1->chain->index == tp2->chain->index &&
	       tp1->prio == tp2->prio &&
	       tp1->protocol == tp2->protocol;
}

static bool tcf_proto_exists_destroying(struct tcf_chain *chain,
					struct tcf_proto *tp)
{
	u32 hash = destroy_obj_hashfn(tp);
	struct tcf_proto *iter;
	bool found = false;

	rcu_read_lock();
	hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter,
				   destroy_ht_node, hash) {
		if (tcf_proto_cmp(tp, iter)) {
			found = true;
			break;
		}
	}
	rcu_read_unlock();

	return found;
}

static void
tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp)
{
	struct tcf_block *block = chain->block;

	mutex_lock(&block->proto_destroy_lock);
	if (hash_hashed(&tp->destroy_ht_node))
		hash_del_rcu(&tp->destroy_ht_node);
	mutex_unlock(&block->proto_destroy_lock);
}

L
Linus Torvalds 已提交
108 109
/* Find classifier type by string name */

110
static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
L
Linus Torvalds 已提交
111
{
112
	const struct tcf_proto_ops *t, *res = NULL;
L
Linus Torvalds 已提交
113 114 115

	if (kind) {
		read_lock(&cls_mod_lock);
116
		list_for_each_entry(t, &tcf_proto_base, head) {
117
			if (strcmp(kind, t->kind) == 0) {
118 119
				if (try_module_get(t->owner))
					res = t;
L
Linus Torvalds 已提交
120 121 122 123 124
				break;
			}
		}
		read_unlock(&cls_mod_lock);
	}
125
	return res;
L
Linus Torvalds 已提交
126 127
}

128
static const struct tcf_proto_ops *
129 130
tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
		     struct netlink_ext_ack *extack)
131 132 133 134 135 136 137
{
	const struct tcf_proto_ops *ops;

	ops = __tcf_proto_lookup_ops(kind);
	if (ops)
		return ops;
#ifdef CONFIG_MODULES
138 139
	if (rtnl_held)
		rtnl_unlock();
140
	request_module("cls_%s", kind);
141 142
	if (rtnl_held)
		rtnl_lock();
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
	ops = __tcf_proto_lookup_ops(kind);
	/* We dropped the RTNL semaphore in order to perform
	 * the module load. So, even if we succeeded in loading
	 * the module we have to replay the request. We indicate
	 * this using -EAGAIN.
	 */
	if (ops) {
		module_put(ops->owner);
		return ERR_PTR(-EAGAIN);
	}
#endif
	NL_SET_ERR_MSG(extack, "TC classifier not found");
	return ERR_PTR(-ENOENT);
}

L
Linus Torvalds 已提交
158 159 160 161
/* Register(unregister) new classifier type */

int register_tcf_proto_ops(struct tcf_proto_ops *ops)
{
162
	struct tcf_proto_ops *t;
L
Linus Torvalds 已提交
163 164 165
	int rc = -EEXIST;

	write_lock(&cls_mod_lock);
166
	list_for_each_entry(t, &tcf_proto_base, head)
L
Linus Torvalds 已提交
167 168 169
		if (!strcmp(ops->kind, t->kind))
			goto out;

170
	list_add_tail(&ops->head, &tcf_proto_base);
L
Linus Torvalds 已提交
171 172 173 174 175
	rc = 0;
out:
	write_unlock(&cls_mod_lock);
	return rc;
}
176
EXPORT_SYMBOL(register_tcf_proto_ops);
L
Linus Torvalds 已提交
177

178 179
static struct workqueue_struct *tc_filter_wq;

L
Linus Torvalds 已提交
180 181
int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
{
182
	struct tcf_proto_ops *t;
L
Linus Torvalds 已提交
183 184
	int rc = -ENOENT;

185 186 187 188
	/* Wait for outstanding call_rcu()s, if any, from a
	 * tcf_proto_ops's destroy() handler.
	 */
	rcu_barrier();
189
	flush_workqueue(tc_filter_wq);
190

L
Linus Torvalds 已提交
191
	write_lock(&cls_mod_lock);
192 193 194 195
	list_for_each_entry(t, &tcf_proto_base, head) {
		if (t == ops) {
			list_del(&t->head);
			rc = 0;
L
Linus Torvalds 已提交
196
			break;
197 198
		}
	}
L
Linus Torvalds 已提交
199 200 201
	write_unlock(&cls_mod_lock);
	return rc;
}
202
EXPORT_SYMBOL(unregister_tcf_proto_ops);
L
Linus Torvalds 已提交
203

C
Cong Wang 已提交
204
bool tcf_queue_work(struct rcu_work *rwork, work_func_t func)
205
{
C
Cong Wang 已提交
206 207
	INIT_RCU_WORK(rwork, func);
	return queue_rcu_work(tc_filter_wq, rwork);
208 209 210
}
EXPORT_SYMBOL(tcf_queue_work);

L
Linus Torvalds 已提交
211 212
/* Select new prio value from the range, managed by kernel. */

213
static inline u32 tcf_auto_prio(struct tcf_proto *tp)
L
Linus Torvalds 已提交
214
{
215
	u32 first = TC_H_MAKE(0xC0000000U, 0U);
L
Linus Torvalds 已提交
216 217

	if (tp)
E
Eric Dumazet 已提交
218
		first = tp->prio - 1;
L
Linus Torvalds 已提交
219

220
	return TC_H_MAJ(first);
L
Linus Torvalds 已提交
221 222
}

223 224 225
static bool tcf_proto_check_kind(struct nlattr *kind, char *name)
{
	if (kind)
226
		return nla_strscpy(name, kind, IFNAMSIZ) < 0;
227 228 229 230
	memset(name, 0, IFNAMSIZ);
	return false;
}

231 232 233 234 235
static bool tcf_proto_is_unlocked(const char *kind)
{
	const struct tcf_proto_ops *ops;
	bool ret;

236 237 238
	if (strlen(kind) == 0)
		return false;

239 240 241 242 243 244 245 246 247 248 249 250
	ops = tcf_proto_lookup_ops(kind, false, NULL);
	/* On error return false to take rtnl lock. Proto lookup/create
	 * functions will perform lookup again and properly handle errors.
	 */
	if (IS_ERR(ops))
		return false;

	ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
	module_put(ops->owner);
	return ret;
}

251
static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
252
					  u32 prio, struct tcf_chain *chain,
253
					  bool rtnl_held,
254
					  struct netlink_ext_ack *extack)
255 256 257 258 259 260 261 262
{
	struct tcf_proto *tp;
	int err;

	tp = kzalloc(sizeof(*tp), GFP_KERNEL);
	if (!tp)
		return ERR_PTR(-ENOBUFS);

263
	tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
264 265
	if (IS_ERR(tp->ops)) {
		err = PTR_ERR(tp->ops);
266
		goto errout;
267 268 269 270
	}
	tp->classify = tp->ops->classify;
	tp->protocol = protocol;
	tp->prio = prio;
271
	tp->chain = chain;
272
	spin_lock_init(&tp->lock);
273
	refcount_set(&tp->refcnt, 1);
274 275 276 277 278 279 280 281 282 283 284 285 286

	err = tp->ops->init(tp);
	if (err) {
		module_put(tp->ops->owner);
		goto errout;
	}
	return tp;

errout:
	kfree(tp);
	return ERR_PTR(err);
}

287 288 289 290 291 292 293
static void tcf_proto_get(struct tcf_proto *tp)
{
	refcount_inc(&tp->refcnt);
}

static void tcf_chain_put(struct tcf_chain *chain);

294
static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
295
			      bool sig_destroy, struct netlink_ext_ack *extack)
296
{
297
	tp->ops->destroy(tp, rtnl_held, extack);
298 299
	if (sig_destroy)
		tcf_proto_signal_destroyed(tp->chain, tp);
300
	tcf_chain_put(tp->chain);
301 302
	module_put(tp->ops->owner);
	kfree_rcu(tp, rcu);
303 304
}

305
static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
306 307 308
			  struct netlink_ext_ack *extack)
{
	if (refcount_dec_and_test(&tp->refcnt))
309
		tcf_proto_destroy(tp, rtnl_held, true, extack);
310 311
}

312
static bool tcf_proto_check_delete(struct tcf_proto *tp)
313
{
314 315
	if (tp->ops->delete_empty)
		return tp->ops->delete_empty(tp);
316

317
	tp->deleting = true;
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
	return tp->deleting;
}

static void tcf_proto_mark_delete(struct tcf_proto *tp)
{
	spin_lock(&tp->lock);
	tp->deleting = true;
	spin_unlock(&tp->lock);
}

static bool tcf_proto_is_deleting(struct tcf_proto *tp)
{
	bool deleting;

	spin_lock(&tp->lock);
	deleting = tp->deleting;
	spin_unlock(&tp->lock);

	return deleting;
}

339 340 341
#define ASSERT_BLOCK_LOCKED(block)					\
	lockdep_assert_held(&(block)->lock)

342 343 344 345 346 347
struct tcf_filter_chain_list_item {
	struct list_head list;
	tcf_chain_head_change_t *chain_head_change;
	void *chain_head_change_priv;
};

348 349
static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
					  u32 chain_index)
350
{
351 352
	struct tcf_chain *chain;

353 354
	ASSERT_BLOCK_LOCKED(block);

355 356 357
	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
	if (!chain)
		return NULL;
358
	list_add_tail_rcu(&chain->list, &block->chain_list);
359
	mutex_init(&chain->filter_chain_lock);
360 361
	chain->block = block;
	chain->index = chain_index;
362
	chain->refcnt = 1;
363 364
	if (!chain->index)
		block->chain0.chain = chain;
365
	return chain;
366 367
}

368 369 370 371 372 373
static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
				       struct tcf_proto *tp_head)
{
	if (item->chain_head_change)
		item->chain_head_change(tp_head, item->chain_head_change_priv);
}
374 375 376

static void tcf_chain0_head_change(struct tcf_chain *chain,
				   struct tcf_proto *tp_head)
377
{
378
	struct tcf_filter_chain_list_item *item;
379
	struct tcf_block *block = chain->block;
380

381 382
	if (chain->index)
		return;
383 384

	mutex_lock(&block->lock);
385
	list_for_each_entry(item, &block->chain0.filter_chain_list, list)
386
		tcf_chain_head_change_item(item, tp_head);
387
	mutex_unlock(&block->lock);
388 389
}

390 391 392
/* Returns true if block can be safely freed. */

static bool tcf_chain_detach(struct tcf_chain *chain)
J
Jiri Pirko 已提交
393
{
394 395
	struct tcf_block *block = chain->block;

396 397
	ASSERT_BLOCK_LOCKED(block);

398
	list_del_rcu(&chain->list);
399 400
	if (!chain->index)
		block->chain0.chain = NULL;
401 402 403 404 405 406 407 408 409 410 411

	if (list_empty(&block->chain_list) &&
	    refcount_read(&block->refcnt) == 0)
		return true;

	return false;
}

static void tcf_block_destroy(struct tcf_block *block)
{
	mutex_destroy(&block->lock);
412
	mutex_destroy(&block->proto_destroy_lock);
413 414 415 416 417 418 419
	kfree_rcu(block, rcu);
}

static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
{
	struct tcf_block *block = chain->block;

420
	mutex_destroy(&chain->filter_chain_lock);
421
	kfree_rcu(chain, rcu);
422 423
	if (free_block)
		tcf_block_destroy(block);
424
}
425

426 427
static void tcf_chain_hold(struct tcf_chain *chain)
{
428 429
	ASSERT_BLOCK_LOCKED(chain->block);

430
	++chain->refcnt;
431 432
}

433
static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
434
{
435 436
	ASSERT_BLOCK_LOCKED(chain->block);

437
	/* In case all the references are action references, this
438
	 * chain should not be shown to the user.
439 440 441 442
	 */
	return chain->refcnt == chain->action_refcnt;
}

443 444
static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
					  u32 chain_index)
445 446 447
{
	struct tcf_chain *chain;

448 449
	ASSERT_BLOCK_LOCKED(block);

450
	list_for_each_entry(chain, &block->chain_list, list) {
451
		if (chain->index == chain_index)
452
			return chain;
453 454 455 456
	}
	return NULL;
}

457 458 459 460 461 462 463 464 465 466 467 468 469 470
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
static struct tcf_chain *tcf_chain_lookup_rcu(const struct tcf_block *block,
					      u32 chain_index)
{
	struct tcf_chain *chain;

	list_for_each_entry_rcu(chain, &block->chain_list, list) {
		if (chain->index == chain_index)
			return chain;
	}
	return NULL;
}
#endif

471 472 473
static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
			   u32 seq, u16 flags, int event, bool unicast);

474 475 476
static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
					 u32 chain_index, bool create,
					 bool by_act)
477
{
478 479
	struct tcf_chain *chain = NULL;
	bool is_first_reference;
480

481 482
	mutex_lock(&block->lock);
	chain = tcf_chain_lookup(block, chain_index);
483 484
	if (chain) {
		tcf_chain_hold(chain);
485 486
	} else {
		if (!create)
487
			goto errout;
488 489
		chain = tcf_chain_create(block, chain_index);
		if (!chain)
490
			goto errout;
491
	}
492

493 494
	if (by_act)
		++chain->action_refcnt;
495 496
	is_first_reference = chain->refcnt - chain->action_refcnt == 1;
	mutex_unlock(&block->lock);
497 498 499 500 501 502

	/* Send notification only in case we got the first
	 * non-action reference. Until then, the chain acts only as
	 * a placeholder for actions pointing to it and user ought
	 * not know about them.
	 */
503
	if (is_first_reference && !by_act)
504 505 506
		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
				RTM_NEWCHAIN, false);

507
	return chain;
508 509 510 511

errout:
	mutex_unlock(&block->lock);
	return chain;
512
}
513

514 515
static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
				       bool create)
516 517 518
{
	return __tcf_chain_get(block, chain_index, create, false);
}
519

520 521
struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
{
522
	return __tcf_chain_get(block, chain_index, true, true);
523 524 525
}
EXPORT_SYMBOL(tcf_chain_get_by_act);

526 527 528 529 530 531
static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
			       void *tmplt_priv);
static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
				  void *tmplt_priv, u32 chain_index,
				  struct tcf_block *block, struct sk_buff *oskb,
				  u32 seq, u16 flags, bool unicast);
532

533 534
static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
			    bool explicitly_created)
535
{
536
	struct tcf_block *block = chain->block;
537
	const struct tcf_proto_ops *tmplt_ops;
538
	bool free_block = false;
539
	unsigned int refcnt;
540
	void *tmplt_priv;
541 542

	mutex_lock(&block->lock);
543 544 545 546 547 548 549 550
	if (explicitly_created) {
		if (!chain->explicitly_created) {
			mutex_unlock(&block->lock);
			return;
		}
		chain->explicitly_created = false;
	}

551 552
	if (by_act)
		chain->action_refcnt--;
553 554 555 556 557 558

	/* tc_chain_notify_delete can't be called while holding block lock.
	 * However, when block is unlocked chain can be changed concurrently, so
	 * save these to temporary variables.
	 */
	refcnt = --chain->refcnt;
559 560
	tmplt_ops = chain->tmplt_ops;
	tmplt_priv = chain->tmplt_priv;
561 562

	/* The last dropped non-action reference will trigger notification. */
563 564
	if (refcnt - chain->action_refcnt == 0 && !by_act) {
		tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index,
565
				       block, NULL, 0, 0, false);
566 567 568
		/* Last reference to chain, no need to lock. */
		chain->flushing = false;
	}
569

570 571 572 573
	if (refcnt == 0)
		free_block = tcf_chain_detach(chain);
	mutex_unlock(&block->lock);

574
	if (refcnt == 0) {
575
		tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
576
		tcf_chain_destroy(chain, free_block);
577
	}
578
}
579

580
static void tcf_chain_put(struct tcf_chain *chain)
581
{
582
	__tcf_chain_put(chain, false, false);
583
}
584

585 586
void tcf_chain_put_by_act(struct tcf_chain *chain)
{
587
	__tcf_chain_put(chain, true, false);
588 589 590
}
EXPORT_SYMBOL(tcf_chain_put_by_act);

591 592
static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
{
593
	__tcf_chain_put(chain, false, true);
594 595
}

596
static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
597
{
598
	struct tcf_proto *tp, *tp_next;
599

600 601
	mutex_lock(&chain->filter_chain_lock);
	tp = tcf_chain_dereference(chain->filter_chain, chain);
602 603 604 605 606 607
	while (tp) {
		tp_next = rcu_dereference_protected(tp->next, 1);
		tcf_proto_signal_destroying(chain, tp);
		tp = tp_next;
	}
	tp = tcf_chain_dereference(chain->filter_chain, chain);
608
	RCU_INIT_POINTER(chain->filter_chain, NULL);
609
	tcf_chain0_head_change(chain, NULL);
610
	chain->flushing = true;
611 612
	mutex_unlock(&chain->filter_chain_lock);

613
	while (tp) {
614
		tp_next = rcu_dereference_protected(tp->next, 1);
615
		tcf_proto_put(tp, rtnl_held, NULL);
616
		tp = tp_next;
617 618 619
	}
}

620 621 622
static int tcf_block_setup(struct tcf_block *block,
			   struct flow_block_offload *bo);

623
static void tcf_block_offload_init(struct flow_block_offload *bo,
624
				   struct net_device *dev, struct Qdisc *sch,
625 626 627 628 629 630 631 632 633 634 635
				   enum flow_block_command command,
				   enum flow_block_binder_type binder_type,
				   struct flow_block *flow_block,
				   bool shared, struct netlink_ext_ack *extack)
{
	bo->net = dev_net(dev);
	bo->command = command;
	bo->binder_type = binder_type;
	bo->block = flow_block;
	bo->block_shared = shared;
	bo->extack = extack;
636
	bo->sch = sch;
637 638 639
	INIT_LIST_HEAD(&bo->cb_list);
}

640 641 642 643
static void tcf_block_unbind(struct tcf_block *block,
			     struct flow_block_offload *bo);

static void tc_block_indr_cleanup(struct flow_block_cb *block_cb)
644
{
645 646
	struct tcf_block *block = block_cb->indr.data;
	struct net_device *dev = block_cb->indr.dev;
647
	struct Qdisc *sch = block_cb->indr.sch;
648 649
	struct netlink_ext_ack extack = {};
	struct flow_block_offload bo;
650

651
	tcf_block_offload_init(&bo, dev, sch, FLOW_BLOCK_UNBIND,
652 653 654
			       block_cb->indr.binder_type,
			       &block->flow_block, tcf_block_shared(block),
			       &extack);
655
	rtnl_lock();
656
	down_write(&block->cb_lock);
657
	list_del(&block_cb->driver_list);
658 659
	list_move(&block_cb->list, &bo.cb_list);
	tcf_block_unbind(block, &bo);
660
	up_write(&block->cb_lock);
661
	rtnl_unlock();
662 663
}

664 665
static bool tcf_block_offload_in_use(struct tcf_block *block)
{
666
	return atomic_read(&block->offloadcnt);
667 668 669
}

static int tcf_block_offload_cmd(struct tcf_block *block,
670
				 struct net_device *dev, struct Qdisc *sch,
671
				 struct tcf_block_ext_info *ei,
672
				 enum flow_block_command command,
673
				 struct netlink_ext_ack *extack)
674
{
675
	struct flow_block_offload bo = {};
676

677
	tcf_block_offload_init(&bo, dev, sch, command, ei->binder_type,
678 679
			       &block->flow_block, tcf_block_shared(block),
			       extack);
680

681 682 683
	if (dev->netdev_ops->ndo_setup_tc) {
		int err;

684
		err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
685 686 687 688 689 690 691
		if (err < 0) {
			if (err != -EOPNOTSUPP)
				NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed");
			return err;
		}

		return tcf_block_setup(block, &bo);
692
	}
693

694
	flow_indr_dev_setup_offload(dev, sch, TC_SETUP_BLOCK, block, &bo,
695 696 697 698
				    tc_block_indr_cleanup);
	tcf_block_setup(block, &bo);

	return -EOPNOTSUPP;
699 700
}

701
static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
702 703
				  struct tcf_block_ext_info *ei,
				  struct netlink_ext_ack *extack)
704
{
705 706 707
	struct net_device *dev = q->dev_queue->dev;
	int err;

708
	down_write(&block->cb_lock);
709 710 711 712

	/* If tc offload feature is disabled and the block we try to bind
	 * to already has some offloaded filters, forbid to bind.
	 */
713 714 715
	if (dev->netdev_ops->ndo_setup_tc &&
	    !tc_can_offload(dev) &&
	    tcf_block_offload_in_use(block)) {
716
		NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
717 718
		err = -EOPNOTSUPP;
		goto err_unlock;
719
	}
720

721
	err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_BIND, extack);
722 723
	if (err == -EOPNOTSUPP)
		goto no_offload_dev_inc;
724
	if (err)
725
		goto err_unlock;
726

727
	up_write(&block->cb_lock);
728
	return 0;
729 730

no_offload_dev_inc:
731
	if (tcf_block_offload_in_use(block))
732
		goto err_unlock;
733

734
	err = 0;
735
	block->nooffloaddevcnt++;
736 737 738
err_unlock:
	up_write(&block->cb_lock);
	return err;
739 740 741 742 743
}

static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
				     struct tcf_block_ext_info *ei)
{
744 745 746
	struct net_device *dev = q->dev_queue->dev;
	int err;

747
	down_write(&block->cb_lock);
748
	err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_UNBIND, NULL);
749 750
	if (err == -EOPNOTSUPP)
		goto no_offload_dev_dec;
751
	up_write(&block->cb_lock);
752 753 754 755
	return;

no_offload_dev_dec:
	WARN_ON(block->nooffloaddevcnt-- == 0);
756
	up_write(&block->cb_lock);
757 758
}

759
static int
760 761 762
tcf_chain0_head_change_cb_add(struct tcf_block *block,
			      struct tcf_block_ext_info *ei,
			      struct netlink_ext_ack *extack)
763 764
{
	struct tcf_filter_chain_list_item *item;
765
	struct tcf_chain *chain0;
766 767 768 769 770 771 772 773

	item = kmalloc(sizeof(*item), GFP_KERNEL);
	if (!item) {
		NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed");
		return -ENOMEM;
	}
	item->chain_head_change = ei->chain_head_change;
	item->chain_head_change_priv = ei->chain_head_change_priv;
774 775 776

	mutex_lock(&block->lock);
	chain0 = block->chain0.chain;
777 778 779 780
	if (chain0)
		tcf_chain_hold(chain0);
	else
		list_add(&item->list, &block->chain0.filter_chain_list);
781 782
	mutex_unlock(&block->lock);

783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799
	if (chain0) {
		struct tcf_proto *tp_head;

		mutex_lock(&chain0->filter_chain_lock);

		tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
		if (tp_head)
			tcf_chain_head_change_item(item, tp_head);

		mutex_lock(&block->lock);
		list_add(&item->list, &block->chain0.filter_chain_list);
		mutex_unlock(&block->lock);

		mutex_unlock(&chain0->filter_chain_lock);
		tcf_chain_put(chain0);
	}

800 801 802 803
	return 0;
}

static void
804 805
tcf_chain0_head_change_cb_del(struct tcf_block *block,
			      struct tcf_block_ext_info *ei)
806 807 808
{
	struct tcf_filter_chain_list_item *item;

809
	mutex_lock(&block->lock);
810
	list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
811 812 813
		if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
		    (item->chain_head_change == ei->chain_head_change &&
		     item->chain_head_change_priv == ei->chain_head_change_priv)) {
814
			if (block->chain0.chain)
815
				tcf_chain_head_change_item(item, NULL);
816
			list_del(&item->list);
817 818
			mutex_unlock(&block->lock);

819 820 821 822
			kfree(item);
			return;
		}
	}
823
	mutex_unlock(&block->lock);
824 825 826
	WARN_ON(1);
}

827
struct tcf_net {
828
	spinlock_t idr_lock; /* Protects idr */
829 830 831 832 833 834
	struct idr idr;
};

static unsigned int tcf_net_id;

static int tcf_block_insert(struct tcf_block *block, struct net *net,
835
			    struct netlink_ext_ack *extack)
836
{
837
	struct tcf_net *tn = net_generic(net, tcf_net_id);
838 839 840 841 842 843 844 845
	int err;

	idr_preload(GFP_KERNEL);
	spin_lock(&tn->idr_lock);
	err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
			    GFP_NOWAIT);
	spin_unlock(&tn->idr_lock);
	idr_preload_end();
846

847
	return err;
848 849
}

850 851 852 853
static void tcf_block_remove(struct tcf_block *block, struct net *net)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

854
	spin_lock(&tn->idr_lock);
855
	idr_remove(&tn->idr, block->index);
856
	spin_unlock(&tn->idr_lock);
857 858 859
}

static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
860
					  u32 block_index,
861
					  struct netlink_ext_ack *extack)
862
{
863
	struct tcf_block *block;
864

865
	block = kzalloc(sizeof(*block), GFP_KERNEL);
866 867
	if (!block) {
		NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
868
		return ERR_PTR(-ENOMEM);
869
	}
870
	mutex_init(&block->lock);
871
	mutex_init(&block->proto_destroy_lock);
872
	init_rwsem(&block->cb_lock);
873
	flow_block_init(&block->flow_block);
874
	INIT_LIST_HEAD(&block->chain_list);
875
	INIT_LIST_HEAD(&block->owner_list);
876
	INIT_LIST_HEAD(&block->chain0.filter_chain_list);
877

878
	refcount_set(&block->refcnt, 1);
879
	block->net = net;
880 881 882 883 884
	block->index = block_index;

	/* Don't store q pointer for blocks which are shared */
	if (!tcf_block_shared(block))
		block->q = q;
885 886 887 888 889 890 891
	return block;
}

static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

892
	return idr_find(&tn->idr, block_index);
893 894
}

895 896 897 898 899 900 901 902 903 904 905 906 907
static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
{
	struct tcf_block *block;

	rcu_read_lock();
	block = tcf_block_lookup(net, block_index);
	if (block && !refcount_inc_not_zero(&block->refcnt))
		block = NULL;
	rcu_read_unlock();

	return block;
}

908 909
static struct tcf_chain *
__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
910
{
911 912 913 914 915 916 917
	mutex_lock(&block->lock);
	if (chain)
		chain = list_is_last(&chain->list, &block->chain_list) ?
			NULL : list_next_entry(chain, list);
	else
		chain = list_first_entry_or_null(&block->chain_list,
						 struct tcf_chain, list);
918

919 920 921 922 923 924
	/* skip all action-only chains */
	while (chain && tcf_chain_held_by_acts_only(chain))
		chain = list_is_last(&chain->list, &block->chain_list) ?
			NULL : list_next_entry(chain, list);

	if (chain)
925
		tcf_chain_hold(chain);
926
	mutex_unlock(&block->lock);
927

928
	return chain;
929 930
}

931 932 933 934 935 936 937 938 939 940 941
/* Function to be used by all clients that want to iterate over all chains on
 * block. It properly obtains block->lock and takes reference to chain before
 * returning it. Users of this function must be tolerant to concurrent chain
 * insertion/deletion or ensure that no concurrent chain modification is
 * possible. Note that all netlink dump callbacks cannot guarantee to provide
 * consistent dump because rtnl lock is released each time skb is filled with
 * data and sent to user-space.
 */

struct tcf_chain *
tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
942
{
943
	struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
944

945
	if (chain)
946
		tcf_chain_put(chain);
947 948 949 950 951

	return chain_next;
}
EXPORT_SYMBOL(tcf_get_next_chain);

952 953 954
static struct tcf_proto *
__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
{
955 956
	u32 prio = 0;

957 958 959
	ASSERT_RTNL();
	mutex_lock(&chain->filter_chain_lock);

960
	if (!tp) {
961
		tp = tcf_chain_dereference(chain->filter_chain, chain);
962 963 964 965 966 967 968 969 970 971 972 973
	} else if (tcf_proto_is_deleting(tp)) {
		/* 'deleting' flag is set and chain->filter_chain_lock was
		 * unlocked, which means next pointer could be invalid. Restart
		 * search.
		 */
		prio = tp->prio + 1;
		tp = tcf_chain_dereference(chain->filter_chain, chain);

		for (; tp; tp = tcf_chain_dereference(tp->next, chain))
			if (!tp->deleting && tp->prio >= prio)
				break;
	} else {
974
		tp = tcf_chain_dereference(tp->next, chain);
975
	}
976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993

	if (tp)
		tcf_proto_get(tp);

	mutex_unlock(&chain->filter_chain_lock);

	return tp;
}

/* Function to be used by all clients that want to iterate over all tp's on
 * chain. Users of this function must be tolerant to concurrent tp
 * insertion/deletion or ensure that no concurrent chain modification is
 * possible. Note that all netlink dump callbacks cannot guarantee to provide
 * consistent dump because rtnl lock is released each time skb is filled with
 * data and sent to user-space.
 */

struct tcf_proto *
994
tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
995 996 997 998
{
	struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);

	if (tp)
999
		tcf_proto_put(tp, true, NULL);
1000 1001 1002 1003 1004

	return tp_next;
}
EXPORT_SYMBOL(tcf_get_next_proto);

1005
static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
1006 1007 1008 1009 1010 1011 1012 1013 1014 1015
{
	struct tcf_chain *chain;

	/* Last reference to block. At this point chains cannot be added or
	 * removed concurrently.
	 */
	for (chain = tcf_get_next_chain(block, NULL);
	     chain;
	     chain = tcf_get_next_chain(block, chain)) {
		tcf_chain_put_explicitly_created(chain);
1016
		tcf_chain_flush(chain, rtnl_held);
1017 1018 1019
	}
}

1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155
/* Lookup Qdisc and increments its reference counter.
 * Set parent, if necessary.
 */

static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
			    u32 *parent, int ifindex, bool rtnl_held,
			    struct netlink_ext_ack *extack)
{
	const struct Qdisc_class_ops *cops;
	struct net_device *dev;
	int err = 0;

	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
		return 0;

	rcu_read_lock();

	/* Find link */
	dev = dev_get_by_index_rcu(net, ifindex);
	if (!dev) {
		rcu_read_unlock();
		return -ENODEV;
	}

	/* Find qdisc */
	if (!*parent) {
		*q = dev->qdisc;
		*parent = (*q)->handle;
	} else {
		*q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
		if (!*q) {
			NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
			err = -EINVAL;
			goto errout_rcu;
		}
	}

	*q = qdisc_refcount_inc_nz(*q);
	if (!*q) {
		NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
		err = -EINVAL;
		goto errout_rcu;
	}

	/* Is it classful? */
	cops = (*q)->ops->cl_ops;
	if (!cops) {
		NL_SET_ERR_MSG(extack, "Qdisc not classful");
		err = -EINVAL;
		goto errout_qdisc;
	}

	if (!cops->tcf_block) {
		NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
		err = -EOPNOTSUPP;
		goto errout_qdisc;
	}

errout_rcu:
	/* At this point we know that qdisc is not noop_qdisc,
	 * which means that qdisc holds a reference to net_device
	 * and we hold a reference to qdisc, so it is safe to release
	 * rcu read lock.
	 */
	rcu_read_unlock();
	return err;

errout_qdisc:
	rcu_read_unlock();

	if (rtnl_held)
		qdisc_put(*q);
	else
		qdisc_put_unlocked(*q);
	*q = NULL;

	return err;
}

static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl,
			       int ifindex, struct netlink_ext_ack *extack)
{
	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
		return 0;

	/* Do we search for filter, attached to class? */
	if (TC_H_MIN(parent)) {
		const struct Qdisc_class_ops *cops = q->ops->cl_ops;

		*cl = cops->find(q, parent);
		if (*cl == 0) {
			NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
			return -ENOENT;
		}
	}

	return 0;
}

static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q,
					  unsigned long cl, int ifindex,
					  u32 block_index,
					  struct netlink_ext_ack *extack)
{
	struct tcf_block *block;

	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
		block = tcf_block_refcnt_get(net, block_index);
		if (!block) {
			NL_SET_ERR_MSG(extack, "Block of given index was not found");
			return ERR_PTR(-EINVAL);
		}
	} else {
		const struct Qdisc_class_ops *cops = q->ops->cl_ops;

		block = cops->tcf_block(q, cl, extack);
		if (!block)
			return ERR_PTR(-EINVAL);

		if (tcf_block_shared(block)) {
			NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
			return ERR_PTR(-EOPNOTSUPP);
		}

		/* Always take reference to block in order to support execution
		 * of rules update path of cls API without rtnl lock. Caller
		 * must release block when it is finished using it. 'if' block
		 * of this conditional obtain reference to block by calling
		 * tcf_block_refcnt_get().
		 */
		refcount_inc(&block->refcnt);
	}

	return block;
}

1156
static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
1157
			    struct tcf_block_ext_info *ei, bool rtnl_held)
1158
{
1159
	if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
1160 1161 1162 1163 1164 1165 1166 1167
		/* Flushing/putting all chains will cause the block to be
		 * deallocated when last chain is freed. However, if chain_list
		 * is empty, block has to be manually deallocated. After block
		 * reference counter reached 0, it is no longer possible to
		 * increment it or add new chains to block.
		 */
		bool free_block = list_empty(&block->chain_list);

1168
		mutex_unlock(&block->lock);
1169 1170 1171 1172 1173 1174 1175
		if (tcf_block_shared(block))
			tcf_block_remove(block, block->net);

		if (q)
			tcf_block_offload_unbind(block, q, ei);

		if (free_block)
1176
			tcf_block_destroy(block);
1177
		else
1178
			tcf_block_flush_all_chains(block, rtnl_held);
1179 1180 1181 1182 1183
	} else if (q) {
		tcf_block_offload_unbind(block, q, ei);
	}
}

1184
static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
1185
{
1186
	__tcf_block_put(block, NULL, NULL, rtnl_held);
1187 1188
}

1189 1190 1191 1192 1193 1194 1195 1196 1197 1198
/* Find tcf block.
 * Set q, parent, cl when appropriate.
 */

static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
					u32 *parent, unsigned long *cl,
					int ifindex, u32 block_index,
					struct netlink_ext_ack *extack)
{
	struct tcf_block *block;
1199
	int err = 0;
1200

1201
	ASSERT_RTNL();
1202

1203 1204 1205
	err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack);
	if (err)
		goto errout;
1206

1207 1208 1209
	err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack);
	if (err)
		goto errout_qdisc;
1210

1211
	block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack);
1212 1213
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
1214
		goto errout_qdisc;
1215
	}
1216 1217

	return block;
1218 1219

errout_qdisc:
1220
	if (*q)
1221
		qdisc_put(*q);
1222 1223
errout:
	*q = NULL;
1224 1225 1226
	return ERR_PTR(err);
}

1227 1228
static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
			      bool rtnl_held)
1229
{
1230
	if (!IS_ERR_OR_NULL(block))
1231
		tcf_block_refcnt_put(block, rtnl_held);
1232

1233 1234 1235 1236 1237 1238
	if (q) {
		if (rtnl_held)
			qdisc_put(q);
		else
			qdisc_put_unlocked(q);
	}
1239 1240
}

1241 1242 1243
struct tcf_block_owner_item {
	struct list_head list;
	struct Qdisc *q;
1244
	enum flow_block_binder_type binder_type;
1245 1246 1247 1248 1249
};

static void
tcf_block_owner_netif_keep_dst(struct tcf_block *block,
			       struct Qdisc *q,
1250
			       enum flow_block_binder_type binder_type)
1251 1252
{
	if (block->keep_dst &&
1253 1254
	    binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
	    binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270
		netif_keep_dst(qdisc_dev(q));
}

void tcf_block_netif_keep_dst(struct tcf_block *block)
{
	struct tcf_block_owner_item *item;

	block->keep_dst = true;
	list_for_each_entry(item, &block->owner_list, list)
		tcf_block_owner_netif_keep_dst(block, item->q,
					       item->binder_type);
}
EXPORT_SYMBOL(tcf_block_netif_keep_dst);

static int tcf_block_owner_add(struct tcf_block *block,
			       struct Qdisc *q,
1271
			       enum flow_block_binder_type binder_type)
1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285
{
	struct tcf_block_owner_item *item;

	item = kmalloc(sizeof(*item), GFP_KERNEL);
	if (!item)
		return -ENOMEM;
	item->q = q;
	item->binder_type = binder_type;
	list_add(&item->list, &block->owner_list);
	return 0;
}

static void tcf_block_owner_del(struct tcf_block *block,
				struct Qdisc *q,
1286
				enum flow_block_binder_type binder_type)
1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299
{
	struct tcf_block_owner_item *item;

	list_for_each_entry(item, &block->owner_list, list) {
		if (item->q == q && item->binder_type == binder_type) {
			list_del(&item->list);
			kfree(item);
			return;
		}
	}
	WARN_ON(1);
}

1300 1301 1302 1303 1304 1305 1306 1307
int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
		      struct tcf_block_ext_info *ei,
		      struct netlink_ext_ack *extack)
{
	struct net *net = qdisc_net(q);
	struct tcf_block *block = NULL;
	int err;

1308
	if (ei->block_index)
1309
		/* block_index not 0 means the shared block is requested */
1310
		block = tcf_block_refcnt_get(net, ei->block_index);
1311 1312

	if (!block) {
1313
		block = tcf_block_create(net, q, ei->block_index, extack);
1314 1315
		if (IS_ERR(block))
			return PTR_ERR(block);
1316 1317
		if (tcf_block_shared(block)) {
			err = tcf_block_insert(block, net, extack);
1318 1319 1320 1321 1322
			if (err)
				goto err_block_insert;
		}
	}

1323 1324 1325 1326 1327 1328
	err = tcf_block_owner_add(block, q, ei->binder_type);
	if (err)
		goto err_block_owner_add;

	tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);

1329
	err = tcf_chain0_head_change_cb_add(block, ei, extack);
1330
	if (err)
1331
		goto err_chain0_head_change_cb_add;
1332

1333
	err = tcf_block_offload_bind(block, q, ei, extack);
1334 1335 1336
	if (err)
		goto err_block_offload_bind;

1337 1338
	*p_block = block;
	return 0;
1339

1340
err_block_offload_bind:
1341 1342
	tcf_chain0_head_change_cb_del(block, ei);
err_chain0_head_change_cb_add:
1343 1344
	tcf_block_owner_del(block, q, ei->binder_type);
err_block_owner_add:
1345
err_block_insert:
1346
	tcf_block_refcnt_put(block, true);
1347
	return err;
1348
}
1349 1350
EXPORT_SYMBOL(tcf_block_get_ext);

1351 1352 1353 1354 1355 1356 1357
static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
{
	struct tcf_proto __rcu **p_filter_chain = priv;

	rcu_assign_pointer(*p_filter_chain, tp_head);
}

1358
int tcf_block_get(struct tcf_block **p_block,
1359 1360
		  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
		  struct netlink_ext_ack *extack)
1361
{
1362 1363 1364 1365
	struct tcf_block_ext_info ei = {
		.chain_head_change = tcf_chain_head_change_dflt,
		.chain_head_change_priv = p_filter_chain,
	};
1366

1367
	WARN_ON(!p_filter_chain);
1368
	return tcf_block_get_ext(p_block, q, &ei, extack);
1369
}
1370 1371
EXPORT_SYMBOL(tcf_block_get);

1372
/* XXX: Standalone actions are not allowed to jump to any chain, and bound
1373
 * actions should be all removed after flushing.
1374
 */
1375
void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
1376
		       struct tcf_block_ext_info *ei)
1377
{
1378 1379
	if (!block)
		return;
1380
	tcf_chain0_head_change_cb_del(block, ei);
1381
	tcf_block_owner_del(block, q, ei->binder_type);
1382

1383
	__tcf_block_put(block, q, ei, true);
1384
}
1385 1386 1387 1388 1389 1390
EXPORT_SYMBOL(tcf_block_put_ext);

void tcf_block_put(struct tcf_block *block)
{
	struct tcf_block_ext_info ei = {0, };

1391 1392
	if (!block)
		return;
1393
	tcf_block_put_ext(block, block->q, &ei);
1394
}
1395

1396
EXPORT_SYMBOL(tcf_block_put);
1397

1398
static int
1399
tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
1400 1401 1402
			    void *cb_priv, bool add, bool offload_in_use,
			    struct netlink_ext_ack *extack)
{
1403
	struct tcf_chain *chain, *chain_prev;
1404
	struct tcf_proto *tp, *tp_prev;
1405 1406
	int err;

1407 1408
	lockdep_assert_held(&block->cb_lock);

1409 1410 1411 1412 1413
	for (chain = __tcf_get_next_chain(block, NULL);
	     chain;
	     chain_prev = chain,
		     chain = __tcf_get_next_chain(block, chain),
		     tcf_chain_put(chain_prev)) {
1414 1415 1416
		for (tp = __tcf_get_next_proto(chain, NULL); tp;
		     tp_prev = tp,
			     tp = __tcf_get_next_proto(chain, tp),
1417
			     tcf_proto_put(tp_prev, true, NULL)) {
1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433
			if (tp->ops->reoffload) {
				err = tp->ops->reoffload(tp, add, cb, cb_priv,
							 extack);
				if (err && add)
					goto err_playback_remove;
			} else if (add && offload_in_use) {
				err = -EOPNOTSUPP;
				NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support");
				goto err_playback_remove;
			}
		}
	}

	return 0;

err_playback_remove:
1434
	tcf_proto_put(tp, true, NULL);
1435
	tcf_chain_put(chain);
1436 1437 1438 1439 1440
	tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
				    extack);
	return err;
}

1441 1442 1443 1444 1445 1446
static int tcf_block_bind(struct tcf_block *block,
			  struct flow_block_offload *bo)
{
	struct flow_block_cb *block_cb, *next;
	int err, i = 0;

1447 1448
	lockdep_assert_held(&block->cb_lock);

1449 1450 1451 1452 1453 1454 1455
	list_for_each_entry(block_cb, &bo->cb_list, list) {
		err = tcf_block_playback_offloads(block, block_cb->cb,
						  block_cb->cb_priv, true,
						  tcf_block_offload_in_use(block),
						  bo->extack);
		if (err)
			goto err_unroll;
1456 1457
		if (!bo->unlocked_driver_cb)
			block->lockeddevcnt++;
1458 1459 1460

		i++;
	}
1461
	list_splice(&bo->cb_list, &block->flow_block.cb_list);
1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472

	return 0;

err_unroll:
	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
		if (i-- > 0) {
			list_del(&block_cb->list);
			tcf_block_playback_offloads(block, block_cb->cb,
						    block_cb->cb_priv, false,
						    tcf_block_offload_in_use(block),
						    NULL);
1473 1474
			if (!bo->unlocked_driver_cb)
				block->lockeddevcnt--;
1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486
		}
		flow_block_cb_free(block_cb);
	}

	return err;
}

static void tcf_block_unbind(struct tcf_block *block,
			     struct flow_block_offload *bo)
{
	struct flow_block_cb *block_cb, *next;

1487 1488
	lockdep_assert_held(&block->cb_lock);

1489 1490 1491 1492 1493 1494 1495
	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
		tcf_block_playback_offloads(block, block_cb->cb,
					    block_cb->cb_priv, false,
					    tcf_block_offload_in_use(block),
					    NULL);
		list_del(&block_cb->list);
		flow_block_cb_free(block_cb);
1496 1497
		if (!bo->unlocked_driver_cb)
			block->lockeddevcnt--;
1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521
	}
}

static int tcf_block_setup(struct tcf_block *block,
			   struct flow_block_offload *bo)
{
	int err;

	switch (bo->command) {
	case FLOW_BLOCK_BIND:
		err = tcf_block_bind(block, bo);
		break;
	case FLOW_BLOCK_UNBIND:
		err = 0;
		tcf_block_unbind(block, bo);
		break;
	default:
		WARN_ON_ONCE(1);
		err = -EOPNOTSUPP;
	}

	return err;
}

1522 1523 1524 1525
/* Main classifier routine: scans classifier chain attached
 * to this qdisc, (optionally) tests for protocol and asks
 * specific classifiers.
 */
1526 1527
static inline int __tcf_classify(struct sk_buff *skb,
				 const struct tcf_proto *tp,
1528
				 const struct tcf_proto *orig_tp,
1529 1530 1531
				 struct tcf_result *res,
				 bool compat_mode,
				 u32 *last_executed_chain)
1532 1533 1534
{
#ifdef CONFIG_NET_CLS_ACT
	const int max_reclassify_loop = 4;
1535
	const struct tcf_proto *first_tp;
1536 1537 1538 1539 1540
	int limit = 0;

reclassify:
#endif
	for (; tp; tp = rcu_dereference_bh(tp->next)) {
1541
		__be16 protocol = skb_protocol(skb, false);
1542 1543 1544 1545 1546 1547 1548 1549
		int err;

		if (tp->protocol != protocol &&
		    tp->protocol != htons(ETH_P_ALL))
			continue;

		err = tp->classify(skb, tp, res);
#ifdef CONFIG_NET_CLS_ACT
1550
		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
1551
			first_tp = orig_tp;
1552
			*last_executed_chain = first_tp->chain->index;
1553
			goto reset;
1554
		} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
1555
			first_tp = res->goto_tp;
1556
			*last_executed_chain = err & TC_ACT_EXT_VAL_MASK;
1557 1558
			goto reset;
		}
1559 1560 1561 1562 1563 1564 1565 1566 1567
#endif
		if (err >= 0)
			return err;
	}

	return TC_ACT_UNSPEC; /* signal: continue lookup */
#ifdef CONFIG_NET_CLS_ACT
reset:
	if (unlikely(limit++ >= max_reclassify_loop)) {
1568 1569 1570
		net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
				       tp->chain->block->index,
				       tp->prio & 0xffff,
1571 1572 1573 1574
				       ntohs(tp->protocol));
		return TC_ACT_SHOT;
	}

1575
	tp = first_tp;
1576 1577 1578
	goto reclassify;
#endif
}
1579 1580 1581 1582 1583 1584

int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
		 struct tcf_result *res, bool compat_mode)
{
	u32 last_executed_chain = 0;

1585
	return __tcf_classify(skb, tp, tp, res, compat_mode,
1586 1587
			      &last_executed_chain);
}
1588 1589
EXPORT_SYMBOL(tcf_classify);

1590 1591 1592
int tcf_classify_ingress(struct sk_buff *skb,
			 const struct tcf_block *ingress_block,
			 const struct tcf_proto *tp,
1593 1594 1595 1596 1597
			 struct tcf_result *res, bool compat_mode)
{
#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
	u32 last_executed_chain = 0;

1598
	return __tcf_classify(skb, tp, tp, res, compat_mode,
1599 1600 1601
			      &last_executed_chain);
#else
	u32 last_executed_chain = tp ? tp->chain->index : 0;
1602
	const struct tcf_proto *orig_tp = tp;
1603 1604 1605
	struct tc_skb_ext *ext;
	int ret;

1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618
	ext = skb_ext_find(skb, TC_SKB_EXT);

	if (ext && ext->chain) {
		struct tcf_chain *fchain;

		fchain = tcf_chain_lookup_rcu(ingress_block, ext->chain);
		if (!fchain)
			return TC_ACT_SHOT;

		/* Consume, so cloned/redirect skbs won't inherit ext */
		skb_ext_del(skb, TC_SKB_EXT);

		tp = rcu_dereference_bh(fchain->filter_chain);
1619
		last_executed_chain = fchain->index;
1620 1621 1622 1623
	}

	ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
			     &last_executed_chain);
1624 1625 1626 1627 1628 1629 1630

	/* If we missed on some chain */
	if (ret == TC_ACT_UNSPEC && last_executed_chain) {
		ext = skb_ext_add(skb, TC_SKB_EXT);
		if (WARN_ON_ONCE(!ext))
			return TC_ACT_SHOT;
		ext->chain = last_executed_chain;
1631
		ext->mru = qdisc_skb_cb(skb)->mru;
1632 1633 1634 1635 1636 1637 1638
	}

	return ret;
#endif
}
EXPORT_SYMBOL(tcf_classify_ingress);

1639 1640 1641 1642 1643
struct tcf_chain_info {
	struct tcf_proto __rcu **pprev;
	struct tcf_proto __rcu *next;
};

1644 1645
static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
					   struct tcf_chain_info *chain_info)
1646
{
1647
	return tcf_chain_dereference(*chain_info->pprev, chain);
1648 1649
}

1650 1651 1652
static int tcf_chain_tp_insert(struct tcf_chain *chain,
			       struct tcf_chain_info *chain_info,
			       struct tcf_proto *tp)
1653
{
1654 1655 1656
	if (chain->flushing)
		return -EAGAIN;

1657
	if (*chain_info->pprev == chain->filter_chain)
1658
		tcf_chain0_head_change(chain, tp);
1659
	tcf_proto_get(tp);
1660
	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
1661
	rcu_assign_pointer(*chain_info->pprev, tp);
1662 1663

	return 0;
1664 1665 1666 1667 1668 1669
}

static void tcf_chain_tp_remove(struct tcf_chain *chain,
				struct tcf_chain_info *chain_info,
				struct tcf_proto *tp)
{
1670
	struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
1671

1672
	tcf_proto_mark_delete(tp);
1673
	if (tp == chain->filter_chain)
1674
		tcf_chain0_head_change(chain, next);
1675 1676 1677
	RCU_INIT_POINTER(*chain_info->pprev, next);
}

1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689
static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
					   struct tcf_chain_info *chain_info,
					   u32 protocol, u32 prio,
					   bool prio_allocate);

/* Try to insert new proto.
 * If proto with specified priority already exists, free new proto
 * and return existing one.
 */

static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
						    struct tcf_proto *tp_new,
1690 1691
						    u32 protocol, u32 prio,
						    bool rtnl_held)
1692 1693 1694
{
	struct tcf_chain_info chain_info;
	struct tcf_proto *tp;
1695
	int err = 0;
1696 1697 1698

	mutex_lock(&chain->filter_chain_lock);

1699 1700 1701 1702 1703 1704
	if (tcf_proto_exists_destroying(chain, tp_new)) {
		mutex_unlock(&chain->filter_chain_lock);
		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
		return ERR_PTR(-EAGAIN);
	}

1705 1706 1707
	tp = tcf_chain_tp_find(chain, &chain_info,
			       protocol, prio, false);
	if (!tp)
1708
		err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
1709 1710 1711
	mutex_unlock(&chain->filter_chain_lock);

	if (tp) {
1712
		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1713
		tp_new = tp;
1714
	} else if (err) {
1715
		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1716
		tp_new = ERR_PTR(err);
1717 1718 1719 1720 1721 1722
	}

	return tp_new;
}

static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
1723
				      struct tcf_proto *tp, bool rtnl_held,
1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747
				      struct netlink_ext_ack *extack)
{
	struct tcf_chain_info chain_info;
	struct tcf_proto *tp_iter;
	struct tcf_proto **pprev;
	struct tcf_proto *next;

	mutex_lock(&chain->filter_chain_lock);

	/* Atomically find and remove tp from chain. */
	for (pprev = &chain->filter_chain;
	     (tp_iter = tcf_chain_dereference(*pprev, chain));
	     pprev = &tp_iter->next) {
		if (tp_iter == tp) {
			chain_info.pprev = pprev;
			chain_info.next = tp_iter->next;
			WARN_ON(tp_iter->deleting);
			break;
		}
	}
	/* Verify that tp still exists and no new filters were inserted
	 * concurrently.
	 * Mark tp for deletion if it is empty.
	 */
1748
	if (!tp_iter || !tcf_proto_check_delete(tp)) {
1749 1750 1751 1752
		mutex_unlock(&chain->filter_chain_lock);
		return;
	}

1753
	tcf_proto_signal_destroying(chain, tp);
1754 1755 1756 1757 1758 1759
	next = tcf_chain_dereference(chain_info.next, chain);
	if (tp == chain->filter_chain)
		tcf_chain0_head_change(chain, next);
	RCU_INIT_POINTER(*chain_info.pprev, next);
	mutex_unlock(&chain->filter_chain_lock);

1760
	tcf_proto_put(tp, rtnl_held, extack);
1761 1762
}

1763 1764 1765 1766 1767 1768 1769 1770 1771 1772
static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
					   struct tcf_chain_info *chain_info,
					   u32 protocol, u32 prio,
					   bool prio_allocate)
{
	struct tcf_proto **pprev;
	struct tcf_proto *tp;

	/* Check the chain for existence of proto-tcf with this priority */
	for (pprev = &chain->filter_chain;
1773 1774
	     (tp = tcf_chain_dereference(*pprev, chain));
	     pprev = &tp->next) {
1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786
		if (tp->prio >= prio) {
			if (tp->prio == prio) {
				if (prio_allocate ||
				    (tp->protocol != protocol && protocol))
					return ERR_PTR(-EINVAL);
			} else {
				tp = NULL;
			}
			break;
		}
	}
	chain_info->pprev = pprev;
1787 1788 1789 1790 1791 1792
	if (tp) {
		chain_info->next = tp->next;
		tcf_proto_get(tp);
	} else {
		chain_info->next = NULL;
	}
1793 1794 1795
	return tp;
}

1796
static int tcf_fill_node(struct net *net, struct sk_buff *skb,
1797 1798
			 struct tcf_proto *tp, struct tcf_block *block,
			 struct Qdisc *q, u32 parent, void *fh,
1799
			 u32 portid, u32 seq, u16 flags, int event,
1800
			 bool terse_dump, bool rtnl_held)
1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812
{
	struct tcmsg *tcm;
	struct nlmsghdr  *nlh;
	unsigned char *b = skb_tail_pointer(skb);

	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
	if (!nlh)
		goto out_nlmsg_trim;
	tcm = nlmsg_data(nlh);
	tcm->tcm_family = AF_UNSPEC;
	tcm->tcm__pad1 = 0;
	tcm->tcm__pad2 = 0;
1813 1814 1815 1816 1817 1818 1819
	if (q) {
		tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
		tcm->tcm_parent = parent;
	} else {
		tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
		tcm->tcm_block_index = block->index;
	}
1820 1821 1822 1823 1824 1825 1826
	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
	if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
		goto nla_put_failure;
	if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
		goto nla_put_failure;
	if (!fh) {
		tcm->tcm_handle = 0;
1827 1828 1829 1830 1831 1832 1833 1834
	} else if (terse_dump) {
		if (tp->ops->terse_dump) {
			if (tp->ops->terse_dump(net, tp, fh, skb, tcm,
						rtnl_held) < 0)
				goto nla_put_failure;
		} else {
			goto cls_op_not_supp;
		}
1835
	} else {
1836 1837
		if (tp->ops->dump &&
		    tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
1838 1839 1840 1841 1842 1843 1844
			goto nla_put_failure;
	}
	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
	return skb->len;

out_nlmsg_trim:
nla_put_failure:
1845
cls_op_not_supp:
1846 1847 1848 1849 1850 1851
	nlmsg_trim(skb, b);
	return -1;
}

static int tfilter_notify(struct net *net, struct sk_buff *oskb,
			  struct nlmsghdr *n, struct tcf_proto *tp,
1852
			  struct tcf_block *block, struct Qdisc *q,
1853 1854
			  u32 parent, void *fh, int event, bool unicast,
			  bool rtnl_held)
1855 1856 1857
{
	struct sk_buff *skb;
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1858
	int err = 0;
1859 1860 1861 1862 1863

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

1864
	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1865
			  n->nlmsg_seq, n->nlmsg_flags, event,
1866
			  false, rtnl_held) <= 0) {
1867 1868 1869 1870 1871
		kfree_skb(skb);
		return -EINVAL;
	}

	if (unicast)
1872 1873 1874 1875
		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
	else
		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
				     n->nlmsg_flags & NLM_F_ECHO);
1876

1877 1878 1879
	if (err > 0)
		err = 0;
	return err;
1880 1881 1882 1883
}

static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
			      struct nlmsghdr *n, struct tcf_proto *tp,
1884
			      struct tcf_block *block, struct Qdisc *q,
1885
			      u32 parent, void *fh, bool unicast, bool *last,
1886
			      bool rtnl_held, struct netlink_ext_ack *extack)
1887 1888 1889 1890 1891 1892 1893 1894 1895
{
	struct sk_buff *skb;
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
	int err;

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

1896
	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1897
			  n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
1898
			  false, rtnl_held) <= 0) {
1899
		NL_SET_ERR_MSG(extack, "Failed to build del event notification");
1900 1901 1902 1903
		kfree_skb(skb);
		return -EINVAL;
	}

1904
	err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
1905 1906 1907 1908 1909 1910
	if (err) {
		kfree_skb(skb);
		return err;
	}

	if (unicast)
1911 1912 1913 1914
		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
	else
		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
				     n->nlmsg_flags & NLM_F_ECHO);
1915 1916
	if (err < 0)
		NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
1917 1918 1919

	if (err > 0)
		err = 0;
1920
	return err;
1921 1922 1923
}

static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
1924 1925
				 struct tcf_block *block, struct Qdisc *q,
				 u32 parent, struct nlmsghdr *n,
1926
				 struct tcf_chain *chain, int event)
1927 1928 1929
{
	struct tcf_proto *tp;

1930 1931
	for (tp = tcf_get_next_proto(chain, NULL);
	     tp; tp = tcf_get_next_proto(chain, tp))
1932
		tfilter_notify(net, oskb, n, tp, block,
1933
			       q, parent, NULL, event, false, true);
1934 1935
}

1936 1937 1938 1939 1940 1941
static void tfilter_put(struct tcf_proto *tp, void *fh)
{
	if (tp->ops->put && fh)
		tp->ops->put(tp, fh);
}

1942
static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1943
			  struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
1944
{
1945
	struct net *net = sock_net(skb->sk);
1946
	struct nlattr *tca[TCA_MAX + 1];
1947
	char name[IFNAMSIZ];
L
Linus Torvalds 已提交
1948 1949 1950
	struct tcmsg *t;
	u32 protocol;
	u32 prio;
1951
	bool prio_allocate;
L
Linus Torvalds 已提交
1952
	u32 parent;
1953
	u32 chain_index;
1954
	struct Qdisc *q = NULL;
1955
	struct tcf_chain_info chain_info;
1956
	struct tcf_chain *chain = NULL;
1957
	struct tcf_block *block;
L
Linus Torvalds 已提交
1958 1959
	struct tcf_proto *tp;
	unsigned long cl;
1960
	void *fh;
L
Linus Torvalds 已提交
1961
	int err;
1962
	int tp_created;
1963
	bool rtnl_held = false;
L
Linus Torvalds 已提交
1964

1965
	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1966
		return -EPERM;
1967

L
Linus Torvalds 已提交
1968
replay:
1969 1970
	tp_created = 0;

1971 1972
	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
				     rtm_tca_policy, extack);
1973 1974 1975
	if (err < 0)
		return err;

1976
	t = nlmsg_data(n);
L
Linus Torvalds 已提交
1977 1978
	protocol = TC_H_MIN(t->tcm_info);
	prio = TC_H_MAJ(t->tcm_info);
1979
	prio_allocate = false;
L
Linus Torvalds 已提交
1980
	parent = t->tcm_parent;
1981
	tp = NULL;
L
Linus Torvalds 已提交
1982
	cl = 0;
1983
	block = NULL;
L
Linus Torvalds 已提交
1984 1985

	if (prio == 0) {
1986 1987 1988 1989 1990 1991 1992
		/* If no priority is provided by the user,
		 * we allocate one.
		 */
		if (n->nlmsg_flags & NLM_F_CREATE) {
			prio = TC_H_MAKE(0x80000000U, 0U);
			prio_allocate = true;
		} else {
1993
			NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
L
Linus Torvalds 已提交
1994
			return -ENOENT;
1995
		}
L
Linus Torvalds 已提交
1996 1997 1998 1999
	}

	/* Find head of filter chain. */

2000 2001 2002 2003
	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
	if (err)
		return err;

2004 2005 2006 2007 2008 2009
	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
		NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
		err = -EINVAL;
		goto errout;
	}

2010 2011 2012 2013 2014 2015
	/* Take rtnl mutex if rtnl_held was set to true on previous iteration,
	 * block is shared (no qdisc found), qdisc is not unlocked, classifier
	 * type is not specified, classifier is not unlocked.
	 */
	if (rtnl_held ||
	    (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2016
	    !tcf_proto_is_unlocked(name)) {
2017 2018 2019 2020 2021 2022 2023 2024 2025 2026
		rtnl_held = true;
		rtnl_lock();
	}

	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
	if (err)
		goto errout;

	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
				 extack);
2027 2028 2029
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
		goto errout;
2030
	}
2031
	block->classid = parent;
2032 2033 2034

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
2035
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2036 2037 2038
		err = -EINVAL;
		goto errout;
	}
2039
	chain = tcf_chain_get(block, chain_index, true);
2040
	if (!chain) {
2041
		NL_SET_ERR_MSG(extack, "Cannot create specified filter chain");
2042
		err = -ENOMEM;
2043 2044
		goto errout;
	}
L
Linus Torvalds 已提交
2045

2046
	mutex_lock(&chain->filter_chain_lock);
2047 2048 2049
	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
			       prio, prio_allocate);
	if (IS_ERR(tp)) {
2050
		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2051
		err = PTR_ERR(tp);
2052
		goto errout_locked;
L
Linus Torvalds 已提交
2053 2054 2055
	}

	if (tp == NULL) {
2056 2057
		struct tcf_proto *tp_new = NULL;

2058 2059 2060 2061 2062
		if (chain->flushing) {
			err = -EAGAIN;
			goto errout_locked;
		}

L
Linus Torvalds 已提交
2063 2064
		/* Proto-tcf does not exist, create new one */

2065
		if (tca[TCA_KIND] == NULL || !protocol) {
2066
			NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
2067
			err = -EINVAL;
2068
			goto errout_locked;
2069
		}
L
Linus Torvalds 已提交
2070

2071
		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2072
			NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2073
			err = -ENOENT;
2074
			goto errout_locked;
2075
		}
L
Linus Torvalds 已提交
2076

2077
		if (prio_allocate)
2078 2079
			prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
							       &chain_info));
L
Linus Torvalds 已提交
2080

2081
		mutex_unlock(&chain->filter_chain_lock);
2082 2083
		tp_new = tcf_proto_create(name, protocol, prio, chain,
					  rtnl_held, extack);
2084 2085
		if (IS_ERR(tp_new)) {
			err = PTR_ERR(tp_new);
2086
			goto errout_tp;
L
Linus Torvalds 已提交
2087
		}
2088

2089
		tp_created = 1;
2090 2091
		tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
						rtnl_held);
2092 2093 2094 2095
		if (IS_ERR(tp)) {
			err = PTR_ERR(tp);
			goto errout_tp;
		}
2096 2097
	} else {
		mutex_unlock(&chain->filter_chain_lock);
2098
	}
L
Linus Torvalds 已提交
2099

2100 2101 2102 2103 2104 2105
	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
		err = -EINVAL;
		goto errout;
	}

L
Linus Torvalds 已提交
2106 2107
	fh = tp->ops->get(tp, t->tcm_handle);

2108
	if (!fh) {
2109
		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2110
			NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2111
			err = -ENOENT;
L
Linus Torvalds 已提交
2112
			goto errout;
2113
		}
2114
	} else if (n->nlmsg_flags & NLM_F_EXCL) {
2115
		tfilter_put(tp, fh);
2116 2117 2118
		NL_SET_ERR_MSG(extack, "Filter already exists");
		err = -EEXIST;
		goto errout;
L
Linus Torvalds 已提交
2119 2120
	}

2121 2122 2123 2124 2125 2126
	if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
		NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
		err = -EINVAL;
		goto errout;
	}

2127
	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
2128
			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
2129
			      rtnl_held, extack);
2130
	if (err == 0) {
2131
		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2132
			       RTM_NEWTFILTER, false, rtnl_held);
2133
		tfilter_put(tp, fh);
2134 2135 2136
		/* q pointer is NULL for shared blocks */
		if (q)
			q->flags &= ~TCQ_F_CAN_BYPASS;
2137
	}
L
Linus Torvalds 已提交
2138 2139

errout:
2140
	if (err && tp_created)
2141
		tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
2142
errout_tp:
2143 2144
	if (chain) {
		if (tp && !IS_ERR(tp))
2145
			tcf_proto_put(tp, rtnl_held, NULL);
2146 2147 2148
		if (!tp_created)
			tcf_chain_put(chain);
	}
2149
	tcf_block_release(q, block, rtnl_held);
2150 2151 2152 2153 2154 2155 2156 2157 2158

	if (rtnl_held)
		rtnl_unlock();

	if (err == -EAGAIN) {
		/* Take rtnl lock in case EAGAIN is caused by concurrent flush
		 * of target chain.
		 */
		rtnl_held = true;
L
Linus Torvalds 已提交
2159 2160
		/* Replay the request. */
		goto replay;
2161
	}
L
Linus Torvalds 已提交
2162
	return err;
2163 2164 2165 2166

errout_locked:
	mutex_unlock(&chain->filter_chain_lock);
	goto errout;
L
Linus Torvalds 已提交
2167 2168
}

2169 2170 2171 2172 2173
static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
			  struct netlink_ext_ack *extack)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
2174
	char name[IFNAMSIZ];
2175 2176 2177 2178 2179 2180 2181 2182
	struct tcmsg *t;
	u32 protocol;
	u32 prio;
	u32 parent;
	u32 chain_index;
	struct Qdisc *q = NULL;
	struct tcf_chain_info chain_info;
	struct tcf_chain *chain = NULL;
2183
	struct tcf_block *block = NULL;
2184 2185 2186 2187
	struct tcf_proto *tp = NULL;
	unsigned long cl = 0;
	void *fh = NULL;
	int err;
2188
	bool rtnl_held = false;
2189 2190 2191 2192

	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
		return -EPERM;

2193 2194
	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
				     rtm_tca_policy, extack);
2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209
	if (err < 0)
		return err;

	t = nlmsg_data(n);
	protocol = TC_H_MIN(t->tcm_info);
	prio = TC_H_MAJ(t->tcm_info);
	parent = t->tcm_parent;

	if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) {
		NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
		return -ENOENT;
	}

	/* Find head of filter chain. */

2210 2211 2212 2213
	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
	if (err)
		return err;

2214 2215 2216 2217 2218
	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
		NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
		err = -EINVAL;
		goto errout;
	}
2219 2220 2221 2222 2223 2224
	/* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
	 * found), qdisc is not unlocked, classifier type is not specified,
	 * classifier is not unlocked.
	 */
	if (!prio ||
	    (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2225
	    !tcf_proto_is_unlocked(name)) {
2226 2227 2228 2229 2230 2231 2232 2233 2234 2235
		rtnl_held = true;
		rtnl_lock();
	}

	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
	if (err)
		goto errout;

	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
				 extack);
2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
		goto errout;
	}

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
		err = -EINVAL;
		goto errout;
	}
	chain = tcf_chain_get(block, chain_index, false);
	if (!chain) {
2249 2250 2251 2252 2253 2254 2255
		/* User requested flush on non-existent chain. Nothing to do,
		 * so just return success.
		 */
		if (prio == 0) {
			err = 0;
			goto errout;
		}
2256
		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2257
		err = -ENOENT;
2258 2259 2260 2261 2262
		goto errout;
	}

	if (prio == 0) {
		tfilter_notify_chain(net, skb, block, q, parent, n,
2263
				     chain, RTM_DELTFILTER);
2264
		tcf_chain_flush(chain, rtnl_held);
2265 2266 2267 2268
		err = 0;
		goto errout;
	}

2269
	mutex_lock(&chain->filter_chain_lock);
2270 2271 2272 2273
	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
			       prio, false);
	if (!tp || IS_ERR(tp)) {
		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2274
		err = tp ? PTR_ERR(tp) : -ENOENT;
2275
		goto errout_locked;
2276 2277 2278
	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
		err = -EINVAL;
2279 2280
		goto errout_locked;
	} else if (t->tcm_handle == 0) {
2281
		tcf_proto_signal_destroying(chain, tp);
2282 2283 2284
		tcf_chain_tp_remove(chain, &chain_info, tp);
		mutex_unlock(&chain->filter_chain_lock);

2285
		tcf_proto_put(tp, rtnl_held, NULL);
2286
		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2287
			       RTM_DELTFILTER, false, rtnl_held);
2288
		err = 0;
2289 2290
		goto errout;
	}
2291
	mutex_unlock(&chain->filter_chain_lock);
2292 2293 2294 2295

	fh = tp->ops->get(tp, t->tcm_handle);

	if (!fh) {
2296 2297
		NL_SET_ERR_MSG(extack, "Specified filter handle not found");
		err = -ENOENT;
2298 2299 2300 2301 2302
	} else {
		bool last;

		err = tfilter_del_notify(net, skb, n, tp, block,
					 q, parent, fh, false, &last,
2303 2304
					 rtnl_held, extack);

2305 2306
		if (err)
			goto errout;
2307
		if (last)
2308
			tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
2309 2310 2311
	}

errout:
2312 2313
	if (chain) {
		if (tp && !IS_ERR(tp))
2314
			tcf_proto_put(tp, rtnl_held, NULL);
2315
		tcf_chain_put(chain);
2316
	}
2317
	tcf_block_release(q, block, rtnl_held);
2318 2319 2320 2321

	if (rtnl_held)
		rtnl_unlock();

2322
	return err;
2323 2324 2325 2326

errout_locked:
	mutex_unlock(&chain->filter_chain_lock);
	goto errout;
2327 2328 2329 2330 2331 2332 2333
}

static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
			  struct netlink_ext_ack *extack)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
2334
	char name[IFNAMSIZ];
2335 2336 2337 2338 2339 2340 2341 2342
	struct tcmsg *t;
	u32 protocol;
	u32 prio;
	u32 parent;
	u32 chain_index;
	struct Qdisc *q = NULL;
	struct tcf_chain_info chain_info;
	struct tcf_chain *chain = NULL;
2343
	struct tcf_block *block = NULL;
2344 2345 2346 2347
	struct tcf_proto *tp = NULL;
	unsigned long cl = 0;
	void *fh = NULL;
	int err;
2348
	bool rtnl_held = false;
2349

2350 2351
	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
				     rtm_tca_policy, extack);
2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366
	if (err < 0)
		return err;

	t = nlmsg_data(n);
	protocol = TC_H_MIN(t->tcm_info);
	prio = TC_H_MAJ(t->tcm_info);
	parent = t->tcm_parent;

	if (prio == 0) {
		NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
		return -ENOENT;
	}

	/* Find head of filter chain. */

2367 2368 2369 2370
	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
	if (err)
		return err;

2371 2372 2373 2374 2375
	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
		NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
		err = -EINVAL;
		goto errout;
	}
2376 2377 2378 2379 2380
	/* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
	 * unlocked, classifier type is not specified, classifier is not
	 * unlocked.
	 */
	if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2381
	    !tcf_proto_is_unlocked(name)) {
2382 2383 2384 2385 2386 2387 2388 2389 2390 2391
		rtnl_held = true;
		rtnl_lock();
	}

	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
	if (err)
		goto errout;

	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
				 extack);
2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
		goto errout;
	}

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
		err = -EINVAL;
		goto errout;
	}
	chain = tcf_chain_get(block, chain_index, false);
	if (!chain) {
		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
		err = -EINVAL;
		goto errout;
	}

2410
	mutex_lock(&chain->filter_chain_lock);
2411 2412
	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
			       prio, false);
2413
	mutex_unlock(&chain->filter_chain_lock);
2414 2415
	if (!tp || IS_ERR(tp)) {
		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2416
		err = tp ? PTR_ERR(tp) : -ENOENT;
2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430
		goto errout;
	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
		err = -EINVAL;
		goto errout;
	}

	fh = tp->ops->get(tp, t->tcm_handle);

	if (!fh) {
		NL_SET_ERR_MSG(extack, "Specified filter handle not found");
		err = -ENOENT;
	} else {
		err = tfilter_notify(net, skb, n, tp, block, q, parent,
2431
				     fh, RTM_NEWTFILTER, true, rtnl_held);
2432 2433 2434 2435
		if (err < 0)
			NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
	}

2436
	tfilter_put(tp, fh);
2437
errout:
2438 2439
	if (chain) {
		if (tp && !IS_ERR(tp))
2440
			tcf_proto_put(tp, rtnl_held, NULL);
2441
		tcf_chain_put(chain);
2442
	}
2443
	tcf_block_release(q, block, rtnl_held);
2444 2445 2446 2447

	if (rtnl_held)
		rtnl_unlock();

2448 2449 2450
	return err;
}

2451
struct tcf_dump_args {
L
Linus Torvalds 已提交
2452 2453 2454
	struct tcf_walker w;
	struct sk_buff *skb;
	struct netlink_callback *cb;
2455
	struct tcf_block *block;
2456 2457
	struct Qdisc *q;
	u32 parent;
2458
	bool terse_dump;
L
Linus Torvalds 已提交
2459 2460
};

2461
static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
L
Linus Torvalds 已提交
2462
{
2463
	struct tcf_dump_args *a = (void *)arg;
2464
	struct net *net = sock_net(a->skb->sk);
L
Linus Torvalds 已提交
2465

2466
	return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
2467
			     n, NETLINK_CB(a->cb->skb).portid,
J
Jamal Hadi Salim 已提交
2468
			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2469
			     RTM_NEWTFILTER, a->terse_dump, true);
L
Linus Torvalds 已提交
2470 2471
}

2472 2473
static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
			   struct sk_buff *skb, struct netlink_callback *cb,
2474
			   long index_start, long *p_index, bool terse)
2475 2476
{
	struct net *net = sock_net(skb->sk);
2477
	struct tcf_block *block = chain->block;
2478
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2479
	struct tcf_proto *tp, *tp_prev;
2480 2481
	struct tcf_dump_args arg;

2482 2483 2484 2485
	for (tp = __tcf_get_next_proto(chain, NULL);
	     tp;
	     tp_prev = tp,
		     tp = __tcf_get_next_proto(chain, tp),
2486
		     tcf_proto_put(tp_prev, true, NULL),
2487
		     (*p_index)++) {
2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499
		if (*p_index < index_start)
			continue;
		if (TC_H_MAJ(tcm->tcm_info) &&
		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
			continue;
		if (TC_H_MIN(tcm->tcm_info) &&
		    TC_H_MIN(tcm->tcm_info) != tp->protocol)
			continue;
		if (*p_index > index_start)
			memset(&cb->args[1], 0,
			       sizeof(cb->args) - sizeof(cb->args[0]));
		if (cb->args[1] == 0) {
2500
			if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
2501 2502
					  NETLINK_CB(cb->skb).portid,
					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
2503
					  RTM_NEWTFILTER, false, true) <= 0)
2504
				goto errout;
2505 2506 2507 2508 2509 2510 2511
			cb->args[1] = 1;
		}
		if (!tp->ops->walk)
			continue;
		arg.w.fn = tcf_node_dump;
		arg.skb = skb;
		arg.cb = cb;
2512
		arg.block = block;
2513 2514
		arg.q = q;
		arg.parent = parent;
2515 2516 2517
		arg.w.stop = 0;
		arg.w.skip = cb->args[1] - 1;
		arg.w.count = 0;
2518
		arg.w.cookie = cb->args[2];
2519
		arg.terse_dump = terse;
2520
		tp->ops->walk(tp, &arg.w, true);
2521
		cb->args[2] = arg.w.cookie;
2522 2523
		cb->args[1] = arg.w.count + 1;
		if (arg.w.stop)
2524
			goto errout;
2525
	}
2526
	return true;
2527 2528

errout:
2529
	tcf_proto_put(tp, true, NULL);
2530
	return false;
2531 2532
}

2533 2534 2535 2536
static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = {
	[TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE),
};

E
Eric Dumazet 已提交
2537
/* called with RTNL */
L
Linus Torvalds 已提交
2538 2539
static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
2540
	struct tcf_chain *chain, *chain_prev;
2541
	struct net *net = sock_net(skb->sk);
2542
	struct nlattr *tca[TCA_MAX + 1];
2543
	struct Qdisc *q = NULL;
2544
	struct tcf_block *block;
2545
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2546
	bool terse_dump = false;
2547 2548
	long index_start;
	long index;
2549
	u32 parent;
2550
	int err;
L
Linus Torvalds 已提交
2551

2552
	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
L
Linus Torvalds 已提交
2553
		return skb->len;
2554

2555
	err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
2556
				     tcf_tfilter_dump_policy, cb->extack);
2557 2558 2559
	if (err)
		return err;

2560 2561 2562 2563 2564 2565 2566
	if (tca[TCA_DUMP_FLAGS]) {
		struct nla_bitfield32 flags =
			nla_get_bitfield32(tca[TCA_DUMP_FLAGS]);

		terse_dump = flags.value & TCA_DUMP_FLAGS_TERSE;
	}

2567
	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2568
		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2569 2570
		if (!block)
			goto out;
2571 2572 2573 2574 2575 2576 2577
		/* If we work with block index, q is NULL and parent value
		 * will never be used in the following code. The check
		 * in tcf_fill_node prevents it. However, compiler does not
		 * see that far, so set parent to zero to silence the warning
		 * about parent being uninitialized.
		 */
		parent = 0;
2578
	} else {
2579 2580 2581 2582 2583 2584 2585 2586 2587
		const struct Qdisc_class_ops *cops;
		struct net_device *dev;
		unsigned long cl = 0;

		dev = __dev_get_by_index(net, tcm->tcm_ifindex);
		if (!dev)
			return skb->len;

		parent = tcm->tcm_parent;
2588
		if (!parent)
2589
			q = dev->qdisc;
2590
		else
2591 2592 2593 2594 2595
			q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
		if (!q)
			goto out;
		cops = q->ops->cl_ops;
		if (!cops)
2596
			goto out;
2597 2598 2599 2600 2601 2602 2603 2604 2605 2606
		if (!cops->tcf_block)
			goto out;
		if (TC_H_MIN(tcm->tcm_parent)) {
			cl = cops->find(q, tcm->tcm_parent);
			if (cl == 0)
				goto out;
		}
		block = cops->tcf_block(q, cl, NULL);
		if (!block)
			goto out;
2607
		parent = block->classid;
2608 2609
		if (tcf_block_shared(block))
			q = NULL;
L
Linus Torvalds 已提交
2610 2611
	}

2612 2613
	index_start = cb->args[0];
	index = 0;
2614

2615 2616 2617 2618 2619
	for (chain = __tcf_get_next_chain(block, NULL);
	     chain;
	     chain_prev = chain,
		     chain = __tcf_get_next_chain(block, chain),
		     tcf_chain_put(chain_prev)) {
2620 2621 2622
		if (tca[TCA_CHAIN] &&
		    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
			continue;
2623
		if (!tcf_chain_dump(chain, q, parent, skb, cb,
2624
				    index_start, &index, terse_dump)) {
2625
			tcf_chain_put(chain);
2626
			err = -EMSGSIZE;
2627
			break;
2628
		}
2629 2630
	}

2631
	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
2632
		tcf_block_refcnt_put(block, true);
2633
	cb->args[0] = index;
L
Linus Torvalds 已提交
2634 2635

out:
2636 2637 2638
	/* If we did no progress, the error (EMSGSIZE) is real */
	if (skb->len == 0 && err)
		return err;
L
Linus Torvalds 已提交
2639 2640 2641
	return skb->len;
}

2642 2643 2644 2645
static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
			      void *tmplt_priv, u32 chain_index,
			      struct net *net, struct sk_buff *skb,
			      struct tcf_block *block,
2646 2647 2648
			      u32 portid, u32 seq, u16 flags, int event)
{
	unsigned char *b = skb_tail_pointer(skb);
2649
	const struct tcf_proto_ops *ops;
2650 2651
	struct nlmsghdr *nlh;
	struct tcmsg *tcm;
2652 2653
	void *priv;

2654 2655
	ops = tmplt_ops;
	priv = tmplt_priv;
2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672

	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
	if (!nlh)
		goto out_nlmsg_trim;
	tcm = nlmsg_data(nlh);
	tcm->tcm_family = AF_UNSPEC;
	tcm->tcm__pad1 = 0;
	tcm->tcm__pad2 = 0;
	tcm->tcm_handle = 0;
	if (block->q) {
		tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex;
		tcm->tcm_parent = block->q->handle;
	} else {
		tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
		tcm->tcm_block_index = block->index;
	}

2673
	if (nla_put_u32(skb, TCA_CHAIN, chain_index))
2674 2675
		goto nla_put_failure;

2676 2677 2678 2679 2680 2681 2682
	if (ops) {
		if (nla_put_string(skb, TCA_KIND, ops->kind))
			goto nla_put_failure;
		if (ops->tmplt_dump(skb, net, priv) < 0)
			goto nla_put_failure;
	}

2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698
	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
	return skb->len;

out_nlmsg_trim:
nla_put_failure:
	nlmsg_trim(skb, b);
	return -EMSGSIZE;
}

static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
			   u32 seq, u16 flags, int event, bool unicast)
{
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
	struct tcf_block *block = chain->block;
	struct net *net = block->net;
	struct sk_buff *skb;
2699
	int err = 0;
2700 2701 2702 2703 2704

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

2705 2706
	if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
			       chain->index, net, skb, block, portid,
2707 2708 2709 2710 2711 2712
			       seq, flags, event) <= 0) {
		kfree_skb(skb);
		return -EINVAL;
	}

	if (unicast)
2713 2714 2715 2716
		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
	else
		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
				     flags & NLM_F_ECHO);
2717

2718 2719 2720
	if (err > 0)
		err = 0;
	return err;
2721 2722
}

2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747
static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
				  void *tmplt_priv, u32 chain_index,
				  struct tcf_block *block, struct sk_buff *oskb,
				  u32 seq, u16 flags, bool unicast)
{
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
	struct net *net = block->net;
	struct sk_buff *skb;

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

	if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
			       block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
		kfree_skb(skb);
		return -EINVAL;
	}

	if (unicast)
		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);

	return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
}

2748 2749 2750 2751 2752
static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
			      struct nlattr **tca,
			      struct netlink_ext_ack *extack)
{
	const struct tcf_proto_ops *ops;
2753
	char name[IFNAMSIZ];
2754 2755 2756 2757 2758 2759
	void *tmplt_priv;

	/* If kind is not set, user did not specify template. */
	if (!tca[TCA_KIND])
		return 0;

2760 2761 2762 2763 2764 2765
	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
		NL_SET_ERR_MSG(extack, "Specified TC chain template name too long");
		return -EINVAL;
	}

	ops = tcf_proto_lookup_ops(name, true, extack);
2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782
	if (IS_ERR(ops))
		return PTR_ERR(ops);
	if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
		NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
		return -EOPNOTSUPP;
	}

	tmplt_priv = ops->tmplt_create(net, chain, tca, extack);
	if (IS_ERR(tmplt_priv)) {
		module_put(ops->owner);
		return PTR_ERR(tmplt_priv);
	}
	chain->tmplt_ops = ops;
	chain->tmplt_priv = tmplt_priv;
	return 0;
}

2783 2784
static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
			       void *tmplt_priv)
2785 2786
{
	/* If template ops are set, no work to do for us. */
2787
	if (!tmplt_ops)
2788 2789
		return;

2790 2791
	tmplt_ops->tmplt_destroy(tmplt_priv);
	module_put(tmplt_ops->owner);
2792 2793
}

2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814
/* Add/delete/get a chain */

static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
			struct netlink_ext_ack *extack)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
	struct tcmsg *t;
	u32 parent;
	u32 chain_index;
	struct Qdisc *q = NULL;
	struct tcf_chain *chain = NULL;
	struct tcf_block *block;
	unsigned long cl;
	int err;

	if (n->nlmsg_type != RTM_GETCHAIN &&
	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
		return -EPERM;

replay:
2815 2816
	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
				     rtm_tca_policy, extack);
2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831
	if (err < 0)
		return err;

	t = nlmsg_data(n);
	parent = t->tcm_parent;
	cl = 0;

	block = tcf_block_find(net, &q, &parent, &cl,
			       t->tcm_ifindex, t->tcm_block_index, extack);
	if (IS_ERR(block))
		return PTR_ERR(block);

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2832 2833
		err = -EINVAL;
		goto errout_block;
2834
	}
2835 2836

	mutex_lock(&block->lock);
2837 2838 2839
	chain = tcf_chain_lookup(block, chain_index);
	if (n->nlmsg_type == RTM_NEWCHAIN) {
		if (chain) {
2840
			if (tcf_chain_held_by_acts_only(chain)) {
2841
				/* The chain exists only because there is
2842
				 * some action referencing it.
2843 2844 2845 2846
				 */
				tcf_chain_hold(chain);
			} else {
				NL_SET_ERR_MSG(extack, "Filter chain already exists");
2847
				err = -EEXIST;
2848
				goto errout_block_locked;
2849 2850 2851 2852
			}
		} else {
			if (!(n->nlmsg_flags & NLM_F_CREATE)) {
				NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
2853
				err = -ENOENT;
2854
				goto errout_block_locked;
2855 2856 2857 2858
			}
			chain = tcf_chain_create(block, chain_index);
			if (!chain) {
				NL_SET_ERR_MSG(extack, "Failed to create filter chain");
2859
				err = -ENOMEM;
2860
				goto errout_block_locked;
2861
			}
2862 2863
		}
	} else {
2864
		if (!chain || tcf_chain_held_by_acts_only(chain)) {
2865
			NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2866
			err = -EINVAL;
2867
			goto errout_block_locked;
2868 2869 2870 2871
		}
		tcf_chain_hold(chain);
	}

2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882
	if (n->nlmsg_type == RTM_NEWCHAIN) {
		/* Modifying chain requires holding parent block lock. In case
		 * the chain was successfully added, take a reference to the
		 * chain. This ensures that an empty chain does not disappear at
		 * the end of this function.
		 */
		tcf_chain_hold(chain);
		chain->explicitly_created = true;
	}
	mutex_unlock(&block->lock);

2883 2884
	switch (n->nlmsg_type) {
	case RTM_NEWCHAIN:
2885
		err = tc_chain_tmplt_add(chain, net, tca, extack);
2886 2887
		if (err) {
			tcf_chain_put_explicitly_created(chain);
2888
			goto errout;
2889 2890
		}

2891 2892 2893 2894
		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
				RTM_NEWCHAIN, false);
		break;
	case RTM_DELCHAIN:
2895
		tfilter_notify_chain(net, skb, block, q, parent, n,
2896
				     chain, RTM_DELTFILTER);
2897
		/* Flush the chain first as the user requested chain removal. */
2898
		tcf_chain_flush(chain, true);
2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917
		/* In case the chain was successfully deleted, put a reference
		 * to the chain previously taken during addition.
		 */
		tcf_chain_put_explicitly_created(chain);
		break;
	case RTM_GETCHAIN:
		err = tc_chain_notify(chain, skb, n->nlmsg_seq,
				      n->nlmsg_seq, n->nlmsg_type, true);
		if (err < 0)
			NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
		break;
	default:
		err = -EOPNOTSUPP;
		NL_SET_ERR_MSG(extack, "Unsupported message type");
		goto errout;
	}

errout:
	tcf_chain_put(chain);
2918
errout_block:
2919
	tcf_block_release(q, block, true);
2920 2921 2922 2923
	if (err == -EAGAIN)
		/* Replay the request. */
		goto replay;
	return err;
2924 2925 2926 2927

errout_block_locked:
	mutex_unlock(&block->lock);
	goto errout_block;
2928 2929 2930 2931 2932 2933 2934 2935 2936 2937
}

/* called with RTNL */
static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
	struct Qdisc *q = NULL;
	struct tcf_block *block;
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2938
	struct tcf_chain *chain;
2939 2940 2941 2942 2943 2944 2945
	long index_start;
	long index;
	int err;

	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
		return skb->len;

2946 2947
	err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
				     rtm_tca_policy, cb->extack);
2948 2949 2950 2951
	if (err)
		return err;

	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2952
		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963
		if (!block)
			goto out;
	} else {
		const struct Qdisc_class_ops *cops;
		struct net_device *dev;
		unsigned long cl = 0;

		dev = __dev_get_by_index(net, tcm->tcm_ifindex);
		if (!dev)
			return skb->len;

2964
		if (!tcm->tcm_parent)
2965
			q = dev->qdisc;
2966
		else
2967
			q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
2968

2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990
		if (!q)
			goto out;
		cops = q->ops->cl_ops;
		if (!cops)
			goto out;
		if (!cops->tcf_block)
			goto out;
		if (TC_H_MIN(tcm->tcm_parent)) {
			cl = cops->find(q, tcm->tcm_parent);
			if (cl == 0)
				goto out;
		}
		block = cops->tcf_block(q, cl, NULL);
		if (!block)
			goto out;
		if (tcf_block_shared(block))
			q = NULL;
	}

	index_start = cb->args[0];
	index = 0;

2991 2992
	mutex_lock(&block->lock);
	list_for_each_entry(chain, &block->chain_list, list) {
2993 2994 2995 2996 2997 2998 2999
		if ((tca[TCA_CHAIN] &&
		     nla_get_u32(tca[TCA_CHAIN]) != chain->index))
			continue;
		if (index < index_start) {
			index++;
			continue;
		}
3000 3001
		if (tcf_chain_held_by_acts_only(chain))
			continue;
3002 3003
		err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
					 chain->index, net, skb, block,
3004 3005 3006
					 NETLINK_CB(cb->skb).portid,
					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
					 RTM_NEWCHAIN);
3007
		if (err <= 0)
3008 3009 3010
			break;
		index++;
	}
3011
	mutex_unlock(&block->lock);
3012

3013
	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
3014
		tcf_block_refcnt_put(block, true);
3015 3016 3017 3018 3019 3020 3021 3022 3023
	cb->args[0] = index;

out:
	/* If we did no progress, the error (EMSGSIZE) is real */
	if (skb->len == 0 && err)
		return err;
	return skb->len;
}

3024
void tcf_exts_destroy(struct tcf_exts *exts)
L
Linus Torvalds 已提交
3025 3026
{
#ifdef CONFIG_NET_CLS_ACT
3027 3028 3029 3030
	if (exts->actions) {
		tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
		kfree(exts->actions);
	}
3031
	exts->nr_actions = 0;
L
Linus Torvalds 已提交
3032 3033
#endif
}
3034
EXPORT_SYMBOL(tcf_exts_destroy);
L
Linus Torvalds 已提交
3035

3036
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
3037
		      struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
3038
		      bool rtnl_held, struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
3039 3040 3041 3042
{
#ifdef CONFIG_NET_CLS_ACT
	{
		struct tc_action *act;
3043
		size_t attr_size = 0;
L
Linus Torvalds 已提交
3044

3045
		if (exts->police && tb[exts->police]) {
3046 3047
			act = tcf_action_init_1(net, tp, tb[exts->police],
						rate_tlv, "police", ovr,
3048 3049
						TCA_ACT_BIND, rtnl_held,
						extack);
3050 3051
			if (IS_ERR(act))
				return PTR_ERR(act);
L
Linus Torvalds 已提交
3052

3053
			act->type = exts->type = TCA_OLD_COMPAT;
3054 3055
			exts->actions[0] = act;
			exts->nr_actions = 1;
3056
		} else if (exts->action && tb[exts->action]) {
3057
			int err;
3058

3059 3060
			err = tcf_action_init(net, tp, tb[exts->action],
					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
3061 3062
					      exts->actions, &attr_size,
					      rtnl_held, extack);
3063
			if (err < 0)
3064
				return err;
3065
			exts->nr_actions = err;
L
Linus Torvalds 已提交
3066 3067 3068
		}
	}
#else
3069
	if ((exts->action && tb[exts->action]) ||
3070 3071
	    (exts->police && tb[exts->police])) {
		NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
L
Linus Torvalds 已提交
3072
		return -EOPNOTSUPP;
3073
	}
L
Linus Torvalds 已提交
3074 3075 3076 3077
#endif

	return 0;
}
3078
EXPORT_SYMBOL(tcf_exts_validate);
L
Linus Torvalds 已提交
3079

3080
void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
L
Linus Torvalds 已提交
3081 3082
{
#ifdef CONFIG_NET_CLS_ACT
3083 3084
	struct tcf_exts old = *dst;

3085
	*dst = *src;
3086
	tcf_exts_destroy(&old);
L
Linus Torvalds 已提交
3087 3088
#endif
}
3089
EXPORT_SYMBOL(tcf_exts_change);
L
Linus Torvalds 已提交
3090

3091 3092 3093 3094 3095 3096 3097 3098 3099
#ifdef CONFIG_NET_CLS_ACT
static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
{
	if (exts->nr_actions == 0)
		return NULL;
	else
		return exts->actions[0];
}
#endif
3100

3101
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
L
Linus Torvalds 已提交
3102 3103
{
#ifdef CONFIG_NET_CLS_ACT
3104 3105
	struct nlattr *nest;

3106
	if (exts->action && tcf_exts_has_actions(exts)) {
L
Linus Torvalds 已提交
3107 3108 3109 3110 3111
		/*
		 * again for backward compatible mode - we want
		 * to work with both old and new modes of entering
		 * tc data even if iproute2  was newer - jhs
		 */
3112
		if (exts->type != TCA_OLD_COMPAT) {
3113
			nest = nla_nest_start_noflag(skb, exts->action);
3114 3115
			if (nest == NULL)
				goto nla_put_failure;
3116

3117 3118
			if (tcf_action_dump(skb, exts->actions, 0, 0, false)
			    < 0)
3119
				goto nla_put_failure;
3120
			nla_nest_end(skb, nest);
3121
		} else if (exts->police) {
3122
			struct tc_action *act = tcf_exts_first_act(exts);
3123
			nest = nla_nest_start_noflag(skb, exts->police);
3124
			if (nest == NULL || !act)
3125
				goto nla_put_failure;
3126
			if (tcf_action_dump_old(skb, act, 0, 0) < 0)
3127
				goto nla_put_failure;
3128
			nla_nest_end(skb, nest);
L
Linus Torvalds 已提交
3129 3130 3131
		}
	}
	return 0;
3132 3133 3134

nla_put_failure:
	nla_nest_cancel(skb, nest);
L
Linus Torvalds 已提交
3135
	return -1;
3136 3137 3138
#else
	return 0;
#endif
L
Linus Torvalds 已提交
3139
}
3140
EXPORT_SYMBOL(tcf_exts_dump);
L
Linus Torvalds 已提交
3141

3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166
int tcf_exts_terse_dump(struct sk_buff *skb, struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
	struct nlattr *nest;

	if (!exts->action || !tcf_exts_has_actions(exts))
		return 0;

	nest = nla_nest_start_noflag(skb, exts->action);
	if (!nest)
		goto nla_put_failure;

	if (tcf_action_dump(skb, exts->actions, 0, 0, true) < 0)
		goto nla_put_failure;
	nla_nest_end(skb, nest);
	return 0;

nla_put_failure:
	nla_nest_cancel(skb, nest);
	return -1;
#else
	return 0;
#endif
}
EXPORT_SYMBOL(tcf_exts_terse_dump);
3167

3168
int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
L
Linus Torvalds 已提交
3169 3170
{
#ifdef CONFIG_NET_CLS_ACT
3171
	struct tc_action *a = tcf_exts_first_act(exts);
3172
	if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
3173
		return -1;
L
Linus Torvalds 已提交
3174 3175 3176
#endif
	return 0;
}
3177
EXPORT_SYMBOL(tcf_exts_dump_stats);
L
Linus Torvalds 已提交
3178

3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228
static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
{
	if (*flags & TCA_CLS_FLAGS_IN_HW)
		return;
	*flags |= TCA_CLS_FLAGS_IN_HW;
	atomic_inc(&block->offloadcnt);
}

static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
{
	if (!(*flags & TCA_CLS_FLAGS_IN_HW))
		return;
	*flags &= ~TCA_CLS_FLAGS_IN_HW;
	atomic_dec(&block->offloadcnt);
}

static void tc_cls_offload_cnt_update(struct tcf_block *block,
				      struct tcf_proto *tp, u32 *cnt,
				      u32 *flags, u32 diff, bool add)
{
	lockdep_assert_held(&block->cb_lock);

	spin_lock(&tp->lock);
	if (add) {
		if (!*cnt)
			tcf_block_offload_inc(block, flags);
		*cnt += diff;
	} else {
		*cnt -= diff;
		if (!*cnt)
			tcf_block_offload_dec(block, flags);
	}
	spin_unlock(&tp->lock);
}

static void
tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp,
			 u32 *cnt, u32 *flags)
{
	lockdep_assert_held(&block->cb_lock);

	spin_lock(&tp->lock);
	tcf_block_offload_dec(block, flags);
	*cnt = 0;
	spin_unlock(&tp->lock);
}

static int
__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
		   void *type_data, bool err_stop)
3229
{
3230
	struct flow_block_cb *block_cb;
3231 3232 3233
	int ok_count = 0;
	int err;

3234
	list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
3235 3236
		err = block_cb->cb(type, type_data, block_cb->cb_priv);
		if (err) {
3237 3238
			if (err_stop)
				return err;
3239 3240 3241 3242
		} else {
			ok_count++;
		}
	}
3243 3244 3245 3246 3247 3248
	return ok_count;
}

int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
		     void *type_data, bool err_stop, bool rtnl_held)
{
3249
	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3250 3251
	int ok_count;

3252 3253 3254
retry:
	if (take_rtnl)
		rtnl_lock();
3255
	down_read(&block->cb_lock);
3256 3257 3258 3259 3260 3261 3262 3263 3264 3265
	/* Need to obtain rtnl lock if block is bound to devs that require it.
	 * In block bind code cb_lock is obtained while holding rtnl, so we must
	 * obtain the locks in same order here.
	 */
	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
		up_read(&block->cb_lock);
		take_rtnl = true;
		goto retry;
	}

3266
	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3267

3268
	up_read(&block->cb_lock);
3269 3270
	if (take_rtnl)
		rtnl_unlock();
3271
	return ok_count;
3272 3273
}
EXPORT_SYMBOL(tc_setup_cb_call);
3274

3275 3276 3277 3278 3279 3280 3281 3282 3283 3284
/* Non-destructive filter add. If filter that wasn't already in hardware is
 * successfully offloaded, increment block offloads counter. On failure,
 * previously offloaded filter is considered to be intact and offloads counter
 * is not decremented.
 */

int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
		    enum tc_setup_type type, void *type_data, bool err_stop,
		    u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
{
3285
	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3286 3287
	int ok_count;

3288 3289 3290
retry:
	if (take_rtnl)
		rtnl_lock();
3291
	down_read(&block->cb_lock);
3292 3293 3294 3295 3296 3297 3298 3299 3300 3301
	/* Need to obtain rtnl lock if block is bound to devs that require it.
	 * In block bind code cb_lock is obtained while holding rtnl, so we must
	 * obtain the locks in same order here.
	 */
	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
		up_read(&block->cb_lock);
		take_rtnl = true;
		goto retry;
	}

3302 3303 3304 3305 3306 3307 3308
	/* Make sure all netdevs sharing this block are offload-capable. */
	if (block->nooffloaddevcnt && err_stop) {
		ok_count = -EOPNOTSUPP;
		goto err_unlock;
	}

	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3309 3310 3311 3312 3313
	if (ok_count < 0)
		goto err_unlock;

	if (tp->ops->hw_add)
		tp->ops->hw_add(tp, type_data);
3314 3315 3316 3317 3318
	if (ok_count > 0)
		tc_cls_offload_cnt_update(block, tp, in_hw_count, flags,
					  ok_count, true);
err_unlock:
	up_read(&block->cb_lock);
3319 3320
	if (take_rtnl)
		rtnl_unlock();
3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336
	return ok_count < 0 ? ok_count : 0;
}
EXPORT_SYMBOL(tc_setup_cb_add);

/* Destructive filter replace. If filter that wasn't already in hardware is
 * successfully offloaded, increment block offload counter. On failure,
 * previously offloaded filter is considered to be destroyed and offload counter
 * is decremented.
 */

int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp,
			enum tc_setup_type type, void *type_data, bool err_stop,
			u32 *old_flags, unsigned int *old_in_hw_count,
			u32 *new_flags, unsigned int *new_in_hw_count,
			bool rtnl_held)
{
3337
	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3338 3339
	int ok_count;

3340 3341 3342
retry:
	if (take_rtnl)
		rtnl_lock();
3343
	down_read(&block->cb_lock);
3344 3345 3346 3347 3348 3349 3350 3351 3352 3353
	/* Need to obtain rtnl lock if block is bound to devs that require it.
	 * In block bind code cb_lock is obtained while holding rtnl, so we must
	 * obtain the locks in same order here.
	 */
	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
		up_read(&block->cb_lock);
		take_rtnl = true;
		goto retry;
	}

3354 3355 3356 3357 3358 3359 3360
	/* Make sure all netdevs sharing this block are offload-capable. */
	if (block->nooffloaddevcnt && err_stop) {
		ok_count = -EOPNOTSUPP;
		goto err_unlock;
	}

	tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags);
3361 3362
	if (tp->ops->hw_del)
		tp->ops->hw_del(tp, type_data);
3363 3364

	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3365 3366 3367 3368 3369
	if (ok_count < 0)
		goto err_unlock;

	if (tp->ops->hw_add)
		tp->ops->hw_add(tp, type_data);
3370
	if (ok_count > 0)
3371 3372
		tc_cls_offload_cnt_update(block, tp, new_in_hw_count,
					  new_flags, ok_count, true);
3373 3374
err_unlock:
	up_read(&block->cb_lock);
3375 3376
	if (take_rtnl)
		rtnl_unlock();
3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388
	return ok_count < 0 ? ok_count : 0;
}
EXPORT_SYMBOL(tc_setup_cb_replace);

/* Destroy filter and decrement block offload counter, if filter was previously
 * offloaded.
 */

int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp,
			enum tc_setup_type type, void *type_data, bool err_stop,
			u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
{
3389
	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3390 3391
	int ok_count;

3392 3393 3394
retry:
	if (take_rtnl)
		rtnl_lock();
3395
	down_read(&block->cb_lock);
3396 3397 3398 3399 3400 3401 3402 3403 3404 3405
	/* Need to obtain rtnl lock if block is bound to devs that require it.
	 * In block bind code cb_lock is obtained while holding rtnl, so we must
	 * obtain the locks in same order here.
	 */
	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
		up_read(&block->cb_lock);
		take_rtnl = true;
		goto retry;
	}

3406 3407 3408
	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);

	tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags);
3409 3410 3411
	if (tp->ops->hw_del)
		tp->ops->hw_del(tp, type_data);

3412
	up_read(&block->cb_lock);
3413 3414
	if (take_rtnl)
		rtnl_unlock();
3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437
	return ok_count < 0 ? ok_count : 0;
}
EXPORT_SYMBOL(tc_setup_cb_destroy);

int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
			  bool add, flow_setup_cb_t *cb,
			  enum tc_setup_type type, void *type_data,
			  void *cb_priv, u32 *flags, unsigned int *in_hw_count)
{
	int err = cb(type, type_data, cb_priv);

	if (err) {
		if (add && tc_skip_sw(*flags))
			return err;
	} else {
		tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1,
					  add);
	}

	return 0;
}
EXPORT_SYMBOL(tc_setup_cb_reoffload);

3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461
static int tcf_act_get_cookie(struct flow_action_entry *entry,
			      const struct tc_action *act)
{
	struct tc_cookie *cookie;
	int err = 0;

	rcu_read_lock();
	cookie = rcu_dereference(act->act_cookie);
	if (cookie) {
		entry->cookie = flow_action_cookie_create(cookie->data,
							  cookie->len,
							  GFP_ATOMIC);
		if (!entry->cookie)
			err = -ENOMEM;
	}
	rcu_read_unlock();
	return err;
}

static void tcf_act_put_cookie(struct flow_action_entry *entry)
{
	flow_action_cookie_destroy(entry->cookie);
}

3462 3463 3464 3465 3466
void tc_cleanup_flow_action(struct flow_action *flow_action)
{
	struct flow_action_entry *entry;
	int i;

3467 3468
	flow_action_for_each(i, entry, flow_action) {
		tcf_act_put_cookie(entry);
3469 3470
		if (entry->destructor)
			entry->destructor(entry->destructor_priv);
3471
	}
3472 3473 3474
}
EXPORT_SYMBOL(tc_cleanup_flow_action);

3475 3476 3477
static void tcf_mirred_get_dev(struct flow_action_entry *entry,
			       const struct tc_action *act)
{
3478 3479
#ifdef CONFIG_NET_CLS_ACT
	entry->dev = act->ops->get_dev(act, &entry->destructor);
3480 3481 3482
	if (!entry->dev)
		return;
	entry->destructor_priv = entry->dev;
3483
#endif
3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503
}

static void tcf_tunnel_encap_put_tunnel(void *priv)
{
	struct ip_tunnel_info *tunnel = priv;

	kfree(tunnel);
}

static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry,
				       const struct tc_action *act)
{
	entry->tunnel = tcf_tunnel_info_copy(act);
	if (!entry->tunnel)
		return -ENOMEM;
	entry->destructor = tcf_tunnel_encap_put_tunnel;
	entry->destructor_priv = entry->tunnel;
	return 0;
}

3504 3505 3506 3507 3508 3509 3510 3511 3512 3513
static void tcf_sample_get_group(struct flow_action_entry *entry,
				 const struct tc_action *act)
{
#ifdef CONFIG_NET_CLS_ACT
	entry->sample.psample_group =
		act->ops->get_psample_group(act, &entry->destructor);
	entry->destructor_priv = entry->sample.psample_group;
#endif
}

3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534
static void tcf_gate_entry_destructor(void *priv)
{
	struct action_gate_entry *oe = priv;

	kfree(oe);
}

static int tcf_gate_get_entries(struct flow_action_entry *entry,
				const struct tc_action *act)
{
	entry->gate.entries = tcf_gate_get_list(act);

	if (!entry->gate.entries)
		return -EINVAL;

	entry->destructor = tcf_gate_entry_destructor;
	entry->destructor_priv = entry->gate.entries;

	return 0;
}

3535 3536 3537 3538 3539 3540 3541 3542 3543 3544
static enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats)
{
	if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY))
		return FLOW_ACTION_HW_STATS_DONT_CARE;
	else if (!hw_stats)
		return FLOW_ACTION_HW_STATS_DISABLED;

	return hw_stats;
}

3545
int tc_setup_flow_action(struct flow_action *flow_action,
3546
			 const struct tcf_exts *exts)
3547
{
3548
	struct tc_action *act;
3549
	int i, j, k, err = 0;
3550

3551 3552 3553
	BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY);
	BUILD_BUG_ON(TCA_ACT_HW_STATS_IMMEDIATE != FLOW_ACTION_HW_STATS_IMMEDIATE);
	BUILD_BUG_ON(TCA_ACT_HW_STATS_DELAYED != FLOW_ACTION_HW_STATS_DELAYED);
3554

3555 3556 3557 3558 3559 3560 3561 3562
	if (!exts)
		return 0;

	j = 0;
	tcf_exts_for_each_action(i, act, exts) {
		struct flow_action_entry *entry;

		entry = &flow_action->entries[j];
3563
		spin_lock_bh(&act->tcfa_lock);
3564 3565 3566
		err = tcf_act_get_cookie(entry, act);
		if (err)
			goto err_out_locked;
3567

3568
		entry->hw_stats = tc_act_hw_stats(act->hw_stats);
3569

3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580
		if (is_tcf_gact_ok(act)) {
			entry->id = FLOW_ACTION_ACCEPT;
		} else if (is_tcf_gact_shot(act)) {
			entry->id = FLOW_ACTION_DROP;
		} else if (is_tcf_gact_trap(act)) {
			entry->id = FLOW_ACTION_TRAP;
		} else if (is_tcf_gact_goto_chain(act)) {
			entry->id = FLOW_ACTION_GOTO;
			entry->chain_index = tcf_gact_goto_chain_index(act);
		} else if (is_tcf_mirred_egress_redirect(act)) {
			entry->id = FLOW_ACTION_REDIRECT;
3581
			tcf_mirred_get_dev(entry, act);
3582 3583
		} else if (is_tcf_mirred_egress_mirror(act)) {
			entry->id = FLOW_ACTION_MIRRED;
3584
			tcf_mirred_get_dev(entry, act);
3585 3586
		} else if (is_tcf_mirred_ingress_redirect(act)) {
			entry->id = FLOW_ACTION_REDIRECT_INGRESS;
3587
			tcf_mirred_get_dev(entry, act);
3588 3589
		} else if (is_tcf_mirred_ingress_mirror(act)) {
			entry->id = FLOW_ACTION_MIRRED_INGRESS;
3590
			tcf_mirred_get_dev(entry, act);
3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608
		} else if (is_tcf_vlan(act)) {
			switch (tcf_vlan_action(act)) {
			case TCA_VLAN_ACT_PUSH:
				entry->id = FLOW_ACTION_VLAN_PUSH;
				entry->vlan.vid = tcf_vlan_push_vid(act);
				entry->vlan.proto = tcf_vlan_push_proto(act);
				entry->vlan.prio = tcf_vlan_push_prio(act);
				break;
			case TCA_VLAN_ACT_POP:
				entry->id = FLOW_ACTION_VLAN_POP;
				break;
			case TCA_VLAN_ACT_MODIFY:
				entry->id = FLOW_ACTION_VLAN_MANGLE;
				entry->vlan.vid = tcf_vlan_push_vid(act);
				entry->vlan.proto = tcf_vlan_push_proto(act);
				entry->vlan.prio = tcf_vlan_push_prio(act);
				break;
			default:
3609
				err = -EOPNOTSUPP;
3610
				goto err_out_locked;
3611 3612 3613
			}
		} else if (is_tcf_tunnel_set(act)) {
			entry->id = FLOW_ACTION_TUNNEL_ENCAP;
3614 3615
			err = tcf_tunnel_encap_get_tunnel(entry, act);
			if (err)
3616
				goto err_out_locked;
3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628
		} else if (is_tcf_tunnel_release(act)) {
			entry->id = FLOW_ACTION_TUNNEL_DECAP;
		} else if (is_tcf_pedit(act)) {
			for (k = 0; k < tcf_pedit_nkeys(act); k++) {
				switch (tcf_pedit_cmd(act, k)) {
				case TCA_PEDIT_KEY_EX_CMD_SET:
					entry->id = FLOW_ACTION_MANGLE;
					break;
				case TCA_PEDIT_KEY_EX_CMD_ADD:
					entry->id = FLOW_ACTION_ADD;
					break;
				default:
3629
					err = -EOPNOTSUPP;
3630
					goto err_out_locked;
3631 3632 3633 3634 3635
				}
				entry->mangle.htype = tcf_pedit_htype(act, k);
				entry->mangle.mask = tcf_pedit_mask(act, k);
				entry->mangle.val = tcf_pedit_val(act, k);
				entry->mangle.offset = tcf_pedit_offset(act, k);
3636
				entry->hw_stats = tc_act_hw_stats(act->hw_stats);
3637
				entry = &flow_action->entries[++j];
3638 3639 3640 3641 3642 3643 3644
			}
		} else if (is_tcf_csum(act)) {
			entry->id = FLOW_ACTION_CSUM;
			entry->csum_flags = tcf_csum_update_flags(act);
		} else if (is_tcf_skbedit_mark(act)) {
			entry->id = FLOW_ACTION_MARK;
			entry->mark = tcf_skbedit_mark(act);
3645 3646 3647 3648 3649
		} else if (is_tcf_sample(act)) {
			entry->id = FLOW_ACTION_SAMPLE;
			entry->sample.trunc_size = tcf_sample_trunc_size(act);
			entry->sample.truncate = tcf_sample_truncate(act);
			entry->sample.rate = tcf_sample_rate(act);
3650
			tcf_sample_get_group(entry, act);
3651 3652
		} else if (is_tcf_police(act)) {
			entry->id = FLOW_ACTION_POLICE;
3653
			entry->police.burst = tcf_police_burst(act);
3654 3655
			entry->police.rate_bytes_ps =
				tcf_police_rate_bytes_ps(act);
3656
			entry->police.mtu = tcf_police_tcfp_mtu(act);
3657
			entry->police.index = act->tcfa_index;
P
Paul Blakey 已提交
3658 3659 3660 3661
		} else if (is_tcf_ct(act)) {
			entry->id = FLOW_ACTION_CT;
			entry->ct.action = tcf_ct_action(act);
			entry->ct.zone = tcf_ct_zone(act);
3662
			entry->ct.flow_table = tcf_ct_ft(act);
3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684
		} else if (is_tcf_mpls(act)) {
			switch (tcf_mpls_action(act)) {
			case TCA_MPLS_ACT_PUSH:
				entry->id = FLOW_ACTION_MPLS_PUSH;
				entry->mpls_push.proto = tcf_mpls_proto(act);
				entry->mpls_push.label = tcf_mpls_label(act);
				entry->mpls_push.tc = tcf_mpls_tc(act);
				entry->mpls_push.bos = tcf_mpls_bos(act);
				entry->mpls_push.ttl = tcf_mpls_ttl(act);
				break;
			case TCA_MPLS_ACT_POP:
				entry->id = FLOW_ACTION_MPLS_POP;
				entry->mpls_pop.proto = tcf_mpls_proto(act);
				break;
			case TCA_MPLS_ACT_MODIFY:
				entry->id = FLOW_ACTION_MPLS_MANGLE;
				entry->mpls_mangle.label = tcf_mpls_label(act);
				entry->mpls_mangle.tc = tcf_mpls_tc(act);
				entry->mpls_mangle.bos = tcf_mpls_bos(act);
				entry->mpls_mangle.ttl = tcf_mpls_ttl(act);
				break;
			default:
3685
				goto err_out_locked;
3686
			}
3687 3688 3689
		} else if (is_tcf_skbedit_ptype(act)) {
			entry->id = FLOW_ACTION_PTYPE;
			entry->ptype = tcf_skbedit_ptype(act);
3690 3691 3692
		} else if (is_tcf_skbedit_priority(act)) {
			entry->id = FLOW_ACTION_PRIORITY;
			entry->priority = tcf_skbedit_priority(act);
3693 3694 3695 3696 3697 3698 3699 3700 3701 3702
		} else if (is_tcf_gate(act)) {
			entry->id = FLOW_ACTION_GATE;
			entry->gate.index = tcf_gate_index(act);
			entry->gate.prio = tcf_gate_prio(act);
			entry->gate.basetime = tcf_gate_basetime(act);
			entry->gate.cycletime = tcf_gate_cycletime(act);
			entry->gate.cycletimeext = tcf_gate_cycletimeext(act);
			entry->gate.num_entries = tcf_gate_num_entries(act);
			err = tcf_gate_get_entries(entry, act);
			if (err)
3703
				goto err_out_locked;
3704
		} else {
3705
			err = -EOPNOTSUPP;
3706
			goto err_out_locked;
3707
		}
3708
		spin_unlock_bh(&act->tcfa_lock);
3709 3710 3711 3712

		if (!is_tcf_pedit(act))
			j++;
	}
3713

3714
err_out:
3715 3716 3717
	if (err)
		tc_cleanup_flow_action(flow_action);

3718
	return err;
3719 3720 3721
err_out_locked:
	spin_unlock_bh(&act->tcfa_lock);
	goto err_out;
3722 3723 3724
}
EXPORT_SYMBOL(tc_setup_flow_action);

3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740
unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
{
	unsigned int num_acts = 0;
	struct tc_action *act;
	int i;

	tcf_exts_for_each_action(i, act, exts) {
		if (is_tcf_pedit(act))
			num_acts += tcf_pedit_nkeys(act);
		else
			num_acts++;
	}
	return num_acts;
}
EXPORT_SYMBOL(tcf_exts_num_actions);

3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812
#ifdef CONFIG_NET_CLS_ACT
static int tcf_qevent_parse_block_index(struct nlattr *block_index_attr,
					u32 *p_block_index,
					struct netlink_ext_ack *extack)
{
	*p_block_index = nla_get_u32(block_index_attr);
	if (!*p_block_index) {
		NL_SET_ERR_MSG(extack, "Block number may not be zero");
		return -EINVAL;
	}

	return 0;
}

int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
		    enum flow_block_binder_type binder_type,
		    struct nlattr *block_index_attr,
		    struct netlink_ext_ack *extack)
{
	u32 block_index;
	int err;

	if (!block_index_attr)
		return 0;

	err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
	if (err)
		return err;

	if (!block_index)
		return 0;

	qe->info.binder_type = binder_type;
	qe->info.chain_head_change = tcf_chain_head_change_dflt;
	qe->info.chain_head_change_priv = &qe->filter_chain;
	qe->info.block_index = block_index;

	return tcf_block_get_ext(&qe->block, sch, &qe->info, extack);
}
EXPORT_SYMBOL(tcf_qevent_init);

void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch)
{
	if (qe->info.block_index)
		tcf_block_put_ext(qe->block, sch, &qe->info);
}
EXPORT_SYMBOL(tcf_qevent_destroy);

int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr,
			       struct netlink_ext_ack *extack)
{
	u32 block_index;
	int err;

	if (!block_index_attr)
		return 0;

	err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
	if (err)
		return err;

	/* Bounce newly-configured block or change in block. */
	if (block_index != qe->info.block_index) {
		NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
		return -EINVAL;
	}

	return 0;
}
EXPORT_SYMBOL(tcf_qevent_validate_change);

struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
3813
				  struct sk_buff **to_free, int *ret)
3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853
{
	struct tcf_result cl_res;
	struct tcf_proto *fl;

	if (!qe->info.block_index)
		return skb;

	fl = rcu_dereference_bh(qe->filter_chain);

	switch (tcf_classify(skb, fl, &cl_res, false)) {
	case TC_ACT_SHOT:
		qdisc_qstats_drop(sch);
		__qdisc_drop(skb, to_free);
		*ret = __NET_XMIT_BYPASS;
		return NULL;
	case TC_ACT_STOLEN:
	case TC_ACT_QUEUED:
	case TC_ACT_TRAP:
		__qdisc_drop(skb, to_free);
		*ret = __NET_XMIT_STOLEN;
		return NULL;
	case TC_ACT_REDIRECT:
		skb_do_redirect(skb);
		*ret = __NET_XMIT_STOLEN;
		return NULL;
	}

	return skb;
}
EXPORT_SYMBOL(tcf_qevent_handle);

int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe)
{
	if (!qe->info.block_index)
		return 0;
	return nla_put_u32(skb, attr_name, qe->info.block_index);
}
EXPORT_SYMBOL(tcf_qevent_dump);
#endif

3854 3855 3856 3857
static __net_init int tcf_net_init(struct net *net)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

3858
	spin_lock_init(&tn->idr_lock);
3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876
	idr_init(&tn->idr);
	return 0;
}

static void __net_exit tcf_net_exit(struct net *net)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

	idr_destroy(&tn->idr);
}

static struct pernet_operations tcf_net_ops = {
	.init = tcf_net_init,
	.exit = tcf_net_exit,
	.id   = &tcf_net_id,
	.size = sizeof(struct tcf_net),
};

L
Linus Torvalds 已提交
3877 3878
static int __init tc_filter_init(void)
{
3879 3880
	int err;

3881 3882 3883 3884
	tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
	if (!tc_filter_wq)
		return -ENOMEM;

3885 3886 3887 3888
	err = register_pernet_subsys(&tcf_net_ops);
	if (err)
		goto err_register_pernet_subsys;

3889 3890 3891 3892
	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
		      RTNL_FLAG_DOIT_UNLOCKED);
	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
		      RTNL_FLAG_DOIT_UNLOCKED);
3893
	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
3894
		      tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
3895 3896 3897 3898
	rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
	rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
	rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
		      tc_dump_chain, 0);
L
Linus Torvalds 已提交
3899 3900

	return 0;
3901 3902 3903 3904

err_register_pernet_subsys:
	destroy_workqueue(tc_filter_wq);
	return err;
L
Linus Torvalds 已提交
3905 3906 3907
}

subsys_initcall(tc_filter_init);