cls_api.c 71.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * net/sched/cls_api.c	Packet classifier API.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 * Changes:
 *
 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
 *
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
22
#include <linux/err.h>
L
Linus Torvalds 已提交
23 24 25
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
26
#include <linux/slab.h>
27
#include <linux/idr.h>
28
#include <linux/rhashtable.h>
29 30
#include <net/net_namespace.h>
#include <net/sock.h>
31
#include <net/netlink.h>
L
Linus Torvalds 已提交
32 33
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
34
#include <net/tc_act/tc_pedit.h>
35 36 37 38 39 40 41
#include <net/tc_act/tc_mirred.h>
#include <net/tc_act/tc_vlan.h>
#include <net/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_csum.h>
#include <net/tc_act/tc_gact.h>
#include <net/tc_act/tc_skbedit.h>
#include <net/tc_act/tc_mirred.h>
L
Linus Torvalds 已提交
42

43 44
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];

L
Linus Torvalds 已提交
45
/* The list of all installed classifier types */
46
static LIST_HEAD(tcf_proto_base);
L
Linus Torvalds 已提交
47 48 49 50 51 52

/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK(cls_mod_lock);

/* Find classifier type by string name */

53
static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
L
Linus Torvalds 已提交
54
{
55
	const struct tcf_proto_ops *t, *res = NULL;
L
Linus Torvalds 已提交
56 57 58

	if (kind) {
		read_lock(&cls_mod_lock);
59
		list_for_each_entry(t, &tcf_proto_base, head) {
60
			if (strcmp(kind, t->kind) == 0) {
61 62
				if (try_module_get(t->owner))
					res = t;
L
Linus Torvalds 已提交
63 64 65 66 67
				break;
			}
		}
		read_unlock(&cls_mod_lock);
	}
68
	return res;
L
Linus Torvalds 已提交
69 70
}

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
static const struct tcf_proto_ops *
tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
{
	const struct tcf_proto_ops *ops;

	ops = __tcf_proto_lookup_ops(kind);
	if (ops)
		return ops;
#ifdef CONFIG_MODULES
	rtnl_unlock();
	request_module("cls_%s", kind);
	rtnl_lock();
	ops = __tcf_proto_lookup_ops(kind);
	/* We dropped the RTNL semaphore in order to perform
	 * the module load. So, even if we succeeded in loading
	 * the module we have to replay the request. We indicate
	 * this using -EAGAIN.
	 */
	if (ops) {
		module_put(ops->owner);
		return ERR_PTR(-EAGAIN);
	}
#endif
	NL_SET_ERR_MSG(extack, "TC classifier not found");
	return ERR_PTR(-ENOENT);
}

L
Linus Torvalds 已提交
98 99 100 101
/* Register(unregister) new classifier type */

int register_tcf_proto_ops(struct tcf_proto_ops *ops)
{
102
	struct tcf_proto_ops *t;
L
Linus Torvalds 已提交
103 104 105
	int rc = -EEXIST;

	write_lock(&cls_mod_lock);
106
	list_for_each_entry(t, &tcf_proto_base, head)
L
Linus Torvalds 已提交
107 108 109
		if (!strcmp(ops->kind, t->kind))
			goto out;

110
	list_add_tail(&ops->head, &tcf_proto_base);
L
Linus Torvalds 已提交
111 112 113 114 115
	rc = 0;
out:
	write_unlock(&cls_mod_lock);
	return rc;
}
116
EXPORT_SYMBOL(register_tcf_proto_ops);
L
Linus Torvalds 已提交
117

118 119
static struct workqueue_struct *tc_filter_wq;

L
Linus Torvalds 已提交
120 121
int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
{
122
	struct tcf_proto_ops *t;
L
Linus Torvalds 已提交
123 124
	int rc = -ENOENT;

125 126 127 128
	/* Wait for outstanding call_rcu()s, if any, from a
	 * tcf_proto_ops's destroy() handler.
	 */
	rcu_barrier();
129
	flush_workqueue(tc_filter_wq);
130

L
Linus Torvalds 已提交
131
	write_lock(&cls_mod_lock);
132 133 134 135
	list_for_each_entry(t, &tcf_proto_base, head) {
		if (t == ops) {
			list_del(&t->head);
			rc = 0;
L
Linus Torvalds 已提交
136
			break;
137 138
		}
	}
L
Linus Torvalds 已提交
139 140 141
	write_unlock(&cls_mod_lock);
	return rc;
}
142
EXPORT_SYMBOL(unregister_tcf_proto_ops);
L
Linus Torvalds 已提交
143

C
Cong Wang 已提交
144
bool tcf_queue_work(struct rcu_work *rwork, work_func_t func)
145
{
C
Cong Wang 已提交
146 147
	INIT_RCU_WORK(rwork, func);
	return queue_rcu_work(tc_filter_wq, rwork);
148 149 150
}
EXPORT_SYMBOL(tcf_queue_work);

L
Linus Torvalds 已提交
151 152
/* Select new prio value from the range, managed by kernel. */

153
static inline u32 tcf_auto_prio(struct tcf_proto *tp)
L
Linus Torvalds 已提交
154
{
155
	u32 first = TC_H_MAKE(0xC0000000U, 0U);
L
Linus Torvalds 已提交
156 157

	if (tp)
E
Eric Dumazet 已提交
158
		first = tp->prio - 1;
L
Linus Torvalds 已提交
159

160
	return TC_H_MAJ(first);
L
Linus Torvalds 已提交
161 162
}

163
static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
164 165
					  u32 prio, struct tcf_chain *chain,
					  struct netlink_ext_ack *extack)
166 167 168 169 170 171 172 173
{
	struct tcf_proto *tp;
	int err;

	tp = kzalloc(sizeof(*tp), GFP_KERNEL);
	if (!tp)
		return ERR_PTR(-ENOBUFS);

174 175 176
	tp->ops = tcf_proto_lookup_ops(kind, extack);
	if (IS_ERR(tp->ops)) {
		err = PTR_ERR(tp->ops);
177
		goto errout;
178 179 180 181
	}
	tp->classify = tp->ops->classify;
	tp->protocol = protocol;
	tp->prio = prio;
182
	tp->chain = chain;
183 184 185 186 187 188 189 190 191 192 193 194 195

	err = tp->ops->init(tp);
	if (err) {
		module_put(tp->ops->owner);
		goto errout;
	}
	return tp;

errout:
	kfree(tp);
	return ERR_PTR(err);
}

196 197
static void tcf_proto_destroy(struct tcf_proto *tp,
			      struct netlink_ext_ack *extack)
198
{
199
	tp->ops->destroy(tp, extack);
200 201
	module_put(tp->ops->owner);
	kfree_rcu(tp, rcu);
202 203
}

204 205 206
#define ASSERT_BLOCK_LOCKED(block)					\
	lockdep_assert_held(&(block)->lock)

207 208 209 210 211 212
struct tcf_filter_chain_list_item {
	struct list_head list;
	tcf_chain_head_change_t *chain_head_change;
	void *chain_head_change_priv;
};

213 214
static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
					  u32 chain_index)
215
{
216 217
	struct tcf_chain *chain;

218 219
	ASSERT_BLOCK_LOCKED(block);

220 221 222 223
	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
	if (!chain)
		return NULL;
	list_add_tail(&chain->list, &block->chain_list);
224
	mutex_init(&chain->filter_chain_lock);
225 226
	chain->block = block;
	chain->index = chain_index;
227
	chain->refcnt = 1;
228 229
	if (!chain->index)
		block->chain0.chain = chain;
230
	return chain;
231 232
}

233 234 235 236 237 238
static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
				       struct tcf_proto *tp_head)
{
	if (item->chain_head_change)
		item->chain_head_change(tp_head, item->chain_head_change_priv);
}
239 240 241

static void tcf_chain0_head_change(struct tcf_chain *chain,
				   struct tcf_proto *tp_head)
242
{
243
	struct tcf_filter_chain_list_item *item;
244
	struct tcf_block *block = chain->block;
245

246 247
	if (chain->index)
		return;
248 249

	mutex_lock(&block->lock);
250
	list_for_each_entry(item, &block->chain0.filter_chain_list, list)
251
		tcf_chain_head_change_item(item, tp_head);
252
	mutex_unlock(&block->lock);
253 254
}

255 256 257
/* Returns true if block can be safely freed. */

static bool tcf_chain_detach(struct tcf_chain *chain)
J
Jiri Pirko 已提交
258
{
259 260
	struct tcf_block *block = chain->block;

261 262
	ASSERT_BLOCK_LOCKED(block);

263
	list_del(&chain->list);
264 265
	if (!chain->index)
		block->chain0.chain = NULL;
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283

	if (list_empty(&block->chain_list) &&
	    refcount_read(&block->refcnt) == 0)
		return true;

	return false;
}

static void tcf_block_destroy(struct tcf_block *block)
{
	mutex_destroy(&block->lock);
	kfree_rcu(block, rcu);
}

static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
{
	struct tcf_block *block = chain->block;

284
	mutex_destroy(&chain->filter_chain_lock);
285
	kfree(chain);
286 287
	if (free_block)
		tcf_block_destroy(block);
288
}
289

290 291
static void tcf_chain_hold(struct tcf_chain *chain)
{
292 293
	ASSERT_BLOCK_LOCKED(chain->block);

294
	++chain->refcnt;
295 296
}

297
static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
298
{
299 300
	ASSERT_BLOCK_LOCKED(chain->block);

301
	/* In case all the references are action references, this
302
	 * chain should not be shown to the user.
303 304 305 306
	 */
	return chain->refcnt == chain->action_refcnt;
}

307 308
static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
					  u32 chain_index)
309 310 311
{
	struct tcf_chain *chain;

312 313
	ASSERT_BLOCK_LOCKED(block);

314
	list_for_each_entry(chain, &block->chain_list, list) {
315
		if (chain->index == chain_index)
316
			return chain;
317 318 319 320 321 322 323
	}
	return NULL;
}

static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
			   u32 seq, u16 flags, int event, bool unicast);

324 325 326
static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
					 u32 chain_index, bool create,
					 bool by_act)
327
{
328 329
	struct tcf_chain *chain = NULL;
	bool is_first_reference;
330

331 332
	mutex_lock(&block->lock);
	chain = tcf_chain_lookup(block, chain_index);
333 334
	if (chain) {
		tcf_chain_hold(chain);
335 336
	} else {
		if (!create)
337
			goto errout;
338 339
		chain = tcf_chain_create(block, chain_index);
		if (!chain)
340
			goto errout;
341
	}
342

343 344
	if (by_act)
		++chain->action_refcnt;
345 346
	is_first_reference = chain->refcnt - chain->action_refcnt == 1;
	mutex_unlock(&block->lock);
347 348 349 350 351 352

	/* Send notification only in case we got the first
	 * non-action reference. Until then, the chain acts only as
	 * a placeholder for actions pointing to it and user ought
	 * not know about them.
	 */
353
	if (is_first_reference && !by_act)
354 355 356
		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
				RTM_NEWCHAIN, false);

357
	return chain;
358 359 360 361

errout:
	mutex_unlock(&block->lock);
	return chain;
362
}
363

364 365
static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
				       bool create)
366 367 368
{
	return __tcf_chain_get(block, chain_index, create, false);
}
369

370 371
struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
{
372
	return __tcf_chain_get(block, chain_index, true, true);
373 374 375
}
EXPORT_SYMBOL(tcf_chain_get_by_act);

376 377 378 379 380 381
static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
			       void *tmplt_priv);
static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
				  void *tmplt_priv, u32 chain_index,
				  struct tcf_block *block, struct sk_buff *oskb,
				  u32 seq, u16 flags, bool unicast);
382

383 384
static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
			    bool explicitly_created)
385
{
386
	struct tcf_block *block = chain->block;
387
	const struct tcf_proto_ops *tmplt_ops;
388 389
	bool is_last, free_block = false;
	unsigned int refcnt;
390 391
	void *tmplt_priv;
	u32 chain_index;
392 393

	mutex_lock(&block->lock);
394 395 396 397 398 399 400 401
	if (explicitly_created) {
		if (!chain->explicitly_created) {
			mutex_unlock(&block->lock);
			return;
		}
		chain->explicitly_created = false;
	}

402 403
	if (by_act)
		chain->action_refcnt--;
404 405 406 407 408 409 410

	/* tc_chain_notify_delete can't be called while holding block lock.
	 * However, when block is unlocked chain can be changed concurrently, so
	 * save these to temporary variables.
	 */
	refcnt = --chain->refcnt;
	is_last = refcnt - chain->action_refcnt == 0;
411 412 413 414
	tmplt_ops = chain->tmplt_ops;
	tmplt_priv = chain->tmplt_priv;
	chain_index = chain->index;

415 416 417
	if (refcnt == 0)
		free_block = tcf_chain_detach(chain);
	mutex_unlock(&block->lock);
418 419

	/* The last dropped non-action reference will trigger notification. */
420
	if (is_last && !by_act)
421 422
		tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain_index,
				       block, NULL, 0, 0, false);
423

424
	if (refcnt == 0) {
425
		tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
426
		tcf_chain_destroy(chain, free_block);
427
	}
428
}
429

430
static void tcf_chain_put(struct tcf_chain *chain)
431
{
432
	__tcf_chain_put(chain, false, false);
433
}
434

435 436
void tcf_chain_put_by_act(struct tcf_chain *chain)
{
437
	__tcf_chain_put(chain, true, false);
438 439 440
}
EXPORT_SYMBOL(tcf_chain_put_by_act);

441 442
static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
{
443
	__tcf_chain_put(chain, false, true);
444 445
}

446 447
static void tcf_chain_flush(struct tcf_chain *chain)
{
448
	struct tcf_proto *tp;
449

450 451
	mutex_lock(&chain->filter_chain_lock);
	tp = tcf_chain_dereference(chain->filter_chain, chain);
452
	tcf_chain0_head_change(chain, NULL);
453 454
	mutex_unlock(&chain->filter_chain_lock);

455 456 457 458 459 460 461 462
	while (tp) {
		RCU_INIT_POINTER(chain->filter_chain, tp->next);
		tcf_proto_destroy(tp, NULL);
		tp = rtnl_dereference(chain->filter_chain);
		tcf_chain_put(chain);
	}
}

463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701
static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
{
	const struct Qdisc_class_ops *cops;
	struct Qdisc *qdisc;

	if (!dev_ingress_queue(dev))
		return NULL;

	qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
	if (!qdisc)
		return NULL;

	cops = qdisc->ops->cl_ops;
	if (!cops)
		return NULL;

	if (!cops->tcf_block)
		return NULL;

	return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
}

static struct rhashtable indr_setup_block_ht;

struct tc_indr_block_dev {
	struct rhash_head ht_node;
	struct net_device *dev;
	unsigned int refcnt;
	struct list_head cb_list;
	struct tcf_block *block;
};

struct tc_indr_block_cb {
	struct list_head list;
	void *cb_priv;
	tc_indr_block_bind_cb_t *cb;
	void *cb_ident;
};

static const struct rhashtable_params tc_indr_setup_block_ht_params = {
	.key_offset	= offsetof(struct tc_indr_block_dev, dev),
	.head_offset	= offsetof(struct tc_indr_block_dev, ht_node),
	.key_len	= sizeof(struct net_device *),
};

static struct tc_indr_block_dev *
tc_indr_block_dev_lookup(struct net_device *dev)
{
	return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
				      tc_indr_setup_block_ht_params);
}

static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev)
{
	struct tc_indr_block_dev *indr_dev;

	indr_dev = tc_indr_block_dev_lookup(dev);
	if (indr_dev)
		goto inc_ref;

	indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
	if (!indr_dev)
		return NULL;

	INIT_LIST_HEAD(&indr_dev->cb_list);
	indr_dev->dev = dev;
	indr_dev->block = tc_dev_ingress_block(dev);
	if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
				   tc_indr_setup_block_ht_params)) {
		kfree(indr_dev);
		return NULL;
	}

inc_ref:
	indr_dev->refcnt++;
	return indr_dev;
}

static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev)
{
	if (--indr_dev->refcnt)
		return;

	rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
			       tc_indr_setup_block_ht_params);
	kfree(indr_dev);
}

static struct tc_indr_block_cb *
tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev,
			tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
	struct tc_indr_block_cb *indr_block_cb;

	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
		if (indr_block_cb->cb == cb &&
		    indr_block_cb->cb_ident == cb_ident)
			return indr_block_cb;
	return NULL;
}

static struct tc_indr_block_cb *
tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv,
		     tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
	struct tc_indr_block_cb *indr_block_cb;

	indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
	if (indr_block_cb)
		return ERR_PTR(-EEXIST);

	indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
	if (!indr_block_cb)
		return ERR_PTR(-ENOMEM);

	indr_block_cb->cb_priv = cb_priv;
	indr_block_cb->cb = cb;
	indr_block_cb->cb_ident = cb_ident;
	list_add(&indr_block_cb->list, &indr_dev->cb_list);

	return indr_block_cb;
}

static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb)
{
	list_del(&indr_block_cb->list);
	kfree(indr_block_cb);
}

static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
				  struct tc_indr_block_cb *indr_block_cb,
				  enum tc_block_command command)
{
	struct tc_block_offload bo = {
		.command	= command,
		.binder_type	= TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
		.block		= indr_dev->block,
	};

	if (!indr_dev->block)
		return;

	indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
			  &bo);
}

int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
				tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
	struct tc_indr_block_cb *indr_block_cb;
	struct tc_indr_block_dev *indr_dev;
	int err;

	indr_dev = tc_indr_block_dev_get(dev);
	if (!indr_dev)
		return -ENOMEM;

	indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
	err = PTR_ERR_OR_ZERO(indr_block_cb);
	if (err)
		goto err_dev_put;

	tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_BIND);
	return 0;

err_dev_put:
	tc_indr_block_dev_put(indr_dev);
	return err;
}
EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register);

int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
			      tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
	int err;

	rtnl_lock();
	err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
	rtnl_unlock();

	return err;
}
EXPORT_SYMBOL_GPL(tc_indr_block_cb_register);

void __tc_indr_block_cb_unregister(struct net_device *dev,
				   tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
	struct tc_indr_block_cb *indr_block_cb;
	struct tc_indr_block_dev *indr_dev;

	indr_dev = tc_indr_block_dev_lookup(dev);
	if (!indr_dev)
		return;

	indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
	if (!indr_block_cb)
		return;

	/* Send unbind message if required to free any block cbs. */
	tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_UNBIND);
	tc_indr_block_cb_del(indr_block_cb);
	tc_indr_block_dev_put(indr_dev);
}
EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister);

void tc_indr_block_cb_unregister(struct net_device *dev,
				 tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
	rtnl_lock();
	__tc_indr_block_cb_unregister(dev, cb, cb_ident);
	rtnl_unlock();
}
EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister);

static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
			       struct tcf_block_ext_info *ei,
			       enum tc_block_command command,
			       struct netlink_ext_ack *extack)
{
	struct tc_indr_block_cb *indr_block_cb;
	struct tc_indr_block_dev *indr_dev;
	struct tc_block_offload bo = {
		.command	= command,
		.binder_type	= ei->binder_type,
		.block		= block,
		.extack		= extack,
	};

	indr_dev = tc_indr_block_dev_lookup(dev);
	if (!indr_dev)
		return;

	indr_dev->block = command == TC_BLOCK_BIND ? block : NULL;

	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
		indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
				  &bo);
}

702 703 704 705 706 707 708 709
static bool tcf_block_offload_in_use(struct tcf_block *block)
{
	return block->offloadcnt;
}

static int tcf_block_offload_cmd(struct tcf_block *block,
				 struct net_device *dev,
				 struct tcf_block_ext_info *ei,
710 711
				 enum tc_block_command command,
				 struct netlink_ext_ack *extack)
712 713 714 715 716 717
{
	struct tc_block_offload bo = {};

	bo.command = command;
	bo.binder_type = ei->binder_type;
	bo.block = block;
718
	bo.extack = extack;
719
	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
720 721
}

722
static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
723 724
				  struct tcf_block_ext_info *ei,
				  struct netlink_ext_ack *extack)
725
{
726 727 728 729 730 731 732 733 734
	struct net_device *dev = q->dev_queue->dev;
	int err;

	if (!dev->netdev_ops->ndo_setup_tc)
		goto no_offload_dev_inc;

	/* If tc offload feature is disabled and the block we try to bind
	 * to already has some offloaded filters, forbid to bind.
	 */
735 736
	if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
		NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
737
		return -EOPNOTSUPP;
738
	}
739

740
	err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND, extack);
741 742
	if (err == -EOPNOTSUPP)
		goto no_offload_dev_inc;
743 744 745 746 747
	if (err)
		return err;

	tc_indr_block_call(block, dev, ei, TC_BLOCK_BIND, extack);
	return 0;
748 749 750 751 752

no_offload_dev_inc:
	if (tcf_block_offload_in_use(block))
		return -EOPNOTSUPP;
	block->nooffloaddevcnt++;
753
	tc_indr_block_call(block, dev, ei, TC_BLOCK_BIND, extack);
754
	return 0;
755 756 757 758 759
}

static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
				     struct tcf_block_ext_info *ei)
{
760 761 762
	struct net_device *dev = q->dev_queue->dev;
	int err;

763 764
	tc_indr_block_call(block, dev, ei, TC_BLOCK_UNBIND, NULL);

765 766
	if (!dev->netdev_ops->ndo_setup_tc)
		goto no_offload_dev_dec;
767
	err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND, NULL);
768 769 770 771 772 773
	if (err == -EOPNOTSUPP)
		goto no_offload_dev_dec;
	return;

no_offload_dev_dec:
	WARN_ON(block->nooffloaddevcnt-- == 0);
774 775
}

776
static int
777 778 779
tcf_chain0_head_change_cb_add(struct tcf_block *block,
			      struct tcf_block_ext_info *ei,
			      struct netlink_ext_ack *extack)
780 781
{
	struct tcf_filter_chain_list_item *item;
782
	struct tcf_chain *chain0;
783 784 785 786 787 788 789 790

	item = kmalloc(sizeof(*item), GFP_KERNEL);
	if (!item) {
		NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed");
		return -ENOMEM;
	}
	item->chain_head_change = ei->chain_head_change;
	item->chain_head_change_priv = ei->chain_head_change_priv;
791 792 793

	mutex_lock(&block->lock);
	chain0 = block->chain0.chain;
794 795 796 797
	if (chain0)
		tcf_chain_hold(chain0);
	else
		list_add(&item->list, &block->chain0.filter_chain_list);
798 799
	mutex_unlock(&block->lock);

800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816
	if (chain0) {
		struct tcf_proto *tp_head;

		mutex_lock(&chain0->filter_chain_lock);

		tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
		if (tp_head)
			tcf_chain_head_change_item(item, tp_head);

		mutex_lock(&block->lock);
		list_add(&item->list, &block->chain0.filter_chain_list);
		mutex_unlock(&block->lock);

		mutex_unlock(&chain0->filter_chain_lock);
		tcf_chain_put(chain0);
	}

817 818 819 820
	return 0;
}

static void
821 822
tcf_chain0_head_change_cb_del(struct tcf_block *block,
			      struct tcf_block_ext_info *ei)
823 824 825
{
	struct tcf_filter_chain_list_item *item;

826
	mutex_lock(&block->lock);
827
	list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
828 829 830
		if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
		    (item->chain_head_change == ei->chain_head_change &&
		     item->chain_head_change_priv == ei->chain_head_change_priv)) {
831
			if (block->chain0.chain)
832
				tcf_chain_head_change_item(item, NULL);
833
			list_del(&item->list);
834 835
			mutex_unlock(&block->lock);

836 837 838 839
			kfree(item);
			return;
		}
	}
840
	mutex_unlock(&block->lock);
841 842 843
	WARN_ON(1);
}

844
struct tcf_net {
845
	spinlock_t idr_lock; /* Protects idr */
846 847 848 849 850 851
	struct idr idr;
};

static unsigned int tcf_net_id;

static int tcf_block_insert(struct tcf_block *block, struct net *net,
852
			    struct netlink_ext_ack *extack)
853
{
854
	struct tcf_net *tn = net_generic(net, tcf_net_id);
855 856 857 858 859 860 861 862
	int err;

	idr_preload(GFP_KERNEL);
	spin_lock(&tn->idr_lock);
	err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
			    GFP_NOWAIT);
	spin_unlock(&tn->idr_lock);
	idr_preload_end();
863

864
	return err;
865 866
}

867 868 869 870
static void tcf_block_remove(struct tcf_block *block, struct net *net)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

871
	spin_lock(&tn->idr_lock);
872
	idr_remove(&tn->idr, block->index);
873
	spin_unlock(&tn->idr_lock);
874 875 876
}

static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
877
					  u32 block_index,
878
					  struct netlink_ext_ack *extack)
879
{
880
	struct tcf_block *block;
881

882
	block = kzalloc(sizeof(*block), GFP_KERNEL);
883 884
	if (!block) {
		NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
885
		return ERR_PTR(-ENOMEM);
886
	}
887
	mutex_init(&block->lock);
888
	INIT_LIST_HEAD(&block->chain_list);
889
	INIT_LIST_HEAD(&block->cb_list);
890
	INIT_LIST_HEAD(&block->owner_list);
891
	INIT_LIST_HEAD(&block->chain0.filter_chain_list);
892

893
	refcount_set(&block->refcnt, 1);
894
	block->net = net;
895 896 897 898 899
	block->index = block_index;

	/* Don't store q pointer for blocks which are shared */
	if (!tcf_block_shared(block))
		block->q = q;
900 901 902 903 904 905 906
	return block;
}

static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

907
	return idr_find(&tn->idr, block_index);
908 909
}

910 911 912 913 914 915 916 917 918 919 920 921 922
static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
{
	struct tcf_block *block;

	rcu_read_lock();
	block = tcf_block_lookup(net, block_index);
	if (block && !refcount_inc_not_zero(&block->refcnt))
		block = NULL;
	rcu_read_unlock();

	return block;
}

923 924
static struct tcf_chain *
__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
925
{
926 927 928 929 930 931 932
	mutex_lock(&block->lock);
	if (chain)
		chain = list_is_last(&chain->list, &block->chain_list) ?
			NULL : list_next_entry(chain, list);
	else
		chain = list_first_entry_or_null(&block->chain_list,
						 struct tcf_chain, list);
933

934 935 936 937 938 939
	/* skip all action-only chains */
	while (chain && tcf_chain_held_by_acts_only(chain))
		chain = list_is_last(&chain->list, &block->chain_list) ?
			NULL : list_next_entry(chain, list);

	if (chain)
940
		tcf_chain_hold(chain);
941
	mutex_unlock(&block->lock);
942

943
	return chain;
944 945
}

946 947 948 949 950 951 952 953 954 955 956
/* Function to be used by all clients that want to iterate over all chains on
 * block. It properly obtains block->lock and takes reference to chain before
 * returning it. Users of this function must be tolerant to concurrent chain
 * insertion/deletion or ensure that no concurrent chain modification is
 * possible. Note that all netlink dump callbacks cannot guarantee to provide
 * consistent dump because rtnl lock is released each time skb is filled with
 * data and sent to user-space.
 */

struct tcf_chain *
tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
957
{
958
	struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
959

960
	if (chain)
961
		tcf_chain_put(chain);
962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978

	return chain_next;
}
EXPORT_SYMBOL(tcf_get_next_chain);

static void tcf_block_flush_all_chains(struct tcf_block *block)
{
	struct tcf_chain *chain;

	/* Last reference to block. At this point chains cannot be added or
	 * removed concurrently.
	 */
	for (chain = tcf_get_next_chain(block, NULL);
	     chain;
	     chain = tcf_get_next_chain(block, chain)) {
		tcf_chain_put_explicitly_created(chain);
		tcf_chain_flush(chain);
979 980 981
	}
}

982 983 984
static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
			    struct tcf_block_ext_info *ei)
{
985
	if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
986 987 988 989 990 991 992 993
		/* Flushing/putting all chains will cause the block to be
		 * deallocated when last chain is freed. However, if chain_list
		 * is empty, block has to be manually deallocated. After block
		 * reference counter reached 0, it is no longer possible to
		 * increment it or add new chains to block.
		 */
		bool free_block = list_empty(&block->chain_list);

994
		mutex_unlock(&block->lock);
995 996 997 998 999 1000 1001
		if (tcf_block_shared(block))
			tcf_block_remove(block, block->net);

		if (q)
			tcf_block_offload_unbind(block, q, ei);

		if (free_block)
1002
			tcf_block_destroy(block);
1003
		else
1004
			tcf_block_flush_all_chains(block);
1005 1006 1007 1008 1009 1010 1011 1012 1013 1014
	} else if (q) {
		tcf_block_offload_unbind(block, q, ei);
	}
}

static void tcf_block_refcnt_put(struct tcf_block *block)
{
	__tcf_block_put(block, NULL, NULL);
}

1015 1016 1017 1018 1019 1020 1021 1022 1023 1024
/* Find tcf block.
 * Set q, parent, cl when appropriate.
 */

static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
					u32 *parent, unsigned long *cl,
					int ifindex, u32 block_index,
					struct netlink_ext_ack *extack)
{
	struct tcf_block *block;
1025
	int err = 0;
1026 1027

	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
1028
		block = tcf_block_refcnt_get(net, block_index);
1029 1030 1031 1032 1033 1034 1035 1036
		if (!block) {
			NL_SET_ERR_MSG(extack, "Block of given index was not found");
			return ERR_PTR(-EINVAL);
		}
	} else {
		const struct Qdisc_class_ops *cops;
		struct net_device *dev;

1037 1038
		rcu_read_lock();

1039
		/* Find link */
1040 1041 1042
		dev = dev_get_by_index_rcu(net, ifindex);
		if (!dev) {
			rcu_read_unlock();
1043
			return ERR_PTR(-ENODEV);
1044
		}
1045 1046 1047 1048 1049 1050

		/* Find qdisc */
		if (!*parent) {
			*q = dev->qdisc;
			*parent = (*q)->handle;
		} else {
1051
			*q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
1052 1053
			if (!*q) {
				NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
1054 1055
				err = -EINVAL;
				goto errout_rcu;
1056 1057 1058
			}
		}

1059 1060 1061 1062 1063 1064 1065
		*q = qdisc_refcount_inc_nz(*q);
		if (!*q) {
			NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
			err = -EINVAL;
			goto errout_rcu;
		}

1066 1067 1068 1069
		/* Is it classful? */
		cops = (*q)->ops->cl_ops;
		if (!cops) {
			NL_SET_ERR_MSG(extack, "Qdisc not classful");
1070 1071
			err = -EINVAL;
			goto errout_rcu;
1072 1073 1074 1075
		}

		if (!cops->tcf_block) {
			NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
1076 1077
			err = -EOPNOTSUPP;
			goto errout_rcu;
1078 1079
		}

1080 1081 1082 1083 1084 1085 1086
		/* At this point we know that qdisc is not noop_qdisc,
		 * which means that qdisc holds a reference to net_device
		 * and we hold a reference to qdisc, so it is safe to release
		 * rcu read lock.
		 */
		rcu_read_unlock();

1087 1088 1089 1090 1091
		/* Do we search for filter, attached to class? */
		if (TC_H_MIN(*parent)) {
			*cl = cops->find(*q, *parent);
			if (*cl == 0) {
				NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
1092 1093
				err = -ENOENT;
				goto errout_qdisc;
1094 1095 1096 1097 1098
			}
		}

		/* And the last stroke */
		block = cops->tcf_block(*q, *cl, extack);
1099 1100 1101 1102
		if (!block) {
			err = -EINVAL;
			goto errout_qdisc;
		}
1103 1104
		if (tcf_block_shared(block)) {
			NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
1105 1106
			err = -EOPNOTSUPP;
			goto errout_qdisc;
1107
		}
1108 1109 1110 1111 1112 1113 1114 1115

		/* Always take reference to block in order to support execution
		 * of rules update path of cls API without rtnl lock. Caller
		 * must release block when it is finished using it. 'if' block
		 * of this conditional obtain reference to block by calling
		 * tcf_block_refcnt_get().
		 */
		refcount_inc(&block->refcnt);
1116 1117 1118
	}

	return block;
1119 1120 1121 1122

errout_rcu:
	rcu_read_unlock();
errout_qdisc:
1123
	if (*q) {
1124
		qdisc_put(*q);
1125 1126
		*q = NULL;
	}
1127 1128 1129 1130 1131
	return ERR_PTR(err);
}

static void tcf_block_release(struct Qdisc *q, struct tcf_block *block)
{
1132 1133 1134
	if (!IS_ERR_OR_NULL(block))
		tcf_block_refcnt_put(block);

1135 1136
	if (q)
		qdisc_put(q);
1137 1138
}

1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197
struct tcf_block_owner_item {
	struct list_head list;
	struct Qdisc *q;
	enum tcf_block_binder_type binder_type;
};

static void
tcf_block_owner_netif_keep_dst(struct tcf_block *block,
			       struct Qdisc *q,
			       enum tcf_block_binder_type binder_type)
{
	if (block->keep_dst &&
	    binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
	    binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
		netif_keep_dst(qdisc_dev(q));
}

void tcf_block_netif_keep_dst(struct tcf_block *block)
{
	struct tcf_block_owner_item *item;

	block->keep_dst = true;
	list_for_each_entry(item, &block->owner_list, list)
		tcf_block_owner_netif_keep_dst(block, item->q,
					       item->binder_type);
}
EXPORT_SYMBOL(tcf_block_netif_keep_dst);

static int tcf_block_owner_add(struct tcf_block *block,
			       struct Qdisc *q,
			       enum tcf_block_binder_type binder_type)
{
	struct tcf_block_owner_item *item;

	item = kmalloc(sizeof(*item), GFP_KERNEL);
	if (!item)
		return -ENOMEM;
	item->q = q;
	item->binder_type = binder_type;
	list_add(&item->list, &block->owner_list);
	return 0;
}

static void tcf_block_owner_del(struct tcf_block *block,
				struct Qdisc *q,
				enum tcf_block_binder_type binder_type)
{
	struct tcf_block_owner_item *item;

	list_for_each_entry(item, &block->owner_list, list) {
		if (item->q == q && item->binder_type == binder_type) {
			list_del(&item->list);
			kfree(item);
			return;
		}
	}
	WARN_ON(1);
}

1198 1199 1200 1201 1202 1203 1204 1205
int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
		      struct tcf_block_ext_info *ei,
		      struct netlink_ext_ack *extack)
{
	struct net *net = qdisc_net(q);
	struct tcf_block *block = NULL;
	int err;

1206
	if (ei->block_index)
1207
		/* block_index not 0 means the shared block is requested */
1208
		block = tcf_block_refcnt_get(net, ei->block_index);
1209 1210

	if (!block) {
1211
		block = tcf_block_create(net, q, ei->block_index, extack);
1212 1213
		if (IS_ERR(block))
			return PTR_ERR(block);
1214 1215
		if (tcf_block_shared(block)) {
			err = tcf_block_insert(block, net, extack);
1216 1217 1218 1219 1220
			if (err)
				goto err_block_insert;
		}
	}

1221 1222 1223 1224 1225 1226
	err = tcf_block_owner_add(block, q, ei->binder_type);
	if (err)
		goto err_block_owner_add;

	tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);

1227
	err = tcf_chain0_head_change_cb_add(block, ei, extack);
1228
	if (err)
1229
		goto err_chain0_head_change_cb_add;
1230

1231
	err = tcf_block_offload_bind(block, q, ei, extack);
1232 1233 1234
	if (err)
		goto err_block_offload_bind;

1235 1236
	*p_block = block;
	return 0;
1237

1238
err_block_offload_bind:
1239 1240
	tcf_chain0_head_change_cb_del(block, ei);
err_chain0_head_change_cb_add:
1241 1242
	tcf_block_owner_del(block, q, ei->binder_type);
err_block_owner_add:
1243
err_block_insert:
1244
	tcf_block_refcnt_put(block);
1245
	return err;
1246
}
1247 1248
EXPORT_SYMBOL(tcf_block_get_ext);

1249 1250 1251 1252 1253 1254 1255
static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
{
	struct tcf_proto __rcu **p_filter_chain = priv;

	rcu_assign_pointer(*p_filter_chain, tp_head);
}

1256
int tcf_block_get(struct tcf_block **p_block,
1257 1258
		  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
		  struct netlink_ext_ack *extack)
1259
{
1260 1261 1262 1263
	struct tcf_block_ext_info ei = {
		.chain_head_change = tcf_chain_head_change_dflt,
		.chain_head_change_priv = p_filter_chain,
	};
1264

1265
	WARN_ON(!p_filter_chain);
1266
	return tcf_block_get_ext(p_block, q, &ei, extack);
1267
}
1268 1269
EXPORT_SYMBOL(tcf_block_get);

1270
/* XXX: Standalone actions are not allowed to jump to any chain, and bound
1271
 * actions should be all removed after flushing.
1272
 */
1273
void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
1274
		       struct tcf_block_ext_info *ei)
1275
{
1276 1277
	if (!block)
		return;
1278
	tcf_chain0_head_change_cb_del(block, ei);
1279
	tcf_block_owner_del(block, q, ei->binder_type);
1280

1281
	__tcf_block_put(block, q, ei);
1282
}
1283 1284 1285 1286 1287 1288
EXPORT_SYMBOL(tcf_block_put_ext);

void tcf_block_put(struct tcf_block *block)
{
	struct tcf_block_ext_info ei = {0, };

1289 1290
	if (!block)
		return;
1291
	tcf_block_put_ext(block, block->q, &ei);
1292
}
1293

1294
EXPORT_SYMBOL(tcf_block_put);
1295

1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332
struct tcf_block_cb {
	struct list_head list;
	tc_setup_cb_t *cb;
	void *cb_ident;
	void *cb_priv;
	unsigned int refcnt;
};

void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
{
	return block_cb->cb_priv;
}
EXPORT_SYMBOL(tcf_block_cb_priv);

struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
					 tc_setup_cb_t *cb, void *cb_ident)
{	struct tcf_block_cb *block_cb;

	list_for_each_entry(block_cb, &block->cb_list, list)
		if (block_cb->cb == cb && block_cb->cb_ident == cb_ident)
			return block_cb;
	return NULL;
}
EXPORT_SYMBOL(tcf_block_cb_lookup);

void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
{
	block_cb->refcnt++;
}
EXPORT_SYMBOL(tcf_block_cb_incref);

unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
{
	return --block_cb->refcnt;
}
EXPORT_SYMBOL(tcf_block_cb_decref);

1333 1334 1335 1336 1337
static int
tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
			    void *cb_priv, bool add, bool offload_in_use,
			    struct netlink_ext_ack *extack)
{
1338
	struct tcf_chain *chain, *chain_prev;
1339 1340 1341
	struct tcf_proto *tp;
	int err;

1342 1343 1344 1345 1346
	for (chain = __tcf_get_next_chain(block, NULL);
	     chain;
	     chain_prev = chain,
		     chain = __tcf_get_next_chain(block, chain),
		     tcf_chain_put(chain_prev)) {
1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364
		for (tp = rtnl_dereference(chain->filter_chain); tp;
		     tp = rtnl_dereference(tp->next)) {
			if (tp->ops->reoffload) {
				err = tp->ops->reoffload(tp, add, cb, cb_priv,
							 extack);
				if (err && add)
					goto err_playback_remove;
			} else if (add && offload_in_use) {
				err = -EOPNOTSUPP;
				NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support");
				goto err_playback_remove;
			}
		}
	}

	return 0;

err_playback_remove:
1365
	tcf_chain_put(chain);
1366 1367 1368 1369 1370
	tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
				    extack);
	return err;
}

1371 1372
struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
					     tc_setup_cb_t *cb, void *cb_ident,
1373 1374
					     void *cb_priv,
					     struct netlink_ext_ack *extack)
1375 1376
{
	struct tcf_block_cb *block_cb;
1377
	int err;
1378

1379 1380 1381 1382 1383 1384
	/* Replay any already present rules */
	err = tcf_block_playback_offloads(block, cb, cb_priv, true,
					  tcf_block_offload_in_use(block),
					  extack);
	if (err)
		return ERR_PTR(err);
1385

1386 1387
	block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
	if (!block_cb)
1388
		return ERR_PTR(-ENOMEM);
1389 1390 1391 1392 1393 1394 1395 1396 1397 1398
	block_cb->cb = cb;
	block_cb->cb_ident = cb_ident;
	block_cb->cb_priv = cb_priv;
	list_add(&block_cb->list, &block->cb_list);
	return block_cb;
}
EXPORT_SYMBOL(__tcf_block_cb_register);

int tcf_block_cb_register(struct tcf_block *block,
			  tc_setup_cb_t *cb, void *cb_ident,
1399
			  void *cb_priv, struct netlink_ext_ack *extack)
1400 1401 1402
{
	struct tcf_block_cb *block_cb;

1403 1404
	block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv,
					   extack);
1405
	return PTR_ERR_OR_ZERO(block_cb);
1406 1407 1408
}
EXPORT_SYMBOL(tcf_block_cb_register);

1409 1410
void __tcf_block_cb_unregister(struct tcf_block *block,
			       struct tcf_block_cb *block_cb)
1411
{
1412 1413 1414
	tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv,
				    false, tcf_block_offload_in_use(block),
				    NULL);
1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427
	list_del(&block_cb->list);
	kfree(block_cb);
}
EXPORT_SYMBOL(__tcf_block_cb_unregister);

void tcf_block_cb_unregister(struct tcf_block *block,
			     tc_setup_cb_t *cb, void *cb_ident)
{
	struct tcf_block_cb *block_cb;

	block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
	if (!block_cb)
		return;
1428
	__tcf_block_cb_unregister(block, block_cb);
1429 1430 1431
}
EXPORT_SYMBOL(tcf_block_cb_unregister);

1432 1433 1434 1435 1436 1437 1438 1439 1440
/* Main classifier routine: scans classifier chain attached
 * to this qdisc, (optionally) tests for protocol and asks
 * specific classifiers.
 */
int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
		 struct tcf_result *res, bool compat_mode)
{
#ifdef CONFIG_NET_CLS_ACT
	const int max_reclassify_loop = 4;
1441 1442
	const struct tcf_proto *orig_tp = tp;
	const struct tcf_proto *first_tp;
1443 1444 1445 1446 1447
	int limit = 0;

reclassify:
#endif
	for (; tp; tp = rcu_dereference_bh(tp->next)) {
1448
		__be16 protocol = tc_skb_protocol(skb);
1449 1450 1451 1452 1453 1454 1455 1456
		int err;

		if (tp->protocol != protocol &&
		    tp->protocol != htons(ETH_P_ALL))
			continue;

		err = tp->classify(skb, tp, res);
#ifdef CONFIG_NET_CLS_ACT
1457
		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
1458
			first_tp = orig_tp;
1459
			goto reset;
1460
		} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
1461
			first_tp = res->goto_tp;
1462 1463
			goto reset;
		}
1464 1465 1466 1467 1468 1469 1470 1471 1472
#endif
		if (err >= 0)
			return err;
	}

	return TC_ACT_UNSPEC; /* signal: continue lookup */
#ifdef CONFIG_NET_CLS_ACT
reset:
	if (unlikely(limit++ >= max_reclassify_loop)) {
1473 1474 1475
		net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
				       tp->chain->block->index,
				       tp->prio & 0xffff,
1476 1477 1478 1479
				       ntohs(tp->protocol));
		return TC_ACT_SHOT;
	}

1480
	tp = first_tp;
1481 1482 1483 1484 1485
	goto reclassify;
#endif
}
EXPORT_SYMBOL(tcf_classify);

1486 1487 1488 1489 1490
struct tcf_chain_info {
	struct tcf_proto __rcu **pprev;
	struct tcf_proto __rcu *next;
};

1491 1492
static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
					   struct tcf_chain_info *chain_info)
1493
{
1494
	return tcf_chain_dereference(*chain_info->pprev, chain);
1495 1496 1497 1498 1499 1500
}

static void tcf_chain_tp_insert(struct tcf_chain *chain,
				struct tcf_chain_info *chain_info,
				struct tcf_proto *tp)
{
1501
	if (*chain_info->pprev == chain->filter_chain)
1502
		tcf_chain0_head_change(chain, tp);
1503
	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
1504
	rcu_assign_pointer(*chain_info->pprev, tp);
1505
	tcf_chain_hold(chain);
1506 1507 1508 1509 1510 1511
}

static void tcf_chain_tp_remove(struct tcf_chain *chain,
				struct tcf_chain_info *chain_info,
				struct tcf_proto *tp)
{
1512
	struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
1513

1514
	if (tp == chain->filter_chain)
1515
		tcf_chain0_head_change(chain, next);
1516
	RCU_INIT_POINTER(*chain_info->pprev, next);
1517
	tcf_chain_put(chain);
1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529
}

static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
					   struct tcf_chain_info *chain_info,
					   u32 protocol, u32 prio,
					   bool prio_allocate)
{
	struct tcf_proto **pprev;
	struct tcf_proto *tp;

	/* Check the chain for existence of proto-tcf with this priority */
	for (pprev = &chain->filter_chain;
1530 1531
	     (tp = tcf_chain_dereference(*pprev, chain));
	     pprev = &tp->next) {
1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547
		if (tp->prio >= prio) {
			if (tp->prio == prio) {
				if (prio_allocate ||
				    (tp->protocol != protocol && protocol))
					return ERR_PTR(-EINVAL);
			} else {
				tp = NULL;
			}
			break;
		}
	}
	chain_info->pprev = pprev;
	chain_info->next = tp ? tp->next : NULL;
	return tp;
}

1548
static int tcf_fill_node(struct net *net, struct sk_buff *skb,
1549 1550 1551
			 struct tcf_proto *tp, struct tcf_block *block,
			 struct Qdisc *q, u32 parent, void *fh,
			 u32 portid, u32 seq, u16 flags, int event)
1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563
{
	struct tcmsg *tcm;
	struct nlmsghdr  *nlh;
	unsigned char *b = skb_tail_pointer(skb);

	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
	if (!nlh)
		goto out_nlmsg_trim;
	tcm = nlmsg_data(nlh);
	tcm->tcm_family = AF_UNSPEC;
	tcm->tcm__pad1 = 0;
	tcm->tcm__pad2 = 0;
1564 1565 1566 1567 1568 1569 1570
	if (q) {
		tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
		tcm->tcm_parent = parent;
	} else {
		tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
		tcm->tcm_block_index = block->index;
	}
1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592
	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
	if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
		goto nla_put_failure;
	if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
		goto nla_put_failure;
	if (!fh) {
		tcm->tcm_handle = 0;
	} else {
		if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
			goto nla_put_failure;
	}
	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
	return skb->len;

out_nlmsg_trim:
nla_put_failure:
	nlmsg_trim(skb, b);
	return -1;
}

static int tfilter_notify(struct net *net, struct sk_buff *oskb,
			  struct nlmsghdr *n, struct tcf_proto *tp,
1593 1594
			  struct tcf_block *block, struct Qdisc *q,
			  u32 parent, void *fh, int event, bool unicast)
1595 1596 1597 1598 1599 1600 1601 1602
{
	struct sk_buff *skb;
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

1603 1604
	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
			  n->nlmsg_seq, n->nlmsg_flags, event) <= 0) {
1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617
		kfree_skb(skb);
		return -EINVAL;
	}

	if (unicast)
		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);

	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
			      n->nlmsg_flags & NLM_F_ECHO);
}

static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
			      struct nlmsghdr *n, struct tcf_proto *tp,
1618
			      struct tcf_block *block, struct Qdisc *q,
1619 1620
			      u32 parent, void *fh, bool unicast, bool *last,
			      struct netlink_ext_ack *extack)
1621 1622 1623 1624 1625 1626 1627 1628 1629
{
	struct sk_buff *skb;
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
	int err;

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

1630 1631
	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
			  n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
1632
		NL_SET_ERR_MSG(extack, "Failed to build del event notification");
1633 1634 1635 1636
		kfree_skb(skb);
		return -EINVAL;
	}

1637
	err = tp->ops->delete(tp, fh, last, extack);
1638 1639 1640 1641 1642 1643 1644 1645
	if (err) {
		kfree_skb(skb);
		return err;
	}

	if (unicast)
		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);

1646 1647 1648 1649 1650
	err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
			     n->nlmsg_flags & NLM_F_ECHO);
	if (err < 0)
		NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
	return err;
1651 1652 1653
}

static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
1654 1655
				 struct tcf_block *block, struct Qdisc *q,
				 u32 parent, struct nlmsghdr *n,
1656 1657 1658 1659 1660 1661
				 struct tcf_chain *chain, int event)
{
	struct tcf_proto *tp;

	for (tp = rtnl_dereference(chain->filter_chain);
	     tp; tp = rtnl_dereference(tp->next))
1662
		tfilter_notify(net, oskb, n, tp, block,
1663
			       q, parent, NULL, event, false);
1664 1665
}

1666
static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1667
			  struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
1668
{
1669
	struct net *net = sock_net(skb->sk);
1670
	struct nlattr *tca[TCA_MAX + 1];
L
Linus Torvalds 已提交
1671 1672 1673
	struct tcmsg *t;
	u32 protocol;
	u32 prio;
1674
	bool prio_allocate;
L
Linus Torvalds 已提交
1675
	u32 parent;
1676
	u32 chain_index;
1677
	struct Qdisc *q = NULL;
1678
	struct tcf_chain_info chain_info;
1679
	struct tcf_chain *chain = NULL;
1680
	struct tcf_block *block;
L
Linus Torvalds 已提交
1681 1682
	struct tcf_proto *tp;
	unsigned long cl;
1683
	void *fh;
L
Linus Torvalds 已提交
1684
	int err;
1685
	int tp_created;
L
Linus Torvalds 已提交
1686

1687
	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1688
		return -EPERM;
1689

L
Linus Torvalds 已提交
1690
replay:
1691 1692
	tp_created = 0;

1693
	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
1694 1695 1696
	if (err < 0)
		return err;

1697
	t = nlmsg_data(n);
L
Linus Torvalds 已提交
1698 1699
	protocol = TC_H_MIN(t->tcm_info);
	prio = TC_H_MAJ(t->tcm_info);
1700
	prio_allocate = false;
L
Linus Torvalds 已提交
1701 1702 1703 1704
	parent = t->tcm_parent;
	cl = 0;

	if (prio == 0) {
1705 1706 1707 1708 1709 1710 1711
		/* If no priority is provided by the user,
		 * we allocate one.
		 */
		if (n->nlmsg_flags & NLM_F_CREATE) {
			prio = TC_H_MAKE(0x80000000U, 0U);
			prio_allocate = true;
		} else {
1712
			NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
L
Linus Torvalds 已提交
1713
			return -ENOENT;
1714
		}
L
Linus Torvalds 已提交
1715 1716 1717 1718
	}

	/* Find head of filter chain. */

1719 1720 1721 1722 1723
	block = tcf_block_find(net, &q, &parent, &cl,
			       t->tcm_ifindex, t->tcm_block_index, extack);
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
		goto errout;
1724
	}
1725 1726 1727

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
1728
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
1729 1730 1731
		err = -EINVAL;
		goto errout;
	}
1732
	chain = tcf_chain_get(block, chain_index, true);
1733
	if (!chain) {
1734
		NL_SET_ERR_MSG(extack, "Cannot create specified filter chain");
1735
		err = -ENOMEM;
1736 1737
		goto errout;
	}
L
Linus Torvalds 已提交
1738

1739
	mutex_lock(&chain->filter_chain_lock);
1740 1741 1742
	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
			       prio, prio_allocate);
	if (IS_ERR(tp)) {
1743
		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
1744
		err = PTR_ERR(tp);
1745
		goto errout_locked;
L
Linus Torvalds 已提交
1746 1747 1748 1749 1750
	}

	if (tp == NULL) {
		/* Proto-tcf does not exist, create new one */

1751
		if (tca[TCA_KIND] == NULL || !protocol) {
1752
			NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
1753
			err = -EINVAL;
1754
			goto errout_locked;
1755
		}
L
Linus Torvalds 已提交
1756

1757
		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1758
			NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
1759
			err = -ENOENT;
1760
			goto errout_locked;
1761
		}
L
Linus Torvalds 已提交
1762

1763
		if (prio_allocate)
1764 1765
			prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
							       &chain_info));
L
Linus Torvalds 已提交
1766

1767
		mutex_unlock(&chain->filter_chain_lock);
1768
		tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
1769
				      protocol, prio, chain, extack);
1770 1771
		if (IS_ERR(tp)) {
			err = PTR_ERR(tp);
L
Linus Torvalds 已提交
1772 1773
			goto errout;
		}
1774 1775 1776 1777

		mutex_lock(&chain->filter_chain_lock);
		tcf_chain_tp_insert(chain, &chain_info, tp);
		mutex_unlock(&chain->filter_chain_lock);
1778
		tp_created = 1;
1779
	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
1780
		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
1781
		err = -EINVAL;
1782 1783 1784
		goto errout_locked;
	} else {
		mutex_unlock(&chain->filter_chain_lock);
1785
	}
L
Linus Torvalds 已提交
1786 1787 1788

	fh = tp->ops->get(tp, t->tcm_handle);

1789
	if (!fh) {
1790
		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1791
			NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
1792
			err = -ENOENT;
L
Linus Torvalds 已提交
1793
			goto errout;
1794
		}
1795 1796 1797 1798
	} else if (n->nlmsg_flags & NLM_F_EXCL) {
		NL_SET_ERR_MSG(extack, "Filter already exists");
		err = -EEXIST;
		goto errout;
L
Linus Torvalds 已提交
1799 1800
	}

1801 1802 1803 1804 1805 1806
	if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
		NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
		err = -EINVAL;
		goto errout;
	}

1807
	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
1808 1809
			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
			      extack);
1810
	if (err == 0)
1811
		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
1812
			       RTM_NEWTFILTER, false);
1813 1814
	else if (tp_created)
		tcf_proto_destroy(tp, NULL);
L
Linus Torvalds 已提交
1815 1816

errout:
1817 1818
	if (chain)
		tcf_chain_put(chain);
1819
	tcf_block_release(q, block);
L
Linus Torvalds 已提交
1820 1821 1822 1823
	if (err == -EAGAIN)
		/* Replay the request. */
		goto replay;
	return err;
1824 1825 1826 1827

errout_locked:
	mutex_unlock(&chain->filter_chain_lock);
	goto errout;
L
Linus Torvalds 已提交
1828 1829
}

1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851
static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
			  struct netlink_ext_ack *extack)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
	struct tcmsg *t;
	u32 protocol;
	u32 prio;
	u32 parent;
	u32 chain_index;
	struct Qdisc *q = NULL;
	struct tcf_chain_info chain_info;
	struct tcf_chain *chain = NULL;
	struct tcf_block *block;
	struct tcf_proto *tp = NULL;
	unsigned long cl = 0;
	void *fh = NULL;
	int err;

	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
		return -EPERM;

1852
	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882
	if (err < 0)
		return err;

	t = nlmsg_data(n);
	protocol = TC_H_MIN(t->tcm_info);
	prio = TC_H_MAJ(t->tcm_info);
	parent = t->tcm_parent;

	if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) {
		NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
		return -ENOENT;
	}

	/* Find head of filter chain. */

	block = tcf_block_find(net, &q, &parent, &cl,
			       t->tcm_ifindex, t->tcm_block_index, extack);
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
		goto errout;
	}

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
		err = -EINVAL;
		goto errout;
	}
	chain = tcf_chain_get(block, chain_index, false);
	if (!chain) {
1883 1884 1885 1886 1887 1888 1889
		/* User requested flush on non-existent chain. Nothing to do,
		 * so just return success.
		 */
		if (prio == 0) {
			err = 0;
			goto errout;
		}
1890
		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
1891
		err = -ENOENT;
1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902
		goto errout;
	}

	if (prio == 0) {
		tfilter_notify_chain(net, skb, block, q, parent, n,
				     chain, RTM_DELTFILTER);
		tcf_chain_flush(chain);
		err = 0;
		goto errout;
	}

1903
	mutex_lock(&chain->filter_chain_lock);
1904 1905 1906 1907
	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
			       prio, false);
	if (!tp || IS_ERR(tp)) {
		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
1908
		err = tp ? PTR_ERR(tp) : -ENOENT;
1909
		goto errout_locked;
1910 1911 1912
	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
		err = -EINVAL;
1913 1914 1915 1916 1917 1918 1919 1920 1921
		goto errout_locked;
	} else if (t->tcm_handle == 0) {
		tcf_chain_tp_remove(chain, &chain_info, tp);
		mutex_unlock(&chain->filter_chain_lock);

		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
			       RTM_DELTFILTER, false);
		tcf_proto_destroy(tp, extack);
		err = 0;
1922 1923
		goto errout;
	}
1924
	mutex_unlock(&chain->filter_chain_lock);
1925 1926 1927 1928

	fh = tp->ops->get(tp, t->tcm_handle);

	if (!fh) {
1929 1930
		NL_SET_ERR_MSG(extack, "Specified filter handle not found");
		err = -ENOENT;
1931 1932 1933 1934 1935 1936 1937 1938 1939
	} else {
		bool last;

		err = tfilter_del_notify(net, skb, n, tp, block,
					 q, parent, fh, false, &last,
					 extack);
		if (err)
			goto errout;
		if (last) {
1940
			mutex_lock(&chain->filter_chain_lock);
1941
			tcf_chain_tp_remove(chain, &chain_info, tp);
1942 1943
			mutex_unlock(&chain->filter_chain_lock);

1944 1945 1946 1947 1948 1949 1950
			tcf_proto_destroy(tp, extack);
		}
	}

errout:
	if (chain)
		tcf_chain_put(chain);
1951
	tcf_block_release(q, block);
1952
	return err;
1953 1954 1955 1956

errout_locked:
	mutex_unlock(&chain->filter_chain_lock);
	goto errout;
1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977
}

static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
			  struct netlink_ext_ack *extack)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
	struct tcmsg *t;
	u32 protocol;
	u32 prio;
	u32 parent;
	u32 chain_index;
	struct Qdisc *q = NULL;
	struct tcf_chain_info chain_info;
	struct tcf_chain *chain = NULL;
	struct tcf_block *block;
	struct tcf_proto *tp = NULL;
	unsigned long cl = 0;
	void *fh = NULL;
	int err;

1978
	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
	if (err < 0)
		return err;

	t = nlmsg_data(n);
	protocol = TC_H_MIN(t->tcm_info);
	prio = TC_H_MAJ(t->tcm_info);
	parent = t->tcm_parent;

	if (prio == 0) {
		NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
		return -ENOENT;
	}

	/* Find head of filter chain. */

	block = tcf_block_find(net, &q, &parent, &cl,
			       t->tcm_ifindex, t->tcm_block_index, extack);
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
		goto errout;
	}

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
		err = -EINVAL;
		goto errout;
	}
	chain = tcf_chain_get(block, chain_index, false);
	if (!chain) {
		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
		err = -EINVAL;
		goto errout;
	}

2014
	mutex_lock(&chain->filter_chain_lock);
2015 2016
	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
			       prio, false);
2017
	mutex_unlock(&chain->filter_chain_lock);
2018 2019
	if (!tp || IS_ERR(tp)) {
		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2020
		err = tp ? PTR_ERR(tp) : -ENOENT;
2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042
		goto errout;
	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
		err = -EINVAL;
		goto errout;
	}

	fh = tp->ops->get(tp, t->tcm_handle);

	if (!fh) {
		NL_SET_ERR_MSG(extack, "Specified filter handle not found");
		err = -ENOENT;
	} else {
		err = tfilter_notify(net, skb, n, tp, block, q, parent,
				     fh, RTM_NEWTFILTER, true);
		if (err < 0)
			NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
	}

errout:
	if (chain)
		tcf_chain_put(chain);
2043
	tcf_block_release(q, block);
2044 2045 2046
	return err;
}

2047
struct tcf_dump_args {
L
Linus Torvalds 已提交
2048 2049 2050
	struct tcf_walker w;
	struct sk_buff *skb;
	struct netlink_callback *cb;
2051
	struct tcf_block *block;
2052 2053
	struct Qdisc *q;
	u32 parent;
L
Linus Torvalds 已提交
2054 2055
};

2056
static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
L
Linus Torvalds 已提交
2057
{
2058
	struct tcf_dump_args *a = (void *)arg;
2059
	struct net *net = sock_net(a->skb->sk);
L
Linus Torvalds 已提交
2060

2061
	return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
2062
			     n, NETLINK_CB(a->cb->skb).portid,
J
Jamal Hadi Salim 已提交
2063 2064
			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
			     RTM_NEWTFILTER);
L
Linus Torvalds 已提交
2065 2066
}

2067 2068
static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
			   struct sk_buff *skb, struct netlink_callback *cb,
2069 2070 2071
			   long index_start, long *p_index)
{
	struct net *net = sock_net(skb->sk);
2072
	struct tcf_block *block = chain->block;
2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
	struct tcf_dump_args arg;
	struct tcf_proto *tp;

	for (tp = rtnl_dereference(chain->filter_chain);
	     tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
		if (*p_index < index_start)
			continue;
		if (TC_H_MAJ(tcm->tcm_info) &&
		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
			continue;
		if (TC_H_MIN(tcm->tcm_info) &&
		    TC_H_MIN(tcm->tcm_info) != tp->protocol)
			continue;
		if (*p_index > index_start)
			memset(&cb->args[1], 0,
			       sizeof(cb->args) - sizeof(cb->args[0]));
		if (cb->args[1] == 0) {
2091
			if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
2092 2093 2094
					  NETLINK_CB(cb->skb).portid,
					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
					  RTM_NEWTFILTER) <= 0)
2095
				return false;
2096 2097 2098 2099 2100 2101 2102 2103

			cb->args[1] = 1;
		}
		if (!tp->ops->walk)
			continue;
		arg.w.fn = tcf_node_dump;
		arg.skb = skb;
		arg.cb = cb;
2104
		arg.block = block;
2105 2106
		arg.q = q;
		arg.parent = parent;
2107 2108 2109
		arg.w.stop = 0;
		arg.w.skip = cb->args[1] - 1;
		arg.w.count = 0;
2110
		arg.w.cookie = cb->args[2];
2111
		tp->ops->walk(tp, &arg.w);
2112
		cb->args[2] = arg.w.cookie;
2113 2114
		cb->args[1] = arg.w.count + 1;
		if (arg.w.stop)
2115
			return false;
2116
	}
2117
	return true;
2118 2119
}

E
Eric Dumazet 已提交
2120
/* called with RTNL */
L
Linus Torvalds 已提交
2121 2122
static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
2123
	struct tcf_chain *chain, *chain_prev;
2124
	struct net *net = sock_net(skb->sk);
2125
	struct nlattr *tca[TCA_MAX + 1];
2126
	struct Qdisc *q = NULL;
2127
	struct tcf_block *block;
2128
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2129 2130
	long index_start;
	long index;
2131
	u32 parent;
2132
	int err;
L
Linus Torvalds 已提交
2133

2134
	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
L
Linus Torvalds 已提交
2135
		return skb->len;
2136

D
David Ahern 已提交
2137 2138
	err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL,
			  cb->extack);
2139 2140 2141
	if (err)
		return err;

2142
	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2143
		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2144 2145
		if (!block)
			goto out;
2146 2147 2148 2149 2150 2151 2152
		/* If we work with block index, q is NULL and parent value
		 * will never be used in the following code. The check
		 * in tcf_fill_node prevents it. However, compiler does not
		 * see that far, so set parent to zero to silence the warning
		 * about parent being uninitialized.
		 */
		parent = 0;
2153
	} else {
2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172
		const struct Qdisc_class_ops *cops;
		struct net_device *dev;
		unsigned long cl = 0;

		dev = __dev_get_by_index(net, tcm->tcm_ifindex);
		if (!dev)
			return skb->len;

		parent = tcm->tcm_parent;
		if (!parent) {
			q = dev->qdisc;
			parent = q->handle;
		} else {
			q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
		}
		if (!q)
			goto out;
		cops = q->ops->cl_ops;
		if (!cops)
2173
			goto out;
2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185
		if (!cops->tcf_block)
			goto out;
		if (TC_H_MIN(tcm->tcm_parent)) {
			cl = cops->find(q, tcm->tcm_parent);
			if (cl == 0)
				goto out;
		}
		block = cops->tcf_block(q, cl, NULL);
		if (!block)
			goto out;
		if (tcf_block_shared(block))
			q = NULL;
L
Linus Torvalds 已提交
2186 2187
	}

2188 2189
	index_start = cb->args[0];
	index = 0;
2190

2191 2192 2193 2194 2195
	for (chain = __tcf_get_next_chain(block, NULL);
	     chain;
	     chain_prev = chain,
		     chain = __tcf_get_next_chain(block, chain),
		     tcf_chain_put(chain_prev)) {
2196 2197 2198
		if (tca[TCA_CHAIN] &&
		    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
			continue;
2199
		if (!tcf_chain_dump(chain, q, parent, skb, cb,
2200
				    index_start, &index)) {
2201
			tcf_chain_put(chain);
2202
			err = -EMSGSIZE;
2203
			break;
2204
		}
2205 2206
	}

2207 2208
	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
		tcf_block_refcnt_put(block);
2209
	cb->args[0] = index;
L
Linus Torvalds 已提交
2210 2211

out:
2212 2213 2214
	/* If we did no progress, the error (EMSGSIZE) is real */
	if (skb->len == 0 && err)
		return err;
L
Linus Torvalds 已提交
2215 2216 2217
	return skb->len;
}

2218 2219 2220 2221
static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
			      void *tmplt_priv, u32 chain_index,
			      struct net *net, struct sk_buff *skb,
			      struct tcf_block *block,
2222 2223 2224
			      u32 portid, u32 seq, u16 flags, int event)
{
	unsigned char *b = skb_tail_pointer(skb);
2225
	const struct tcf_proto_ops *ops;
2226 2227
	struct nlmsghdr *nlh;
	struct tcmsg *tcm;
2228 2229
	void *priv;

2230 2231
	ops = tmplt_ops;
	priv = tmplt_priv;
2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248

	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
	if (!nlh)
		goto out_nlmsg_trim;
	tcm = nlmsg_data(nlh);
	tcm->tcm_family = AF_UNSPEC;
	tcm->tcm__pad1 = 0;
	tcm->tcm__pad2 = 0;
	tcm->tcm_handle = 0;
	if (block->q) {
		tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex;
		tcm->tcm_parent = block->q->handle;
	} else {
		tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
		tcm->tcm_block_index = block->index;
	}

2249
	if (nla_put_u32(skb, TCA_CHAIN, chain_index))
2250 2251
		goto nla_put_failure;

2252 2253 2254 2255 2256 2257 2258
	if (ops) {
		if (nla_put_string(skb, TCA_KIND, ops->kind))
			goto nla_put_failure;
		if (ops->tmplt_dump(skb, net, priv) < 0)
			goto nla_put_failure;
	}

2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279
	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
	return skb->len;

out_nlmsg_trim:
nla_put_failure:
	nlmsg_trim(skb, b);
	return -EMSGSIZE;
}

static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
			   u32 seq, u16 flags, int event, bool unicast)
{
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
	struct tcf_block *block = chain->block;
	struct net *net = block->net;
	struct sk_buff *skb;

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

2280 2281
	if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
			       chain->index, net, skb, block, portid,
2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292
			       seq, flags, event) <= 0) {
		kfree_skb(skb);
		return -EINVAL;
	}

	if (unicast)
		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);

	return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
}

2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317
static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
				  void *tmplt_priv, u32 chain_index,
				  struct tcf_block *block, struct sk_buff *oskb,
				  u32 seq, u16 flags, bool unicast)
{
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
	struct net *net = block->net;
	struct sk_buff *skb;

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

	if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
			       block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
		kfree_skb(skb);
		return -EINVAL;
	}

	if (unicast)
		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);

	return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
}

2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346
static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
			      struct nlattr **tca,
			      struct netlink_ext_ack *extack)
{
	const struct tcf_proto_ops *ops;
	void *tmplt_priv;

	/* If kind is not set, user did not specify template. */
	if (!tca[TCA_KIND])
		return 0;

	ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack);
	if (IS_ERR(ops))
		return PTR_ERR(ops);
	if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
		NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
		return -EOPNOTSUPP;
	}

	tmplt_priv = ops->tmplt_create(net, chain, tca, extack);
	if (IS_ERR(tmplt_priv)) {
		module_put(ops->owner);
		return PTR_ERR(tmplt_priv);
	}
	chain->tmplt_ops = ops;
	chain->tmplt_priv = tmplt_priv;
	return 0;
}

2347 2348
static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
			       void *tmplt_priv)
2349 2350
{
	/* If template ops are set, no work to do for us. */
2351
	if (!tmplt_ops)
2352 2353
		return;

2354 2355
	tmplt_ops->tmplt_destroy(tmplt_priv);
	module_put(tmplt_ops->owner);
2356 2357
}

2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378
/* Add/delete/get a chain */

static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
			struct netlink_ext_ack *extack)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
	struct tcmsg *t;
	u32 parent;
	u32 chain_index;
	struct Qdisc *q = NULL;
	struct tcf_chain *chain = NULL;
	struct tcf_block *block;
	unsigned long cl;
	int err;

	if (n->nlmsg_type != RTM_GETCHAIN &&
	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
		return -EPERM;

replay:
2379
	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394
	if (err < 0)
		return err;

	t = nlmsg_data(n);
	parent = t->tcm_parent;
	cl = 0;

	block = tcf_block_find(net, &q, &parent, &cl,
			       t->tcm_ifindex, t->tcm_block_index, extack);
	if (IS_ERR(block))
		return PTR_ERR(block);

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2395 2396
		err = -EINVAL;
		goto errout_block;
2397
	}
2398 2399

	mutex_lock(&block->lock);
2400 2401 2402
	chain = tcf_chain_lookup(block, chain_index);
	if (n->nlmsg_type == RTM_NEWCHAIN) {
		if (chain) {
2403
			if (tcf_chain_held_by_acts_only(chain)) {
2404
				/* The chain exists only because there is
2405
				 * some action referencing it.
2406 2407 2408 2409
				 */
				tcf_chain_hold(chain);
			} else {
				NL_SET_ERR_MSG(extack, "Filter chain already exists");
2410
				err = -EEXIST;
2411
				goto errout_block_locked;
2412 2413 2414 2415
			}
		} else {
			if (!(n->nlmsg_flags & NLM_F_CREATE)) {
				NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
2416
				err = -ENOENT;
2417
				goto errout_block_locked;
2418 2419 2420 2421
			}
			chain = tcf_chain_create(block, chain_index);
			if (!chain) {
				NL_SET_ERR_MSG(extack, "Failed to create filter chain");
2422
				err = -ENOMEM;
2423
				goto errout_block_locked;
2424
			}
2425 2426
		}
	} else {
2427
		if (!chain || tcf_chain_held_by_acts_only(chain)) {
2428
			NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2429
			err = -EINVAL;
2430
			goto errout_block_locked;
2431 2432 2433 2434
		}
		tcf_chain_hold(chain);
	}

2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445
	if (n->nlmsg_type == RTM_NEWCHAIN) {
		/* Modifying chain requires holding parent block lock. In case
		 * the chain was successfully added, take a reference to the
		 * chain. This ensures that an empty chain does not disappear at
		 * the end of this function.
		 */
		tcf_chain_hold(chain);
		chain->explicitly_created = true;
	}
	mutex_unlock(&block->lock);

2446 2447
	switch (n->nlmsg_type) {
	case RTM_NEWCHAIN:
2448
		err = tc_chain_tmplt_add(chain, net, tca, extack);
2449 2450
		if (err) {
			tcf_chain_put_explicitly_created(chain);
2451
			goto errout;
2452 2453
		}

2454 2455 2456 2457
		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
				RTM_NEWCHAIN, false);
		break;
	case RTM_DELCHAIN:
2458 2459
		tfilter_notify_chain(net, skb, block, q, parent, n,
				     chain, RTM_DELTFILTER);
2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480
		/* Flush the chain first as the user requested chain removal. */
		tcf_chain_flush(chain);
		/* In case the chain was successfully deleted, put a reference
		 * to the chain previously taken during addition.
		 */
		tcf_chain_put_explicitly_created(chain);
		break;
	case RTM_GETCHAIN:
		err = tc_chain_notify(chain, skb, n->nlmsg_seq,
				      n->nlmsg_seq, n->nlmsg_type, true);
		if (err < 0)
			NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
		break;
	default:
		err = -EOPNOTSUPP;
		NL_SET_ERR_MSG(extack, "Unsupported message type");
		goto errout;
	}

errout:
	tcf_chain_put(chain);
2481 2482
errout_block:
	tcf_block_release(q, block);
2483 2484 2485 2486
	if (err == -EAGAIN)
		/* Replay the request. */
		goto replay;
	return err;
2487 2488 2489 2490

errout_block_locked:
	mutex_unlock(&block->lock);
	goto errout_block;
2491 2492 2493 2494 2495
}

/* called with RTNL */
static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
{
2496
	struct tcf_chain *chain, *chain_prev;
2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
	struct Qdisc *q = NULL;
	struct tcf_block *block;
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
	long index_start;
	long index;
	u32 parent;
	int err;

	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
		return skb->len;

2510
	err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
D
David Ahern 已提交
2511
			  cb->extack);
2512 2513 2514 2515
	if (err)
		return err;

	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2516
		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563
		if (!block)
			goto out;
		/* If we work with block index, q is NULL and parent value
		 * will never be used in the following code. The check
		 * in tcf_fill_node prevents it. However, compiler does not
		 * see that far, so set parent to zero to silence the warning
		 * about parent being uninitialized.
		 */
		parent = 0;
	} else {
		const struct Qdisc_class_ops *cops;
		struct net_device *dev;
		unsigned long cl = 0;

		dev = __dev_get_by_index(net, tcm->tcm_ifindex);
		if (!dev)
			return skb->len;

		parent = tcm->tcm_parent;
		if (!parent) {
			q = dev->qdisc;
			parent = q->handle;
		} else {
			q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
		}
		if (!q)
			goto out;
		cops = q->ops->cl_ops;
		if (!cops)
			goto out;
		if (!cops->tcf_block)
			goto out;
		if (TC_H_MIN(tcm->tcm_parent)) {
			cl = cops->find(q, tcm->tcm_parent);
			if (cl == 0)
				goto out;
		}
		block = cops->tcf_block(q, cl, NULL);
		if (!block)
			goto out;
		if (tcf_block_shared(block))
			q = NULL;
	}

	index_start = cb->args[0];
	index = 0;

2564 2565 2566 2567 2568
	for (chain = __tcf_get_next_chain(block, NULL);
	     chain;
	     chain_prev = chain,
		     chain = __tcf_get_next_chain(block, chain),
		     tcf_chain_put(chain_prev)) {
2569 2570 2571 2572 2573 2574 2575
		if ((tca[TCA_CHAIN] &&
		     nla_get_u32(tca[TCA_CHAIN]) != chain->index))
			continue;
		if (index < index_start) {
			index++;
			continue;
		}
2576 2577
		err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
					 chain->index, net, skb, block,
2578 2579 2580
					 NETLINK_CB(cb->skb).portid,
					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
					 RTM_NEWCHAIN);
2581 2582
		if (err <= 0) {
			tcf_chain_put(chain);
2583
			break;
2584
		}
2585 2586 2587
		index++;
	}

2588 2589
	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
		tcf_block_refcnt_put(block);
2590 2591 2592 2593 2594 2595 2596 2597 2598
	cb->args[0] = index;

out:
	/* If we did no progress, the error (EMSGSIZE) is real */
	if (skb->len == 0 && err)
		return err;
	return skb->len;
}

2599
void tcf_exts_destroy(struct tcf_exts *exts)
L
Linus Torvalds 已提交
2600 2601
{
#ifdef CONFIG_NET_CLS_ACT
2602
	tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
2603 2604
	kfree(exts->actions);
	exts->nr_actions = 0;
L
Linus Torvalds 已提交
2605 2606
#endif
}
2607
EXPORT_SYMBOL(tcf_exts_destroy);
L
Linus Torvalds 已提交
2608

2609
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
2610 2611
		      struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
		      struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
2612 2613 2614 2615
{
#ifdef CONFIG_NET_CLS_ACT
	{
		struct tc_action *act;
2616
		size_t attr_size = 0;
L
Linus Torvalds 已提交
2617

2618
		if (exts->police && tb[exts->police]) {
2619 2620
			act = tcf_action_init_1(net, tp, tb[exts->police],
						rate_tlv, "police", ovr,
2621
						TCA_ACT_BIND, true, extack);
2622 2623
			if (IS_ERR(act))
				return PTR_ERR(act);
L
Linus Torvalds 已提交
2624

2625
			act->type = exts->type = TCA_OLD_COMPAT;
2626 2627
			exts->actions[0] = act;
			exts->nr_actions = 1;
2628
		} else if (exts->action && tb[exts->action]) {
2629
			int err;
2630

2631 2632
			err = tcf_action_init(net, tp, tb[exts->action],
					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
2633
					      exts->actions, &attr_size, true,
2634
					      extack);
2635
			if (err < 0)
2636
				return err;
2637
			exts->nr_actions = err;
L
Linus Torvalds 已提交
2638
		}
2639
		exts->net = net;
L
Linus Torvalds 已提交
2640 2641
	}
#else
2642
	if ((exts->action && tb[exts->action]) ||
2643 2644
	    (exts->police && tb[exts->police])) {
		NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
L
Linus Torvalds 已提交
2645
		return -EOPNOTSUPP;
2646
	}
L
Linus Torvalds 已提交
2647 2648 2649 2650
#endif

	return 0;
}
2651
EXPORT_SYMBOL(tcf_exts_validate);
L
Linus Torvalds 已提交
2652

2653
void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
L
Linus Torvalds 已提交
2654 2655
{
#ifdef CONFIG_NET_CLS_ACT
2656 2657
	struct tcf_exts old = *dst;

2658
	*dst = *src;
2659
	tcf_exts_destroy(&old);
L
Linus Torvalds 已提交
2660 2661
#endif
}
2662
EXPORT_SYMBOL(tcf_exts_change);
L
Linus Torvalds 已提交
2663

2664 2665 2666 2667 2668 2669 2670 2671 2672
#ifdef CONFIG_NET_CLS_ACT
static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
{
	if (exts->nr_actions == 0)
		return NULL;
	else
		return exts->actions[0];
}
#endif
2673

2674
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
L
Linus Torvalds 已提交
2675 2676
{
#ifdef CONFIG_NET_CLS_ACT
2677 2678
	struct nlattr *nest;

2679
	if (exts->action && tcf_exts_has_actions(exts)) {
L
Linus Torvalds 已提交
2680 2681 2682 2683 2684
		/*
		 * again for backward compatible mode - we want
		 * to work with both old and new modes of entering
		 * tc data even if iproute2  was newer - jhs
		 */
2685
		if (exts->type != TCA_OLD_COMPAT) {
2686
			nest = nla_nest_start(skb, exts->action);
2687 2688
			if (nest == NULL)
				goto nla_put_failure;
2689

2690
			if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
2691
				goto nla_put_failure;
2692
			nla_nest_end(skb, nest);
2693
		} else if (exts->police) {
2694
			struct tc_action *act = tcf_exts_first_act(exts);
2695
			nest = nla_nest_start(skb, exts->police);
2696
			if (nest == NULL || !act)
2697
				goto nla_put_failure;
2698
			if (tcf_action_dump_old(skb, act, 0, 0) < 0)
2699
				goto nla_put_failure;
2700
			nla_nest_end(skb, nest);
L
Linus Torvalds 已提交
2701 2702 2703
		}
	}
	return 0;
2704 2705 2706

nla_put_failure:
	nla_nest_cancel(skb, nest);
L
Linus Torvalds 已提交
2707
	return -1;
2708 2709 2710
#else
	return 0;
#endif
L
Linus Torvalds 已提交
2711
}
2712
EXPORT_SYMBOL(tcf_exts_dump);
L
Linus Torvalds 已提交
2713

2714

2715
int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
L
Linus Torvalds 已提交
2716 2717
{
#ifdef CONFIG_NET_CLS_ACT
2718
	struct tc_action *a = tcf_exts_first_act(exts);
2719
	if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
2720
		return -1;
L
Linus Torvalds 已提交
2721 2722 2723
#endif
	return 0;
}
2724
EXPORT_SYMBOL(tcf_exts_dump_stats);
L
Linus Torvalds 已提交
2725

2726 2727
int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
		     void *type_data, bool err_stop)
2728
{
2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746
	struct tcf_block_cb *block_cb;
	int ok_count = 0;
	int err;

	/* Make sure all netdevs sharing this block are offload-capable. */
	if (block->nooffloaddevcnt && err_stop)
		return -EOPNOTSUPP;

	list_for_each_entry(block_cb, &block->cb_list, list) {
		err = block_cb->cb(type, type_data, block_cb->cb_priv);
		if (err) {
			if (err_stop)
				return err;
		} else {
			ok_count++;
		}
	}
	return ok_count;
2747 2748
}
EXPORT_SYMBOL(tc_setup_cb_call);
2749

2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841
int tc_setup_flow_action(struct flow_action *flow_action,
			 const struct tcf_exts *exts)
{
	const struct tc_action *act;
	int i, j, k;

	if (!exts)
		return 0;

	j = 0;
	tcf_exts_for_each_action(i, act, exts) {
		struct flow_action_entry *entry;

		entry = &flow_action->entries[j];
		if (is_tcf_gact_ok(act)) {
			entry->id = FLOW_ACTION_ACCEPT;
		} else if (is_tcf_gact_shot(act)) {
			entry->id = FLOW_ACTION_DROP;
		} else if (is_tcf_gact_trap(act)) {
			entry->id = FLOW_ACTION_TRAP;
		} else if (is_tcf_gact_goto_chain(act)) {
			entry->id = FLOW_ACTION_GOTO;
			entry->chain_index = tcf_gact_goto_chain_index(act);
		} else if (is_tcf_mirred_egress_redirect(act)) {
			entry->id = FLOW_ACTION_REDIRECT;
			entry->dev = tcf_mirred_dev(act);
		} else if (is_tcf_mirred_egress_mirror(act)) {
			entry->id = FLOW_ACTION_MIRRED;
			entry->dev = tcf_mirred_dev(act);
		} else if (is_tcf_vlan(act)) {
			switch (tcf_vlan_action(act)) {
			case TCA_VLAN_ACT_PUSH:
				entry->id = FLOW_ACTION_VLAN_PUSH;
				entry->vlan.vid = tcf_vlan_push_vid(act);
				entry->vlan.proto = tcf_vlan_push_proto(act);
				entry->vlan.prio = tcf_vlan_push_prio(act);
				break;
			case TCA_VLAN_ACT_POP:
				entry->id = FLOW_ACTION_VLAN_POP;
				break;
			case TCA_VLAN_ACT_MODIFY:
				entry->id = FLOW_ACTION_VLAN_MANGLE;
				entry->vlan.vid = tcf_vlan_push_vid(act);
				entry->vlan.proto = tcf_vlan_push_proto(act);
				entry->vlan.prio = tcf_vlan_push_prio(act);
				break;
			default:
				goto err_out;
			}
		} else if (is_tcf_tunnel_set(act)) {
			entry->id = FLOW_ACTION_TUNNEL_ENCAP;
			entry->tunnel = tcf_tunnel_info(act);
		} else if (is_tcf_tunnel_release(act)) {
			entry->id = FLOW_ACTION_TUNNEL_DECAP;
			entry->tunnel = tcf_tunnel_info(act);
		} else if (is_tcf_pedit(act)) {
			for (k = 0; k < tcf_pedit_nkeys(act); k++) {
				switch (tcf_pedit_cmd(act, k)) {
				case TCA_PEDIT_KEY_EX_CMD_SET:
					entry->id = FLOW_ACTION_MANGLE;
					break;
				case TCA_PEDIT_KEY_EX_CMD_ADD:
					entry->id = FLOW_ACTION_ADD;
					break;
				default:
					goto err_out;
				}
				entry->mangle.htype = tcf_pedit_htype(act, k);
				entry->mangle.mask = tcf_pedit_mask(act, k);
				entry->mangle.val = tcf_pedit_val(act, k);
				entry->mangle.offset = tcf_pedit_offset(act, k);
				entry = &flow_action->entries[++j];
			}
		} else if (is_tcf_csum(act)) {
			entry->id = FLOW_ACTION_CSUM;
			entry->csum_flags = tcf_csum_update_flags(act);
		} else if (is_tcf_skbedit_mark(act)) {
			entry->id = FLOW_ACTION_MARK;
			entry->mark = tcf_skbedit_mark(act);
		} else {
			goto err_out;
		}

		if (!is_tcf_pedit(act))
			j++;
	}
	return 0;
err_out:
	return -EOPNOTSUPP;
}
EXPORT_SYMBOL(tc_setup_flow_action);

2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857
unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
{
	unsigned int num_acts = 0;
	struct tc_action *act;
	int i;

	tcf_exts_for_each_action(i, act, exts) {
		if (is_tcf_pedit(act))
			num_acts += tcf_pedit_nkeys(act);
		else
			num_acts++;
	}
	return num_acts;
}
EXPORT_SYMBOL(tcf_exts_num_actions);

2858 2859 2860 2861
static __net_init int tcf_net_init(struct net *net)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

2862
	spin_lock_init(&tn->idr_lock);
2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880
	idr_init(&tn->idr);
	return 0;
}

static void __net_exit tcf_net_exit(struct net *net)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

	idr_destroy(&tn->idr);
}

static struct pernet_operations tcf_net_ops = {
	.init = tcf_net_init,
	.exit = tcf_net_exit,
	.id   = &tcf_net_id,
	.size = sizeof(struct tcf_net),
};

L
Linus Torvalds 已提交
2881 2882
static int __init tc_filter_init(void)
{
2883 2884
	int err;

2885 2886 2887 2888
	tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
	if (!tc_filter_wq)
		return -ENOMEM;

2889 2890 2891 2892
	err = register_pernet_subsys(&tcf_net_ops);
	if (err)
		goto err_register_pernet_subsys;

2893 2894 2895 2896 2897
	err = rhashtable_init(&indr_setup_block_ht,
			      &tc_indr_setup_block_ht_params);
	if (err)
		goto err_rhash_setup_block_ht;

2898 2899 2900
	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0);
	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
2901
		      tc_dump_tfilter, 0);
2902 2903 2904 2905
	rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
	rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
	rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
		      tc_dump_chain, 0);
L
Linus Torvalds 已提交
2906 2907

	return 0;
2908

2909 2910
err_rhash_setup_block_ht:
	unregister_pernet_subsys(&tcf_net_ops);
2911 2912 2913
err_register_pernet_subsys:
	destroy_workqueue(tc_filter_wq);
	return err;
L
Linus Torvalds 已提交
2914 2915 2916
}

subsys_initcall(tc_filter_init);