cls_api.c 22.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * net/sched/cls_api.c	Packet classifier API.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 * Changes:
 *
 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
 *
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
22
#include <linux/err.h>
L
Linus Torvalds 已提交
23 24 25
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
26
#include <linux/err.h>
27
#include <linux/slab.h>
28 29
#include <net/net_namespace.h>
#include <net/sock.h>
30
#include <net/netlink.h>
L
Linus Torvalds 已提交
31 32 33 34
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>

/* The list of all installed classifier types */
35
static LIST_HEAD(tcf_proto_base);
L
Linus Torvalds 已提交
36 37 38 39 40 41

/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK(cls_mod_lock);

/* Find classifier type by string name */

42
static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind)
L
Linus Torvalds 已提交
43
{
44
	const struct tcf_proto_ops *t, *res = NULL;
L
Linus Torvalds 已提交
45 46 47

	if (kind) {
		read_lock(&cls_mod_lock);
48
		list_for_each_entry(t, &tcf_proto_base, head) {
49
			if (strcmp(kind, t->kind) == 0) {
50 51
				if (try_module_get(t->owner))
					res = t;
L
Linus Torvalds 已提交
52 53 54 55 56
				break;
			}
		}
		read_unlock(&cls_mod_lock);
	}
57
	return res;
L
Linus Torvalds 已提交
58 59 60 61 62 63
}

/* Register(unregister) new classifier type */

int register_tcf_proto_ops(struct tcf_proto_ops *ops)
{
64
	struct tcf_proto_ops *t;
L
Linus Torvalds 已提交
65 66 67
	int rc = -EEXIST;

	write_lock(&cls_mod_lock);
68
	list_for_each_entry(t, &tcf_proto_base, head)
L
Linus Torvalds 已提交
69 70 71
		if (!strcmp(ops->kind, t->kind))
			goto out;

72
	list_add_tail(&ops->head, &tcf_proto_base);
L
Linus Torvalds 已提交
73 74 75 76 77
	rc = 0;
out:
	write_unlock(&cls_mod_lock);
	return rc;
}
78
EXPORT_SYMBOL(register_tcf_proto_ops);
L
Linus Torvalds 已提交
79 80 81

int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
{
82
	struct tcf_proto_ops *t;
L
Linus Torvalds 已提交
83 84
	int rc = -ENOENT;

85 86 87 88 89
	/* Wait for outstanding call_rcu()s, if any, from a
	 * tcf_proto_ops's destroy() handler.
	 */
	rcu_barrier();

L
Linus Torvalds 已提交
90
	write_lock(&cls_mod_lock);
91 92 93 94
	list_for_each_entry(t, &tcf_proto_base, head) {
		if (t == ops) {
			list_del(&t->head);
			rc = 0;
L
Linus Torvalds 已提交
95
			break;
96 97
		}
	}
L
Linus Torvalds 已提交
98 99 100
	write_unlock(&cls_mod_lock);
	return rc;
}
101
EXPORT_SYMBOL(unregister_tcf_proto_ops);
L
Linus Torvalds 已提交
102

103 104
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
			  struct nlmsghdr *n, struct tcf_proto *tp,
105
			  unsigned long fh, int event, bool unicast);
L
Linus Torvalds 已提交
106

107 108
static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
				 struct nlmsghdr *n,
109
				 struct tcf_chain *chain, int event)
110 111 112
{
	struct tcf_proto *tp;

113 114
	for (tp = rtnl_dereference(chain->filter_chain);
	     tp; tp = rtnl_dereference(tp->next))
115
		tfilter_notify(net, oskb, n, tp, 0, event, false);
116
}
L
Linus Torvalds 已提交
117 118 119

/* Select new prio value from the range, managed by kernel. */

120
static inline u32 tcf_auto_prio(struct tcf_proto *tp)
L
Linus Torvalds 已提交
121
{
122
	u32 first = TC_H_MAKE(0xC0000000U, 0U);
L
Linus Torvalds 已提交
123 124

	if (tp)
E
Eric Dumazet 已提交
125
		first = tp->prio - 1;
L
Linus Torvalds 已提交
126

127
	return TC_H_MAJ(first);
L
Linus Torvalds 已提交
128 129
}

130
static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
131
					  u32 prio, u32 parent, struct Qdisc *q,
132
					  struct tcf_chain *chain)
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
{
	struct tcf_proto *tp;
	int err;

	tp = kzalloc(sizeof(*tp), GFP_KERNEL);
	if (!tp)
		return ERR_PTR(-ENOBUFS);

	err = -ENOENT;
	tp->ops = tcf_proto_lookup_ops(kind);
	if (!tp->ops) {
#ifdef CONFIG_MODULES
		rtnl_unlock();
		request_module("cls_%s", kind);
		rtnl_lock();
		tp->ops = tcf_proto_lookup_ops(kind);
		/* We dropped the RTNL semaphore in order to perform
		 * the module load. So, even if we succeeded in loading
		 * the module we have to replay the request. We indicate
		 * this using -EAGAIN.
		 */
		if (tp->ops) {
			module_put(tp->ops->owner);
			err = -EAGAIN;
		} else {
			err = -ENOENT;
		}
		goto errout;
#endif
	}
	tp->classify = tp->ops->classify;
	tp->protocol = protocol;
	tp->prio = prio;
	tp->classid = parent;
	tp->q = q;
168
	tp->chain = chain;
169 170 171 172 173 174 175 176 177 178 179 180 181

	err = tp->ops->init(tp);
	if (err) {
		module_put(tp->ops->owner);
		goto errout;
	}
	return tp;

errout:
	kfree(tp);
	return ERR_PTR(err);
}

182
static void tcf_proto_destroy(struct tcf_proto *tp)
183
{
184 185 186
	tp->ops->destroy(tp);
	module_put(tp->ops->owner);
	kfree_rcu(tp, rcu);
187 188
}

189 190
static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
					  u32 chain_index)
191
{
192 193 194 195 196 197 198 199 200 201
	struct tcf_chain *chain;

	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
	if (!chain)
		return NULL;
	list_add_tail(&chain->list, &block->chain_list);
	chain->block = block;
	chain->index = chain_index;
	chain->refcnt = 1;
	return chain;
202 203
}

J
Jiri Pirko 已提交
204
static void tcf_chain_flush(struct tcf_chain *chain)
205 206 207
{
	struct tcf_proto *tp;

J
Jiri Pirko 已提交
208 209
	if (*chain->p_filter_chain)
		RCU_INIT_POINTER(*chain->p_filter_chain, NULL);
210 211
	while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) {
		RCU_INIT_POINTER(chain->filter_chain, tp->next);
212
		tcf_proto_destroy(tp);
213
	}
J
Jiri Pirko 已提交
214 215 216 217 218 219
}

static void tcf_chain_destroy(struct tcf_chain *chain)
{
	list_del(&chain->list);
	tcf_chain_flush(chain);
220 221 222
	kfree(chain);
}

223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index)
{
	struct tcf_chain *chain;

	list_for_each_entry(chain, &block->chain_list, list) {
		if (chain->index == chain_index) {
			chain->refcnt++;
			return chain;
		}
	}
	return tcf_chain_create(block, chain_index);
}
EXPORT_SYMBOL(tcf_chain_get);

void tcf_chain_put(struct tcf_chain *chain)
{
	/* Destroy unused chain, with exception of chain 0, which is the
	 * default one and has to be always present.
	 */
	if (--chain->refcnt == 0 && !chain->filter_chain && chain->index != 0)
		tcf_chain_destroy(chain);
}
EXPORT_SYMBOL(tcf_chain_put);

247 248 249 250 251
static void
tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain,
			       struct tcf_proto __rcu **p_filter_chain)
{
	chain->p_filter_chain = p_filter_chain;
252
}
253 254 255 256 257

int tcf_block_get(struct tcf_block **p_block,
		  struct tcf_proto __rcu **p_filter_chain)
{
	struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
258
	struct tcf_chain *chain;
259
	int err;
260 261 262

	if (!block)
		return -ENOMEM;
263 264 265 266
	INIT_LIST_HEAD(&block->chain_list);
	/* Create chain 0 by default, it has to be always present. */
	chain = tcf_chain_create(block, 0);
	if (!chain) {
267 268 269
		err = -ENOMEM;
		goto err_chain_create;
	}
270
	tcf_chain_filter_chain_ptr_set(chain, p_filter_chain);
271 272
	*p_block = block;
	return 0;
273 274 275 276

err_chain_create:
	kfree(block);
	return err;
277 278 279 280 281
}
EXPORT_SYMBOL(tcf_block_get);

void tcf_block_put(struct tcf_block *block)
{
282 283
	struct tcf_chain *chain, *tmp;

284 285
	if (!block)
		return;
286 287 288

	list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
		tcf_chain_destroy(chain);
289 290 291
	kfree(block);
}
EXPORT_SYMBOL(tcf_block_put);
292

293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
/* Main classifier routine: scans classifier chain attached
 * to this qdisc, (optionally) tests for protocol and asks
 * specific classifiers.
 */
int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
		 struct tcf_result *res, bool compat_mode)
{
	__be16 protocol = tc_skb_protocol(skb);
#ifdef CONFIG_NET_CLS_ACT
	const int max_reclassify_loop = 4;
	const struct tcf_proto *old_tp = tp;
	int limit = 0;

reclassify:
#endif
	for (; tp; tp = rcu_dereference_bh(tp->next)) {
		int err;

		if (tp->protocol != protocol &&
		    tp->protocol != htons(ETH_P_ALL))
			continue;

		err = tp->classify(skb, tp, res);
#ifdef CONFIG_NET_CLS_ACT
317
		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
318
			goto reset;
319 320 321 322
		} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
			old_tp = res->goto_tp;
			goto reset;
		}
323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
#endif
		if (err >= 0)
			return err;
	}

	return TC_ACT_UNSPEC; /* signal: continue lookup */
#ifdef CONFIG_NET_CLS_ACT
reset:
	if (unlikely(limit++ >= max_reclassify_loop)) {
		net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
				       tp->q->ops->id, tp->prio & 0xffff,
				       ntohs(tp->protocol));
		return TC_ACT_SHOT;
	}

	tp = old_tp;
	protocol = tc_skb_protocol(skb);
	goto reclassify;
#endif
}
EXPORT_SYMBOL(tcf_classify);

345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
struct tcf_chain_info {
	struct tcf_proto __rcu **pprev;
	struct tcf_proto __rcu *next;
};

static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
{
	return rtnl_dereference(*chain_info->pprev);
}

static void tcf_chain_tp_insert(struct tcf_chain *chain,
				struct tcf_chain_info *chain_info,
				struct tcf_proto *tp)
{
	if (chain->p_filter_chain &&
	    *chain_info->pprev == chain->filter_chain)
361
		rcu_assign_pointer(*chain->p_filter_chain, tp);
362 363 364 365 366 367 368 369 370 371 372
	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
	rcu_assign_pointer(*chain_info->pprev, tp);
}

static void tcf_chain_tp_remove(struct tcf_chain *chain,
				struct tcf_chain_info *chain_info,
				struct tcf_proto *tp)
{
	struct tcf_proto *next = rtnl_dereference(chain_info->next);

	if (chain->p_filter_chain && tp == chain->filter_chain)
373
		RCU_INIT_POINTER(*chain->p_filter_chain, next);
374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403
	RCU_INIT_POINTER(*chain_info->pprev, next);
}

static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
					   struct tcf_chain_info *chain_info,
					   u32 protocol, u32 prio,
					   bool prio_allocate)
{
	struct tcf_proto **pprev;
	struct tcf_proto *tp;

	/* Check the chain for existence of proto-tcf with this priority */
	for (pprev = &chain->filter_chain;
	     (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
		if (tp->prio >= prio) {
			if (tp->prio == prio) {
				if (prio_allocate ||
				    (tp->protocol != protocol && protocol))
					return ERR_PTR(-EINVAL);
			} else {
				tp = NULL;
			}
			break;
		}
	}
	chain_info->pprev = pprev;
	chain_info->next = tp ? tp->next : NULL;
	return tp;
}

L
Linus Torvalds 已提交
404 405
/* Add/change/delete/get a filter node */

406 407
static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
			  struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
408
{
409
	struct net *net = sock_net(skb->sk);
410
	struct nlattr *tca[TCA_MAX + 1];
L
Linus Torvalds 已提交
411 412 413
	struct tcmsg *t;
	u32 protocol;
	u32 prio;
414
	bool prio_allocate;
L
Linus Torvalds 已提交
415
	u32 parent;
416
	u32 chain_index;
L
Linus Torvalds 已提交
417 418
	struct net_device *dev;
	struct Qdisc  *q;
419
	struct tcf_chain_info chain_info;
420
	struct tcf_chain *chain = NULL;
421
	struct tcf_block *block;
L
Linus Torvalds 已提交
422
	struct tcf_proto *tp;
423
	const struct Qdisc_class_ops *cops;
L
Linus Torvalds 已提交
424 425 426
	unsigned long cl;
	unsigned long fh;
	int err;
427
	int tp_created;
L
Linus Torvalds 已提交
428

429
	if ((n->nlmsg_type != RTM_GETTFILTER) &&
430
	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
431
		return -EPERM;
432

L
Linus Torvalds 已提交
433
replay:
434 435
	tp_created = 0;

436
	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
437 438 439
	if (err < 0)
		return err;

440
	t = nlmsg_data(n);
L
Linus Torvalds 已提交
441 442
	protocol = TC_H_MIN(t->tcm_info);
	prio = TC_H_MAJ(t->tcm_info);
443
	prio_allocate = false;
L
Linus Torvalds 已提交
444 445 446 447
	parent = t->tcm_parent;
	cl = 0;

	if (prio == 0) {
448 449
		switch (n->nlmsg_type) {
		case RTM_DELTFILTER:
450
			if (protocol || t->tcm_handle || tca[TCA_KIND])
451 452 453 454 455 456 457 458
				return -ENOENT;
			break;
		case RTM_NEWTFILTER:
			/* If no priority is provided by the user,
			 * we allocate one.
			 */
			if (n->nlmsg_flags & NLM_F_CREATE) {
				prio = TC_H_MAKE(0x80000000U, 0U);
459
				prio_allocate = true;
460 461 462 463
				break;
			}
			/* fall-through */
		default:
L
Linus Torvalds 已提交
464
			return -ENOENT;
465
		}
L
Linus Torvalds 已提交
466 467 468 469 470
	}

	/* Find head of filter chain. */

	/* Find link */
471
	dev = __dev_get_by_index(net, t->tcm_ifindex);
472
	if (dev == NULL)
L
Linus Torvalds 已提交
473 474 475 476
		return -ENODEV;

	/* Find qdisc */
	if (!parent) {
477
		q = dev->qdisc;
L
Linus Torvalds 已提交
478
		parent = q->handle;
479 480 481 482 483
	} else {
		q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
		if (q == NULL)
			return -EINVAL;
	}
L
Linus Torvalds 已提交
484 485

	/* Is it classful? */
E
Eric Dumazet 已提交
486 487
	cops = q->ops->cl_ops;
	if (!cops)
L
Linus Torvalds 已提交
488 489
		return -EINVAL;

490
	if (!cops->tcf_block)
491 492
		return -EOPNOTSUPP;

L
Linus Torvalds 已提交
493 494 495 496 497 498 499 500
	/* Do we search for filter, attached to class? */
	if (TC_H_MIN(parent)) {
		cl = cops->get(q, parent);
		if (cl == 0)
			return -ENOENT;
	}

	/* And the last stroke */
501 502
	block = cops->tcf_block(q, cl);
	if (!block) {
503
		err = -EINVAL;
L
Linus Torvalds 已提交
504
		goto errout;
505
	}
506 507 508 509 510 511 512 513 514 515 516

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
		err = -EINVAL;
		goto errout;
	}
	chain = tcf_chain_get(block, chain_index);
	if (!chain) {
		err = -ENOMEM;
		goto errout;
	}
517

518 519
	if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
		tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER);
J
Jiri Pirko 已提交
520
		tcf_chain_flush(chain);
521 522 523
		err = 0;
		goto errout;
	}
L
Linus Torvalds 已提交
524

525 526 527 528 529
	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
			       prio, prio_allocate);
	if (IS_ERR(tp)) {
		err = PTR_ERR(tp);
		goto errout;
L
Linus Torvalds 已提交
530 531 532 533 534
	}

	if (tp == NULL) {
		/* Proto-tcf does not exist, create new one */

535 536
		if (tca[TCA_KIND] == NULL || !protocol) {
			err = -EINVAL;
L
Linus Torvalds 已提交
537
			goto errout;
538
		}
L
Linus Torvalds 已提交
539

E
Eric Dumazet 已提交
540
		if (n->nlmsg_type != RTM_NEWTFILTER ||
541 542
		    !(n->nlmsg_flags & NLM_F_CREATE)) {
			err = -ENOENT;
L
Linus Torvalds 已提交
543
			goto errout;
544
		}
L
Linus Torvalds 已提交
545

546
		if (prio_allocate)
547
			prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
L
Linus Torvalds 已提交
548

549
		tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
550
				      protocol, prio, parent, q, chain);
551 552
		if (IS_ERR(tp)) {
			err = PTR_ERR(tp);
L
Linus Torvalds 已提交
553 554
			goto errout;
		}
555
		tp_created = 1;
556 557
	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
		err = -EINVAL;
L
Linus Torvalds 已提交
558
		goto errout;
559
	}
L
Linus Torvalds 已提交
560 561 562 563 564

	fh = tp->ops->get(tp, t->tcm_handle);

	if (fh == 0) {
		if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
565
			tcf_chain_tp_remove(chain, &chain_info, tp);
566 567
			tfilter_notify(net, skb, n, tp, fh,
				       RTM_DELTFILTER, false);
568
			tcf_proto_destroy(tp);
L
Linus Torvalds 已提交
569 570 571 572
			err = 0;
			goto errout;
		}

573
		if (n->nlmsg_type != RTM_NEWTFILTER ||
574 575
		    !(n->nlmsg_flags & NLM_F_CREATE)) {
			err = -ENOENT;
L
Linus Torvalds 已提交
576
			goto errout;
577
		}
L
Linus Torvalds 已提交
578
	} else {
579 580
		bool last;

L
Linus Torvalds 已提交
581
		switch (n->nlmsg_type) {
582
		case RTM_NEWTFILTER:
583 584
			if (n->nlmsg_flags & NLM_F_EXCL) {
				if (tp_created)
585
					tcf_proto_destroy(tp);
586
				err = -EEXIST;
L
Linus Torvalds 已提交
587
				goto errout;
588
			}
L
Linus Torvalds 已提交
589 590
			break;
		case RTM_DELTFILTER:
591
			err = tp->ops->delete(tp, fh, &last);
592 593 594 595
			if (err)
				goto errout;
			tfilter_notify(net, skb, n, tp, t->tcm_handle,
				       RTM_DELTFILTER, false);
596
			if (last) {
597
				tcf_chain_tp_remove(chain, &chain_info, tp);
598 599
				tcf_proto_destroy(tp);
			}
600
			goto errout;
L
Linus Torvalds 已提交
601
		case RTM_GETTFILTER:
J
Jamal Hadi Salim 已提交
602
			err = tfilter_notify(net, skb, n, tp, fh,
603
					     RTM_NEWTFILTER, true);
L
Linus Torvalds 已提交
604 605 606 607 608 609 610
			goto errout;
		default:
			err = -EINVAL;
			goto errout;
		}
	}

611 612
	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
613
	if (err == 0) {
614 615
		if (tp_created)
			tcf_chain_tp_insert(chain, &chain_info, tp);
616
		tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false);
617 618
	} else {
		if (tp_created)
619
			tcf_proto_destroy(tp);
620
	}
L
Linus Torvalds 已提交
621 622

errout:
623 624
	if (chain)
		tcf_chain_put(chain);
L
Linus Torvalds 已提交
625 626 627 628 629 630 631 632
	if (cl)
		cops->put(q, cl);
	if (err == -EAGAIN)
		/* Replay the request. */
		goto replay;
	return err;
}

633 634 635
static int tcf_fill_node(struct net *net, struct sk_buff *skb,
			 struct tcf_proto *tp, unsigned long fh, u32 portid,
			 u32 seq, u16 flags, int event)
L
Linus Torvalds 已提交
636 637 638
{
	struct tcmsg *tcm;
	struct nlmsghdr  *nlh;
639
	unsigned char *b = skb_tail_pointer(skb);
L
Linus Torvalds 已提交
640

641
	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
642 643 644
	if (!nlh)
		goto out_nlmsg_trim;
	tcm = nlmsg_data(nlh);
L
Linus Torvalds 已提交
645
	tcm->tcm_family = AF_UNSPEC;
646
	tcm->tcm__pad1 = 0;
J
Jiri Pirko 已提交
647
	tcm->tcm__pad2 = 0;
648
	tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
L
Linus Torvalds 已提交
649 650
	tcm->tcm_parent = tp->classid;
	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
651 652
	if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
		goto nla_put_failure;
653 654
	if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
		goto nla_put_failure;
L
Linus Torvalds 已提交
655 656 657
	tcm->tcm_handle = fh;
	if (RTM_DELTFILTER != event) {
		tcm->tcm_handle = 0;
658
		if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
659
			goto nla_put_failure;
L
Linus Torvalds 已提交
660
	}
661
	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
L
Linus Torvalds 已提交
662 663
	return skb->len;

664
out_nlmsg_trim:
665
nla_put_failure:
666
	nlmsg_trim(skb, b);
L
Linus Torvalds 已提交
667 668 669
	return -1;
}

670 671
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
			  struct nlmsghdr *n, struct tcf_proto *tp,
672
			  unsigned long fh, int event, bool unicast)
L
Linus Torvalds 已提交
673 674
{
	struct sk_buff *skb;
675
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
L
Linus Torvalds 已提交
676 677 678 679 680

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

681 682
	if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
			  n->nlmsg_flags, event) <= 0) {
L
Linus Torvalds 已提交
683 684 685 686
		kfree_skb(skb);
		return -EINVAL;
	}

687 688 689
	if (unicast)
		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);

690
	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
691
			      n->nlmsg_flags & NLM_F_ECHO);
L
Linus Torvalds 已提交
692 693
}

694
struct tcf_dump_args {
L
Linus Torvalds 已提交
695 696 697 698 699
	struct tcf_walker w;
	struct sk_buff *skb;
	struct netlink_callback *cb;
};

700 701
static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
			 struct tcf_walker *arg)
L
Linus Torvalds 已提交
702
{
703
	struct tcf_dump_args *a = (void *)arg;
704
	struct net *net = sock_net(a->skb->sk);
L
Linus Torvalds 已提交
705

706
	return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid,
J
Jamal Hadi Salim 已提交
707 708
			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
			     RTM_NEWTFILTER);
L
Linus Torvalds 已提交
709 710
}

711
static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737
			   struct netlink_callback *cb,
			   long index_start, long *p_index)
{
	struct net *net = sock_net(skb->sk);
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
	struct tcf_dump_args arg;
	struct tcf_proto *tp;

	for (tp = rtnl_dereference(chain->filter_chain);
	     tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
		if (*p_index < index_start)
			continue;
		if (TC_H_MAJ(tcm->tcm_info) &&
		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
			continue;
		if (TC_H_MIN(tcm->tcm_info) &&
		    TC_H_MIN(tcm->tcm_info) != tp->protocol)
			continue;
		if (*p_index > index_start)
			memset(&cb->args[1], 0,
			       sizeof(cb->args) - sizeof(cb->args[0]));
		if (cb->args[1] == 0) {
			if (tcf_fill_node(net, skb, tp, 0,
					  NETLINK_CB(cb->skb).portid,
					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
					  RTM_NEWTFILTER) <= 0)
738
				return false;
739 740 741 742 743 744 745 746 747 748 749 750 751 752

			cb->args[1] = 1;
		}
		if (!tp->ops->walk)
			continue;
		arg.w.fn = tcf_node_dump;
		arg.skb = skb;
		arg.cb = cb;
		arg.w.stop = 0;
		arg.w.skip = cb->args[1] - 1;
		arg.w.count = 0;
		tp->ops->walk(tp, &arg.w);
		cb->args[1] = arg.w.count + 1;
		if (arg.w.stop)
753
			return false;
754
	}
755
	return true;
756 757
}

E
Eric Dumazet 已提交
758
/* called with RTNL */
L
Linus Torvalds 已提交
759 760
static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
761
	struct net *net = sock_net(skb->sk);
762
	struct nlattr *tca[TCA_MAX + 1];
L
Linus Torvalds 已提交
763 764
	struct net_device *dev;
	struct Qdisc *q;
765
	struct tcf_block *block;
766
	struct tcf_chain *chain;
767
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
L
Linus Torvalds 已提交
768
	unsigned long cl = 0;
769
	const struct Qdisc_class_ops *cops;
770 771
	long index_start;
	long index;
772
	int err;
L
Linus Torvalds 已提交
773

774
	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
L
Linus Torvalds 已提交
775
		return skb->len;
776 777 778 779 780

	err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
	if (err)
		return err;

E
Eric Dumazet 已提交
781 782
	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
	if (!dev)
L
Linus Torvalds 已提交
783 784 785
		return skb->len;

	if (!tcm->tcm_parent)
786
		q = dev->qdisc;
L
Linus Torvalds 已提交
787 788 789 790
	else
		q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
	if (!q)
		goto out;
E
Eric Dumazet 已提交
791 792
	cops = q->ops->cl_ops;
	if (!cops)
L
Linus Torvalds 已提交
793
		goto errout;
794
	if (!cops->tcf_block)
795
		goto errout;
L
Linus Torvalds 已提交
796 797 798 799 800
	if (TC_H_MIN(tcm->tcm_parent)) {
		cl = cops->get(q, tcm->tcm_parent);
		if (cl == 0)
			goto errout;
	}
801 802
	block = cops->tcf_block(q, cl);
	if (!block)
L
Linus Torvalds 已提交
803 804
		goto errout;

805 806
	index_start = cb->args[0];
	index = 0;
807 808 809 810 811 812 813 814 815

	list_for_each_entry(chain, &block->chain_list, list) {
		if (tca[TCA_CHAIN] &&
		    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
			continue;
		if (!tcf_chain_dump(chain, skb, cb, index_start, &index))
			break;
	}

816
	cb->args[0] = index;
L
Linus Torvalds 已提交
817 818 819 820 821 822 823 824

errout:
	if (cl)
		cops->put(q, cl);
out:
	return skb->len;
}

825
void tcf_exts_destroy(struct tcf_exts *exts)
L
Linus Torvalds 已提交
826 827
{
#ifdef CONFIG_NET_CLS_ACT
828 829 830 831 832 833
	LIST_HEAD(actions);

	tcf_exts_to_list(exts, &actions);
	tcf_action_destroy(&actions, TCA_ACT_UNBIND);
	kfree(exts->actions);
	exts->nr_actions = 0;
L
Linus Torvalds 已提交
834 835
#endif
}
836
EXPORT_SYMBOL(tcf_exts_destroy);
L
Linus Torvalds 已提交
837

838
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
J
Jamal Hadi Salim 已提交
839
		      struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr)
L
Linus Torvalds 已提交
840 841 842 843 844
{
#ifdef CONFIG_NET_CLS_ACT
	{
		struct tc_action *act;

845
		if (exts->police && tb[exts->police]) {
846 847 848
			act = tcf_action_init_1(net, tp, tb[exts->police],
						rate_tlv, "police", ovr,
						TCA_ACT_BIND);
849 850
			if (IS_ERR(act))
				return PTR_ERR(act);
L
Linus Torvalds 已提交
851

852
			act->type = exts->type = TCA_OLD_COMPAT;
853 854
			exts->actions[0] = act;
			exts->nr_actions = 1;
855
		} else if (exts->action && tb[exts->action]) {
856 857 858
			LIST_HEAD(actions);
			int err, i = 0;

859 860
			err = tcf_action_init(net, tp, tb[exts->action],
					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
J
Jamal Hadi Salim 已提交
861
					      &actions);
862 863
			if (err)
				return err;
864 865 866
			list_for_each_entry(act, &actions, list)
				exts->actions[i++] = act;
			exts->nr_actions = i;
L
Linus Torvalds 已提交
867 868 869
		}
	}
#else
870 871
	if ((exts->action && tb[exts->action]) ||
	    (exts->police && tb[exts->police]))
L
Linus Torvalds 已提交
872 873 874 875 876
		return -EOPNOTSUPP;
#endif

	return 0;
}
877
EXPORT_SYMBOL(tcf_exts_validate);
L
Linus Torvalds 已提交
878

879 880
void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
		     struct tcf_exts *src)
L
Linus Torvalds 已提交
881 882
{
#ifdef CONFIG_NET_CLS_ACT
883 884
	struct tcf_exts old = *dst;

885
	tcf_tree_lock(tp);
886 887
	dst->nr_actions = src->nr_actions;
	dst->actions = src->actions;
888
	dst->type = src->type;
889
	tcf_tree_unlock(tp);
890 891

	tcf_exts_destroy(&old);
L
Linus Torvalds 已提交
892 893
#endif
}
894
EXPORT_SYMBOL(tcf_exts_change);
L
Linus Torvalds 已提交
895

896 897 898 899 900 901 902 903 904
#ifdef CONFIG_NET_CLS_ACT
static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
{
	if (exts->nr_actions == 0)
		return NULL;
	else
		return exts->actions[0];
}
#endif
905

906
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
L
Linus Torvalds 已提交
907 908
{
#ifdef CONFIG_NET_CLS_ACT
909 910
	struct nlattr *nest;

911
	if (exts->action && exts->nr_actions) {
L
Linus Torvalds 已提交
912 913 914 915 916
		/*
		 * again for backward compatible mode - we want
		 * to work with both old and new modes of entering
		 * tc data even if iproute2  was newer - jhs
		 */
917
		if (exts->type != TCA_OLD_COMPAT) {
918 919
			LIST_HEAD(actions);

920
			nest = nla_nest_start(skb, exts->action);
921 922
			if (nest == NULL)
				goto nla_put_failure;
923 924 925

			tcf_exts_to_list(exts, &actions);
			if (tcf_action_dump(skb, &actions, 0, 0) < 0)
926
				goto nla_put_failure;
927
			nla_nest_end(skb, nest);
928
		} else if (exts->police) {
929
			struct tc_action *act = tcf_exts_first_act(exts);
930
			nest = nla_nest_start(skb, exts->police);
931
			if (nest == NULL || !act)
932
				goto nla_put_failure;
933
			if (tcf_action_dump_old(skb, act, 0, 0) < 0)
934
				goto nla_put_failure;
935
			nla_nest_end(skb, nest);
L
Linus Torvalds 已提交
936 937 938
		}
	}
	return 0;
939 940 941

nla_put_failure:
	nla_nest_cancel(skb, nest);
L
Linus Torvalds 已提交
942
	return -1;
943 944 945
#else
	return 0;
#endif
L
Linus Torvalds 已提交
946
}
947
EXPORT_SYMBOL(tcf_exts_dump);
L
Linus Torvalds 已提交
948

949

950
int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
L
Linus Torvalds 已提交
951 952
{
#ifdef CONFIG_NET_CLS_ACT
953
	struct tc_action *a = tcf_exts_first_act(exts);
954
	if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
955
		return -1;
L
Linus Torvalds 已提交
956 957 958
#endif
	return 0;
}
959
EXPORT_SYMBOL(tcf_exts_dump_stats);
L
Linus Torvalds 已提交
960

961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984
int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
		     struct net_device **hw_dev)
{
#ifdef CONFIG_NET_CLS_ACT
	const struct tc_action *a;
	LIST_HEAD(actions);

	if (tc_no_actions(exts))
		return -EINVAL;

	tcf_exts_to_list(exts, &actions);
	list_for_each_entry(a, &actions, list) {
		if (a->ops->get_dev) {
			a->ops->get_dev(a, dev_net(dev), hw_dev);
			break;
		}
	}
	if (*hw_dev)
		return 0;
#endif
	return -EOPNOTSUPP;
}
EXPORT_SYMBOL(tcf_exts_get_dev);

L
Linus Torvalds 已提交
985 986
static int __init tc_filter_init(void)
{
987 988
	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL);
	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, NULL);
989
	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
990
		      tc_dump_tfilter, NULL);
L
Linus Torvalds 已提交
991 992 993 994 995

	return 0;
}

subsys_initcall(tc_filter_init);