cls_api.c 22.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * net/sched/cls_api.c	Packet classifier API.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 * Changes:
 *
 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
 *
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
22
#include <linux/err.h>
L
Linus Torvalds 已提交
23 24 25
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
26
#include <linux/err.h>
27
#include <linux/slab.h>
28 29
#include <net/net_namespace.h>
#include <net/sock.h>
30
#include <net/netlink.h>
L
Linus Torvalds 已提交
31 32 33 34
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>

/* The list of all installed classifier types */
35
static LIST_HEAD(tcf_proto_base);
L
Linus Torvalds 已提交
36 37 38 39 40 41

/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK(cls_mod_lock);

/* Find classifier type by string name */

42
static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind)
L
Linus Torvalds 已提交
43
{
44
	const struct tcf_proto_ops *t, *res = NULL;
L
Linus Torvalds 已提交
45 46 47

	if (kind) {
		read_lock(&cls_mod_lock);
48
		list_for_each_entry(t, &tcf_proto_base, head) {
49
			if (strcmp(kind, t->kind) == 0) {
50 51
				if (try_module_get(t->owner))
					res = t;
L
Linus Torvalds 已提交
52 53 54 55 56
				break;
			}
		}
		read_unlock(&cls_mod_lock);
	}
57
	return res;
L
Linus Torvalds 已提交
58 59 60 61 62 63
}

/* Register(unregister) new classifier type */

int register_tcf_proto_ops(struct tcf_proto_ops *ops)
{
64
	struct tcf_proto_ops *t;
L
Linus Torvalds 已提交
65 66 67
	int rc = -EEXIST;

	write_lock(&cls_mod_lock);
68
	list_for_each_entry(t, &tcf_proto_base, head)
L
Linus Torvalds 已提交
69 70 71
		if (!strcmp(ops->kind, t->kind))
			goto out;

72
	list_add_tail(&ops->head, &tcf_proto_base);
L
Linus Torvalds 已提交
73 74 75 76 77
	rc = 0;
out:
	write_unlock(&cls_mod_lock);
	return rc;
}
78
EXPORT_SYMBOL(register_tcf_proto_ops);
L
Linus Torvalds 已提交
79 80 81

int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
{
82
	struct tcf_proto_ops *t;
L
Linus Torvalds 已提交
83 84
	int rc = -ENOENT;

85 86 87 88 89
	/* Wait for outstanding call_rcu()s, if any, from a
	 * tcf_proto_ops's destroy() handler.
	 */
	rcu_barrier();

L
Linus Torvalds 已提交
90
	write_lock(&cls_mod_lock);
91 92 93 94
	list_for_each_entry(t, &tcf_proto_base, head) {
		if (t == ops) {
			list_del(&t->head);
			rc = 0;
L
Linus Torvalds 已提交
95
			break;
96 97
		}
	}
L
Linus Torvalds 已提交
98 99 100
	write_unlock(&cls_mod_lock);
	return rc;
}
101
EXPORT_SYMBOL(unregister_tcf_proto_ops);
L
Linus Torvalds 已提交
102

103 104
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
			  struct nlmsghdr *n, struct tcf_proto *tp,
105
			  unsigned long fh, int event, bool unicast);
L
Linus Torvalds 已提交
106

107 108
static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
				 struct nlmsghdr *n,
109
				 struct tcf_chain *chain, int event)
110 111 112
{
	struct tcf_proto *tp;

113 114
	for (tp = rtnl_dereference(chain->filter_chain);
	     tp; tp = rtnl_dereference(tp->next))
115
		tfilter_notify(net, oskb, n, tp, 0, event, false);
116
}
L
Linus Torvalds 已提交
117 118 119

/* Select new prio value from the range, managed by kernel. */

120
static inline u32 tcf_auto_prio(struct tcf_proto *tp)
L
Linus Torvalds 已提交
121
{
122
	u32 first = TC_H_MAKE(0xC0000000U, 0U);
L
Linus Torvalds 已提交
123 124

	if (tp)
E
Eric Dumazet 已提交
125
		first = tp->prio - 1;
L
Linus Torvalds 已提交
126

127
	return TC_H_MAJ(first);
L
Linus Torvalds 已提交
128 129
}

130
static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
131
					  u32 prio, u32 parent, struct Qdisc *q,
132
					  struct tcf_chain *chain)
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
{
	struct tcf_proto *tp;
	int err;

	tp = kzalloc(sizeof(*tp), GFP_KERNEL);
	if (!tp)
		return ERR_PTR(-ENOBUFS);

	err = -ENOENT;
	tp->ops = tcf_proto_lookup_ops(kind);
	if (!tp->ops) {
#ifdef CONFIG_MODULES
		rtnl_unlock();
		request_module("cls_%s", kind);
		rtnl_lock();
		tp->ops = tcf_proto_lookup_ops(kind);
		/* We dropped the RTNL semaphore in order to perform
		 * the module load. So, even if we succeeded in loading
		 * the module we have to replay the request. We indicate
		 * this using -EAGAIN.
		 */
		if (tp->ops) {
			module_put(tp->ops->owner);
			err = -EAGAIN;
		} else {
			err = -ENOENT;
		}
		goto errout;
#endif
	}
	tp->classify = tp->ops->classify;
	tp->protocol = protocol;
	tp->prio = prio;
	tp->classid = parent;
	tp->q = q;
168
	tp->chain = chain;
169 170 171 172 173 174 175 176 177 178 179 180 181

	err = tp->ops->init(tp);
	if (err) {
		module_put(tp->ops->owner);
		goto errout;
	}
	return tp;

errout:
	kfree(tp);
	return ERR_PTR(err);
}

182
static void tcf_proto_destroy(struct tcf_proto *tp)
183
{
184 185 186
	tp->ops->destroy(tp);
	module_put(tp->ops->owner);
	kfree_rcu(tp, rcu);
187 188
}

189 190
static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
					  u32 chain_index)
191
{
192 193 194 195 196 197 198 199 200 201
	struct tcf_chain *chain;

	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
	if (!chain)
		return NULL;
	list_add_tail(&chain->list, &block->chain_list);
	chain->block = block;
	chain->index = chain_index;
	chain->refcnt = 1;
	return chain;
202 203
}

J
Jiri Pirko 已提交
204
static void tcf_chain_flush(struct tcf_chain *chain)
205 206 207
{
	struct tcf_proto *tp;

J
Jiri Pirko 已提交
208 209
	if (*chain->p_filter_chain)
		RCU_INIT_POINTER(*chain->p_filter_chain, NULL);
210 211
	while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) {
		RCU_INIT_POINTER(chain->filter_chain, tp->next);
212
		tcf_proto_destroy(tp);
213
	}
J
Jiri Pirko 已提交
214 215 216 217 218 219
}

static void tcf_chain_destroy(struct tcf_chain *chain)
{
	list_del(&chain->list);
	tcf_chain_flush(chain);
220 221 222
	kfree(chain);
}

223 224
struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
				bool create)
225 226 227 228 229 230 231 232 233
{
	struct tcf_chain *chain;

	list_for_each_entry(chain, &block->chain_list, list) {
		if (chain->index == chain_index) {
			chain->refcnt++;
			return chain;
		}
	}
234 235 236 237
	if (create)
		return tcf_chain_create(block, chain_index);
	else
		return NULL;
238 239 240 241 242 243 244 245 246 247 248 249 250
}
EXPORT_SYMBOL(tcf_chain_get);

void tcf_chain_put(struct tcf_chain *chain)
{
	/* Destroy unused chain, with exception of chain 0, which is the
	 * default one and has to be always present.
	 */
	if (--chain->refcnt == 0 && !chain->filter_chain && chain->index != 0)
		tcf_chain_destroy(chain);
}
EXPORT_SYMBOL(tcf_chain_put);

251 252 253 254 255
static void
tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain,
			       struct tcf_proto __rcu **p_filter_chain)
{
	chain->p_filter_chain = p_filter_chain;
256
}
257 258 259 260 261

int tcf_block_get(struct tcf_block **p_block,
		  struct tcf_proto __rcu **p_filter_chain)
{
	struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
262
	struct tcf_chain *chain;
263
	int err;
264 265 266

	if (!block)
		return -ENOMEM;
267 268 269 270
	INIT_LIST_HEAD(&block->chain_list);
	/* Create chain 0 by default, it has to be always present. */
	chain = tcf_chain_create(block, 0);
	if (!chain) {
271 272 273
		err = -ENOMEM;
		goto err_chain_create;
	}
274
	tcf_chain_filter_chain_ptr_set(chain, p_filter_chain);
275 276
	*p_block = block;
	return 0;
277 278 279 280

err_chain_create:
	kfree(block);
	return err;
281 282 283 284 285
}
EXPORT_SYMBOL(tcf_block_get);

void tcf_block_put(struct tcf_block *block)
{
286 287
	struct tcf_chain *chain, *tmp;

288 289
	if (!block)
		return;
290 291 292

	list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
		tcf_chain_destroy(chain);
293 294 295
	kfree(block);
}
EXPORT_SYMBOL(tcf_block_put);
296

297 298 299 300 301 302 303 304 305 306
/* Main classifier routine: scans classifier chain attached
 * to this qdisc, (optionally) tests for protocol and asks
 * specific classifiers.
 */
int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
		 struct tcf_result *res, bool compat_mode)
{
	__be16 protocol = tc_skb_protocol(skb);
#ifdef CONFIG_NET_CLS_ACT
	const int max_reclassify_loop = 4;
307 308
	const struct tcf_proto *orig_tp = tp;
	const struct tcf_proto *first_tp;
309 310 311 312 313 314 315 316 317 318 319 320 321
	int limit = 0;

reclassify:
#endif
	for (; tp; tp = rcu_dereference_bh(tp->next)) {
		int err;

		if (tp->protocol != protocol &&
		    tp->protocol != htons(ETH_P_ALL))
			continue;

		err = tp->classify(skb, tp, res);
#ifdef CONFIG_NET_CLS_ACT
322
		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
323
			first_tp = orig_tp;
324
			goto reset;
325
		} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
326
			first_tp = res->goto_tp;
327 328
			goto reset;
		}
329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
#endif
		if (err >= 0)
			return err;
	}

	return TC_ACT_UNSPEC; /* signal: continue lookup */
#ifdef CONFIG_NET_CLS_ACT
reset:
	if (unlikely(limit++ >= max_reclassify_loop)) {
		net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
				       tp->q->ops->id, tp->prio & 0xffff,
				       ntohs(tp->protocol));
		return TC_ACT_SHOT;
	}

344
	tp = first_tp;
345 346 347 348 349 350
	protocol = tc_skb_protocol(skb);
	goto reclassify;
#endif
}
EXPORT_SYMBOL(tcf_classify);

351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
struct tcf_chain_info {
	struct tcf_proto __rcu **pprev;
	struct tcf_proto __rcu *next;
};

static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
{
	return rtnl_dereference(*chain_info->pprev);
}

static void tcf_chain_tp_insert(struct tcf_chain *chain,
				struct tcf_chain_info *chain_info,
				struct tcf_proto *tp)
{
	if (chain->p_filter_chain &&
	    *chain_info->pprev == chain->filter_chain)
367
		rcu_assign_pointer(*chain->p_filter_chain, tp);
368 369 370 371 372 373 374 375 376 377 378
	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
	rcu_assign_pointer(*chain_info->pprev, tp);
}

static void tcf_chain_tp_remove(struct tcf_chain *chain,
				struct tcf_chain_info *chain_info,
				struct tcf_proto *tp)
{
	struct tcf_proto *next = rtnl_dereference(chain_info->next);

	if (chain->p_filter_chain && tp == chain->filter_chain)
379
		RCU_INIT_POINTER(*chain->p_filter_chain, next);
380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409
	RCU_INIT_POINTER(*chain_info->pprev, next);
}

static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
					   struct tcf_chain_info *chain_info,
					   u32 protocol, u32 prio,
					   bool prio_allocate)
{
	struct tcf_proto **pprev;
	struct tcf_proto *tp;

	/* Check the chain for existence of proto-tcf with this priority */
	for (pprev = &chain->filter_chain;
	     (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
		if (tp->prio >= prio) {
			if (tp->prio == prio) {
				if (prio_allocate ||
				    (tp->protocol != protocol && protocol))
					return ERR_PTR(-EINVAL);
			} else {
				tp = NULL;
			}
			break;
		}
	}
	chain_info->pprev = pprev;
	chain_info->next = tp ? tp->next : NULL;
	return tp;
}

L
Linus Torvalds 已提交
410 411
/* Add/change/delete/get a filter node */

412 413
static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
			  struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
414
{
415
	struct net *net = sock_net(skb->sk);
416
	struct nlattr *tca[TCA_MAX + 1];
L
Linus Torvalds 已提交
417 418 419
	struct tcmsg *t;
	u32 protocol;
	u32 prio;
420
	bool prio_allocate;
L
Linus Torvalds 已提交
421
	u32 parent;
422
	u32 chain_index;
L
Linus Torvalds 已提交
423 424
	struct net_device *dev;
	struct Qdisc  *q;
425
	struct tcf_chain_info chain_info;
426
	struct tcf_chain *chain = NULL;
427
	struct tcf_block *block;
L
Linus Torvalds 已提交
428
	struct tcf_proto *tp;
429
	const struct Qdisc_class_ops *cops;
L
Linus Torvalds 已提交
430 431 432
	unsigned long cl;
	unsigned long fh;
	int err;
433
	int tp_created;
L
Linus Torvalds 已提交
434

435
	if ((n->nlmsg_type != RTM_GETTFILTER) &&
436
	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
437
		return -EPERM;
438

L
Linus Torvalds 已提交
439
replay:
440 441
	tp_created = 0;

442
	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
443 444 445
	if (err < 0)
		return err;

446
	t = nlmsg_data(n);
L
Linus Torvalds 已提交
447 448
	protocol = TC_H_MIN(t->tcm_info);
	prio = TC_H_MAJ(t->tcm_info);
449
	prio_allocate = false;
L
Linus Torvalds 已提交
450 451 452 453
	parent = t->tcm_parent;
	cl = 0;

	if (prio == 0) {
454 455
		switch (n->nlmsg_type) {
		case RTM_DELTFILTER:
456
			if (protocol || t->tcm_handle || tca[TCA_KIND])
457 458 459 460 461 462 463 464
				return -ENOENT;
			break;
		case RTM_NEWTFILTER:
			/* If no priority is provided by the user,
			 * we allocate one.
			 */
			if (n->nlmsg_flags & NLM_F_CREATE) {
				prio = TC_H_MAKE(0x80000000U, 0U);
465
				prio_allocate = true;
466 467 468 469
				break;
			}
			/* fall-through */
		default:
L
Linus Torvalds 已提交
470
			return -ENOENT;
471
		}
L
Linus Torvalds 已提交
472 473 474 475 476
	}

	/* Find head of filter chain. */

	/* Find link */
477
	dev = __dev_get_by_index(net, t->tcm_ifindex);
478
	if (dev == NULL)
L
Linus Torvalds 已提交
479 480 481 482
		return -ENODEV;

	/* Find qdisc */
	if (!parent) {
483
		q = dev->qdisc;
L
Linus Torvalds 已提交
484
		parent = q->handle;
485 486 487 488 489
	} else {
		q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
		if (q == NULL)
			return -EINVAL;
	}
L
Linus Torvalds 已提交
490 491

	/* Is it classful? */
E
Eric Dumazet 已提交
492 493
	cops = q->ops->cl_ops;
	if (!cops)
L
Linus Torvalds 已提交
494 495
		return -EINVAL;

496
	if (!cops->tcf_block)
497 498
		return -EOPNOTSUPP;

L
Linus Torvalds 已提交
499 500 501 502 503 504 505 506
	/* Do we search for filter, attached to class? */
	if (TC_H_MIN(parent)) {
		cl = cops->get(q, parent);
		if (cl == 0)
			return -ENOENT;
	}

	/* And the last stroke */
507 508
	block = cops->tcf_block(q, cl);
	if (!block) {
509
		err = -EINVAL;
L
Linus Torvalds 已提交
510
		goto errout;
511
	}
512 513 514 515 516 517

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
		err = -EINVAL;
		goto errout;
	}
518 519
	chain = tcf_chain_get(block, chain_index,
			      n->nlmsg_type == RTM_NEWTFILTER);
520
	if (!chain) {
521
		err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL;
522 523
		goto errout;
	}
524

525 526
	if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
		tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER);
J
Jiri Pirko 已提交
527
		tcf_chain_flush(chain);
528 529 530
		err = 0;
		goto errout;
	}
L
Linus Torvalds 已提交
531

532 533 534 535 536
	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
			       prio, prio_allocate);
	if (IS_ERR(tp)) {
		err = PTR_ERR(tp);
		goto errout;
L
Linus Torvalds 已提交
537 538 539 540 541
	}

	if (tp == NULL) {
		/* Proto-tcf does not exist, create new one */

542 543
		if (tca[TCA_KIND] == NULL || !protocol) {
			err = -EINVAL;
L
Linus Torvalds 已提交
544
			goto errout;
545
		}
L
Linus Torvalds 已提交
546

E
Eric Dumazet 已提交
547
		if (n->nlmsg_type != RTM_NEWTFILTER ||
548 549
		    !(n->nlmsg_flags & NLM_F_CREATE)) {
			err = -ENOENT;
L
Linus Torvalds 已提交
550
			goto errout;
551
		}
L
Linus Torvalds 已提交
552

553
		if (prio_allocate)
554
			prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
L
Linus Torvalds 已提交
555

556
		tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
557
				      protocol, prio, parent, q, chain);
558 559
		if (IS_ERR(tp)) {
			err = PTR_ERR(tp);
L
Linus Torvalds 已提交
560 561
			goto errout;
		}
562
		tp_created = 1;
563 564
	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
		err = -EINVAL;
L
Linus Torvalds 已提交
565
		goto errout;
566
	}
L
Linus Torvalds 已提交
567 568 569 570 571

	fh = tp->ops->get(tp, t->tcm_handle);

	if (fh == 0) {
		if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
572
			tcf_chain_tp_remove(chain, &chain_info, tp);
573 574
			tfilter_notify(net, skb, n, tp, fh,
				       RTM_DELTFILTER, false);
575
			tcf_proto_destroy(tp);
L
Linus Torvalds 已提交
576 577 578 579
			err = 0;
			goto errout;
		}

580
		if (n->nlmsg_type != RTM_NEWTFILTER ||
581 582
		    !(n->nlmsg_flags & NLM_F_CREATE)) {
			err = -ENOENT;
L
Linus Torvalds 已提交
583
			goto errout;
584
		}
L
Linus Torvalds 已提交
585
	} else {
586 587
		bool last;

L
Linus Torvalds 已提交
588
		switch (n->nlmsg_type) {
589
		case RTM_NEWTFILTER:
590 591
			if (n->nlmsg_flags & NLM_F_EXCL) {
				if (tp_created)
592
					tcf_proto_destroy(tp);
593
				err = -EEXIST;
L
Linus Torvalds 已提交
594
				goto errout;
595
			}
L
Linus Torvalds 已提交
596 597
			break;
		case RTM_DELTFILTER:
598
			err = tp->ops->delete(tp, fh, &last);
599 600 601 602
			if (err)
				goto errout;
			tfilter_notify(net, skb, n, tp, t->tcm_handle,
				       RTM_DELTFILTER, false);
603
			if (last) {
604
				tcf_chain_tp_remove(chain, &chain_info, tp);
605 606
				tcf_proto_destroy(tp);
			}
607
			goto errout;
L
Linus Torvalds 已提交
608
		case RTM_GETTFILTER:
J
Jamal Hadi Salim 已提交
609
			err = tfilter_notify(net, skb, n, tp, fh,
610
					     RTM_NEWTFILTER, true);
L
Linus Torvalds 已提交
611 612 613 614 615 616 617
			goto errout;
		default:
			err = -EINVAL;
			goto errout;
		}
	}

618 619
	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
620
	if (err == 0) {
621 622
		if (tp_created)
			tcf_chain_tp_insert(chain, &chain_info, tp);
623
		tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false);
624 625
	} else {
		if (tp_created)
626
			tcf_proto_destroy(tp);
627
	}
L
Linus Torvalds 已提交
628 629

errout:
630 631
	if (chain)
		tcf_chain_put(chain);
L
Linus Torvalds 已提交
632 633 634 635 636 637 638 639
	if (cl)
		cops->put(q, cl);
	if (err == -EAGAIN)
		/* Replay the request. */
		goto replay;
	return err;
}

640 641 642
static int tcf_fill_node(struct net *net, struct sk_buff *skb,
			 struct tcf_proto *tp, unsigned long fh, u32 portid,
			 u32 seq, u16 flags, int event)
L
Linus Torvalds 已提交
643 644 645
{
	struct tcmsg *tcm;
	struct nlmsghdr  *nlh;
646
	unsigned char *b = skb_tail_pointer(skb);
L
Linus Torvalds 已提交
647

648
	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
649 650 651
	if (!nlh)
		goto out_nlmsg_trim;
	tcm = nlmsg_data(nlh);
L
Linus Torvalds 已提交
652
	tcm->tcm_family = AF_UNSPEC;
653
	tcm->tcm__pad1 = 0;
J
Jiri Pirko 已提交
654
	tcm->tcm__pad2 = 0;
655
	tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
L
Linus Torvalds 已提交
656 657
	tcm->tcm_parent = tp->classid;
	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
658 659
	if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
		goto nla_put_failure;
660 661
	if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
		goto nla_put_failure;
L
Linus Torvalds 已提交
662 663 664
	tcm->tcm_handle = fh;
	if (RTM_DELTFILTER != event) {
		tcm->tcm_handle = 0;
665
		if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
666
			goto nla_put_failure;
L
Linus Torvalds 已提交
667
	}
668
	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
L
Linus Torvalds 已提交
669 670
	return skb->len;

671
out_nlmsg_trim:
672
nla_put_failure:
673
	nlmsg_trim(skb, b);
L
Linus Torvalds 已提交
674 675 676
	return -1;
}

677 678
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
			  struct nlmsghdr *n, struct tcf_proto *tp,
679
			  unsigned long fh, int event, bool unicast)
L
Linus Torvalds 已提交
680 681
{
	struct sk_buff *skb;
682
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
L
Linus Torvalds 已提交
683 684 685 686 687

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

688 689
	if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
			  n->nlmsg_flags, event) <= 0) {
L
Linus Torvalds 已提交
690 691 692 693
		kfree_skb(skb);
		return -EINVAL;
	}

694 695 696
	if (unicast)
		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);

697
	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
698
			      n->nlmsg_flags & NLM_F_ECHO);
L
Linus Torvalds 已提交
699 700
}

701
struct tcf_dump_args {
L
Linus Torvalds 已提交
702 703 704 705 706
	struct tcf_walker w;
	struct sk_buff *skb;
	struct netlink_callback *cb;
};

707 708
static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
			 struct tcf_walker *arg)
L
Linus Torvalds 已提交
709
{
710
	struct tcf_dump_args *a = (void *)arg;
711
	struct net *net = sock_net(a->skb->sk);
L
Linus Torvalds 已提交
712

713
	return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid,
J
Jamal Hadi Salim 已提交
714 715
			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
			     RTM_NEWTFILTER);
L
Linus Torvalds 已提交
716 717
}

718
static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744
			   struct netlink_callback *cb,
			   long index_start, long *p_index)
{
	struct net *net = sock_net(skb->sk);
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
	struct tcf_dump_args arg;
	struct tcf_proto *tp;

	for (tp = rtnl_dereference(chain->filter_chain);
	     tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
		if (*p_index < index_start)
			continue;
		if (TC_H_MAJ(tcm->tcm_info) &&
		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
			continue;
		if (TC_H_MIN(tcm->tcm_info) &&
		    TC_H_MIN(tcm->tcm_info) != tp->protocol)
			continue;
		if (*p_index > index_start)
			memset(&cb->args[1], 0,
			       sizeof(cb->args) - sizeof(cb->args[0]));
		if (cb->args[1] == 0) {
			if (tcf_fill_node(net, skb, tp, 0,
					  NETLINK_CB(cb->skb).portid,
					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
					  RTM_NEWTFILTER) <= 0)
745
				return false;
746 747 748 749 750 751 752 753 754 755 756 757 758 759

			cb->args[1] = 1;
		}
		if (!tp->ops->walk)
			continue;
		arg.w.fn = tcf_node_dump;
		arg.skb = skb;
		arg.cb = cb;
		arg.w.stop = 0;
		arg.w.skip = cb->args[1] - 1;
		arg.w.count = 0;
		tp->ops->walk(tp, &arg.w);
		cb->args[1] = arg.w.count + 1;
		if (arg.w.stop)
760
			return false;
761
	}
762
	return true;
763 764
}

E
Eric Dumazet 已提交
765
/* called with RTNL */
L
Linus Torvalds 已提交
766 767
static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
768
	struct net *net = sock_net(skb->sk);
769
	struct nlattr *tca[TCA_MAX + 1];
L
Linus Torvalds 已提交
770 771
	struct net_device *dev;
	struct Qdisc *q;
772
	struct tcf_block *block;
773
	struct tcf_chain *chain;
774
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
L
Linus Torvalds 已提交
775
	unsigned long cl = 0;
776
	const struct Qdisc_class_ops *cops;
777 778
	long index_start;
	long index;
779
	int err;
L
Linus Torvalds 已提交
780

781
	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
L
Linus Torvalds 已提交
782
		return skb->len;
783 784 785 786 787

	err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
	if (err)
		return err;

E
Eric Dumazet 已提交
788 789
	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
	if (!dev)
L
Linus Torvalds 已提交
790 791 792
		return skb->len;

	if (!tcm->tcm_parent)
793
		q = dev->qdisc;
L
Linus Torvalds 已提交
794 795 796 797
	else
		q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
	if (!q)
		goto out;
E
Eric Dumazet 已提交
798 799
	cops = q->ops->cl_ops;
	if (!cops)
L
Linus Torvalds 已提交
800
		goto errout;
801
	if (!cops->tcf_block)
802
		goto errout;
L
Linus Torvalds 已提交
803 804 805 806 807
	if (TC_H_MIN(tcm->tcm_parent)) {
		cl = cops->get(q, tcm->tcm_parent);
		if (cl == 0)
			goto errout;
	}
808 809
	block = cops->tcf_block(q, cl);
	if (!block)
L
Linus Torvalds 已提交
810 811
		goto errout;

812 813
	index_start = cb->args[0];
	index = 0;
814 815 816 817 818 819 820 821 822

	list_for_each_entry(chain, &block->chain_list, list) {
		if (tca[TCA_CHAIN] &&
		    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
			continue;
		if (!tcf_chain_dump(chain, skb, cb, index_start, &index))
			break;
	}

823
	cb->args[0] = index;
L
Linus Torvalds 已提交
824 825 826 827 828 829 830 831

errout:
	if (cl)
		cops->put(q, cl);
out:
	return skb->len;
}

832
void tcf_exts_destroy(struct tcf_exts *exts)
L
Linus Torvalds 已提交
833 834
{
#ifdef CONFIG_NET_CLS_ACT
835 836 837 838 839 840
	LIST_HEAD(actions);

	tcf_exts_to_list(exts, &actions);
	tcf_action_destroy(&actions, TCA_ACT_UNBIND);
	kfree(exts->actions);
	exts->nr_actions = 0;
L
Linus Torvalds 已提交
841 842
#endif
}
843
EXPORT_SYMBOL(tcf_exts_destroy);
L
Linus Torvalds 已提交
844

845
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
J
Jamal Hadi Salim 已提交
846
		      struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr)
L
Linus Torvalds 已提交
847 848 849 850 851
{
#ifdef CONFIG_NET_CLS_ACT
	{
		struct tc_action *act;

852
		if (exts->police && tb[exts->police]) {
853 854 855
			act = tcf_action_init_1(net, tp, tb[exts->police],
						rate_tlv, "police", ovr,
						TCA_ACT_BIND);
856 857
			if (IS_ERR(act))
				return PTR_ERR(act);
L
Linus Torvalds 已提交
858

859
			act->type = exts->type = TCA_OLD_COMPAT;
860 861
			exts->actions[0] = act;
			exts->nr_actions = 1;
862
		} else if (exts->action && tb[exts->action]) {
863 864 865
			LIST_HEAD(actions);
			int err, i = 0;

866 867
			err = tcf_action_init(net, tp, tb[exts->action],
					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
J
Jamal Hadi Salim 已提交
868
					      &actions);
869 870
			if (err)
				return err;
871 872 873
			list_for_each_entry(act, &actions, list)
				exts->actions[i++] = act;
			exts->nr_actions = i;
L
Linus Torvalds 已提交
874 875 876
		}
	}
#else
877 878
	if ((exts->action && tb[exts->action]) ||
	    (exts->police && tb[exts->police]))
L
Linus Torvalds 已提交
879 880 881 882 883
		return -EOPNOTSUPP;
#endif

	return 0;
}
884
EXPORT_SYMBOL(tcf_exts_validate);
L
Linus Torvalds 已提交
885

886
void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
L
Linus Torvalds 已提交
887 888
{
#ifdef CONFIG_NET_CLS_ACT
889 890
	struct tcf_exts old = *dst;

891
	*dst = *src;
892
	tcf_exts_destroy(&old);
L
Linus Torvalds 已提交
893 894
#endif
}
895
EXPORT_SYMBOL(tcf_exts_change);
L
Linus Torvalds 已提交
896

897 898 899 900 901 902 903 904 905
#ifdef CONFIG_NET_CLS_ACT
static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
{
	if (exts->nr_actions == 0)
		return NULL;
	else
		return exts->actions[0];
}
#endif
906

907
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
L
Linus Torvalds 已提交
908 909
{
#ifdef CONFIG_NET_CLS_ACT
910 911
	struct nlattr *nest;

912
	if (exts->action && tcf_exts_has_actions(exts)) {
L
Linus Torvalds 已提交
913 914 915 916 917
		/*
		 * again for backward compatible mode - we want
		 * to work with both old and new modes of entering
		 * tc data even if iproute2  was newer - jhs
		 */
918
		if (exts->type != TCA_OLD_COMPAT) {
919 920
			LIST_HEAD(actions);

921
			nest = nla_nest_start(skb, exts->action);
922 923
			if (nest == NULL)
				goto nla_put_failure;
924 925 926

			tcf_exts_to_list(exts, &actions);
			if (tcf_action_dump(skb, &actions, 0, 0) < 0)
927
				goto nla_put_failure;
928
			nla_nest_end(skb, nest);
929
		} else if (exts->police) {
930
			struct tc_action *act = tcf_exts_first_act(exts);
931
			nest = nla_nest_start(skb, exts->police);
932
			if (nest == NULL || !act)
933
				goto nla_put_failure;
934
			if (tcf_action_dump_old(skb, act, 0, 0) < 0)
935
				goto nla_put_failure;
936
			nla_nest_end(skb, nest);
L
Linus Torvalds 已提交
937 938 939
		}
	}
	return 0;
940 941 942

nla_put_failure:
	nla_nest_cancel(skb, nest);
L
Linus Torvalds 已提交
943
	return -1;
944 945 946
#else
	return 0;
#endif
L
Linus Torvalds 已提交
947
}
948
EXPORT_SYMBOL(tcf_exts_dump);
L
Linus Torvalds 已提交
949

950

951
int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
L
Linus Torvalds 已提交
952 953
{
#ifdef CONFIG_NET_CLS_ACT
954
	struct tc_action *a = tcf_exts_first_act(exts);
955
	if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
956
		return -1;
L
Linus Torvalds 已提交
957 958 959
#endif
	return 0;
}
960
EXPORT_SYMBOL(tcf_exts_dump_stats);
L
Linus Torvalds 已提交
961

962 963 964 965 966 967 968
int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
		     struct net_device **hw_dev)
{
#ifdef CONFIG_NET_CLS_ACT
	const struct tc_action *a;
	LIST_HEAD(actions);

969
	if (!tcf_exts_has_actions(exts))
970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985
		return -EINVAL;

	tcf_exts_to_list(exts, &actions);
	list_for_each_entry(a, &actions, list) {
		if (a->ops->get_dev) {
			a->ops->get_dev(a, dev_net(dev), hw_dev);
			break;
		}
	}
	if (*hw_dev)
		return 0;
#endif
	return -EOPNOTSUPP;
}
EXPORT_SYMBOL(tcf_exts_get_dev);

L
Linus Torvalds 已提交
986 987
static int __init tc_filter_init(void)
{
988 989
	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL);
	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, NULL);
990
	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
991
		      tc_dump_tfilter, NULL);
L
Linus Torvalds 已提交
992 993 994 995 996

	return 0;
}

subsys_initcall(tc_filter_init);