cls_api.c 18.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * net/sched/cls_api.c	Packet classifier API.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 * Changes:
 *
 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
 *
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
22
#include <linux/err.h>
L
Linus Torvalds 已提交
23 24 25
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
26
#include <linux/err.h>
27
#include <linux/slab.h>
28 29
#include <net/net_namespace.h>
#include <net/sock.h>
30
#include <net/netlink.h>
L
Linus Torvalds 已提交
31 32 33 34
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>

/* The list of all installed classifier types */
35
static LIST_HEAD(tcf_proto_base);
L
Linus Torvalds 已提交
36 37 38 39 40 41

/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK(cls_mod_lock);

/* Find classifier type by string name */

42
static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind)
L
Linus Torvalds 已提交
43
{
44
	const struct tcf_proto_ops *t, *res = NULL;
L
Linus Torvalds 已提交
45 46 47

	if (kind) {
		read_lock(&cls_mod_lock);
48
		list_for_each_entry(t, &tcf_proto_base, head) {
49
			if (strcmp(kind, t->kind) == 0) {
50 51
				if (try_module_get(t->owner))
					res = t;
L
Linus Torvalds 已提交
52 53 54 55 56
				break;
			}
		}
		read_unlock(&cls_mod_lock);
	}
57
	return res;
L
Linus Torvalds 已提交
58 59 60 61 62 63
}

/* Register(unregister) new classifier type */

int register_tcf_proto_ops(struct tcf_proto_ops *ops)
{
64
	struct tcf_proto_ops *t;
L
Linus Torvalds 已提交
65 66 67
	int rc = -EEXIST;

	write_lock(&cls_mod_lock);
68
	list_for_each_entry(t, &tcf_proto_base, head)
L
Linus Torvalds 已提交
69 70 71
		if (!strcmp(ops->kind, t->kind))
			goto out;

72
	list_add_tail(&ops->head, &tcf_proto_base);
L
Linus Torvalds 已提交
73 74 75 76 77
	rc = 0;
out:
	write_unlock(&cls_mod_lock);
	return rc;
}
78
EXPORT_SYMBOL(register_tcf_proto_ops);
L
Linus Torvalds 已提交
79 80 81

int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
{
82
	struct tcf_proto_ops *t;
L
Linus Torvalds 已提交
83 84
	int rc = -ENOENT;

85 86 87 88 89
	/* Wait for outstanding call_rcu()s, if any, from a
	 * tcf_proto_ops's destroy() handler.
	 */
	rcu_barrier();

L
Linus Torvalds 已提交
90
	write_lock(&cls_mod_lock);
91 92 93 94
	list_for_each_entry(t, &tcf_proto_base, head) {
		if (t == ops) {
			list_del(&t->head);
			rc = 0;
L
Linus Torvalds 已提交
95
			break;
96 97
		}
	}
L
Linus Torvalds 已提交
98 99 100
	write_unlock(&cls_mod_lock);
	return rc;
}
101
EXPORT_SYMBOL(unregister_tcf_proto_ops);
L
Linus Torvalds 已提交
102

103 104
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
			  struct nlmsghdr *n, struct tcf_proto *tp,
105
			  unsigned long fh, int event, bool unicast);
L
Linus Torvalds 已提交
106

107 108 109 110 111 112 113 114 115
static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
				 struct nlmsghdr *n,
				 struct tcf_proto __rcu **chain, int event)
{
	struct tcf_proto __rcu **it_chain;
	struct tcf_proto *tp;

	for (it_chain = chain; (tp = rtnl_dereference(*it_chain)) != NULL;
	     it_chain = &tp->next)
116
		tfilter_notify(net, oskb, n, tp, 0, event, false);
117
}
L
Linus Torvalds 已提交
118 119 120

/* Select new prio value from the range, managed by kernel. */

121
static inline u32 tcf_auto_prio(struct tcf_proto *tp)
L
Linus Torvalds 已提交
122
{
123
	u32 first = TC_H_MAKE(0xC0000000U, 0U);
L
Linus Torvalds 已提交
124 125

	if (tp)
E
Eric Dumazet 已提交
126
		first = tp->prio - 1;
L
Linus Torvalds 已提交
127 128 129 130

	return first;
}

131
static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
132 133
					  u32 prio, u32 parent, struct Qdisc *q,
					  struct tcf_block *block)
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
{
	struct tcf_proto *tp;
	int err;

	tp = kzalloc(sizeof(*tp), GFP_KERNEL);
	if (!tp)
		return ERR_PTR(-ENOBUFS);

	err = -ENOENT;
	tp->ops = tcf_proto_lookup_ops(kind);
	if (!tp->ops) {
#ifdef CONFIG_MODULES
		rtnl_unlock();
		request_module("cls_%s", kind);
		rtnl_lock();
		tp->ops = tcf_proto_lookup_ops(kind);
		/* We dropped the RTNL semaphore in order to perform
		 * the module load. So, even if we succeeded in loading
		 * the module we have to replay the request. We indicate
		 * this using -EAGAIN.
		 */
		if (tp->ops) {
			module_put(tp->ops->owner);
			err = -EAGAIN;
		} else {
			err = -ENOENT;
		}
		goto errout;
#endif
	}
	tp->classify = tp->ops->classify;
	tp->protocol = protocol;
	tp->prio = prio;
	tp->classid = parent;
	tp->q = q;
169
	tp->block = block;
170 171 172 173 174 175 176 177 178 179 180 181 182

	err = tp->ops->init(tp);
	if (err) {
		module_put(tp->ops->owner);
		goto errout;
	}
	return tp;

errout:
	kfree(tp);
	return ERR_PTR(err);
}

183
static void tcf_proto_destroy(struct tcf_proto *tp)
184
{
185 186 187
	tp->ops->destroy(tp);
	module_put(tp->ops->owner);
	kfree_rcu(tp, rcu);
188 189
}

190
static void tcf_chain_destroy(struct tcf_proto __rcu **fl)
191 192 193 194 195
{
	struct tcf_proto *tp;

	while ((tp = rtnl_dereference(*fl)) != NULL) {
		RCU_INIT_POINTER(*fl, tp->next);
196
		tcf_proto_destroy(tp);
197 198
	}
}
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216

int tcf_block_get(struct tcf_block **p_block,
		  struct tcf_proto __rcu **p_filter_chain)
{
	struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);

	if (!block)
		return -ENOMEM;
	block->p_filter_chain = p_filter_chain;
	*p_block = block;
	return 0;
}
EXPORT_SYMBOL(tcf_block_get);

void tcf_block_put(struct tcf_block *block)
{
	if (!block)
		return;
217
	tcf_chain_destroy(block->p_filter_chain);
218 219 220
	kfree(block);
}
EXPORT_SYMBOL(tcf_block_put);
221

222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
/* Main classifier routine: scans classifier chain attached
 * to this qdisc, (optionally) tests for protocol and asks
 * specific classifiers.
 */
int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
		 struct tcf_result *res, bool compat_mode)
{
	__be16 protocol = tc_skb_protocol(skb);
#ifdef CONFIG_NET_CLS_ACT
	const int max_reclassify_loop = 4;
	const struct tcf_proto *old_tp = tp;
	int limit = 0;

reclassify:
#endif
	for (; tp; tp = rcu_dereference_bh(tp->next)) {
		int err;

		if (tp->protocol != protocol &&
		    tp->protocol != htons(ETH_P_ALL))
			continue;

		err = tp->classify(skb, tp, res);
#ifdef CONFIG_NET_CLS_ACT
		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
			goto reset;
#endif
		if (err >= 0)
			return err;
	}

	return TC_ACT_UNSPEC; /* signal: continue lookup */
#ifdef CONFIG_NET_CLS_ACT
reset:
	if (unlikely(limit++ >= max_reclassify_loop)) {
		net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
				       tp->q->ops->id, tp->prio & 0xffff,
				       ntohs(tp->protocol));
		return TC_ACT_SHOT;
	}

	tp = old_tp;
	protocol = tc_skb_protocol(skb);
	goto reclassify;
#endif
}
EXPORT_SYMBOL(tcf_classify);

L
Linus Torvalds 已提交
270 271
/* Add/change/delete/get a filter node */

272 273
static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
			  struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
274
{
275
	struct net *net = sock_net(skb->sk);
276
	struct nlattr *tca[TCA_MAX + 1];
L
Linus Torvalds 已提交
277 278 279 280 281 282 283
	struct tcmsg *t;
	u32 protocol;
	u32 prio;
	u32 nprio;
	u32 parent;
	struct net_device *dev;
	struct Qdisc  *q;
J
John Fastabend 已提交
284 285
	struct tcf_proto __rcu **back;
	struct tcf_proto __rcu **chain;
286
	struct tcf_block *block;
287
	struct tcf_proto *next;
L
Linus Torvalds 已提交
288
	struct tcf_proto *tp;
289
	const struct Qdisc_class_ops *cops;
L
Linus Torvalds 已提交
290 291 292
	unsigned long cl;
	unsigned long fh;
	int err;
293
	int tp_created;
L
Linus Torvalds 已提交
294

295
	if ((n->nlmsg_type != RTM_GETTFILTER) &&
296
	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
297
		return -EPERM;
298

L
Linus Torvalds 已提交
299
replay:
300 301
	tp_created = 0;

302
	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
303 304 305
	if (err < 0)
		return err;

306
	t = nlmsg_data(n);
L
Linus Torvalds 已提交
307 308 309 310 311 312 313
	protocol = TC_H_MIN(t->tcm_info);
	prio = TC_H_MAJ(t->tcm_info);
	nprio = prio;
	parent = t->tcm_parent;
	cl = 0;

	if (prio == 0) {
314 315
		switch (n->nlmsg_type) {
		case RTM_DELTFILTER:
316
			if (protocol || t->tcm_handle || tca[TCA_KIND])
317 318 319 320 321 322 323 324 325 326 327 328
				return -ENOENT;
			break;
		case RTM_NEWTFILTER:
			/* If no priority is provided by the user,
			 * we allocate one.
			 */
			if (n->nlmsg_flags & NLM_F_CREATE) {
				prio = TC_H_MAKE(0x80000000U, 0U);
				break;
			}
			/* fall-through */
		default:
L
Linus Torvalds 已提交
329
			return -ENOENT;
330
		}
L
Linus Torvalds 已提交
331 332 333 334 335
	}

	/* Find head of filter chain. */

	/* Find link */
336
	dev = __dev_get_by_index(net, t->tcm_ifindex);
337
	if (dev == NULL)
L
Linus Torvalds 已提交
338 339 340 341
		return -ENODEV;

	/* Find qdisc */
	if (!parent) {
342
		q = dev->qdisc;
L
Linus Torvalds 已提交
343
		parent = q->handle;
344 345 346 347 348
	} else {
		q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
		if (q == NULL)
			return -EINVAL;
	}
L
Linus Torvalds 已提交
349 350

	/* Is it classful? */
E
Eric Dumazet 已提交
351 352
	cops = q->ops->cl_ops;
	if (!cops)
L
Linus Torvalds 已提交
353 354
		return -EINVAL;

355
	if (!cops->tcf_block)
356 357
		return -EOPNOTSUPP;

L
Linus Torvalds 已提交
358 359 360 361 362 363 364 365
	/* Do we search for filter, attached to class? */
	if (TC_H_MIN(parent)) {
		cl = cops->get(q, parent);
		if (cl == 0)
			return -ENOENT;
	}

	/* And the last stroke */
366 367
	block = cops->tcf_block(q, cl);
	if (!block) {
368
		err = -EINVAL;
L
Linus Torvalds 已提交
369
		goto errout;
370
	}
371 372
	chain = block->p_filter_chain;

373 374
	if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
		tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER);
375
		tcf_chain_destroy(chain);
376 377 378
		err = 0;
		goto errout;
	}
L
Linus Torvalds 已提交
379 380

	/* Check the chain for existence of proto-tcf with this priority */
J
John Fastabend 已提交
381 382 383
	for (back = chain;
	     (tp = rtnl_dereference(*back)) != NULL;
	     back = &tp->next) {
L
Linus Torvalds 已提交
384 385
		if (tp->prio >= prio) {
			if (tp->prio == prio) {
E
Eric Dumazet 已提交
386
				if (!nprio ||
387 388
				    (tp->protocol != protocol && protocol)) {
					err = -EINVAL;
L
Linus Torvalds 已提交
389
					goto errout;
390
				}
391
			} else {
L
Linus Torvalds 已提交
392
				tp = NULL;
393
			}
L
Linus Torvalds 已提交
394 395 396 397 398 399 400
			break;
		}
	}

	if (tp == NULL) {
		/* Proto-tcf does not exist, create new one */

401 402
		if (tca[TCA_KIND] == NULL || !protocol) {
			err = -EINVAL;
L
Linus Torvalds 已提交
403
			goto errout;
404
		}
L
Linus Torvalds 已提交
405

E
Eric Dumazet 已提交
406
		if (n->nlmsg_type != RTM_NEWTFILTER ||
407 408
		    !(n->nlmsg_flags & NLM_F_CREATE)) {
			err = -ENOENT;
L
Linus Torvalds 已提交
409
			goto errout;
410
		}
L
Linus Torvalds 已提交
411

412 413
		if (!nprio)
			nprio = TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back)));
L
Linus Torvalds 已提交
414

415
		tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
416
				      protocol, nprio, parent, q, block);
417 418
		if (IS_ERR(tp)) {
			err = PTR_ERR(tp);
L
Linus Torvalds 已提交
419 420
			goto errout;
		}
421
		tp_created = 1;
422 423
	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
		err = -EINVAL;
L
Linus Torvalds 已提交
424
		goto errout;
425
	}
L
Linus Torvalds 已提交
426 427 428 429 430

	fh = tp->ops->get(tp, t->tcm_handle);

	if (fh == 0) {
		if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
431
			next = rtnl_dereference(tp->next);
J
John Fastabend 已提交
432
			RCU_INIT_POINTER(*back, next);
433 434
			tfilter_notify(net, skb, n, tp, fh,
				       RTM_DELTFILTER, false);
435
			tcf_proto_destroy(tp);
L
Linus Torvalds 已提交
436 437 438 439
			err = 0;
			goto errout;
		}

440
		if (n->nlmsg_type != RTM_NEWTFILTER ||
441 442
		    !(n->nlmsg_flags & NLM_F_CREATE)) {
			err = -ENOENT;
L
Linus Torvalds 已提交
443
			goto errout;
444
		}
L
Linus Torvalds 已提交
445
	} else {
446 447
		bool last;

L
Linus Torvalds 已提交
448
		switch (n->nlmsg_type) {
449
		case RTM_NEWTFILTER:
450 451
			if (n->nlmsg_flags & NLM_F_EXCL) {
				if (tp_created)
452
					tcf_proto_destroy(tp);
453
				err = -EEXIST;
L
Linus Torvalds 已提交
454
				goto errout;
455
			}
L
Linus Torvalds 已提交
456 457
			break;
		case RTM_DELTFILTER:
458
			err = tp->ops->delete(tp, fh, &last);
459 460 461 462 463
			if (err)
				goto errout;
			next = rtnl_dereference(tp->next);
			tfilter_notify(net, skb, n, tp, t->tcm_handle,
				       RTM_DELTFILTER, false);
464
			if (last) {
465
				RCU_INIT_POINTER(*back, next);
466 467
				tcf_proto_destroy(tp);
			}
468
			goto errout;
L
Linus Torvalds 已提交
469
		case RTM_GETTFILTER:
J
Jamal Hadi Salim 已提交
470
			err = tfilter_notify(net, skb, n, tp, fh,
471
					     RTM_NEWTFILTER, true);
L
Linus Torvalds 已提交
472 473 474 475 476 477 478
			goto errout;
		default:
			err = -EINVAL;
			goto errout;
		}
	}

479 480
	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
481 482
	if (err == 0) {
		if (tp_created) {
J
John Fastabend 已提交
483 484
			RCU_INIT_POINTER(tp->next, rtnl_dereference(*back));
			rcu_assign_pointer(*back, tp);
485
		}
486
		tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false);
487 488
	} else {
		if (tp_created)
489
			tcf_proto_destroy(tp);
490
	}
L
Linus Torvalds 已提交
491 492 493 494 495 496 497 498 499 500

errout:
	if (cl)
		cops->put(q, cl);
	if (err == -EAGAIN)
		/* Replay the request. */
		goto replay;
	return err;
}

501 502 503
static int tcf_fill_node(struct net *net, struct sk_buff *skb,
			 struct tcf_proto *tp, unsigned long fh, u32 portid,
			 u32 seq, u16 flags, int event)
L
Linus Torvalds 已提交
504 505 506
{
	struct tcmsg *tcm;
	struct nlmsghdr  *nlh;
507
	unsigned char *b = skb_tail_pointer(skb);
L
Linus Torvalds 已提交
508

509
	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
510 511 512
	if (!nlh)
		goto out_nlmsg_trim;
	tcm = nlmsg_data(nlh);
L
Linus Torvalds 已提交
513
	tcm->tcm_family = AF_UNSPEC;
514
	tcm->tcm__pad1 = 0;
J
Jiri Pirko 已提交
515
	tcm->tcm__pad2 = 0;
516
	tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
L
Linus Torvalds 已提交
517 518
	tcm->tcm_parent = tp->classid;
	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
519 520
	if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
		goto nla_put_failure;
L
Linus Torvalds 已提交
521 522 523
	tcm->tcm_handle = fh;
	if (RTM_DELTFILTER != event) {
		tcm->tcm_handle = 0;
524
		if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
525
			goto nla_put_failure;
L
Linus Torvalds 已提交
526
	}
527
	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
L
Linus Torvalds 已提交
528 529
	return skb->len;

530
out_nlmsg_trim:
531
nla_put_failure:
532
	nlmsg_trim(skb, b);
L
Linus Torvalds 已提交
533 534 535
	return -1;
}

536 537
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
			  struct nlmsghdr *n, struct tcf_proto *tp,
538
			  unsigned long fh, int event, bool unicast)
L
Linus Torvalds 已提交
539 540
{
	struct sk_buff *skb;
541
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
L
Linus Torvalds 已提交
542 543 544 545 546

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

547 548
	if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
			  n->nlmsg_flags, event) <= 0) {
L
Linus Torvalds 已提交
549 550 551 552
		kfree_skb(skb);
		return -EINVAL;
	}

553 554 555
	if (unicast)
		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);

556
	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
557
			      n->nlmsg_flags & NLM_F_ECHO);
L
Linus Torvalds 已提交
558 559
}

560
struct tcf_dump_args {
L
Linus Torvalds 已提交
561 562 563 564 565
	struct tcf_walker w;
	struct sk_buff *skb;
	struct netlink_callback *cb;
};

566 567
static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
			 struct tcf_walker *arg)
L
Linus Torvalds 已提交
568
{
569
	struct tcf_dump_args *a = (void *)arg;
570
	struct net *net = sock_net(a->skb->sk);
L
Linus Torvalds 已提交
571

572
	return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid,
J
Jamal Hadi Salim 已提交
573 574
			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
			     RTM_NEWTFILTER);
L
Linus Torvalds 已提交
575 576
}

E
Eric Dumazet 已提交
577
/* called with RTNL */
L
Linus Torvalds 已提交
578 579
static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
580
	struct net *net = sock_net(skb->sk);
L
Linus Torvalds 已提交
581 582 583 584
	int t;
	int s_t;
	struct net_device *dev;
	struct Qdisc *q;
585
	struct tcf_block *block;
J
John Fastabend 已提交
586
	struct tcf_proto *tp, __rcu **chain;
587
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
L
Linus Torvalds 已提交
588
	unsigned long cl = 0;
589
	const struct Qdisc_class_ops *cops;
L
Linus Torvalds 已提交
590 591
	struct tcf_dump_args arg;

592
	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
L
Linus Torvalds 已提交
593
		return skb->len;
E
Eric Dumazet 已提交
594 595
	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
	if (!dev)
L
Linus Torvalds 已提交
596 597 598
		return skb->len;

	if (!tcm->tcm_parent)
599
		q = dev->qdisc;
L
Linus Torvalds 已提交
600 601 602 603
	else
		q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
	if (!q)
		goto out;
E
Eric Dumazet 已提交
604 605
	cops = q->ops->cl_ops;
	if (!cops)
L
Linus Torvalds 已提交
606
		goto errout;
607
	if (!cops->tcf_block)
608
		goto errout;
L
Linus Torvalds 已提交
609 610 611 612 613
	if (TC_H_MIN(tcm->tcm_parent)) {
		cl = cops->get(q, tcm->tcm_parent);
		if (cl == 0)
			goto errout;
	}
614 615
	block = cops->tcf_block(q, cl);
	if (!block)
L
Linus Torvalds 已提交
616
		goto errout;
617
	chain = block->p_filter_chain;
L
Linus Torvalds 已提交
618 619 620

	s_t = cb->args[0];

J
John Fastabend 已提交
621 622
	for (tp = rtnl_dereference(*chain), t = 0;
	     tp; tp = rtnl_dereference(tp->next), t++) {
E
Eric Dumazet 已提交
623 624
		if (t < s_t)
			continue;
L
Linus Torvalds 已提交
625 626 627 628 629 630 631
		if (TC_H_MAJ(tcm->tcm_info) &&
		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
			continue;
		if (TC_H_MIN(tcm->tcm_info) &&
		    TC_H_MIN(tcm->tcm_info) != tp->protocol)
			continue;
		if (t > s_t)
632 633
			memset(&cb->args[1], 0,
			       sizeof(cb->args)-sizeof(cb->args[0]));
L
Linus Torvalds 已提交
634
		if (cb->args[1] == 0) {
635 636
			if (tcf_fill_node(net, skb, tp, 0,
					  NETLINK_CB(cb->skb).portid,
637 638
					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
					  RTM_NEWTFILTER) <= 0)
L
Linus Torvalds 已提交
639
				break;
640

L
Linus Torvalds 已提交
641 642 643 644 645 646 647 648
			cb->args[1] = 1;
		}
		if (tp->ops->walk == NULL)
			continue;
		arg.w.fn = tcf_node_dump;
		arg.skb = skb;
		arg.cb = cb;
		arg.w.stop = 0;
E
Eric Dumazet 已提交
649
		arg.w.skip = cb->args[1] - 1;
L
Linus Torvalds 已提交
650 651
		arg.w.count = 0;
		tp->ops->walk(tp, &arg.w);
E
Eric Dumazet 已提交
652
		cb->args[1] = arg.w.count + 1;
L
Linus Torvalds 已提交
653 654 655 656 657 658 659 660 661 662 663 664 665
		if (arg.w.stop)
			break;
	}

	cb->args[0] = t;

errout:
	if (cl)
		cops->put(q, cl);
out:
	return skb->len;
}

666
void tcf_exts_destroy(struct tcf_exts *exts)
L
Linus Torvalds 已提交
667 668
{
#ifdef CONFIG_NET_CLS_ACT
669 670 671 672 673 674
	LIST_HEAD(actions);

	tcf_exts_to_list(exts, &actions);
	tcf_action_destroy(&actions, TCA_ACT_UNBIND);
	kfree(exts->actions);
	exts->nr_actions = 0;
L
Linus Torvalds 已提交
675 676
#endif
}
677
EXPORT_SYMBOL(tcf_exts_destroy);
L
Linus Torvalds 已提交
678

679
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
J
Jamal Hadi Salim 已提交
680
		      struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr)
L
Linus Torvalds 已提交
681 682 683 684 685
{
#ifdef CONFIG_NET_CLS_ACT
	{
		struct tc_action *act;

686 687
		if (exts->police && tb[exts->police]) {
			act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
J
Jamal Hadi Salim 已提交
688
						"police", ovr, TCA_ACT_BIND);
689 690
			if (IS_ERR(act))
				return PTR_ERR(act);
L
Linus Torvalds 已提交
691

692
			act->type = exts->type = TCA_OLD_COMPAT;
693 694
			exts->actions[0] = act;
			exts->nr_actions = 1;
695
		} else if (exts->action && tb[exts->action]) {
696 697 698
			LIST_HEAD(actions);
			int err, i = 0;

699
			err = tcf_action_init(net, tb[exts->action], rate_tlv,
J
Jamal Hadi Salim 已提交
700 701
					      NULL, ovr, TCA_ACT_BIND,
					      &actions);
702 703
			if (err)
				return err;
704 705 706
			list_for_each_entry(act, &actions, list)
				exts->actions[i++] = act;
			exts->nr_actions = i;
L
Linus Torvalds 已提交
707 708 709
		}
	}
#else
710 711
	if ((exts->action && tb[exts->action]) ||
	    (exts->police && tb[exts->police]))
L
Linus Torvalds 已提交
712 713 714 715 716
		return -EOPNOTSUPP;
#endif

	return 0;
}
717
EXPORT_SYMBOL(tcf_exts_validate);
L
Linus Torvalds 已提交
718

719 720
void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
		     struct tcf_exts *src)
L
Linus Torvalds 已提交
721 722
{
#ifdef CONFIG_NET_CLS_ACT
723 724
	struct tcf_exts old = *dst;

725
	tcf_tree_lock(tp);
726 727
	dst->nr_actions = src->nr_actions;
	dst->actions = src->actions;
728
	dst->type = src->type;
729
	tcf_tree_unlock(tp);
730 731

	tcf_exts_destroy(&old);
L
Linus Torvalds 已提交
732 733
#endif
}
734
EXPORT_SYMBOL(tcf_exts_change);
L
Linus Torvalds 已提交
735

736 737 738 739 740 741 742 743 744
#ifdef CONFIG_NET_CLS_ACT
static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
{
	if (exts->nr_actions == 0)
		return NULL;
	else
		return exts->actions[0];
}
#endif
745

746
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
L
Linus Torvalds 已提交
747 748
{
#ifdef CONFIG_NET_CLS_ACT
749 750
	struct nlattr *nest;

751
	if (exts->action && exts->nr_actions) {
L
Linus Torvalds 已提交
752 753 754 755 756
		/*
		 * again for backward compatible mode - we want
		 * to work with both old and new modes of entering
		 * tc data even if iproute2  was newer - jhs
		 */
757
		if (exts->type != TCA_OLD_COMPAT) {
758 759
			LIST_HEAD(actions);

760
			nest = nla_nest_start(skb, exts->action);
761 762
			if (nest == NULL)
				goto nla_put_failure;
763 764 765

			tcf_exts_to_list(exts, &actions);
			if (tcf_action_dump(skb, &actions, 0, 0) < 0)
766
				goto nla_put_failure;
767
			nla_nest_end(skb, nest);
768
		} else if (exts->police) {
769
			struct tc_action *act = tcf_exts_first_act(exts);
770
			nest = nla_nest_start(skb, exts->police);
771
			if (nest == NULL || !act)
772
				goto nla_put_failure;
773
			if (tcf_action_dump_old(skb, act, 0, 0) < 0)
774
				goto nla_put_failure;
775
			nla_nest_end(skb, nest);
L
Linus Torvalds 已提交
776 777 778
		}
	}
	return 0;
779 780 781

nla_put_failure:
	nla_nest_cancel(skb, nest);
L
Linus Torvalds 已提交
782
	return -1;
783 784 785
#else
	return 0;
#endif
L
Linus Torvalds 已提交
786
}
787
EXPORT_SYMBOL(tcf_exts_dump);
L
Linus Torvalds 已提交
788

789

790
int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
L
Linus Torvalds 已提交
791 792
{
#ifdef CONFIG_NET_CLS_ACT
793
	struct tc_action *a = tcf_exts_first_act(exts);
794
	if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
795
		return -1;
L
Linus Torvalds 已提交
796 797 798
#endif
	return 0;
}
799
EXPORT_SYMBOL(tcf_exts_dump_stats);
L
Linus Torvalds 已提交
800

801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824
int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
		     struct net_device **hw_dev)
{
#ifdef CONFIG_NET_CLS_ACT
	const struct tc_action *a;
	LIST_HEAD(actions);

	if (tc_no_actions(exts))
		return -EINVAL;

	tcf_exts_to_list(exts, &actions);
	list_for_each_entry(a, &actions, list) {
		if (a->ops->get_dev) {
			a->ops->get_dev(a, dev_net(dev), hw_dev);
			break;
		}
	}
	if (*hw_dev)
		return 0;
#endif
	return -EOPNOTSUPP;
}
EXPORT_SYMBOL(tcf_exts_get_dev);

L
Linus Torvalds 已提交
825 826
static int __init tc_filter_init(void)
{
827 828
	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL);
	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, NULL);
829
	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
830
		      tc_dump_tfilter, NULL);
L
Linus Torvalds 已提交
831 832 833 834 835

	return 0;
}

subsys_initcall(tc_filter_init);