cls_flow.c 16.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * net/sched/cls_flow.c		Generic flow classifier
 *
 * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/random.h>
#include <linux/pkt_cls.h>
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
22
#include <linux/if_vlan.h>
23
#include <linux/slab.h>
24
#include <linux/module.h>
25
#include <net/inet_sock.h>
26 27 28 29

#include <net/pkt_cls.h>
#include <net/ip.h>
#include <net/route.h>
30
#include <net/flow_dissector.h>
E
Eric Dumazet 已提交
31

32
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
33 34 35 36 37
#include <net/netfilter/nf_conntrack.h>
#endif

struct flow_head {
	struct list_head	filters;
J
John Fastabend 已提交
38
	struct rcu_head		rcu;
39 40 41 42 43 44
};

struct flow_filter {
	struct list_head	list;
	struct tcf_exts		exts;
	struct tcf_ematch_tree	ematches;
J
John Fastabend 已提交
45
	struct tcf_proto	*tp;
46 47
	struct timer_list	perturb_timer;
	u32			perturb_period;
48 49 50 51 52 53 54 55 56 57 58
	u32			handle;

	u32			nkeys;
	u32			keymask;
	u32			mode;
	u32			mask;
	u32			xor;
	u32			rshift;
	u32			addend;
	u32			divisor;
	u32			baseclass;
59
	u32			hashrnd;
60 61 62 63
	union {
		struct work_struct	work;
		struct rcu_head		rcu;
	};
64 65 66 67 68 69 70 71 72
};

static inline u32 addr_fold(void *addr)
{
	unsigned long a = (unsigned long)addr;

	return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
}

E
Eric Dumazet 已提交
73
static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
74
{
75 76 77 78 79
	__be32 src = flow_get_u32_src(flow);

	if (src)
		return ntohl(src);

80
	return addr_fold(skb->sk);
81 82
}

E
Eric Dumazet 已提交
83
static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
84
{
85 86 87 88 89
	__be32 dst = flow_get_u32_dst(flow);

	if (dst)
		return ntohl(dst);

90
	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
91 92
}

J
Jamal Hadi Salim 已提交
93 94
static u32 flow_get_proto(const struct sk_buff *skb,
			  const struct flow_keys *flow)
95
{
96
	return flow->basic.ip_proto;
97 98
}

J
Jamal Hadi Salim 已提交
99 100
static u32 flow_get_proto_src(const struct sk_buff *skb,
			      const struct flow_keys *flow)
101
{
102
	if (flow->ports.ports)
103
		return ntohs(flow->ports.src);
104

105 106 107
	return addr_fold(skb->sk);
}

J
Jamal Hadi Salim 已提交
108 109
static u32 flow_get_proto_dst(const struct sk_buff *skb,
			      const struct flow_keys *flow)
110
{
111
	if (flow->ports.ports)
112
		return ntohs(flow->ports.dst);
113

114
	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
115 116 117 118
}

static u32 flow_get_iif(const struct sk_buff *skb)
{
119
	return skb->skb_iif;
120 121 122 123 124 125 126 127 128 129 130 131 132 133
}

static u32 flow_get_priority(const struct sk_buff *skb)
{
	return skb->priority;
}

static u32 flow_get_mark(const struct sk_buff *skb)
{
	return skb->mark;
}

static u32 flow_get_nfct(const struct sk_buff *skb)
{
134
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
135
	return addr_fold(skb_nfct(skb));
136 137 138 139 140
#else
	return 0;
#endif
}

141
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
142 143 144
#define CTTUPLE(skb, member)						\
({									\
	enum ip_conntrack_info ctinfo;					\
145
	const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);		\
146 147 148 149 150 151 152 153 154 155 156 157
	if (ct == NULL)							\
		goto fallback;						\
	ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member;			\
})
#else
#define CTTUPLE(skb, member)						\
({									\
	goto fallback;							\
	0;								\
})
#endif

J
Jamal Hadi Salim 已提交
158 159
static u32 flow_get_nfct_src(const struct sk_buff *skb,
			     const struct flow_keys *flow)
160
{
161
	switch (tc_skb_protocol(skb)) {
162
	case htons(ETH_P_IP):
163
		return ntohl(CTTUPLE(skb, src.u3.ip));
164
	case htons(ETH_P_IPV6):
165 166 167
		return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
	}
fallback:
E
Eric Dumazet 已提交
168
	return flow_get_src(skb, flow);
169 170
}

J
Jamal Hadi Salim 已提交
171 172
static u32 flow_get_nfct_dst(const struct sk_buff *skb,
			     const struct flow_keys *flow)
173
{
174
	switch (tc_skb_protocol(skb)) {
175
	case htons(ETH_P_IP):
176
		return ntohl(CTTUPLE(skb, dst.u3.ip));
177
	case htons(ETH_P_IPV6):
178 179 180
		return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
	}
fallback:
E
Eric Dumazet 已提交
181
	return flow_get_dst(skb, flow);
182 183
}

J
Jamal Hadi Salim 已提交
184 185
static u32 flow_get_nfct_proto_src(const struct sk_buff *skb,
				   const struct flow_keys *flow)
186 187 188
{
	return ntohs(CTTUPLE(skb, src.u.all));
fallback:
E
Eric Dumazet 已提交
189
	return flow_get_proto_src(skb, flow);
190 191
}

J
Jamal Hadi Salim 已提交
192 193
static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb,
				   const struct flow_keys *flow)
194 195 196
{
	return ntohs(CTTUPLE(skb, dst.u.all));
fallback:
E
Eric Dumazet 已提交
197
	return flow_get_proto_dst(skb, flow);
198 199 200 201
}

static u32 flow_get_rtclassid(const struct sk_buff *skb)
{
202
#ifdef CONFIG_IP_ROUTE_CLASSID
E
Eric Dumazet 已提交
203 204
	if (skb_dst(skb))
		return skb_dst(skb)->tclassid;
205 206 207 208 209 210
#endif
	return 0;
}

static u32 flow_get_skuid(const struct sk_buff *skb)
{
211 212 213 214 215
	struct sock *sk = skb_to_full_sk(skb);

	if (sk && sk->sk_socket && sk->sk_socket->file) {
		kuid_t skuid = sk->sk_socket->file->f_cred->fsuid;

216 217
		return from_kuid(&init_user_ns, skuid);
	}
218 219 220 221 222
	return 0;
}

static u32 flow_get_skgid(const struct sk_buff *skb)
{
223 224 225 226 227
	struct sock *sk = skb_to_full_sk(skb);

	if (sk && sk->sk_socket && sk->sk_socket->file) {
		kgid_t skgid = sk->sk_socket->file->f_cred->fsgid;

228 229
		return from_kgid(&init_user_ns, skgid);
	}
230 231 232
	return 0;
}

233 234 235 236 237 238 239 240 241
static u32 flow_get_vlan_tag(const struct sk_buff *skb)
{
	u16 uninitialized_var(tag);

	if (vlan_get_tag(skb, &tag) < 0)
		return 0;
	return tag & VLAN_VID_MASK;
}

242 243
static u32 flow_get_rxhash(struct sk_buff *skb)
{
244
	return skb_get_hash(skb);
245 246
}

E
Eric Dumazet 已提交
247
static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow)
248 249 250
{
	switch (key) {
	case FLOW_KEY_SRC:
E
Eric Dumazet 已提交
251
		return flow_get_src(skb, flow);
252
	case FLOW_KEY_DST:
E
Eric Dumazet 已提交
253
		return flow_get_dst(skb, flow);
254
	case FLOW_KEY_PROTO:
E
Eric Dumazet 已提交
255
		return flow_get_proto(skb, flow);
256
	case FLOW_KEY_PROTO_SRC:
E
Eric Dumazet 已提交
257
		return flow_get_proto_src(skb, flow);
258
	case FLOW_KEY_PROTO_DST:
E
Eric Dumazet 已提交
259
		return flow_get_proto_dst(skb, flow);
260 261 262 263 264 265 266 267 268
	case FLOW_KEY_IIF:
		return flow_get_iif(skb);
	case FLOW_KEY_PRIORITY:
		return flow_get_priority(skb);
	case FLOW_KEY_MARK:
		return flow_get_mark(skb);
	case FLOW_KEY_NFCT:
		return flow_get_nfct(skb);
	case FLOW_KEY_NFCT_SRC:
E
Eric Dumazet 已提交
269
		return flow_get_nfct_src(skb, flow);
270
	case FLOW_KEY_NFCT_DST:
E
Eric Dumazet 已提交
271
		return flow_get_nfct_dst(skb, flow);
272
	case FLOW_KEY_NFCT_PROTO_SRC:
E
Eric Dumazet 已提交
273
		return flow_get_nfct_proto_src(skb, flow);
274
	case FLOW_KEY_NFCT_PROTO_DST:
E
Eric Dumazet 已提交
275
		return flow_get_nfct_proto_dst(skb, flow);
276 277 278 279 280 281
	case FLOW_KEY_RTCLASSID:
		return flow_get_rtclassid(skb);
	case FLOW_KEY_SKUID:
		return flow_get_skuid(skb);
	case FLOW_KEY_SKGID:
		return flow_get_skgid(skb);
282 283
	case FLOW_KEY_VLAN_TAG:
		return flow_get_vlan_tag(skb);
284 285
	case FLOW_KEY_RXHASH:
		return flow_get_rxhash(skb);
286 287 288 289 290 291
	default:
		WARN_ON(1);
		return 0;
	}
}

E
Eric Dumazet 已提交
292 293 294 295 296 297 298 299 300 301
#define FLOW_KEYS_NEEDED ((1 << FLOW_KEY_SRC) | 		\
			  (1 << FLOW_KEY_DST) |			\
			  (1 << FLOW_KEY_PROTO) |		\
			  (1 << FLOW_KEY_PROTO_SRC) |		\
			  (1 << FLOW_KEY_PROTO_DST) | 		\
			  (1 << FLOW_KEY_NFCT_SRC) |		\
			  (1 << FLOW_KEY_NFCT_DST) |		\
			  (1 << FLOW_KEY_NFCT_PROTO_SRC) |	\
			  (1 << FLOW_KEY_NFCT_PROTO_DST))

302
static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
303 304
			 struct tcf_result *res)
{
J
John Fastabend 已提交
305
	struct flow_head *head = rcu_dereference_bh(tp->root);
306 307 308 309 310 311
	struct flow_filter *f;
	u32 keymask;
	u32 classid;
	unsigned int n, key;
	int r;

J
John Fastabend 已提交
312
	list_for_each_entry_rcu(f, &head->filters, list) {
E
Eric Dumazet 已提交
313
		u32 keys[FLOW_KEY_MAX + 1];
E
Eric Dumazet 已提交
314
		struct flow_keys flow_keys;
315 316 317 318 319

		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
			continue;

		keymask = f->keymask;
E
Eric Dumazet 已提交
320
		if (keymask & FLOW_KEYS_NEEDED)
321
			skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
322 323 324 325

		for (n = 0; n < f->nkeys; n++) {
			key = ffs(keymask) - 1;
			keymask &= ~(1 << key);
E
Eric Dumazet 已提交
326
			keys[n] = flow_key_get(skb, key, &flow_keys);
327 328 329
		}

		if (f->mode == FLOW_MODE_HASH)
330
			classid = jhash2(keys, f->nkeys, f->hashrnd);
331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
		else {
			classid = keys[0];
			classid = (classid & f->mask) ^ f->xor;
			classid = (classid >> f->rshift) + f->addend;
		}

		if (f->divisor)
			classid %= f->divisor;

		res->class   = 0;
		res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid);

		r = tcf_exts_exec(skb, &f->exts, res);
		if (r < 0)
			continue;
		return r;
	}
	return -1;
}

351
static void flow_perturbation(struct timer_list *t)
352
{
353
	struct flow_filter *f = from_timer(f, t, perturb_timer);
354 355 356 357 358 359

	get_random_bytes(&f->hashrnd, 4);
	if (f->perturb_period)
		mod_timer(&f->perturb_timer, jiffies + f->perturb_period);
}

360 361 362 363 364 365 366 367 368 369 370 371
static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
	[TCA_FLOW_KEYS]		= { .type = NLA_U32 },
	[TCA_FLOW_MODE]		= { .type = NLA_U32 },
	[TCA_FLOW_BASECLASS]	= { .type = NLA_U32 },
	[TCA_FLOW_RSHIFT]	= { .type = NLA_U32 },
	[TCA_FLOW_ADDEND]	= { .type = NLA_U32 },
	[TCA_FLOW_MASK]		= { .type = NLA_U32 },
	[TCA_FLOW_XOR]		= { .type = NLA_U32 },
	[TCA_FLOW_DIVISOR]	= { .type = NLA_U32 },
	[TCA_FLOW_ACT]		= { .type = NLA_NESTED },
	[TCA_FLOW_POLICE]	= { .type = NLA_NESTED },
	[TCA_FLOW_EMATCHES]	= { .type = NLA_NESTED },
372
	[TCA_FLOW_PERTURB]	= { .type = NLA_U32 },
373 374
};

375
static void __flow_destroy_filter(struct flow_filter *f)
J
John Fastabend 已提交
376 377
{
	del_timer_sync(&f->perturb_timer);
378
	tcf_exts_destroy(&f->exts);
379
	tcf_em_tree_destroy(&f->ematches);
380
	tcf_exts_put_net(&f->exts);
J
John Fastabend 已提交
381
	kfree(f);
382 383 384 385 386 387 388 389
}

static void flow_destroy_filter_work(struct work_struct *work)
{
	struct flow_filter *f = container_of(work, struct flow_filter, work);

	rtnl_lock();
	__flow_destroy_filter(f);
390 391 392 393 394 395 396 397 398
	rtnl_unlock();
}

static void flow_destroy_filter(struct rcu_head *head)
{
	struct flow_filter *f = container_of(head, struct flow_filter, rcu);

	INIT_WORK(&f->work, flow_destroy_filter_work);
	tcf_queue_work(&f->work);
J
John Fastabend 已提交
399 400
}

401
static int flow_change(struct net *net, struct sk_buff *in_skb,
402
		       struct tcf_proto *tp, unsigned long base,
403
		       u32 handle, struct nlattr **tca,
404
		       void **arg, bool ovr, struct netlink_ext_ack *extack)
405
{
J
John Fastabend 已提交
406 407
	struct flow_head *head = rtnl_dereference(tp->root);
	struct flow_filter *fold, *fnew;
408 409 410
	struct nlattr *opt = tca[TCA_OPTIONS];
	struct nlattr *tb[TCA_FLOW_MAX + 1];
	unsigned int nkeys = 0;
411
	unsigned int perturb_period = 0;
412 413 414 415 416 417 418 419
	u32 baseclass = 0;
	u32 keymask = 0;
	u32 mode;
	int err;

	if (opt == NULL)
		return -EINVAL;

420
	err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy, NULL);
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435
	if (err < 0)
		return err;

	if (tb[TCA_FLOW_BASECLASS]) {
		baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]);
		if (TC_H_MIN(baseclass) == 0)
			return -EINVAL;
	}

	if (tb[TCA_FLOW_KEYS]) {
		keymask = nla_get_u32(tb[TCA_FLOW_KEYS]);

		nkeys = hweight32(keymask);
		if (nkeys == 0)
			return -EINVAL;
436 437 438

		if (fls(keymask) - 1 > FLOW_KEY_MAX)
			return -EOPNOTSUPP;
439 440

		if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) &&
441
		    sk_user_ns(NETLINK_CB(in_skb).sk) != &init_user_ns)
442
			return -EOPNOTSUPP;
443 444
	}

J
John Fastabend 已提交
445 446
	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
	if (!fnew)
447
		return -ENOBUFS;
448 449 450

	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &fnew->ematches);
	if (err < 0)
451
		goto err1;
J
John Fastabend 已提交
452

453 454
	err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
	if (err < 0)
455 456 457 458 459
		goto err2;

	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr);
	if (err < 0)
		goto err2;
460

461
	fold = *arg;
J
John Fastabend 已提交
462
	if (fold) {
463
		err = -EINVAL;
J
John Fastabend 已提交
464
		if (fold->handle != handle && handle)
465
			goto err2;
466

J
John Fastabend 已提交
467 468 469 470 471 472 473 474 475 476 477 478 479 480 481
		/* Copy fold into fnew */
		fnew->tp = fold->tp;
		fnew->handle = fold->handle;
		fnew->nkeys = fold->nkeys;
		fnew->keymask = fold->keymask;
		fnew->mode = fold->mode;
		fnew->mask = fold->mask;
		fnew->xor = fold->xor;
		fnew->rshift = fold->rshift;
		fnew->addend = fold->addend;
		fnew->divisor = fold->divisor;
		fnew->baseclass = fold->baseclass;
		fnew->hashrnd = fold->hashrnd;

		mode = fold->mode;
482 483 484
		if (tb[TCA_FLOW_MODE])
			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
		if (mode != FLOW_MODE_HASH && nkeys > 1)
485
			goto err2;
486 487

		if (mode == FLOW_MODE_HASH)
J
John Fastabend 已提交
488
			perturb_period = fold->perturb_period;
489 490
		if (tb[TCA_FLOW_PERTURB]) {
			if (mode != FLOW_MODE_HASH)
491
				goto err2;
492 493
			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
		}
494 495 496
	} else {
		err = -EINVAL;
		if (!handle)
497
			goto err2;
498
		if (!tb[TCA_FLOW_KEYS])
499
			goto err2;
500 501 502 503 504

		mode = FLOW_MODE_MAP;
		if (tb[TCA_FLOW_MODE])
			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
		if (mode != FLOW_MODE_HASH && nkeys > 1)
505
			goto err2;
506

507 508
		if (tb[TCA_FLOW_PERTURB]) {
			if (mode != FLOW_MODE_HASH)
509
				goto err2;
510 511 512
			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
		}

513 514 515 516 517
		if (TC_H_MAJ(baseclass) == 0) {
			struct Qdisc *q = tcf_block_q(tp->chain->block);

			baseclass = TC_H_MAKE(q->handle, baseclass);
		}
518 519 520
		if (TC_H_MIN(baseclass) == 0)
			baseclass = TC_H_MAKE(baseclass, 1);

J
John Fastabend 已提交
521 522 523 524
		fnew->handle = handle;
		fnew->mask  = ~0U;
		fnew->tp = tp;
		get_random_bytes(&fnew->hashrnd, 4);
525 526
	}

527
	timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE);
528

529
	tcf_block_netif_keep_dst(tp->chain->block);
530

531
	if (tb[TCA_FLOW_KEYS]) {
J
John Fastabend 已提交
532 533
		fnew->keymask = keymask;
		fnew->nkeys   = nkeys;
534 535
	}

J
John Fastabend 已提交
536
	fnew->mode = mode;
537 538

	if (tb[TCA_FLOW_MASK])
J
John Fastabend 已提交
539
		fnew->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
540
	if (tb[TCA_FLOW_XOR])
J
John Fastabend 已提交
541
		fnew->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
542
	if (tb[TCA_FLOW_RSHIFT])
J
John Fastabend 已提交
543
		fnew->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
544
	if (tb[TCA_FLOW_ADDEND])
J
John Fastabend 已提交
545
		fnew->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
546 547

	if (tb[TCA_FLOW_DIVISOR])
J
John Fastabend 已提交
548
		fnew->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
549
	if (baseclass)
J
John Fastabend 已提交
550
		fnew->baseclass = baseclass;
551

J
John Fastabend 已提交
552
	fnew->perturb_period = perturb_period;
553
	if (perturb_period)
J
John Fastabend 已提交
554
		mod_timer(&fnew->perturb_timer, jiffies + perturb_period);
555

556
	if (!*arg)
J
John Fastabend 已提交
557 558
		list_add_tail_rcu(&fnew->list, &head->filters);
	else
559
		list_replace_rcu(&fold->list, &fnew->list);
560

561
	*arg = fnew;
562

563 564
	if (fold) {
		tcf_exts_get_net(&fold->exts);
J
John Fastabend 已提交
565
		call_rcu(&fold->rcu, flow_destroy_filter);
566
	}
567 568
	return 0;

569
err2:
570
	tcf_exts_destroy(&fnew->exts);
571
	tcf_em_tree_destroy(&fnew->ematches);
572
err1:
573
	kfree(fnew);
574 575 576
	return err;
}

577
static int flow_delete(struct tcf_proto *tp, void *arg, bool *last)
578
{
579
	struct flow_head *head = rtnl_dereference(tp->root);
580
	struct flow_filter *f = arg;
581

J
John Fastabend 已提交
582
	list_del_rcu(&f->list);
583
	tcf_exts_get_net(&f->exts);
J
John Fastabend 已提交
584
	call_rcu(&f->rcu, flow_destroy_filter);
585
	*last = list_empty(&head->filters);
586 587 588 589 590 591 592 593 594 595 596
	return 0;
}

static int flow_init(struct tcf_proto *tp)
{
	struct flow_head *head;

	head = kzalloc(sizeof(*head), GFP_KERNEL);
	if (head == NULL)
		return -ENOBUFS;
	INIT_LIST_HEAD(&head->filters);
J
John Fastabend 已提交
597
	rcu_assign_pointer(tp->root, head);
598 599 600
	return 0;
}

601
static void flow_destroy(struct tcf_proto *tp)
602
{
J
John Fastabend 已提交
603
	struct flow_head *head = rtnl_dereference(tp->root);
604 605 606
	struct flow_filter *f, *next;

	list_for_each_entry_safe(f, next, &head->filters, list) {
J
John Fastabend 已提交
607
		list_del_rcu(&f->list);
608 609 610 611
		if (tcf_exts_get_net(&f->exts))
			call_rcu(&f->rcu, flow_destroy_filter);
		else
			__flow_destroy_filter(f);
612
	}
J
John Fastabend 已提交
613
	kfree_rcu(head, rcu);
614 615
}

616
static void *flow_get(struct tcf_proto *tp, u32 handle)
617
{
J
John Fastabend 已提交
618
	struct flow_head *head = rtnl_dereference(tp->root);
619 620
	struct flow_filter *f;

621
	list_for_each_entry(f, &head->filters, list)
622
		if (f->handle == handle)
623 624
			return f;
	return NULL;
625 626
}

627
static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh,
628 629
		     struct sk_buff *skb, struct tcmsg *t)
{
630
	struct flow_filter *f = fh;
631 632 633 634 635 636 637 638 639 640 641
	struct nlattr *nest;

	if (f == NULL)
		return skb->len;

	t->tcm_handle = f->handle;

	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;

642 643 644
	if (nla_put_u32(skb, TCA_FLOW_KEYS, f->keymask) ||
	    nla_put_u32(skb, TCA_FLOW_MODE, f->mode))
		goto nla_put_failure;
645 646

	if (f->mask != ~0 || f->xor != 0) {
647 648 649
		if (nla_put_u32(skb, TCA_FLOW_MASK, f->mask) ||
		    nla_put_u32(skb, TCA_FLOW_XOR, f->xor))
			goto nla_put_failure;
650
	}
651 652 653 654 655 656
	if (f->rshift &&
	    nla_put_u32(skb, TCA_FLOW_RSHIFT, f->rshift))
		goto nla_put_failure;
	if (f->addend &&
	    nla_put_u32(skb, TCA_FLOW_ADDEND, f->addend))
		goto nla_put_failure;
657

658 659 660 661 662 663
	if (f->divisor &&
	    nla_put_u32(skb, TCA_FLOW_DIVISOR, f->divisor))
		goto nla_put_failure;
	if (f->baseclass &&
	    nla_put_u32(skb, TCA_FLOW_BASECLASS, f->baseclass))
		goto nla_put_failure;
664

665 666 667
	if (f->perturb_period &&
	    nla_put_u32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ))
		goto nla_put_failure;
668

669
	if (tcf_exts_dump(skb, &f->exts) < 0)
670
		goto nla_put_failure;
671
#ifdef CONFIG_NET_EMATCH
672 673 674
	if (f->ematches.hdr.nmatches &&
	    tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0)
		goto nla_put_failure;
675
#endif
676 677
	nla_nest_end(skb, nest);

678
	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
679 680 681 682 683
		goto nla_put_failure;

	return skb->len;

nla_put_failure:
684
	nla_nest_cancel(skb, nest);
685 686 687 688 689
	return -1;
}

static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
J
John Fastabend 已提交
690
	struct flow_head *head = rtnl_dereference(tp->root);
691 692
	struct flow_filter *f;

693
	list_for_each_entry(f, &head->filters, list) {
694 695
		if (arg->count < arg->skip)
			goto skip;
696
		if (arg->fn(tp, f, arg) < 0) {
697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733
			arg->stop = 1;
			break;
		}
skip:
		arg->count++;
	}
}

static struct tcf_proto_ops cls_flow_ops __read_mostly = {
	.kind		= "flow",
	.classify	= flow_classify,
	.init		= flow_init,
	.destroy	= flow_destroy,
	.change		= flow_change,
	.delete		= flow_delete,
	.get		= flow_get,
	.dump		= flow_dump,
	.walk		= flow_walk,
	.owner		= THIS_MODULE,
};

static int __init cls_flow_init(void)
{
	return register_tcf_proto_ops(&cls_flow_ops);
}

static void __exit cls_flow_exit(void)
{
	unregister_tcf_proto_ops(&cls_flow_ops);
}

module_init(cls_flow_init);
module_exit(cls_flow_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_DESCRIPTION("TC flow classifier");