cls_flow.c 16.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * net/sched/cls_flow.c		Generic flow classifier
 *
 * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/random.h>
#include <linux/pkt_cls.h>
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
22
#include <linux/if_vlan.h>
23
#include <linux/slab.h>
24
#include <linux/module.h>
25
#include <net/inet_sock.h>
26 27 28 29

#include <net/pkt_cls.h>
#include <net/ip.h>
#include <net/route.h>
30
#include <net/flow_dissector.h>
E
Eric Dumazet 已提交
31

32
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
33 34 35 36 37
#include <net/netfilter/nf_conntrack.h>
#endif

struct flow_head {
	struct list_head	filters;
J
John Fastabend 已提交
38
	struct rcu_head		rcu;
39 40 41 42 43 44
};

struct flow_filter {
	struct list_head	list;
	struct tcf_exts		exts;
	struct tcf_ematch_tree	ematches;
J
John Fastabend 已提交
45
	struct tcf_proto	*tp;
46 47
	struct timer_list	perturb_timer;
	u32			perturb_period;
48 49 50 51 52 53 54 55 56 57 58
	u32			handle;

	u32			nkeys;
	u32			keymask;
	u32			mode;
	u32			mask;
	u32			xor;
	u32			rshift;
	u32			addend;
	u32			divisor;
	u32			baseclass;
59
	u32			hashrnd;
60 61 62 63
	union {
		struct work_struct	work;
		struct rcu_head		rcu;
	};
64 65 66 67 68 69 70 71 72
};

static inline u32 addr_fold(void *addr)
{
	unsigned long a = (unsigned long)addr;

	return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
}

E
Eric Dumazet 已提交
73
static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
74
{
75 76 77 78 79
	__be32 src = flow_get_u32_src(flow);

	if (src)
		return ntohl(src);

80
	return addr_fold(skb->sk);
81 82
}

E
Eric Dumazet 已提交
83
static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
84
{
85 86 87 88 89
	__be32 dst = flow_get_u32_dst(flow);

	if (dst)
		return ntohl(dst);

90
	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
91 92
}

J
Jamal Hadi Salim 已提交
93 94
static u32 flow_get_proto(const struct sk_buff *skb,
			  const struct flow_keys *flow)
95
{
96
	return flow->basic.ip_proto;
97 98
}

J
Jamal Hadi Salim 已提交
99 100
static u32 flow_get_proto_src(const struct sk_buff *skb,
			      const struct flow_keys *flow)
101
{
102
	if (flow->ports.ports)
103
		return ntohs(flow->ports.src);
104

105 106 107
	return addr_fold(skb->sk);
}

J
Jamal Hadi Salim 已提交
108 109
static u32 flow_get_proto_dst(const struct sk_buff *skb,
			      const struct flow_keys *flow)
110
{
111
	if (flow->ports.ports)
112
		return ntohs(flow->ports.dst);
113

114
	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
115 116 117 118
}

static u32 flow_get_iif(const struct sk_buff *skb)
{
119
	return skb->skb_iif;
120 121 122 123 124 125 126 127 128 129 130 131 132 133
}

static u32 flow_get_priority(const struct sk_buff *skb)
{
	return skb->priority;
}

static u32 flow_get_mark(const struct sk_buff *skb)
{
	return skb->mark;
}

static u32 flow_get_nfct(const struct sk_buff *skb)
{
134
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
135
	return addr_fold(skb_nfct(skb));
136 137 138 139 140
#else
	return 0;
#endif
}

141
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
142 143 144
#define CTTUPLE(skb, member)						\
({									\
	enum ip_conntrack_info ctinfo;					\
145
	const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);		\
146 147 148 149 150 151 152 153 154 155 156 157
	if (ct == NULL)							\
		goto fallback;						\
	ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member;			\
})
#else
#define CTTUPLE(skb, member)						\
({									\
	goto fallback;							\
	0;								\
})
#endif

J
Jamal Hadi Salim 已提交
158 159
static u32 flow_get_nfct_src(const struct sk_buff *skb,
			     const struct flow_keys *flow)
160
{
161
	switch (tc_skb_protocol(skb)) {
162
	case htons(ETH_P_IP):
163
		return ntohl(CTTUPLE(skb, src.u3.ip));
164
	case htons(ETH_P_IPV6):
165 166 167
		return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
	}
fallback:
E
Eric Dumazet 已提交
168
	return flow_get_src(skb, flow);
169 170
}

J
Jamal Hadi Salim 已提交
171 172
static u32 flow_get_nfct_dst(const struct sk_buff *skb,
			     const struct flow_keys *flow)
173
{
174
	switch (tc_skb_protocol(skb)) {
175
	case htons(ETH_P_IP):
176
		return ntohl(CTTUPLE(skb, dst.u3.ip));
177
	case htons(ETH_P_IPV6):
178 179 180
		return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
	}
fallback:
E
Eric Dumazet 已提交
181
	return flow_get_dst(skb, flow);
182 183
}

J
Jamal Hadi Salim 已提交
184 185
static u32 flow_get_nfct_proto_src(const struct sk_buff *skb,
				   const struct flow_keys *flow)
186 187 188
{
	return ntohs(CTTUPLE(skb, src.u.all));
fallback:
E
Eric Dumazet 已提交
189
	return flow_get_proto_src(skb, flow);
190 191
}

J
Jamal Hadi Salim 已提交
192 193
static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb,
				   const struct flow_keys *flow)
194 195 196
{
	return ntohs(CTTUPLE(skb, dst.u.all));
fallback:
E
Eric Dumazet 已提交
197
	return flow_get_proto_dst(skb, flow);
198 199 200 201
}

static u32 flow_get_rtclassid(const struct sk_buff *skb)
{
202
#ifdef CONFIG_IP_ROUTE_CLASSID
E
Eric Dumazet 已提交
203 204
	if (skb_dst(skb))
		return skb_dst(skb)->tclassid;
205 206 207 208 209 210
#endif
	return 0;
}

static u32 flow_get_skuid(const struct sk_buff *skb)
{
211 212 213 214 215
	struct sock *sk = skb_to_full_sk(skb);

	if (sk && sk->sk_socket && sk->sk_socket->file) {
		kuid_t skuid = sk->sk_socket->file->f_cred->fsuid;

216 217
		return from_kuid(&init_user_ns, skuid);
	}
218 219 220 221 222
	return 0;
}

static u32 flow_get_skgid(const struct sk_buff *skb)
{
223 224 225 226 227
	struct sock *sk = skb_to_full_sk(skb);

	if (sk && sk->sk_socket && sk->sk_socket->file) {
		kgid_t skgid = sk->sk_socket->file->f_cred->fsgid;

228 229
		return from_kgid(&init_user_ns, skgid);
	}
230 231 232
	return 0;
}

233 234 235 236 237 238 239 240 241
static u32 flow_get_vlan_tag(const struct sk_buff *skb)
{
	u16 uninitialized_var(tag);

	if (vlan_get_tag(skb, &tag) < 0)
		return 0;
	return tag & VLAN_VID_MASK;
}

242 243
static u32 flow_get_rxhash(struct sk_buff *skb)
{
244
	return skb_get_hash(skb);
245 246
}

E
Eric Dumazet 已提交
247
static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow)
248 249 250
{
	switch (key) {
	case FLOW_KEY_SRC:
E
Eric Dumazet 已提交
251
		return flow_get_src(skb, flow);
252
	case FLOW_KEY_DST:
E
Eric Dumazet 已提交
253
		return flow_get_dst(skb, flow);
254
	case FLOW_KEY_PROTO:
E
Eric Dumazet 已提交
255
		return flow_get_proto(skb, flow);
256
	case FLOW_KEY_PROTO_SRC:
E
Eric Dumazet 已提交
257
		return flow_get_proto_src(skb, flow);
258
	case FLOW_KEY_PROTO_DST:
E
Eric Dumazet 已提交
259
		return flow_get_proto_dst(skb, flow);
260 261 262 263 264 265 266 267 268
	case FLOW_KEY_IIF:
		return flow_get_iif(skb);
	case FLOW_KEY_PRIORITY:
		return flow_get_priority(skb);
	case FLOW_KEY_MARK:
		return flow_get_mark(skb);
	case FLOW_KEY_NFCT:
		return flow_get_nfct(skb);
	case FLOW_KEY_NFCT_SRC:
E
Eric Dumazet 已提交
269
		return flow_get_nfct_src(skb, flow);
270
	case FLOW_KEY_NFCT_DST:
E
Eric Dumazet 已提交
271
		return flow_get_nfct_dst(skb, flow);
272
	case FLOW_KEY_NFCT_PROTO_SRC:
E
Eric Dumazet 已提交
273
		return flow_get_nfct_proto_src(skb, flow);
274
	case FLOW_KEY_NFCT_PROTO_DST:
E
Eric Dumazet 已提交
275
		return flow_get_nfct_proto_dst(skb, flow);
276 277 278 279 280 281
	case FLOW_KEY_RTCLASSID:
		return flow_get_rtclassid(skb);
	case FLOW_KEY_SKUID:
		return flow_get_skuid(skb);
	case FLOW_KEY_SKGID:
		return flow_get_skgid(skb);
282 283
	case FLOW_KEY_VLAN_TAG:
		return flow_get_vlan_tag(skb);
284 285
	case FLOW_KEY_RXHASH:
		return flow_get_rxhash(skb);
286 287 288 289 290 291
	default:
		WARN_ON(1);
		return 0;
	}
}

E
Eric Dumazet 已提交
292 293 294 295 296 297 298 299 300 301
#define FLOW_KEYS_NEEDED ((1 << FLOW_KEY_SRC) | 		\
			  (1 << FLOW_KEY_DST) |			\
			  (1 << FLOW_KEY_PROTO) |		\
			  (1 << FLOW_KEY_PROTO_SRC) |		\
			  (1 << FLOW_KEY_PROTO_DST) | 		\
			  (1 << FLOW_KEY_NFCT_SRC) |		\
			  (1 << FLOW_KEY_NFCT_DST) |		\
			  (1 << FLOW_KEY_NFCT_PROTO_SRC) |	\
			  (1 << FLOW_KEY_NFCT_PROTO_DST))

302
static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
303 304
			 struct tcf_result *res)
{
J
John Fastabend 已提交
305
	struct flow_head *head = rcu_dereference_bh(tp->root);
306 307 308 309 310 311
	struct flow_filter *f;
	u32 keymask;
	u32 classid;
	unsigned int n, key;
	int r;

J
John Fastabend 已提交
312
	list_for_each_entry_rcu(f, &head->filters, list) {
E
Eric Dumazet 已提交
313
		u32 keys[FLOW_KEY_MAX + 1];
E
Eric Dumazet 已提交
314
		struct flow_keys flow_keys;
315 316 317 318 319

		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
			continue;

		keymask = f->keymask;
E
Eric Dumazet 已提交
320
		if (keymask & FLOW_KEYS_NEEDED)
321
			skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
322 323 324 325

		for (n = 0; n < f->nkeys; n++) {
			key = ffs(keymask) - 1;
			keymask &= ~(1 << key);
E
Eric Dumazet 已提交
326
			keys[n] = flow_key_get(skb, key, &flow_keys);
327 328 329
		}

		if (f->mode == FLOW_MODE_HASH)
330
			classid = jhash2(keys, f->nkeys, f->hashrnd);
331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
		else {
			classid = keys[0];
			classid = (classid & f->mask) ^ f->xor;
			classid = (classid >> f->rshift) + f->addend;
		}

		if (f->divisor)
			classid %= f->divisor;

		res->class   = 0;
		res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid);

		r = tcf_exts_exec(skb, &f->exts, res);
		if (r < 0)
			continue;
		return r;
	}
	return -1;
}

351
static void flow_perturbation(struct timer_list *t)
352
{
353
	struct flow_filter *f = from_timer(f, t, perturb_timer);
354 355 356 357 358 359

	get_random_bytes(&f->hashrnd, 4);
	if (f->perturb_period)
		mod_timer(&f->perturb_timer, jiffies + f->perturb_period);
}

360 361 362 363 364 365 366 367 368 369 370 371
static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
	[TCA_FLOW_KEYS]		= { .type = NLA_U32 },
	[TCA_FLOW_MODE]		= { .type = NLA_U32 },
	[TCA_FLOW_BASECLASS]	= { .type = NLA_U32 },
	[TCA_FLOW_RSHIFT]	= { .type = NLA_U32 },
	[TCA_FLOW_ADDEND]	= { .type = NLA_U32 },
	[TCA_FLOW_MASK]		= { .type = NLA_U32 },
	[TCA_FLOW_XOR]		= { .type = NLA_U32 },
	[TCA_FLOW_DIVISOR]	= { .type = NLA_U32 },
	[TCA_FLOW_ACT]		= { .type = NLA_NESTED },
	[TCA_FLOW_POLICE]	= { .type = NLA_NESTED },
	[TCA_FLOW_EMATCHES]	= { .type = NLA_NESTED },
372
	[TCA_FLOW_PERTURB]	= { .type = NLA_U32 },
373 374
};

375
static void __flow_destroy_filter(struct flow_filter *f)
J
John Fastabend 已提交
376 377
{
	del_timer_sync(&f->perturb_timer);
378
	tcf_exts_destroy(&f->exts);
379
	tcf_em_tree_destroy(&f->ematches);
380
	tcf_exts_put_net(&f->exts);
J
John Fastabend 已提交
381
	kfree(f);
382 383 384 385 386 387 388 389
}

static void flow_destroy_filter_work(struct work_struct *work)
{
	struct flow_filter *f = container_of(work, struct flow_filter, work);

	rtnl_lock();
	__flow_destroy_filter(f);
390 391 392 393 394 395 396 397 398
	rtnl_unlock();
}

static void flow_destroy_filter(struct rcu_head *head)
{
	struct flow_filter *f = container_of(head, struct flow_filter, rcu);

	INIT_WORK(&f->work, flow_destroy_filter_work);
	tcf_queue_work(&f->work);
J
John Fastabend 已提交
399 400
}

401
static int flow_change(struct net *net, struct sk_buff *in_skb,
402
		       struct tcf_proto *tp, unsigned long base,
403
		       u32 handle, struct nlattr **tca,
404
		       void **arg, bool ovr, struct netlink_ext_ack *extack)
405
{
J
John Fastabend 已提交
406 407
	struct flow_head *head = rtnl_dereference(tp->root);
	struct flow_filter *fold, *fnew;
408 409 410
	struct nlattr *opt = tca[TCA_OPTIONS];
	struct nlattr *tb[TCA_FLOW_MAX + 1];
	unsigned int nkeys = 0;
411
	unsigned int perturb_period = 0;
412 413 414 415 416 417 418 419
	u32 baseclass = 0;
	u32 keymask = 0;
	u32 mode;
	int err;

	if (opt == NULL)
		return -EINVAL;

420
	err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy, NULL);
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435
	if (err < 0)
		return err;

	if (tb[TCA_FLOW_BASECLASS]) {
		baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]);
		if (TC_H_MIN(baseclass) == 0)
			return -EINVAL;
	}

	if (tb[TCA_FLOW_KEYS]) {
		keymask = nla_get_u32(tb[TCA_FLOW_KEYS]);

		nkeys = hweight32(keymask);
		if (nkeys == 0)
			return -EINVAL;
436 437 438

		if (fls(keymask) - 1 > FLOW_KEY_MAX)
			return -EOPNOTSUPP;
439 440

		if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) &&
441
		    sk_user_ns(NETLINK_CB(in_skb).sk) != &init_user_ns)
442
			return -EOPNOTSUPP;
443 444
	}

J
John Fastabend 已提交
445 446
	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
	if (!fnew)
447
		return -ENOBUFS;
448 449 450

	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &fnew->ematches);
	if (err < 0)
451
		goto err1;
J
John Fastabend 已提交
452

453 454
	err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
	if (err < 0)
455 456
		goto err2;

457 458
	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr,
				extack);
459 460
	if (err < 0)
		goto err2;
461

462
	fold = *arg;
J
John Fastabend 已提交
463
	if (fold) {
464
		err = -EINVAL;
J
John Fastabend 已提交
465
		if (fold->handle != handle && handle)
466
			goto err2;
467

J
John Fastabend 已提交
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
		/* Copy fold into fnew */
		fnew->tp = fold->tp;
		fnew->handle = fold->handle;
		fnew->nkeys = fold->nkeys;
		fnew->keymask = fold->keymask;
		fnew->mode = fold->mode;
		fnew->mask = fold->mask;
		fnew->xor = fold->xor;
		fnew->rshift = fold->rshift;
		fnew->addend = fold->addend;
		fnew->divisor = fold->divisor;
		fnew->baseclass = fold->baseclass;
		fnew->hashrnd = fold->hashrnd;

		mode = fold->mode;
483 484 485
		if (tb[TCA_FLOW_MODE])
			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
		if (mode != FLOW_MODE_HASH && nkeys > 1)
486
			goto err2;
487 488

		if (mode == FLOW_MODE_HASH)
J
John Fastabend 已提交
489
			perturb_period = fold->perturb_period;
490 491
		if (tb[TCA_FLOW_PERTURB]) {
			if (mode != FLOW_MODE_HASH)
492
				goto err2;
493 494
			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
		}
495 496 497
	} else {
		err = -EINVAL;
		if (!handle)
498
			goto err2;
499
		if (!tb[TCA_FLOW_KEYS])
500
			goto err2;
501 502 503 504 505

		mode = FLOW_MODE_MAP;
		if (tb[TCA_FLOW_MODE])
			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
		if (mode != FLOW_MODE_HASH && nkeys > 1)
506
			goto err2;
507

508 509
		if (tb[TCA_FLOW_PERTURB]) {
			if (mode != FLOW_MODE_HASH)
510
				goto err2;
511 512 513
			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
		}

514 515 516 517 518
		if (TC_H_MAJ(baseclass) == 0) {
			struct Qdisc *q = tcf_block_q(tp->chain->block);

			baseclass = TC_H_MAKE(q->handle, baseclass);
		}
519 520 521
		if (TC_H_MIN(baseclass) == 0)
			baseclass = TC_H_MAKE(baseclass, 1);

J
John Fastabend 已提交
522 523 524 525
		fnew->handle = handle;
		fnew->mask  = ~0U;
		fnew->tp = tp;
		get_random_bytes(&fnew->hashrnd, 4);
526 527
	}

528
	timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE);
529

530
	tcf_block_netif_keep_dst(tp->chain->block);
531

532
	if (tb[TCA_FLOW_KEYS]) {
J
John Fastabend 已提交
533 534
		fnew->keymask = keymask;
		fnew->nkeys   = nkeys;
535 536
	}

J
John Fastabend 已提交
537
	fnew->mode = mode;
538 539

	if (tb[TCA_FLOW_MASK])
J
John Fastabend 已提交
540
		fnew->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
541
	if (tb[TCA_FLOW_XOR])
J
John Fastabend 已提交
542
		fnew->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
543
	if (tb[TCA_FLOW_RSHIFT])
J
John Fastabend 已提交
544
		fnew->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
545
	if (tb[TCA_FLOW_ADDEND])
J
John Fastabend 已提交
546
		fnew->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
547 548

	if (tb[TCA_FLOW_DIVISOR])
J
John Fastabend 已提交
549
		fnew->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
550
	if (baseclass)
J
John Fastabend 已提交
551
		fnew->baseclass = baseclass;
552

J
John Fastabend 已提交
553
	fnew->perturb_period = perturb_period;
554
	if (perturb_period)
J
John Fastabend 已提交
555
		mod_timer(&fnew->perturb_timer, jiffies + perturb_period);
556

557
	if (!*arg)
J
John Fastabend 已提交
558 559
		list_add_tail_rcu(&fnew->list, &head->filters);
	else
560
		list_replace_rcu(&fold->list, &fnew->list);
561

562
	*arg = fnew;
563

564 565
	if (fold) {
		tcf_exts_get_net(&fold->exts);
J
John Fastabend 已提交
566
		call_rcu(&fold->rcu, flow_destroy_filter);
567
	}
568 569
	return 0;

570
err2:
571
	tcf_exts_destroy(&fnew->exts);
572
	tcf_em_tree_destroy(&fnew->ematches);
573
err1:
574
	kfree(fnew);
575 576 577
	return err;
}

578 579
static int flow_delete(struct tcf_proto *tp, void *arg, bool *last,
		       struct netlink_ext_ack *extack)
580
{
581
	struct flow_head *head = rtnl_dereference(tp->root);
582
	struct flow_filter *f = arg;
583

J
John Fastabend 已提交
584
	list_del_rcu(&f->list);
585
	tcf_exts_get_net(&f->exts);
J
John Fastabend 已提交
586
	call_rcu(&f->rcu, flow_destroy_filter);
587
	*last = list_empty(&head->filters);
588 589 590 591 592 593 594 595 596 597 598
	return 0;
}

static int flow_init(struct tcf_proto *tp)
{
	struct flow_head *head;

	head = kzalloc(sizeof(*head), GFP_KERNEL);
	if (head == NULL)
		return -ENOBUFS;
	INIT_LIST_HEAD(&head->filters);
J
John Fastabend 已提交
599
	rcu_assign_pointer(tp->root, head);
600 601 602
	return 0;
}

603
static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
604
{
J
John Fastabend 已提交
605
	struct flow_head *head = rtnl_dereference(tp->root);
606 607 608
	struct flow_filter *f, *next;

	list_for_each_entry_safe(f, next, &head->filters, list) {
J
John Fastabend 已提交
609
		list_del_rcu(&f->list);
610 611 612 613
		if (tcf_exts_get_net(&f->exts))
			call_rcu(&f->rcu, flow_destroy_filter);
		else
			__flow_destroy_filter(f);
614
	}
J
John Fastabend 已提交
615
	kfree_rcu(head, rcu);
616 617
}

618
static void *flow_get(struct tcf_proto *tp, u32 handle)
619
{
J
John Fastabend 已提交
620
	struct flow_head *head = rtnl_dereference(tp->root);
621 622
	struct flow_filter *f;

623
	list_for_each_entry(f, &head->filters, list)
624
		if (f->handle == handle)
625 626
			return f;
	return NULL;
627 628
}

629
static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh,
630 631
		     struct sk_buff *skb, struct tcmsg *t)
{
632
	struct flow_filter *f = fh;
633 634 635 636 637 638 639 640 641 642 643
	struct nlattr *nest;

	if (f == NULL)
		return skb->len;

	t->tcm_handle = f->handle;

	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;

644 645 646
	if (nla_put_u32(skb, TCA_FLOW_KEYS, f->keymask) ||
	    nla_put_u32(skb, TCA_FLOW_MODE, f->mode))
		goto nla_put_failure;
647 648

	if (f->mask != ~0 || f->xor != 0) {
649 650 651
		if (nla_put_u32(skb, TCA_FLOW_MASK, f->mask) ||
		    nla_put_u32(skb, TCA_FLOW_XOR, f->xor))
			goto nla_put_failure;
652
	}
653 654 655 656 657 658
	if (f->rshift &&
	    nla_put_u32(skb, TCA_FLOW_RSHIFT, f->rshift))
		goto nla_put_failure;
	if (f->addend &&
	    nla_put_u32(skb, TCA_FLOW_ADDEND, f->addend))
		goto nla_put_failure;
659

660 661 662 663 664 665
	if (f->divisor &&
	    nla_put_u32(skb, TCA_FLOW_DIVISOR, f->divisor))
		goto nla_put_failure;
	if (f->baseclass &&
	    nla_put_u32(skb, TCA_FLOW_BASECLASS, f->baseclass))
		goto nla_put_failure;
666

667 668 669
	if (f->perturb_period &&
	    nla_put_u32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ))
		goto nla_put_failure;
670

671
	if (tcf_exts_dump(skb, &f->exts) < 0)
672
		goto nla_put_failure;
673
#ifdef CONFIG_NET_EMATCH
674 675 676
	if (f->ematches.hdr.nmatches &&
	    tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0)
		goto nla_put_failure;
677
#endif
678 679
	nla_nest_end(skb, nest);

680
	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
681 682 683 684 685
		goto nla_put_failure;

	return skb->len;

nla_put_failure:
686
	nla_nest_cancel(skb, nest);
687 688 689 690 691
	return -1;
}

static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
J
John Fastabend 已提交
692
	struct flow_head *head = rtnl_dereference(tp->root);
693 694
	struct flow_filter *f;

695
	list_for_each_entry(f, &head->filters, list) {
696 697
		if (arg->count < arg->skip)
			goto skip;
698
		if (arg->fn(tp, f, arg) < 0) {
699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735
			arg->stop = 1;
			break;
		}
skip:
		arg->count++;
	}
}

static struct tcf_proto_ops cls_flow_ops __read_mostly = {
	.kind		= "flow",
	.classify	= flow_classify,
	.init		= flow_init,
	.destroy	= flow_destroy,
	.change		= flow_change,
	.delete		= flow_delete,
	.get		= flow_get,
	.dump		= flow_dump,
	.walk		= flow_walk,
	.owner		= THIS_MODULE,
};

static int __init cls_flow_init(void)
{
	return register_tcf_proto_ops(&cls_flow_ops);
}

static void __exit cls_flow_exit(void)
{
	unregister_tcf_proto_ops(&cls_flow_ops);
}

module_init(cls_flow_init);
module_exit(cls_flow_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_DESCRIPTION("TC flow classifier");