cls_flow.c 16.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * net/sched/cls_flow.c		Generic flow classifier
 *
 * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/random.h>
#include <linux/pkt_cls.h>
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
22
#include <linux/if_vlan.h>
23
#include <linux/slab.h>
24
#include <linux/module.h>
25
#include <net/inet_sock.h>
26 27 28 29

#include <net/pkt_cls.h>
#include <net/ip.h>
#include <net/route.h>
30
#include <net/flow_dissector.h>
E
Eric Dumazet 已提交
31

32
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
33 34 35 36 37
#include <net/netfilter/nf_conntrack.h>
#endif

struct flow_head {
	struct list_head	filters;
J
John Fastabend 已提交
38
	struct rcu_head		rcu;
39 40 41 42 43 44
};

struct flow_filter {
	struct list_head	list;
	struct tcf_exts		exts;
	struct tcf_ematch_tree	ematches;
J
John Fastabend 已提交
45
	struct tcf_proto	*tp;
46 47
	struct timer_list	perturb_timer;
	u32			perturb_period;
48 49 50 51 52 53 54 55 56 57 58
	u32			handle;

	u32			nkeys;
	u32			keymask;
	u32			mode;
	u32			mask;
	u32			xor;
	u32			rshift;
	u32			addend;
	u32			divisor;
	u32			baseclass;
59
	u32			hashrnd;
C
Cong Wang 已提交
60
	struct rcu_work		rwork;
61 62 63 64 65 66 67 68 69
};

static inline u32 addr_fold(void *addr)
{
	unsigned long a = (unsigned long)addr;

	return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
}

E
Eric Dumazet 已提交
70
static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
71
{
72 73 74 75 76
	__be32 src = flow_get_u32_src(flow);

	if (src)
		return ntohl(src);

77
	return addr_fold(skb->sk);
78 79
}

E
Eric Dumazet 已提交
80
static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
81
{
82 83 84 85 86
	__be32 dst = flow_get_u32_dst(flow);

	if (dst)
		return ntohl(dst);

87
	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
88 89
}

J
Jamal Hadi Salim 已提交
90 91
static u32 flow_get_proto(const struct sk_buff *skb,
			  const struct flow_keys *flow)
92
{
93
	return flow->basic.ip_proto;
94 95
}

J
Jamal Hadi Salim 已提交
96 97
static u32 flow_get_proto_src(const struct sk_buff *skb,
			      const struct flow_keys *flow)
98
{
99
	if (flow->ports.ports)
100
		return ntohs(flow->ports.src);
101

102 103 104
	return addr_fold(skb->sk);
}

J
Jamal Hadi Salim 已提交
105 106
static u32 flow_get_proto_dst(const struct sk_buff *skb,
			      const struct flow_keys *flow)
107
{
108
	if (flow->ports.ports)
109
		return ntohs(flow->ports.dst);
110

111
	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
112 113 114 115
}

static u32 flow_get_iif(const struct sk_buff *skb)
{
116
	return skb->skb_iif;
117 118 119 120 121 122 123 124 125 126 127 128 129 130
}

static u32 flow_get_priority(const struct sk_buff *skb)
{
	return skb->priority;
}

static u32 flow_get_mark(const struct sk_buff *skb)
{
	return skb->mark;
}

static u32 flow_get_nfct(const struct sk_buff *skb)
{
131
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
132
	return addr_fold(skb_nfct(skb));
133 134 135 136 137
#else
	return 0;
#endif
}

138
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
139 140 141
#define CTTUPLE(skb, member)						\
({									\
	enum ip_conntrack_info ctinfo;					\
142
	const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);		\
143 144 145 146 147 148 149 150 151 152 153 154
	if (ct == NULL)							\
		goto fallback;						\
	ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member;			\
})
#else
#define CTTUPLE(skb, member)						\
({									\
	goto fallback;							\
	0;								\
})
#endif

J
Jamal Hadi Salim 已提交
155 156
static u32 flow_get_nfct_src(const struct sk_buff *skb,
			     const struct flow_keys *flow)
157
{
158
	switch (tc_skb_protocol(skb)) {
159
	case htons(ETH_P_IP):
160
		return ntohl(CTTUPLE(skb, src.u3.ip));
161
	case htons(ETH_P_IPV6):
162 163 164
		return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
	}
fallback:
E
Eric Dumazet 已提交
165
	return flow_get_src(skb, flow);
166 167
}

J
Jamal Hadi Salim 已提交
168 169
static u32 flow_get_nfct_dst(const struct sk_buff *skb,
			     const struct flow_keys *flow)
170
{
171
	switch (tc_skb_protocol(skb)) {
172
	case htons(ETH_P_IP):
173
		return ntohl(CTTUPLE(skb, dst.u3.ip));
174
	case htons(ETH_P_IPV6):
175 176 177
		return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
	}
fallback:
E
Eric Dumazet 已提交
178
	return flow_get_dst(skb, flow);
179 180
}

J
Jamal Hadi Salim 已提交
181 182
static u32 flow_get_nfct_proto_src(const struct sk_buff *skb,
				   const struct flow_keys *flow)
183 184 185
{
	return ntohs(CTTUPLE(skb, src.u.all));
fallback:
E
Eric Dumazet 已提交
186
	return flow_get_proto_src(skb, flow);
187 188
}

J
Jamal Hadi Salim 已提交
189 190
static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb,
				   const struct flow_keys *flow)
191 192 193
{
	return ntohs(CTTUPLE(skb, dst.u.all));
fallback:
E
Eric Dumazet 已提交
194
	return flow_get_proto_dst(skb, flow);
195 196 197 198
}

static u32 flow_get_rtclassid(const struct sk_buff *skb)
{
199
#ifdef CONFIG_IP_ROUTE_CLASSID
E
Eric Dumazet 已提交
200 201
	if (skb_dst(skb))
		return skb_dst(skb)->tclassid;
202 203 204 205 206 207
#endif
	return 0;
}

static u32 flow_get_skuid(const struct sk_buff *skb)
{
208 209 210 211 212
	struct sock *sk = skb_to_full_sk(skb);

	if (sk && sk->sk_socket && sk->sk_socket->file) {
		kuid_t skuid = sk->sk_socket->file->f_cred->fsuid;

213 214
		return from_kuid(&init_user_ns, skuid);
	}
215 216 217 218 219
	return 0;
}

static u32 flow_get_skgid(const struct sk_buff *skb)
{
220 221 222 223 224
	struct sock *sk = skb_to_full_sk(skb);

	if (sk && sk->sk_socket && sk->sk_socket->file) {
		kgid_t skgid = sk->sk_socket->file->f_cred->fsgid;

225 226
		return from_kgid(&init_user_ns, skgid);
	}
227 228 229
	return 0;
}

230 231 232 233 234 235 236 237 238
static u32 flow_get_vlan_tag(const struct sk_buff *skb)
{
	u16 uninitialized_var(tag);

	if (vlan_get_tag(skb, &tag) < 0)
		return 0;
	return tag & VLAN_VID_MASK;
}

239 240
static u32 flow_get_rxhash(struct sk_buff *skb)
{
241
	return skb_get_hash(skb);
242 243
}

E
Eric Dumazet 已提交
244
static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow)
245 246 247
{
	switch (key) {
	case FLOW_KEY_SRC:
E
Eric Dumazet 已提交
248
		return flow_get_src(skb, flow);
249
	case FLOW_KEY_DST:
E
Eric Dumazet 已提交
250
		return flow_get_dst(skb, flow);
251
	case FLOW_KEY_PROTO:
E
Eric Dumazet 已提交
252
		return flow_get_proto(skb, flow);
253
	case FLOW_KEY_PROTO_SRC:
E
Eric Dumazet 已提交
254
		return flow_get_proto_src(skb, flow);
255
	case FLOW_KEY_PROTO_DST:
E
Eric Dumazet 已提交
256
		return flow_get_proto_dst(skb, flow);
257 258 259 260 261 262 263 264 265
	case FLOW_KEY_IIF:
		return flow_get_iif(skb);
	case FLOW_KEY_PRIORITY:
		return flow_get_priority(skb);
	case FLOW_KEY_MARK:
		return flow_get_mark(skb);
	case FLOW_KEY_NFCT:
		return flow_get_nfct(skb);
	case FLOW_KEY_NFCT_SRC:
E
Eric Dumazet 已提交
266
		return flow_get_nfct_src(skb, flow);
267
	case FLOW_KEY_NFCT_DST:
E
Eric Dumazet 已提交
268
		return flow_get_nfct_dst(skb, flow);
269
	case FLOW_KEY_NFCT_PROTO_SRC:
E
Eric Dumazet 已提交
270
		return flow_get_nfct_proto_src(skb, flow);
271
	case FLOW_KEY_NFCT_PROTO_DST:
E
Eric Dumazet 已提交
272
		return flow_get_nfct_proto_dst(skb, flow);
273 274 275 276 277 278
	case FLOW_KEY_RTCLASSID:
		return flow_get_rtclassid(skb);
	case FLOW_KEY_SKUID:
		return flow_get_skuid(skb);
	case FLOW_KEY_SKGID:
		return flow_get_skgid(skb);
279 280
	case FLOW_KEY_VLAN_TAG:
		return flow_get_vlan_tag(skb);
281 282
	case FLOW_KEY_RXHASH:
		return flow_get_rxhash(skb);
283 284 285 286 287 288
	default:
		WARN_ON(1);
		return 0;
	}
}

E
Eric Dumazet 已提交
289 290 291 292 293 294 295 296 297 298
#define FLOW_KEYS_NEEDED ((1 << FLOW_KEY_SRC) | 		\
			  (1 << FLOW_KEY_DST) |			\
			  (1 << FLOW_KEY_PROTO) |		\
			  (1 << FLOW_KEY_PROTO_SRC) |		\
			  (1 << FLOW_KEY_PROTO_DST) | 		\
			  (1 << FLOW_KEY_NFCT_SRC) |		\
			  (1 << FLOW_KEY_NFCT_DST) |		\
			  (1 << FLOW_KEY_NFCT_PROTO_SRC) |	\
			  (1 << FLOW_KEY_NFCT_PROTO_DST))

299
static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
300 301
			 struct tcf_result *res)
{
J
John Fastabend 已提交
302
	struct flow_head *head = rcu_dereference_bh(tp->root);
303 304 305 306 307 308
	struct flow_filter *f;
	u32 keymask;
	u32 classid;
	unsigned int n, key;
	int r;

J
John Fastabend 已提交
309
	list_for_each_entry_rcu(f, &head->filters, list) {
E
Eric Dumazet 已提交
310
		u32 keys[FLOW_KEY_MAX + 1];
E
Eric Dumazet 已提交
311
		struct flow_keys flow_keys;
312 313 314 315 316

		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
			continue;

		keymask = f->keymask;
E
Eric Dumazet 已提交
317
		if (keymask & FLOW_KEYS_NEEDED)
318
			skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
319 320 321 322

		for (n = 0; n < f->nkeys; n++) {
			key = ffs(keymask) - 1;
			keymask &= ~(1 << key);
E
Eric Dumazet 已提交
323
			keys[n] = flow_key_get(skb, key, &flow_keys);
324 325 326
		}

		if (f->mode == FLOW_MODE_HASH)
327
			classid = jhash2(keys, f->nkeys, f->hashrnd);
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
		else {
			classid = keys[0];
			classid = (classid & f->mask) ^ f->xor;
			classid = (classid >> f->rshift) + f->addend;
		}

		if (f->divisor)
			classid %= f->divisor;

		res->class   = 0;
		res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid);

		r = tcf_exts_exec(skb, &f->exts, res);
		if (r < 0)
			continue;
		return r;
	}
	return -1;
}

348
static void flow_perturbation(struct timer_list *t)
349
{
350
	struct flow_filter *f = from_timer(f, t, perturb_timer);
351 352 353 354 355 356

	get_random_bytes(&f->hashrnd, 4);
	if (f->perturb_period)
		mod_timer(&f->perturb_timer, jiffies + f->perturb_period);
}

357 358 359 360 361 362 363 364 365 366 367 368
static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
	[TCA_FLOW_KEYS]		= { .type = NLA_U32 },
	[TCA_FLOW_MODE]		= { .type = NLA_U32 },
	[TCA_FLOW_BASECLASS]	= { .type = NLA_U32 },
	[TCA_FLOW_RSHIFT]	= { .type = NLA_U32 },
	[TCA_FLOW_ADDEND]	= { .type = NLA_U32 },
	[TCA_FLOW_MASK]		= { .type = NLA_U32 },
	[TCA_FLOW_XOR]		= { .type = NLA_U32 },
	[TCA_FLOW_DIVISOR]	= { .type = NLA_U32 },
	[TCA_FLOW_ACT]		= { .type = NLA_NESTED },
	[TCA_FLOW_POLICE]	= { .type = NLA_NESTED },
	[TCA_FLOW_EMATCHES]	= { .type = NLA_NESTED },
369
	[TCA_FLOW_PERTURB]	= { .type = NLA_U32 },
370 371
};

372
static void __flow_destroy_filter(struct flow_filter *f)
J
John Fastabend 已提交
373 374
{
	del_timer_sync(&f->perturb_timer);
375
	tcf_exts_destroy(&f->exts);
376
	tcf_em_tree_destroy(&f->ematches);
377
	tcf_exts_put_net(&f->exts);
J
John Fastabend 已提交
378
	kfree(f);
379 380 381 382
}

static void flow_destroy_filter_work(struct work_struct *work)
{
C
Cong Wang 已提交
383 384 385
	struct flow_filter *f = container_of(to_rcu_work(work),
					     struct flow_filter,
					     rwork);
386 387
	rtnl_lock();
	__flow_destroy_filter(f);
388 389 390
	rtnl_unlock();
}

391
static int flow_change(struct net *net, struct sk_buff *in_skb,
392
		       struct tcf_proto *tp, unsigned long base,
393
		       u32 handle, struct nlattr **tca,
394
		       void **arg, bool ovr, struct netlink_ext_ack *extack)
395
{
J
John Fastabend 已提交
396 397
	struct flow_head *head = rtnl_dereference(tp->root);
	struct flow_filter *fold, *fnew;
398 399 400
	struct nlattr *opt = tca[TCA_OPTIONS];
	struct nlattr *tb[TCA_FLOW_MAX + 1];
	unsigned int nkeys = 0;
401
	unsigned int perturb_period = 0;
402 403 404 405 406 407 408 409
	u32 baseclass = 0;
	u32 keymask = 0;
	u32 mode;
	int err;

	if (opt == NULL)
		return -EINVAL;

410
	err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy, NULL);
411 412 413 414 415 416 417 418 419 420 421 422 423 424 425
	if (err < 0)
		return err;

	if (tb[TCA_FLOW_BASECLASS]) {
		baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]);
		if (TC_H_MIN(baseclass) == 0)
			return -EINVAL;
	}

	if (tb[TCA_FLOW_KEYS]) {
		keymask = nla_get_u32(tb[TCA_FLOW_KEYS]);

		nkeys = hweight32(keymask);
		if (nkeys == 0)
			return -EINVAL;
426 427 428

		if (fls(keymask) - 1 > FLOW_KEY_MAX)
			return -EOPNOTSUPP;
429 430

		if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) &&
431
		    sk_user_ns(NETLINK_CB(in_skb).sk) != &init_user_ns)
432
			return -EOPNOTSUPP;
433 434
	}

J
John Fastabend 已提交
435 436
	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
	if (!fnew)
437
		return -ENOBUFS;
438 439 440

	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &fnew->ematches);
	if (err < 0)
441
		goto err1;
J
John Fastabend 已提交
442

443 444
	err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
	if (err < 0)
445 446
		goto err2;

447 448
	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr,
				extack);
449 450
	if (err < 0)
		goto err2;
451

452
	fold = *arg;
J
John Fastabend 已提交
453
	if (fold) {
454
		err = -EINVAL;
J
John Fastabend 已提交
455
		if (fold->handle != handle && handle)
456
			goto err2;
457

J
John Fastabend 已提交
458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
		/* Copy fold into fnew */
		fnew->tp = fold->tp;
		fnew->handle = fold->handle;
		fnew->nkeys = fold->nkeys;
		fnew->keymask = fold->keymask;
		fnew->mode = fold->mode;
		fnew->mask = fold->mask;
		fnew->xor = fold->xor;
		fnew->rshift = fold->rshift;
		fnew->addend = fold->addend;
		fnew->divisor = fold->divisor;
		fnew->baseclass = fold->baseclass;
		fnew->hashrnd = fold->hashrnd;

		mode = fold->mode;
473 474 475
		if (tb[TCA_FLOW_MODE])
			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
		if (mode != FLOW_MODE_HASH && nkeys > 1)
476
			goto err2;
477 478

		if (mode == FLOW_MODE_HASH)
J
John Fastabend 已提交
479
			perturb_period = fold->perturb_period;
480 481
		if (tb[TCA_FLOW_PERTURB]) {
			if (mode != FLOW_MODE_HASH)
482
				goto err2;
483 484
			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
		}
485 486 487
	} else {
		err = -EINVAL;
		if (!handle)
488
			goto err2;
489
		if (!tb[TCA_FLOW_KEYS])
490
			goto err2;
491 492 493 494 495

		mode = FLOW_MODE_MAP;
		if (tb[TCA_FLOW_MODE])
			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
		if (mode != FLOW_MODE_HASH && nkeys > 1)
496
			goto err2;
497

498 499
		if (tb[TCA_FLOW_PERTURB]) {
			if (mode != FLOW_MODE_HASH)
500
				goto err2;
501 502 503
			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
		}

504 505 506 507 508
		if (TC_H_MAJ(baseclass) == 0) {
			struct Qdisc *q = tcf_block_q(tp->chain->block);

			baseclass = TC_H_MAKE(q->handle, baseclass);
		}
509 510 511
		if (TC_H_MIN(baseclass) == 0)
			baseclass = TC_H_MAKE(baseclass, 1);

J
John Fastabend 已提交
512 513 514 515
		fnew->handle = handle;
		fnew->mask  = ~0U;
		fnew->tp = tp;
		get_random_bytes(&fnew->hashrnd, 4);
516 517
	}

518
	timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE);
519

520
	tcf_block_netif_keep_dst(tp->chain->block);
521

522
	if (tb[TCA_FLOW_KEYS]) {
J
John Fastabend 已提交
523 524
		fnew->keymask = keymask;
		fnew->nkeys   = nkeys;
525 526
	}

J
John Fastabend 已提交
527
	fnew->mode = mode;
528 529

	if (tb[TCA_FLOW_MASK])
J
John Fastabend 已提交
530
		fnew->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
531
	if (tb[TCA_FLOW_XOR])
J
John Fastabend 已提交
532
		fnew->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
533
	if (tb[TCA_FLOW_RSHIFT])
J
John Fastabend 已提交
534
		fnew->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
535
	if (tb[TCA_FLOW_ADDEND])
J
John Fastabend 已提交
536
		fnew->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
537 538

	if (tb[TCA_FLOW_DIVISOR])
J
John Fastabend 已提交
539
		fnew->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
540
	if (baseclass)
J
John Fastabend 已提交
541
		fnew->baseclass = baseclass;
542

J
John Fastabend 已提交
543
	fnew->perturb_period = perturb_period;
544
	if (perturb_period)
J
John Fastabend 已提交
545
		mod_timer(&fnew->perturb_timer, jiffies + perturb_period);
546

547
	if (!*arg)
J
John Fastabend 已提交
548 549
		list_add_tail_rcu(&fnew->list, &head->filters);
	else
550
		list_replace_rcu(&fold->list, &fnew->list);
551

552
	*arg = fnew;
553

554 555
	if (fold) {
		tcf_exts_get_net(&fold->exts);
C
Cong Wang 已提交
556
		tcf_queue_work(&fold->rwork, flow_destroy_filter_work);
557
	}
558 559
	return 0;

560
err2:
561
	tcf_exts_destroy(&fnew->exts);
562
	tcf_em_tree_destroy(&fnew->ematches);
563
err1:
564
	kfree(fnew);
565 566 567
	return err;
}

568 569
static int flow_delete(struct tcf_proto *tp, void *arg, bool *last,
		       struct netlink_ext_ack *extack)
570
{
571
	struct flow_head *head = rtnl_dereference(tp->root);
572
	struct flow_filter *f = arg;
573

J
John Fastabend 已提交
574
	list_del_rcu(&f->list);
575
	tcf_exts_get_net(&f->exts);
C
Cong Wang 已提交
576
	tcf_queue_work(&f->rwork, flow_destroy_filter_work);
577
	*last = list_empty(&head->filters);
578 579 580 581 582 583 584 585 586 587 588
	return 0;
}

static int flow_init(struct tcf_proto *tp)
{
	struct flow_head *head;

	head = kzalloc(sizeof(*head), GFP_KERNEL);
	if (head == NULL)
		return -ENOBUFS;
	INIT_LIST_HEAD(&head->filters);
J
John Fastabend 已提交
589
	rcu_assign_pointer(tp->root, head);
590 591 592
	return 0;
}

593
static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
594
{
J
John Fastabend 已提交
595
	struct flow_head *head = rtnl_dereference(tp->root);
596 597 598
	struct flow_filter *f, *next;

	list_for_each_entry_safe(f, next, &head->filters, list) {
J
John Fastabend 已提交
599
		list_del_rcu(&f->list);
600
		if (tcf_exts_get_net(&f->exts))
C
Cong Wang 已提交
601
			tcf_queue_work(&f->rwork, flow_destroy_filter_work);
602 603
		else
			__flow_destroy_filter(f);
604
	}
J
John Fastabend 已提交
605
	kfree_rcu(head, rcu);
606 607
}

608
static void *flow_get(struct tcf_proto *tp, u32 handle)
609
{
J
John Fastabend 已提交
610
	struct flow_head *head = rtnl_dereference(tp->root);
611 612
	struct flow_filter *f;

613
	list_for_each_entry(f, &head->filters, list)
614
		if (f->handle == handle)
615 616
			return f;
	return NULL;
617 618
}

619
static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh,
620 621
		     struct sk_buff *skb, struct tcmsg *t)
{
622
	struct flow_filter *f = fh;
623 624 625 626 627 628 629 630 631 632 633
	struct nlattr *nest;

	if (f == NULL)
		return skb->len;

	t->tcm_handle = f->handle;

	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;

634 635 636
	if (nla_put_u32(skb, TCA_FLOW_KEYS, f->keymask) ||
	    nla_put_u32(skb, TCA_FLOW_MODE, f->mode))
		goto nla_put_failure;
637 638

	if (f->mask != ~0 || f->xor != 0) {
639 640 641
		if (nla_put_u32(skb, TCA_FLOW_MASK, f->mask) ||
		    nla_put_u32(skb, TCA_FLOW_XOR, f->xor))
			goto nla_put_failure;
642
	}
643 644 645 646 647 648
	if (f->rshift &&
	    nla_put_u32(skb, TCA_FLOW_RSHIFT, f->rshift))
		goto nla_put_failure;
	if (f->addend &&
	    nla_put_u32(skb, TCA_FLOW_ADDEND, f->addend))
		goto nla_put_failure;
649

650 651 652 653 654 655
	if (f->divisor &&
	    nla_put_u32(skb, TCA_FLOW_DIVISOR, f->divisor))
		goto nla_put_failure;
	if (f->baseclass &&
	    nla_put_u32(skb, TCA_FLOW_BASECLASS, f->baseclass))
		goto nla_put_failure;
656

657 658 659
	if (f->perturb_period &&
	    nla_put_u32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ))
		goto nla_put_failure;
660

661
	if (tcf_exts_dump(skb, &f->exts) < 0)
662
		goto nla_put_failure;
663
#ifdef CONFIG_NET_EMATCH
664 665 666
	if (f->ematches.hdr.nmatches &&
	    tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0)
		goto nla_put_failure;
667
#endif
668 669
	nla_nest_end(skb, nest);

670
	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
671 672 673 674 675
		goto nla_put_failure;

	return skb->len;

nla_put_failure:
676
	nla_nest_cancel(skb, nest);
677 678 679 680 681
	return -1;
}

static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
J
John Fastabend 已提交
682
	struct flow_head *head = rtnl_dereference(tp->root);
683 684
	struct flow_filter *f;

685
	list_for_each_entry(f, &head->filters, list) {
686 687
		if (arg->count < arg->skip)
			goto skip;
688
		if (arg->fn(tp, f, arg) < 0) {
689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725
			arg->stop = 1;
			break;
		}
skip:
		arg->count++;
	}
}

static struct tcf_proto_ops cls_flow_ops __read_mostly = {
	.kind		= "flow",
	.classify	= flow_classify,
	.init		= flow_init,
	.destroy	= flow_destroy,
	.change		= flow_change,
	.delete		= flow_delete,
	.get		= flow_get,
	.dump		= flow_dump,
	.walk		= flow_walk,
	.owner		= THIS_MODULE,
};

static int __init cls_flow_init(void)
{
	return register_tcf_proto_ops(&cls_flow_ops);
}

static void __exit cls_flow_exit(void)
{
	unregister_tcf_proto_ops(&cls_flow_ops);
}

module_init(cls_flow_init);
module_exit(cls_flow_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_DESCRIPTION("TC flow classifier");