cls_flow.c 16.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * net/sched/cls_flow.c		Generic flow classifier
 *
 * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/random.h>
#include <linux/pkt_cls.h>
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
22
#include <linux/if_vlan.h>
23
#include <linux/slab.h>
24
#include <linux/module.h>
25
#include <net/inet_sock.h>
26 27 28 29

#include <net/pkt_cls.h>
#include <net/ip.h>
#include <net/route.h>
30
#include <net/flow_dissector.h>
E
Eric Dumazet 已提交
31

32
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
33 34 35 36 37
#include <net/netfilter/nf_conntrack.h>
#endif

struct flow_head {
	struct list_head	filters;
J
John Fastabend 已提交
38
	struct rcu_head		rcu;
39 40 41 42 43 44
};

struct flow_filter {
	struct list_head	list;
	struct tcf_exts		exts;
	struct tcf_ematch_tree	ematches;
J
John Fastabend 已提交
45
	struct tcf_proto	*tp;
46 47
	struct timer_list	perturb_timer;
	u32			perturb_period;
48 49 50 51 52 53 54 55 56 57 58
	u32			handle;

	u32			nkeys;
	u32			keymask;
	u32			mode;
	u32			mask;
	u32			xor;
	u32			rshift;
	u32			addend;
	u32			divisor;
	u32			baseclass;
59
	u32			hashrnd;
J
John Fastabend 已提交
60
	struct rcu_head		rcu;
61 62 63 64 65 66 67 68 69
};

static inline u32 addr_fold(void *addr)
{
	unsigned long a = (unsigned long)addr;

	return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
}

E
Eric Dumazet 已提交
70
static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
71
{
72 73 74 75 76
	__be32 src = flow_get_u32_src(flow);

	if (src)
		return ntohl(src);

77
	return addr_fold(skb->sk);
78 79
}

E
Eric Dumazet 已提交
80
static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
81
{
82 83 84 85 86
	__be32 dst = flow_get_u32_dst(flow);

	if (dst)
		return ntohl(dst);

87
	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
88 89
}

E
Eric Dumazet 已提交
90
static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
91
{
92
	return flow->basic.ip_proto;
93 94
}

E
Eric Dumazet 已提交
95
static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
96
{
97
	if (flow->ports.ports)
98
		return ntohs(flow->ports.src);
99

100 101 102
	return addr_fold(skb->sk);
}

E
Eric Dumazet 已提交
103
static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
104
{
105
	if (flow->ports.ports)
106
		return ntohs(flow->ports.dst);
107

108
	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
109 110 111 112
}

static u32 flow_get_iif(const struct sk_buff *skb)
{
113
	return skb->skb_iif;
114 115 116 117 118 119 120 121 122 123 124 125 126 127
}

static u32 flow_get_priority(const struct sk_buff *skb)
{
	return skb->priority;
}

static u32 flow_get_mark(const struct sk_buff *skb)
{
	return skb->mark;
}

static u32 flow_get_nfct(const struct sk_buff *skb)
{
128
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
129 130 131 132 133 134
	return addr_fold(skb->nfct);
#else
	return 0;
#endif
}

135
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
136 137 138
#define CTTUPLE(skb, member)						\
({									\
	enum ip_conntrack_info ctinfo;					\
139
	const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);		\
140 141 142 143 144 145 146 147 148 149 150 151
	if (ct == NULL)							\
		goto fallback;						\
	ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member;			\
})
#else
#define CTTUPLE(skb, member)						\
({									\
	goto fallback;							\
	0;								\
})
#endif

E
Eric Dumazet 已提交
152
static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow)
153
{
154
	switch (tc_skb_protocol(skb)) {
155
	case htons(ETH_P_IP):
156
		return ntohl(CTTUPLE(skb, src.u3.ip));
157
	case htons(ETH_P_IPV6):
158 159 160
		return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
	}
fallback:
E
Eric Dumazet 已提交
161
	return flow_get_src(skb, flow);
162 163
}

E
Eric Dumazet 已提交
164
static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow)
165
{
166
	switch (tc_skb_protocol(skb)) {
167
	case htons(ETH_P_IP):
168
		return ntohl(CTTUPLE(skb, dst.u3.ip));
169
	case htons(ETH_P_IPV6):
170 171 172
		return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
	}
fallback:
E
Eric Dumazet 已提交
173
	return flow_get_dst(skb, flow);
174 175
}

E
Eric Dumazet 已提交
176
static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
177 178 179
{
	return ntohs(CTTUPLE(skb, src.u.all));
fallback:
E
Eric Dumazet 已提交
180
	return flow_get_proto_src(skb, flow);
181 182
}

E
Eric Dumazet 已提交
183
static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
184 185 186
{
	return ntohs(CTTUPLE(skb, dst.u.all));
fallback:
E
Eric Dumazet 已提交
187
	return flow_get_proto_dst(skb, flow);
188 189 190 191
}

static u32 flow_get_rtclassid(const struct sk_buff *skb)
{
192
#ifdef CONFIG_IP_ROUTE_CLASSID
E
Eric Dumazet 已提交
193 194
	if (skb_dst(skb))
		return skb_dst(skb)->tclassid;
195 196 197 198 199 200
#endif
	return 0;
}

static u32 flow_get_skuid(const struct sk_buff *skb)
{
201 202 203 204 205
	struct sock *sk = skb_to_full_sk(skb);

	if (sk && sk->sk_socket && sk->sk_socket->file) {
		kuid_t skuid = sk->sk_socket->file->f_cred->fsuid;

206 207
		return from_kuid(&init_user_ns, skuid);
	}
208 209 210 211 212
	return 0;
}

static u32 flow_get_skgid(const struct sk_buff *skb)
{
213 214 215 216 217
	struct sock *sk = skb_to_full_sk(skb);

	if (sk && sk->sk_socket && sk->sk_socket->file) {
		kgid_t skgid = sk->sk_socket->file->f_cred->fsgid;

218 219
		return from_kgid(&init_user_ns, skgid);
	}
220 221 222
	return 0;
}

223 224 225 226 227 228 229 230 231
static u32 flow_get_vlan_tag(const struct sk_buff *skb)
{
	u16 uninitialized_var(tag);

	if (vlan_get_tag(skb, &tag) < 0)
		return 0;
	return tag & VLAN_VID_MASK;
}

232 233
static u32 flow_get_rxhash(struct sk_buff *skb)
{
234
	return skb_get_hash(skb);
235 236
}

E
Eric Dumazet 已提交
237
static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow)
238 239 240
{
	switch (key) {
	case FLOW_KEY_SRC:
E
Eric Dumazet 已提交
241
		return flow_get_src(skb, flow);
242
	case FLOW_KEY_DST:
E
Eric Dumazet 已提交
243
		return flow_get_dst(skb, flow);
244
	case FLOW_KEY_PROTO:
E
Eric Dumazet 已提交
245
		return flow_get_proto(skb, flow);
246
	case FLOW_KEY_PROTO_SRC:
E
Eric Dumazet 已提交
247
		return flow_get_proto_src(skb, flow);
248
	case FLOW_KEY_PROTO_DST:
E
Eric Dumazet 已提交
249
		return flow_get_proto_dst(skb, flow);
250 251 252 253 254 255 256 257 258
	case FLOW_KEY_IIF:
		return flow_get_iif(skb);
	case FLOW_KEY_PRIORITY:
		return flow_get_priority(skb);
	case FLOW_KEY_MARK:
		return flow_get_mark(skb);
	case FLOW_KEY_NFCT:
		return flow_get_nfct(skb);
	case FLOW_KEY_NFCT_SRC:
E
Eric Dumazet 已提交
259
		return flow_get_nfct_src(skb, flow);
260
	case FLOW_KEY_NFCT_DST:
E
Eric Dumazet 已提交
261
		return flow_get_nfct_dst(skb, flow);
262
	case FLOW_KEY_NFCT_PROTO_SRC:
E
Eric Dumazet 已提交
263
		return flow_get_nfct_proto_src(skb, flow);
264
	case FLOW_KEY_NFCT_PROTO_DST:
E
Eric Dumazet 已提交
265
		return flow_get_nfct_proto_dst(skb, flow);
266 267 268 269 270 271
	case FLOW_KEY_RTCLASSID:
		return flow_get_rtclassid(skb);
	case FLOW_KEY_SKUID:
		return flow_get_skuid(skb);
	case FLOW_KEY_SKGID:
		return flow_get_skgid(skb);
272 273
	case FLOW_KEY_VLAN_TAG:
		return flow_get_vlan_tag(skb);
274 275
	case FLOW_KEY_RXHASH:
		return flow_get_rxhash(skb);
276 277 278 279 280 281
	default:
		WARN_ON(1);
		return 0;
	}
}

E
Eric Dumazet 已提交
282 283 284 285 286 287 288 289 290 291
#define FLOW_KEYS_NEEDED ((1 << FLOW_KEY_SRC) | 		\
			  (1 << FLOW_KEY_DST) |			\
			  (1 << FLOW_KEY_PROTO) |		\
			  (1 << FLOW_KEY_PROTO_SRC) |		\
			  (1 << FLOW_KEY_PROTO_DST) | 		\
			  (1 << FLOW_KEY_NFCT_SRC) |		\
			  (1 << FLOW_KEY_NFCT_DST) |		\
			  (1 << FLOW_KEY_NFCT_PROTO_SRC) |	\
			  (1 << FLOW_KEY_NFCT_PROTO_DST))

292
static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
293 294
			 struct tcf_result *res)
{
J
John Fastabend 已提交
295
	struct flow_head *head = rcu_dereference_bh(tp->root);
296 297 298 299 300 301
	struct flow_filter *f;
	u32 keymask;
	u32 classid;
	unsigned int n, key;
	int r;

J
John Fastabend 已提交
302
	list_for_each_entry_rcu(f, &head->filters, list) {
E
Eric Dumazet 已提交
303
		u32 keys[FLOW_KEY_MAX + 1];
E
Eric Dumazet 已提交
304
		struct flow_keys flow_keys;
305 306 307 308 309

		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
			continue;

		keymask = f->keymask;
E
Eric Dumazet 已提交
310
		if (keymask & FLOW_KEYS_NEEDED)
311
			skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
312 313 314 315

		for (n = 0; n < f->nkeys; n++) {
			key = ffs(keymask) - 1;
			keymask &= ~(1 << key);
E
Eric Dumazet 已提交
316
			keys[n] = flow_key_get(skb, key, &flow_keys);
317 318 319
		}

		if (f->mode == FLOW_MODE_HASH)
320
			classid = jhash2(keys, f->nkeys, f->hashrnd);
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
		else {
			classid = keys[0];
			classid = (classid & f->mask) ^ f->xor;
			classid = (classid >> f->rshift) + f->addend;
		}

		if (f->divisor)
			classid %= f->divisor;

		res->class   = 0;
		res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid);

		r = tcf_exts_exec(skb, &f->exts, res);
		if (r < 0)
			continue;
		return r;
	}
	return -1;
}

341 342 343 344 345 346 347 348 349
static void flow_perturbation(unsigned long arg)
{
	struct flow_filter *f = (struct flow_filter *)arg;

	get_random_bytes(&f->hashrnd, 4);
	if (f->perturb_period)
		mod_timer(&f->perturb_timer, jiffies + f->perturb_period);
}

350 351 352 353 354 355 356 357 358 359 360 361
static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
	[TCA_FLOW_KEYS]		= { .type = NLA_U32 },
	[TCA_FLOW_MODE]		= { .type = NLA_U32 },
	[TCA_FLOW_BASECLASS]	= { .type = NLA_U32 },
	[TCA_FLOW_RSHIFT]	= { .type = NLA_U32 },
	[TCA_FLOW_ADDEND]	= { .type = NLA_U32 },
	[TCA_FLOW_MASK]		= { .type = NLA_U32 },
	[TCA_FLOW_XOR]		= { .type = NLA_U32 },
	[TCA_FLOW_DIVISOR]	= { .type = NLA_U32 },
	[TCA_FLOW_ACT]		= { .type = NLA_NESTED },
	[TCA_FLOW_POLICE]	= { .type = NLA_NESTED },
	[TCA_FLOW_EMATCHES]	= { .type = NLA_NESTED },
362
	[TCA_FLOW_PERTURB]	= { .type = NLA_U32 },
363 364
};

J
John Fastabend 已提交
365 366 367 368 369
static void flow_destroy_filter(struct rcu_head *head)
{
	struct flow_filter *f = container_of(head, struct flow_filter, rcu);

	del_timer_sync(&f->perturb_timer);
370
	tcf_exts_destroy(&f->exts);
371
	tcf_em_tree_destroy(&f->ematches);
J
John Fastabend 已提交
372 373 374
	kfree(f);
}

375
static int flow_change(struct net *net, struct sk_buff *in_skb,
376
		       struct tcf_proto *tp, unsigned long base,
377
		       u32 handle, struct nlattr **tca,
378
		       unsigned long *arg, bool ovr)
379
{
J
John Fastabend 已提交
380 381
	struct flow_head *head = rtnl_dereference(tp->root);
	struct flow_filter *fold, *fnew;
382 383 384 385 386
	struct nlattr *opt = tca[TCA_OPTIONS];
	struct nlattr *tb[TCA_FLOW_MAX + 1];
	struct tcf_exts e;
	struct tcf_ematch_tree t;
	unsigned int nkeys = 0;
387
	unsigned int perturb_period = 0;
388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
	u32 baseclass = 0;
	u32 keymask = 0;
	u32 mode;
	int err;

	if (opt == NULL)
		return -EINVAL;

	err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy);
	if (err < 0)
		return err;

	if (tb[TCA_FLOW_BASECLASS]) {
		baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]);
		if (TC_H_MIN(baseclass) == 0)
			return -EINVAL;
	}

	if (tb[TCA_FLOW_KEYS]) {
		keymask = nla_get_u32(tb[TCA_FLOW_KEYS]);

		nkeys = hweight32(keymask);
		if (nkeys == 0)
			return -EINVAL;
412 413 414

		if (fls(keymask) - 1 > FLOW_KEY_MAX)
			return -EOPNOTSUPP;
415 416

		if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) &&
417
		    sk_user_ns(NETLINK_CB(in_skb).sk) != &init_user_ns)
418
			return -EOPNOTSUPP;
419 420
	}

421 422 423
	err = tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
	if (err < 0)
		goto err1;
424
	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
425
	if (err < 0)
426
		goto err1;
427 428 429 430 431

	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t);
	if (err < 0)
		goto err1;

J
John Fastabend 已提交
432 433 434 435 436
	err = -ENOBUFS;
	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
	if (!fnew)
		goto err2;

437 438 439
	err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
	if (err < 0)
		goto err3;
440

J
John Fastabend 已提交
441 442
	fold = (struct flow_filter *)*arg;
	if (fold) {
443
		err = -EINVAL;
J
John Fastabend 已提交
444
		if (fold->handle != handle && handle)
445
			goto err3;
446

J
John Fastabend 已提交
447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
		/* Copy fold into fnew */
		fnew->tp = fold->tp;
		fnew->handle = fold->handle;
		fnew->nkeys = fold->nkeys;
		fnew->keymask = fold->keymask;
		fnew->mode = fold->mode;
		fnew->mask = fold->mask;
		fnew->xor = fold->xor;
		fnew->rshift = fold->rshift;
		fnew->addend = fold->addend;
		fnew->divisor = fold->divisor;
		fnew->baseclass = fold->baseclass;
		fnew->hashrnd = fold->hashrnd;

		mode = fold->mode;
462 463 464
		if (tb[TCA_FLOW_MODE])
			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
		if (mode != FLOW_MODE_HASH && nkeys > 1)
465
			goto err3;
466 467

		if (mode == FLOW_MODE_HASH)
J
John Fastabend 已提交
468
			perturb_period = fold->perturb_period;
469 470
		if (tb[TCA_FLOW_PERTURB]) {
			if (mode != FLOW_MODE_HASH)
471
				goto err3;
472 473
			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
		}
474 475 476
	} else {
		err = -EINVAL;
		if (!handle)
477
			goto err3;
478
		if (!tb[TCA_FLOW_KEYS])
479
			goto err3;
480 481 482 483 484

		mode = FLOW_MODE_MAP;
		if (tb[TCA_FLOW_MODE])
			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
		if (mode != FLOW_MODE_HASH && nkeys > 1)
485
			goto err3;
486

487 488
		if (tb[TCA_FLOW_PERTURB]) {
			if (mode != FLOW_MODE_HASH)
489
				goto err3;
490 491 492
			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
		}

493 494 495 496 497
		if (TC_H_MAJ(baseclass) == 0)
			baseclass = TC_H_MAKE(tp->q->handle, baseclass);
		if (TC_H_MIN(baseclass) == 0)
			baseclass = TC_H_MAKE(baseclass, 1);

J
John Fastabend 已提交
498 499 500 501
		fnew->handle = handle;
		fnew->mask  = ~0U;
		fnew->tp = tp;
		get_random_bytes(&fnew->hashrnd, 4);
502 503
	}

J
John Fastabend 已提交
504 505 506
	fnew->perturb_timer.function = flow_perturbation;
	fnew->perturb_timer.data = (unsigned long)fnew;
	init_timer_deferrable(&fnew->perturb_timer);
507

J
John Fastabend 已提交
508 509
	tcf_exts_change(tp, &fnew->exts, &e);
	tcf_em_tree_change(tp, &fnew->ematches, &t);
510

511 512
	netif_keep_dst(qdisc_dev(tp->q));

513
	if (tb[TCA_FLOW_KEYS]) {
J
John Fastabend 已提交
514 515
		fnew->keymask = keymask;
		fnew->nkeys   = nkeys;
516 517
	}

J
John Fastabend 已提交
518
	fnew->mode = mode;
519 520

	if (tb[TCA_FLOW_MASK])
J
John Fastabend 已提交
521
		fnew->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
522
	if (tb[TCA_FLOW_XOR])
J
John Fastabend 已提交
523
		fnew->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
524
	if (tb[TCA_FLOW_RSHIFT])
J
John Fastabend 已提交
525
		fnew->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
526
	if (tb[TCA_FLOW_ADDEND])
J
John Fastabend 已提交
527
		fnew->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
528 529

	if (tb[TCA_FLOW_DIVISOR])
J
John Fastabend 已提交
530
		fnew->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
531
	if (baseclass)
J
John Fastabend 已提交
532
		fnew->baseclass = baseclass;
533

J
John Fastabend 已提交
534
	fnew->perturb_period = perturb_period;
535
	if (perturb_period)
J
John Fastabend 已提交
536
		mod_timer(&fnew->perturb_timer, jiffies + perturb_period);
537

538
	if (*arg == 0)
J
John Fastabend 已提交
539 540
		list_add_tail_rcu(&fnew->list, &head->filters);
	else
541
		list_replace_rcu(&fold->list, &fnew->list);
542

J
John Fastabend 已提交
543
	*arg = (unsigned long)fnew;
544

J
John Fastabend 已提交
545 546
	if (fold)
		call_rcu(&fold->rcu, flow_destroy_filter);
547 548
	return 0;

549 550
err3:
	tcf_exts_destroy(&fnew->exts);
551
err2:
552
	tcf_em_tree_destroy(&t);
J
John Fastabend 已提交
553
	kfree(fnew);
554
err1:
555
	tcf_exts_destroy(&e);
556 557 558 559 560 561 562
	return err;
}

static int flow_delete(struct tcf_proto *tp, unsigned long arg)
{
	struct flow_filter *f = (struct flow_filter *)arg;

J
John Fastabend 已提交
563 564
	list_del_rcu(&f->list);
	call_rcu(&f->rcu, flow_destroy_filter);
565 566 567 568 569 570 571 572 573 574 575
	return 0;
}

static int flow_init(struct tcf_proto *tp)
{
	struct flow_head *head;

	head = kzalloc(sizeof(*head), GFP_KERNEL);
	if (head == NULL)
		return -ENOBUFS;
	INIT_LIST_HEAD(&head->filters);
J
John Fastabend 已提交
576
	rcu_assign_pointer(tp->root, head);
577 578 579
	return 0;
}

580
static bool flow_destroy(struct tcf_proto *tp, bool force)
581
{
J
John Fastabend 已提交
582
	struct flow_head *head = rtnl_dereference(tp->root);
583 584
	struct flow_filter *f, *next;

585 586 587
	if (!force && !list_empty(&head->filters))
		return false;

588
	list_for_each_entry_safe(f, next, &head->filters, list) {
J
John Fastabend 已提交
589 590
		list_del_rcu(&f->list);
		call_rcu(&f->rcu, flow_destroy_filter);
591
	}
J
John Fastabend 已提交
592 593
	RCU_INIT_POINTER(tp->root, NULL);
	kfree_rcu(head, rcu);
594
	return true;
595 596 597 598
}

static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
{
J
John Fastabend 已提交
599
	struct flow_head *head = rtnl_dereference(tp->root);
600 601
	struct flow_filter *f;

602
	list_for_each_entry(f, &head->filters, list)
603 604 605 606 607
		if (f->handle == handle)
			return (unsigned long)f;
	return 0;
}

608
static int flow_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
609 610 611 612 613 614 615 616 617 618 619 620 621 622
		     struct sk_buff *skb, struct tcmsg *t)
{
	struct flow_filter *f = (struct flow_filter *)fh;
	struct nlattr *nest;

	if (f == NULL)
		return skb->len;

	t->tcm_handle = f->handle;

	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;

623 624 625
	if (nla_put_u32(skb, TCA_FLOW_KEYS, f->keymask) ||
	    nla_put_u32(skb, TCA_FLOW_MODE, f->mode))
		goto nla_put_failure;
626 627

	if (f->mask != ~0 || f->xor != 0) {
628 629 630
		if (nla_put_u32(skb, TCA_FLOW_MASK, f->mask) ||
		    nla_put_u32(skb, TCA_FLOW_XOR, f->xor))
			goto nla_put_failure;
631
	}
632 633 634 635 636 637
	if (f->rshift &&
	    nla_put_u32(skb, TCA_FLOW_RSHIFT, f->rshift))
		goto nla_put_failure;
	if (f->addend &&
	    nla_put_u32(skb, TCA_FLOW_ADDEND, f->addend))
		goto nla_put_failure;
638

639 640 641 642 643 644
	if (f->divisor &&
	    nla_put_u32(skb, TCA_FLOW_DIVISOR, f->divisor))
		goto nla_put_failure;
	if (f->baseclass &&
	    nla_put_u32(skb, TCA_FLOW_BASECLASS, f->baseclass))
		goto nla_put_failure;
645

646 647 648
	if (f->perturb_period &&
	    nla_put_u32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ))
		goto nla_put_failure;
649

650
	if (tcf_exts_dump(skb, &f->exts) < 0)
651
		goto nla_put_failure;
652
#ifdef CONFIG_NET_EMATCH
653 654 655
	if (f->ematches.hdr.nmatches &&
	    tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0)
		goto nla_put_failure;
656
#endif
657 658
	nla_nest_end(skb, nest);

659
	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
660 661 662 663 664
		goto nla_put_failure;

	return skb->len;

nla_put_failure:
665
	nla_nest_cancel(skb, nest);
666 667 668 669 670
	return -1;
}

static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
J
John Fastabend 已提交
671
	struct flow_head *head = rtnl_dereference(tp->root);
672 673
	struct flow_filter *f;

674
	list_for_each_entry(f, &head->filters, list) {
675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714
		if (arg->count < arg->skip)
			goto skip;
		if (arg->fn(tp, (unsigned long)f, arg) < 0) {
			arg->stop = 1;
			break;
		}
skip:
		arg->count++;
	}
}

static struct tcf_proto_ops cls_flow_ops __read_mostly = {
	.kind		= "flow",
	.classify	= flow_classify,
	.init		= flow_init,
	.destroy	= flow_destroy,
	.change		= flow_change,
	.delete		= flow_delete,
	.get		= flow_get,
	.dump		= flow_dump,
	.walk		= flow_walk,
	.owner		= THIS_MODULE,
};

static int __init cls_flow_init(void)
{
	return register_tcf_proto_ops(&cls_flow_ops);
}

static void __exit cls_flow_exit(void)
{
	unregister_tcf_proto_ops(&cls_flow_ops);
}

module_init(cls_flow_init);
module_exit(cls_flow_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_DESCRIPTION("TC flow classifier");