cls_flow.c 16.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * net/sched/cls_flow.c		Generic flow classifier
 *
 * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/random.h>
#include <linux/pkt_cls.h>
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
22
#include <linux/if_vlan.h>
23
#include <linux/slab.h>
24
#include <linux/module.h>
25
#include <net/inet_sock.h>
26 27 28 29

#include <net/pkt_cls.h>
#include <net/ip.h>
#include <net/route.h>
30
#include <net/flow_dissector.h>
E
Eric Dumazet 已提交
31

32
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
33 34 35 36 37
#include <net/netfilter/nf_conntrack.h>
#endif

struct flow_head {
	struct list_head	filters;
J
John Fastabend 已提交
38
	struct rcu_head		rcu;
39 40 41 42 43 44
};

struct flow_filter {
	struct list_head	list;
	struct tcf_exts		exts;
	struct tcf_ematch_tree	ematches;
J
John Fastabend 已提交
45
	struct tcf_proto	*tp;
46 47
	struct timer_list	perturb_timer;
	u32			perturb_period;
48 49 50 51 52 53 54 55 56 57 58
	u32			handle;

	u32			nkeys;
	u32			keymask;
	u32			mode;
	u32			mask;
	u32			xor;
	u32			rshift;
	u32			addend;
	u32			divisor;
	u32			baseclass;
59
	u32			hashrnd;
C
Cong Wang 已提交
60
	struct rcu_work		rwork;
61 62 63 64 65 66 67 68 69
};

static inline u32 addr_fold(void *addr)
{
	unsigned long a = (unsigned long)addr;

	return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
}

E
Eric Dumazet 已提交
70
static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
71
{
72 73 74 75 76
	__be32 src = flow_get_u32_src(flow);

	if (src)
		return ntohl(src);

77
	return addr_fold(skb->sk);
78 79
}

E
Eric Dumazet 已提交
80
static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
81
{
82 83 84 85 86
	__be32 dst = flow_get_u32_dst(flow);

	if (dst)
		return ntohl(dst);

87
	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
88 89
}

J
Jamal Hadi Salim 已提交
90 91
static u32 flow_get_proto(const struct sk_buff *skb,
			  const struct flow_keys *flow)
92
{
93
	return flow->basic.ip_proto;
94 95
}

J
Jamal Hadi Salim 已提交
96 97
static u32 flow_get_proto_src(const struct sk_buff *skb,
			      const struct flow_keys *flow)
98
{
99
	if (flow->ports.ports)
100
		return ntohs(flow->ports.src);
101

102 103 104
	return addr_fold(skb->sk);
}

J
Jamal Hadi Salim 已提交
105 106
static u32 flow_get_proto_dst(const struct sk_buff *skb,
			      const struct flow_keys *flow)
107
{
108
	if (flow->ports.ports)
109
		return ntohs(flow->ports.dst);
110

111
	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
112 113 114 115
}

static u32 flow_get_iif(const struct sk_buff *skb)
{
116
	return skb->skb_iif;
117 118 119 120 121 122 123 124 125 126 127 128 129 130
}

static u32 flow_get_priority(const struct sk_buff *skb)
{
	return skb->priority;
}

static u32 flow_get_mark(const struct sk_buff *skb)
{
	return skb->mark;
}

static u32 flow_get_nfct(const struct sk_buff *skb)
{
131
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
132
	return addr_fold(skb_nfct(skb));
133 134 135 136 137
#else
	return 0;
#endif
}

138
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
139 140 141
#define CTTUPLE(skb, member)						\
({									\
	enum ip_conntrack_info ctinfo;					\
142
	const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);		\
143 144 145 146 147 148 149 150 151 152 153 154
	if (ct == NULL)							\
		goto fallback;						\
	ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member;			\
})
#else
#define CTTUPLE(skb, member)						\
({									\
	goto fallback;							\
	0;								\
})
#endif

J
Jamal Hadi Salim 已提交
155 156
static u32 flow_get_nfct_src(const struct sk_buff *skb,
			     const struct flow_keys *flow)
157
{
158
	switch (tc_skb_protocol(skb)) {
159
	case htons(ETH_P_IP):
160
		return ntohl(CTTUPLE(skb, src.u3.ip));
161
	case htons(ETH_P_IPV6):
162 163 164
		return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
	}
fallback:
E
Eric Dumazet 已提交
165
	return flow_get_src(skb, flow);
166 167
}

J
Jamal Hadi Salim 已提交
168 169
static u32 flow_get_nfct_dst(const struct sk_buff *skb,
			     const struct flow_keys *flow)
170
{
171
	switch (tc_skb_protocol(skb)) {
172
	case htons(ETH_P_IP):
173
		return ntohl(CTTUPLE(skb, dst.u3.ip));
174
	case htons(ETH_P_IPV6):
175 176 177
		return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
	}
fallback:
E
Eric Dumazet 已提交
178
	return flow_get_dst(skb, flow);
179 180
}

J
Jamal Hadi Salim 已提交
181 182
static u32 flow_get_nfct_proto_src(const struct sk_buff *skb,
				   const struct flow_keys *flow)
183 184 185
{
	return ntohs(CTTUPLE(skb, src.u.all));
fallback:
E
Eric Dumazet 已提交
186
	return flow_get_proto_src(skb, flow);
187 188
}

J
Jamal Hadi Salim 已提交
189 190
static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb,
				   const struct flow_keys *flow)
191 192 193
{
	return ntohs(CTTUPLE(skb, dst.u.all));
fallback:
E
Eric Dumazet 已提交
194
	return flow_get_proto_dst(skb, flow);
195 196 197 198
}

static u32 flow_get_rtclassid(const struct sk_buff *skb)
{
199
#ifdef CONFIG_IP_ROUTE_CLASSID
E
Eric Dumazet 已提交
200 201
	if (skb_dst(skb))
		return skb_dst(skb)->tclassid;
202 203 204 205 206 207
#endif
	return 0;
}

static u32 flow_get_skuid(const struct sk_buff *skb)
{
208 209 210 211 212
	struct sock *sk = skb_to_full_sk(skb);

	if (sk && sk->sk_socket && sk->sk_socket->file) {
		kuid_t skuid = sk->sk_socket->file->f_cred->fsuid;

213 214
		return from_kuid(&init_user_ns, skuid);
	}
215 216 217 218 219
	return 0;
}

static u32 flow_get_skgid(const struct sk_buff *skb)
{
220 221 222 223 224
	struct sock *sk = skb_to_full_sk(skb);

	if (sk && sk->sk_socket && sk->sk_socket->file) {
		kgid_t skgid = sk->sk_socket->file->f_cred->fsgid;

225 226
		return from_kgid(&init_user_ns, skgid);
	}
227 228 229
	return 0;
}

230 231 232 233 234 235 236 237 238
static u32 flow_get_vlan_tag(const struct sk_buff *skb)
{
	u16 uninitialized_var(tag);

	if (vlan_get_tag(skb, &tag) < 0)
		return 0;
	return tag & VLAN_VID_MASK;
}

239 240
static u32 flow_get_rxhash(struct sk_buff *skb)
{
241
	return skb_get_hash(skb);
242 243
}

E
Eric Dumazet 已提交
244
static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow)
245 246 247
{
	switch (key) {
	case FLOW_KEY_SRC:
E
Eric Dumazet 已提交
248
		return flow_get_src(skb, flow);
249
	case FLOW_KEY_DST:
E
Eric Dumazet 已提交
250
		return flow_get_dst(skb, flow);
251
	case FLOW_KEY_PROTO:
E
Eric Dumazet 已提交
252
		return flow_get_proto(skb, flow);
253
	case FLOW_KEY_PROTO_SRC:
E
Eric Dumazet 已提交
254
		return flow_get_proto_src(skb, flow);
255
	case FLOW_KEY_PROTO_DST:
E
Eric Dumazet 已提交
256
		return flow_get_proto_dst(skb, flow);
257 258 259 260 261 262 263 264 265
	case FLOW_KEY_IIF:
		return flow_get_iif(skb);
	case FLOW_KEY_PRIORITY:
		return flow_get_priority(skb);
	case FLOW_KEY_MARK:
		return flow_get_mark(skb);
	case FLOW_KEY_NFCT:
		return flow_get_nfct(skb);
	case FLOW_KEY_NFCT_SRC:
E
Eric Dumazet 已提交
266
		return flow_get_nfct_src(skb, flow);
267
	case FLOW_KEY_NFCT_DST:
E
Eric Dumazet 已提交
268
		return flow_get_nfct_dst(skb, flow);
269
	case FLOW_KEY_NFCT_PROTO_SRC:
E
Eric Dumazet 已提交
270
		return flow_get_nfct_proto_src(skb, flow);
271
	case FLOW_KEY_NFCT_PROTO_DST:
E
Eric Dumazet 已提交
272
		return flow_get_nfct_proto_dst(skb, flow);
273 274 275 276 277 278
	case FLOW_KEY_RTCLASSID:
		return flow_get_rtclassid(skb);
	case FLOW_KEY_SKUID:
		return flow_get_skuid(skb);
	case FLOW_KEY_SKGID:
		return flow_get_skgid(skb);
279 280
	case FLOW_KEY_VLAN_TAG:
		return flow_get_vlan_tag(skb);
281 282
	case FLOW_KEY_RXHASH:
		return flow_get_rxhash(skb);
283 284 285 286 287 288
	default:
		WARN_ON(1);
		return 0;
	}
}

E
Eric Dumazet 已提交
289 290 291 292 293 294 295 296 297 298
#define FLOW_KEYS_NEEDED ((1 << FLOW_KEY_SRC) | 		\
			  (1 << FLOW_KEY_DST) |			\
			  (1 << FLOW_KEY_PROTO) |		\
			  (1 << FLOW_KEY_PROTO_SRC) |		\
			  (1 << FLOW_KEY_PROTO_DST) | 		\
			  (1 << FLOW_KEY_NFCT_SRC) |		\
			  (1 << FLOW_KEY_NFCT_DST) |		\
			  (1 << FLOW_KEY_NFCT_PROTO_SRC) |	\
			  (1 << FLOW_KEY_NFCT_PROTO_DST))

299
static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
300 301
			 struct tcf_result *res)
{
J
John Fastabend 已提交
302
	struct flow_head *head = rcu_dereference_bh(tp->root);
303 304 305 306 307 308
	struct flow_filter *f;
	u32 keymask;
	u32 classid;
	unsigned int n, key;
	int r;

J
John Fastabend 已提交
309
	list_for_each_entry_rcu(f, &head->filters, list) {
E
Eric Dumazet 已提交
310
		u32 keys[FLOW_KEY_MAX + 1];
E
Eric Dumazet 已提交
311
		struct flow_keys flow_keys;
312 313 314 315 316

		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
			continue;

		keymask = f->keymask;
E
Eric Dumazet 已提交
317
		if (keymask & FLOW_KEYS_NEEDED)
318
			skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
319 320 321 322

		for (n = 0; n < f->nkeys; n++) {
			key = ffs(keymask) - 1;
			keymask &= ~(1 << key);
E
Eric Dumazet 已提交
323
			keys[n] = flow_key_get(skb, key, &flow_keys);
324 325 326
		}

		if (f->mode == FLOW_MODE_HASH)
327
			classid = jhash2(keys, f->nkeys, f->hashrnd);
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
		else {
			classid = keys[0];
			classid = (classid & f->mask) ^ f->xor;
			classid = (classid >> f->rshift) + f->addend;
		}

		if (f->divisor)
			classid %= f->divisor;

		res->class   = 0;
		res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid);

		r = tcf_exts_exec(skb, &f->exts, res);
		if (r < 0)
			continue;
		return r;
	}
	return -1;
}

348
static void flow_perturbation(struct timer_list *t)
349
{
350
	struct flow_filter *f = from_timer(f, t, perturb_timer);
351 352 353 354 355 356

	get_random_bytes(&f->hashrnd, 4);
	if (f->perturb_period)
		mod_timer(&f->perturb_timer, jiffies + f->perturb_period);
}

357 358 359 360 361 362 363 364 365 366 367 368
static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
	[TCA_FLOW_KEYS]		= { .type = NLA_U32 },
	[TCA_FLOW_MODE]		= { .type = NLA_U32 },
	[TCA_FLOW_BASECLASS]	= { .type = NLA_U32 },
	[TCA_FLOW_RSHIFT]	= { .type = NLA_U32 },
	[TCA_FLOW_ADDEND]	= { .type = NLA_U32 },
	[TCA_FLOW_MASK]		= { .type = NLA_U32 },
	[TCA_FLOW_XOR]		= { .type = NLA_U32 },
	[TCA_FLOW_DIVISOR]	= { .type = NLA_U32 },
	[TCA_FLOW_ACT]		= { .type = NLA_NESTED },
	[TCA_FLOW_POLICE]	= { .type = NLA_NESTED },
	[TCA_FLOW_EMATCHES]	= { .type = NLA_NESTED },
369
	[TCA_FLOW_PERTURB]	= { .type = NLA_U32 },
370 371
};

372
static void __flow_destroy_filter(struct flow_filter *f)
J
John Fastabend 已提交
373 374
{
	del_timer_sync(&f->perturb_timer);
375
	tcf_exts_destroy(&f->exts);
376
	tcf_em_tree_destroy(&f->ematches);
377
	tcf_exts_put_net(&f->exts);
J
John Fastabend 已提交
378
	kfree(f);
379 380 381 382
}

static void flow_destroy_filter_work(struct work_struct *work)
{
C
Cong Wang 已提交
383 384 385
	struct flow_filter *f = container_of(to_rcu_work(work),
					     struct flow_filter,
					     rwork);
386 387
	rtnl_lock();
	__flow_destroy_filter(f);
388 389 390
	rtnl_unlock();
}

391
static int flow_change(struct net *net, struct sk_buff *in_skb,
392
		       struct tcf_proto *tp, unsigned long base,
393
		       u32 handle, struct nlattr **tca,
394 395
		       void **arg, bool ovr, bool rtnl_held,
		       struct netlink_ext_ack *extack)
396
{
J
John Fastabend 已提交
397 398
	struct flow_head *head = rtnl_dereference(tp->root);
	struct flow_filter *fold, *fnew;
399 400 401
	struct nlattr *opt = tca[TCA_OPTIONS];
	struct nlattr *tb[TCA_FLOW_MAX + 1];
	unsigned int nkeys = 0;
402
	unsigned int perturb_period = 0;
403 404 405 406 407 408 409 410
	u32 baseclass = 0;
	u32 keymask = 0;
	u32 mode;
	int err;

	if (opt == NULL)
		return -EINVAL;

411
	err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy, NULL);
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
	if (err < 0)
		return err;

	if (tb[TCA_FLOW_BASECLASS]) {
		baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]);
		if (TC_H_MIN(baseclass) == 0)
			return -EINVAL;
	}

	if (tb[TCA_FLOW_KEYS]) {
		keymask = nla_get_u32(tb[TCA_FLOW_KEYS]);

		nkeys = hweight32(keymask);
		if (nkeys == 0)
			return -EINVAL;
427 428 429

		if (fls(keymask) - 1 > FLOW_KEY_MAX)
			return -EOPNOTSUPP;
430 431

		if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) &&
432
		    sk_user_ns(NETLINK_CB(in_skb).sk) != &init_user_ns)
433
			return -EOPNOTSUPP;
434 435
	}

J
John Fastabend 已提交
436 437
	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
	if (!fnew)
438
		return -ENOBUFS;
439 440 441

	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &fnew->ematches);
	if (err < 0)
442
		goto err1;
J
John Fastabend 已提交
443

444 445
	err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
	if (err < 0)
446 447
		goto err2;

448
	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr,
449
				true, extack);
450 451
	if (err < 0)
		goto err2;
452

453
	fold = *arg;
J
John Fastabend 已提交
454
	if (fold) {
455
		err = -EINVAL;
J
John Fastabend 已提交
456
		if (fold->handle != handle && handle)
457
			goto err2;
458

J
John Fastabend 已提交
459 460 461 462 463 464 465 466 467 468 469 470 471 472 473
		/* Copy fold into fnew */
		fnew->tp = fold->tp;
		fnew->handle = fold->handle;
		fnew->nkeys = fold->nkeys;
		fnew->keymask = fold->keymask;
		fnew->mode = fold->mode;
		fnew->mask = fold->mask;
		fnew->xor = fold->xor;
		fnew->rshift = fold->rshift;
		fnew->addend = fold->addend;
		fnew->divisor = fold->divisor;
		fnew->baseclass = fold->baseclass;
		fnew->hashrnd = fold->hashrnd;

		mode = fold->mode;
474 475 476
		if (tb[TCA_FLOW_MODE])
			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
		if (mode != FLOW_MODE_HASH && nkeys > 1)
477
			goto err2;
478 479

		if (mode == FLOW_MODE_HASH)
J
John Fastabend 已提交
480
			perturb_period = fold->perturb_period;
481 482
		if (tb[TCA_FLOW_PERTURB]) {
			if (mode != FLOW_MODE_HASH)
483
				goto err2;
484 485
			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
		}
486 487 488
	} else {
		err = -EINVAL;
		if (!handle)
489
			goto err2;
490
		if (!tb[TCA_FLOW_KEYS])
491
			goto err2;
492 493 494 495 496

		mode = FLOW_MODE_MAP;
		if (tb[TCA_FLOW_MODE])
			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
		if (mode != FLOW_MODE_HASH && nkeys > 1)
497
			goto err2;
498

499 500
		if (tb[TCA_FLOW_PERTURB]) {
			if (mode != FLOW_MODE_HASH)
501
				goto err2;
502 503 504
			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
		}

505 506 507 508 509
		if (TC_H_MAJ(baseclass) == 0) {
			struct Qdisc *q = tcf_block_q(tp->chain->block);

			baseclass = TC_H_MAKE(q->handle, baseclass);
		}
510 511 512
		if (TC_H_MIN(baseclass) == 0)
			baseclass = TC_H_MAKE(baseclass, 1);

J
John Fastabend 已提交
513 514 515 516
		fnew->handle = handle;
		fnew->mask  = ~0U;
		fnew->tp = tp;
		get_random_bytes(&fnew->hashrnd, 4);
517 518
	}

519
	timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE);
520

521
	tcf_block_netif_keep_dst(tp->chain->block);
522

523
	if (tb[TCA_FLOW_KEYS]) {
J
John Fastabend 已提交
524 525
		fnew->keymask = keymask;
		fnew->nkeys   = nkeys;
526 527
	}

J
John Fastabend 已提交
528
	fnew->mode = mode;
529 530

	if (tb[TCA_FLOW_MASK])
J
John Fastabend 已提交
531
		fnew->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
532
	if (tb[TCA_FLOW_XOR])
J
John Fastabend 已提交
533
		fnew->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
534
	if (tb[TCA_FLOW_RSHIFT])
J
John Fastabend 已提交
535
		fnew->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
536
	if (tb[TCA_FLOW_ADDEND])
J
John Fastabend 已提交
537
		fnew->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
538 539

	if (tb[TCA_FLOW_DIVISOR])
J
John Fastabend 已提交
540
		fnew->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
541
	if (baseclass)
J
John Fastabend 已提交
542
		fnew->baseclass = baseclass;
543

J
John Fastabend 已提交
544
	fnew->perturb_period = perturb_period;
545
	if (perturb_period)
J
John Fastabend 已提交
546
		mod_timer(&fnew->perturb_timer, jiffies + perturb_period);
547

548
	if (!*arg)
J
John Fastabend 已提交
549 550
		list_add_tail_rcu(&fnew->list, &head->filters);
	else
551
		list_replace_rcu(&fold->list, &fnew->list);
552

553
	*arg = fnew;
554

555 556
	if (fold) {
		tcf_exts_get_net(&fold->exts);
C
Cong Wang 已提交
557
		tcf_queue_work(&fold->rwork, flow_destroy_filter_work);
558
	}
559 560
	return 0;

561
err2:
562
	tcf_exts_destroy(&fnew->exts);
563
	tcf_em_tree_destroy(&fnew->ematches);
564
err1:
565
	kfree(fnew);
566 567 568
	return err;
}

569
static int flow_delete(struct tcf_proto *tp, void *arg, bool *last,
570
		       bool rtnl_held, struct netlink_ext_ack *extack)
571
{
572
	struct flow_head *head = rtnl_dereference(tp->root);
573
	struct flow_filter *f = arg;
574

J
John Fastabend 已提交
575
	list_del_rcu(&f->list);
576
	tcf_exts_get_net(&f->exts);
C
Cong Wang 已提交
577
	tcf_queue_work(&f->rwork, flow_destroy_filter_work);
578
	*last = list_empty(&head->filters);
579 580 581 582 583 584 585 586 587 588 589
	return 0;
}

static int flow_init(struct tcf_proto *tp)
{
	struct flow_head *head;

	head = kzalloc(sizeof(*head), GFP_KERNEL);
	if (head == NULL)
		return -ENOBUFS;
	INIT_LIST_HEAD(&head->filters);
J
John Fastabend 已提交
590
	rcu_assign_pointer(tp->root, head);
591 592 593
	return 0;
}

594 595
static void flow_destroy(struct tcf_proto *tp, bool rtnl_held,
			 struct netlink_ext_ack *extack)
596
{
J
John Fastabend 已提交
597
	struct flow_head *head = rtnl_dereference(tp->root);
598 599 600
	struct flow_filter *f, *next;

	list_for_each_entry_safe(f, next, &head->filters, list) {
J
John Fastabend 已提交
601
		list_del_rcu(&f->list);
602
		if (tcf_exts_get_net(&f->exts))
C
Cong Wang 已提交
603
			tcf_queue_work(&f->rwork, flow_destroy_filter_work);
604 605
		else
			__flow_destroy_filter(f);
606
	}
J
John Fastabend 已提交
607
	kfree_rcu(head, rcu);
608 609
}

610
static void *flow_get(struct tcf_proto *tp, u32 handle)
611
{
J
John Fastabend 已提交
612
	struct flow_head *head = rtnl_dereference(tp->root);
613 614
	struct flow_filter *f;

615
	list_for_each_entry(f, &head->filters, list)
616
		if (f->handle == handle)
617 618
			return f;
	return NULL;
619 620
}

621
static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh,
622
		     struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
623
{
624
	struct flow_filter *f = fh;
625 626 627 628 629 630 631 632 633 634 635
	struct nlattr *nest;

	if (f == NULL)
		return skb->len;

	t->tcm_handle = f->handle;

	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;

636 637 638
	if (nla_put_u32(skb, TCA_FLOW_KEYS, f->keymask) ||
	    nla_put_u32(skb, TCA_FLOW_MODE, f->mode))
		goto nla_put_failure;
639 640

	if (f->mask != ~0 || f->xor != 0) {
641 642 643
		if (nla_put_u32(skb, TCA_FLOW_MASK, f->mask) ||
		    nla_put_u32(skb, TCA_FLOW_XOR, f->xor))
			goto nla_put_failure;
644
	}
645 646 647 648 649 650
	if (f->rshift &&
	    nla_put_u32(skb, TCA_FLOW_RSHIFT, f->rshift))
		goto nla_put_failure;
	if (f->addend &&
	    nla_put_u32(skb, TCA_FLOW_ADDEND, f->addend))
		goto nla_put_failure;
651

652 653 654 655 656 657
	if (f->divisor &&
	    nla_put_u32(skb, TCA_FLOW_DIVISOR, f->divisor))
		goto nla_put_failure;
	if (f->baseclass &&
	    nla_put_u32(skb, TCA_FLOW_BASECLASS, f->baseclass))
		goto nla_put_failure;
658

659 660 661
	if (f->perturb_period &&
	    nla_put_u32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ))
		goto nla_put_failure;
662

663
	if (tcf_exts_dump(skb, &f->exts) < 0)
664
		goto nla_put_failure;
665
#ifdef CONFIG_NET_EMATCH
666 667 668
	if (f->ematches.hdr.nmatches &&
	    tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0)
		goto nla_put_failure;
669
#endif
670 671
	nla_nest_end(skb, nest);

672
	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
673 674 675 676 677
		goto nla_put_failure;

	return skb->len;

nla_put_failure:
678
	nla_nest_cancel(skb, nest);
679 680 681
	return -1;
}

682 683
static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg,
		      bool rtnl_held)
684
{
J
John Fastabend 已提交
685
	struct flow_head *head = rtnl_dereference(tp->root);
686 687
	struct flow_filter *f;

688
	list_for_each_entry(f, &head->filters, list) {
689 690
		if (arg->count < arg->skip)
			goto skip;
691
		if (arg->fn(tp, f, arg) < 0) {
692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728
			arg->stop = 1;
			break;
		}
skip:
		arg->count++;
	}
}

static struct tcf_proto_ops cls_flow_ops __read_mostly = {
	.kind		= "flow",
	.classify	= flow_classify,
	.init		= flow_init,
	.destroy	= flow_destroy,
	.change		= flow_change,
	.delete		= flow_delete,
	.get		= flow_get,
	.dump		= flow_dump,
	.walk		= flow_walk,
	.owner		= THIS_MODULE,
};

static int __init cls_flow_init(void)
{
	return register_tcf_proto_ops(&cls_flow_ops);
}

static void __exit cls_flow_exit(void)
{
	unregister_tcf_proto_ops(&cls_flow_ops);
}

module_init(cls_flow_init);
module_exit(cls_flow_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_DESCRIPTION("TC flow classifier");