fib_rules.c 29.6 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7 8 9
/*
 * net/core/fib_rules.c		Generic Routing Rules
 *
 * Authors:	Thomas Graf <tgraf@suug.ch>
 */

#include <linux/types.h>
#include <linux/kernel.h>
10
#include <linux/slab.h>
11
#include <linux/list.h>
12
#include <linux/module.h>
13
#include <net/net_namespace.h>
14
#include <net/sock.h>
15
#include <net/fib_rules.h>
16
#include <net/ip_tunnels.h>
17
#include <linux/indirect_call_wrapper.h>
18

19 20 21 22 23
static const struct fib_kuid_range fib_kuid_range_unset = {
	KUIDT_INIT(0),
	KUIDT_INIT(~0),
};

24 25 26 27 28 29 30 31 32 33
bool fib_rule_matchall(const struct fib_rule *rule)
{
	if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id ||
	    rule->flags)
		return false;
	if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1)
		return false;
	if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
	    !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
		return false;
34 35 36 37
	if (fib_rule_port_range_set(&rule->sport_range))
		return false;
	if (fib_rule_port_range_set(&rule->dport_range))
		return false;
38 39 40 41
	return true;
}
EXPORT_SYMBOL_GPL(fib_rule_matchall);

42 43 44 45 46 47 48 49 50
int fib_default_rule_add(struct fib_rules_ops *ops,
			 u32 pref, u32 table, u32 flags)
{
	struct fib_rule *r;

	r = kzalloc(ops->rule_size, GFP_KERNEL);
	if (r == NULL)
		return -ENOMEM;

51
	refcount_set(&r->refcnt, 1);
52 53 54 55
	r->action = FR_ACT_TO_TBL;
	r->pref = pref;
	r->table = table;
	r->flags = flags;
56
	r->proto = RTPROT_KERNEL;
57
	r->fr_net = ops->fro_net;
58
	r->uid_range = fib_kuid_range_unset;
59

60 61 62
	r->suppress_prefixlen = -1;
	r->suppress_ifgroup = -1;

63 64 65 66 67 68 69
	/* The lock is not required here, the list in unreacheable
	 * at the moment this function is called */
	list_add_tail(&r->list, &ops->rules_list);
	return 0;
}
EXPORT_SYMBOL(fib_default_rule_add);

70
static u32 fib_default_rule_pref(struct fib_rules_ops *ops)
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
{
	struct list_head *pos;
	struct fib_rule *rule;

	if (!list_empty(&ops->rules_list)) {
		pos = ops->rules_list.next;
		if (pos->next != &ops->rules_list) {
			rule = list_entry(pos->next, struct fib_rule, list);
			if (rule->pref)
				return rule->pref - 1;
		}
	}

	return 0;
}

D
Denis V. Lunev 已提交
87
static void notify_rule_change(int event, struct fib_rule *rule,
88 89
			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
			       u32 pid);
90

91
static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family)
92 93 94 95
{
	struct fib_rules_ops *ops;

	rcu_read_lock();
96
	list_for_each_entry_rcu(ops, &net->rules_ops, list) {
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
		if (ops->family == family) {
			if (!try_module_get(ops->owner))
				ops = NULL;
			rcu_read_unlock();
			return ops;
		}
	}
	rcu_read_unlock();

	return NULL;
}

static void rules_ops_put(struct fib_rules_ops *ops)
{
	if (ops)
		module_put(ops->owner);
}

115 116 117
static void flush_route_cache(struct fib_rules_ops *ops)
{
	if (ops->flush_cache)
118
		ops->flush_cache(ops);
119 120
}

121
static int __fib_rules_register(struct fib_rules_ops *ops)
122 123 124
{
	int err = -EEXIST;
	struct fib_rules_ops *o;
D
Denis V. Lunev 已提交
125 126 127
	struct net *net;

	net = ops->fro_net;
128 129 130 131 132 133 134 135 136

	if (ops->rule_size < sizeof(struct fib_rule))
		return -EINVAL;

	if (ops->match == NULL || ops->configure == NULL ||
	    ops->compare == NULL || ops->fill == NULL ||
	    ops->action == NULL)
		return -EINVAL;

137 138
	spin_lock(&net->rules_mod_lock);
	list_for_each_entry(o, &net->rules_ops, list)
139 140 141
		if (ops->family == o->family)
			goto errout;

142
	list_add_tail_rcu(&ops->list, &net->rules_ops);
143 144
	err = 0;
errout:
145
	spin_unlock(&net->rules_mod_lock);
146 147 148 149

	return err;
}

150
struct fib_rules_ops *
151
fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
152 153 154 155
{
	struct fib_rules_ops *ops;
	int err;

156
	ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
157 158 159 160 161 162 163 164 165 166 167 168 169 170
	if (ops == NULL)
		return ERR_PTR(-ENOMEM);

	INIT_LIST_HEAD(&ops->rules_list);
	ops->fro_net = net;

	err = __fib_rules_register(ops);
	if (err) {
		kfree(ops);
		ops = ERR_PTR(err);
	}

	return ops;
}
171 172
EXPORT_SYMBOL_GPL(fib_rules_register);

173
static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
174 175 176
{
	struct fib_rule *rule, *tmp;

177
	list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) {
178
		list_del_rcu(&rule->list);
179 180
		if (ops->delete)
			ops->delete(rule);
181 182 183 184
		fib_rule_put(rule);
	}
}

D
Denis V. Lunev 已提交
185
void fib_rules_unregister(struct fib_rules_ops *ops)
186
{
D
Denis V. Lunev 已提交
187
	struct net *net = ops->fro_net;
188

189
	spin_lock(&net->rules_mod_lock);
190
	list_del_rcu(&ops->list);
191
	spin_unlock(&net->rules_mod_lock);
192

193
	fib_rules_cleanup_ops(ops);
194
	kfree_rcu(ops, rcu);
195 196 197
}
EXPORT_SYMBOL_GPL(fib_rules_unregister);

198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
static int uid_range_set(struct fib_kuid_range *range)
{
	return uid_valid(range->start) && uid_valid(range->end);
}

static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb)
{
	struct fib_rule_uid_range *in;
	struct fib_kuid_range out;

	in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]);

	out.start = make_kuid(current_user_ns(), in->start);
	out.end = make_kuid(current_user_ns(), in->end);

	return out;
}

static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
{
	struct fib_rule_uid_range out = {
		from_kuid_munged(current_user_ns(), range->start),
		from_kuid_munged(current_user_ns(), range->end)
	};

	return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
}

226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
static int nla_get_port_range(struct nlattr *pattr,
			      struct fib_rule_port_range *port_range)
{
	const struct fib_rule_port_range *pr = nla_data(pattr);

	if (!fib_rule_port_range_valid(pr))
		return -EINVAL;

	port_range->start = pr->start;
	port_range->end = pr->end;

	return 0;
}

static int nla_put_port_range(struct sk_buff *skb, int attrtype,
			      struct fib_rule_port_range *range)
{
	return nla_put(skb, attrtype, sizeof(*range), range);
}

246
static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
D
David Ahern 已提交
247 248
			  struct flowi *fl, int flags,
			  struct fib_lookup_arg *arg)
249 250 251
{
	int ret = 0;

252
	if (rule->iifindex && (rule->iifindex != fl->flowi_iif))
253 254
		goto out;

255
	if (rule->oifindex && (rule->oifindex != fl->flowi_oif))
256 257
		goto out;

258
	if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
259 260
		goto out;

261 262 263
	if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id))
		goto out;

D
David Ahern 已提交
264 265 266
	if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg))
		goto out;

267 268 269 270
	if (uid_lt(fl->flowi_uid, rule->uid_range.start) ||
	    uid_gt(fl->flowi_uid, rule->uid_range.end))
		goto out;

271 272 273 274
	ret = INDIRECT_CALL_INET(ops->match,
				 fib6_rule_match,
				 fib4_rule_match,
				 rule, fl, flags);
275 276 277 278
out:
	return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
}

279 280 281 282 283 284 285 286
int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
		     int flags, struct fib_lookup_arg *arg)
{
	struct fib_rule *rule;
	int err;

	rcu_read_lock();

287
	list_for_each_entry_rcu(rule, &ops->rules_list, list) {
T
Thomas Graf 已提交
288
jumped:
D
David Ahern 已提交
289
		if (!fib_rule_match(rule, ops, fl, flags, arg))
290 291
			continue;

T
Thomas Graf 已提交
292 293 294 295 296 297 298 299 300 301
		if (rule->action == FR_ACT_GOTO) {
			struct fib_rule *target;

			target = rcu_dereference(rule->ctarget);
			if (target == NULL) {
				continue;
			} else {
				rule = target;
				goto jumped;
			}
302 303 304
		} else if (rule->action == FR_ACT_NOP)
			continue;
		else
305 306 307 308 309 310 311 312 313
			err = INDIRECT_CALL_INET(ops->action,
						 fib6_rule_action,
						 fib4_rule_action,
						 rule, fl, flags, arg);

		if (!err && ops->suppress && INDIRECT_CALL_INET(ops->suppress,
								fib6_rule_suppress,
								fib4_rule_suppress,
								rule, arg))
314 315
			continue;

316
		if (err != -EAGAIN) {
E
Eric Dumazet 已提交
317
			if ((arg->flags & FIB_LOOKUP_NOREF) ||
318
			    likely(refcount_inc_not_zero(&rule->refcnt))) {
319 320 321 322
				arg->rule = rule;
				goto out;
			}
			break;
323 324 325
		}
	}

326
	err = -ESRCH;
327 328 329 330 331 332 333
out:
	rcu_read_unlock();

	return err;
}
EXPORT_SYMBOL_GPL(fib_rules_lookup);

334
static int call_fib_rule_notifier(struct notifier_block *nb,
335
				  enum fib_event_type event_type,
336 337
				  struct fib_rule *rule, int family,
				  struct netlink_ext_ack *extack)
338 339 340
{
	struct fib_rule_notifier_info info = {
		.info.family = family,
341
		.info.extack = extack,
342 343 344
		.rule = rule,
	};

345
	return call_fib_notifier(nb, event_type, &info.info);
346 347 348 349 350
}

static int call_fib_rule_notifiers(struct net *net,
				   enum fib_event_type event_type,
				   struct fib_rule *rule,
D
David Ahern 已提交
351 352
				   struct fib_rules_ops *ops,
				   struct netlink_ext_ack *extack)
353 354 355
{
	struct fib_rule_notifier_info info = {
		.info.family = ops->family,
D
David Ahern 已提交
356
		.info.extack = extack,
357 358 359 360 361 362 363 364
		.rule = rule,
	};

	ops->fib_rules_seq++;
	return call_fib_notifiers(net, event_type, &info.info);
}

/* Called with rcu_read_lock() */
365 366
int fib_rules_dump(struct net *net, struct notifier_block *nb, int family,
		   struct netlink_ext_ack *extack)
367 368 369
{
	struct fib_rules_ops *ops;
	struct fib_rule *rule;
370
	int err = 0;
371 372 373 374

	ops = lookup_rules_ops(net, family);
	if (!ops)
		return -EAFNOSUPPORT;
375 376
	list_for_each_entry_rcu(rule, &ops->rules_list, list) {
		err = call_fib_rule_notifier(nb, FIB_EVENT_RULE_ADD,
377
					     rule, family, extack);
378 379 380
		if (err)
			break;
	}
381 382
	rules_ops_put(ops);

383
	return err;
384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403
}
EXPORT_SYMBOL_GPL(fib_rules_dump);

unsigned int fib_rules_seq_read(struct net *net, int family)
{
	unsigned int fib_rules_seq;
	struct fib_rules_ops *ops;

	ASSERT_RTNL();

	ops = lookup_rules_ops(net, family);
	if (!ops)
		return 0;
	fib_rules_seq = ops->fib_rules_seq;
	rules_ops_put(ops);

	return fib_rules_seq;
}
EXPORT_SYMBOL_GPL(fib_rules_seq_read);

404 405 406 407 408
static struct fib_rule *rule_find(struct fib_rules_ops *ops,
				  struct fib_rule_hdr *frh,
				  struct nlattr **tb,
				  struct fib_rule *rule,
				  bool user_priority)
409 410 411 412
{
	struct fib_rule *r;

	list_for_each_entry(r, &ops->rules_list, list) {
413
		if (rule->action && r->action != rule->action)
414 415
			continue;

416
		if (rule->table && r->table != rule->table)
417 418
			continue;

419
		if (user_priority && r->pref != rule->pref)
420 421
			continue;

422 423
		if (rule->iifname[0] &&
		    memcmp(r->iifname, rule->iifname, IFNAMSIZ))
424 425
			continue;

426 427
		if (rule->oifname[0] &&
		    memcmp(r->oifname, rule->oifname, IFNAMSIZ))
428 429
			continue;

430
		if (rule->mark && r->mark != rule->mark)
431 432
			continue;

433 434 435 436 437 438 439 440
		if (rule->suppress_ifgroup != -1 &&
		    r->suppress_ifgroup != rule->suppress_ifgroup)
			continue;

		if (rule->suppress_prefixlen != -1 &&
		    r->suppress_prefixlen != rule->suppress_prefixlen)
			continue;

441
		if (rule->mark_mask && r->mark_mask != rule->mark_mask)
442 443
			continue;

444
		if (rule->tun_id && r->tun_id != rule->tun_id)
445 446 447 448 449
			continue;

		if (r->fr_net != rule->fr_net)
			continue;

450
		if (rule->l3mdev && r->l3mdev != rule->l3mdev)
451 452
			continue;

453 454 455
		if (uid_range_set(&rule->uid_range) &&
		    (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
		    !uid_eq(r->uid_range.end, rule->uid_range.end)))
456 457
			continue;

458
		if (rule->ip_proto && r->ip_proto != rule->ip_proto)
459 460
			continue;

461 462 463
		if (rule->proto && r->proto != rule->proto)
			continue;

464 465
		if (fib_rule_port_range_set(&rule->sport_range) &&
		    !fib_rule_port_range_compare(&r->sport_range,
466 467 468
						 &rule->sport_range))
			continue;

469 470
		if (fib_rule_port_range_set(&rule->dport_range) &&
		    !fib_rule_port_range_compare(&r->dport_range,
471 472 473
						 &rule->dport_range))
			continue;

474 475
		if (!ops->compare(r, frh, tb))
			continue;
476
		return r;
477
	}
478 479

	return NULL;
480 481
}

482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502
#ifdef CONFIG_NET_L3_MASTER_DEV
static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule,
			      struct netlink_ext_ack *extack)
{
	nlrule->l3mdev = nla_get_u8(nla);
	if (nlrule->l3mdev != 1) {
		NL_SET_ERR_MSG(extack, "Invalid l3mdev attribute");
		return -1;
	}

	return 0;
}
#else
static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule,
			      struct netlink_ext_ack *extack)
{
	NL_SET_ERR_MSG(extack, "l3mdev support is not enabled in kernel");
	return -1;
}
#endif

503 504 505 506 507 508
static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
		       struct netlink_ext_ack *extack,
		       struct fib_rules_ops *ops,
		       struct nlattr *tb[],
		       struct fib_rule **rule,
		       bool *user_priority)
509
{
510
	struct net *net = sock_net(skb->sk);
511
	struct fib_rule_hdr *frh = nlmsg_data(nlh);
512 513
	struct fib_rule *nlrule = NULL;
	int err = -EINVAL;
514

515 516 517
	if (frh->src_len)
		if (!tb[FRA_SRC] ||
		    frh->src_len > (ops->addr_size * 8) ||
518 519
		    nla_len(tb[FRA_SRC]) != ops->addr_size) {
			NL_SET_ERR_MSG(extack, "Invalid source address");
520
			goto errout;
521
	}
522

523 524 525
	if (frh->dst_len)
		if (!tb[FRA_DST] ||
		    frh->dst_len > (ops->addr_size * 8) ||
526 527
		    nla_len(tb[FRA_DST]) != ops->addr_size) {
			NL_SET_ERR_MSG(extack, "Invalid dst address");
528
			goto errout;
529
	}
530

531 532
	nlrule = kzalloc(ops->rule_size, GFP_KERNEL);
	if (!nlrule) {
533 534 535
		err = -ENOMEM;
		goto errout;
	}
536 537
	refcount_set(&nlrule->refcnt, 1);
	nlrule->fr_net = net;
538

539 540 541 542 543 544
	if (tb[FRA_PRIORITY]) {
		nlrule->pref = nla_get_u32(tb[FRA_PRIORITY]);
		*user_priority = true;
	} else {
		nlrule->pref = fib_default_rule_pref(ops);
	}
545

546
	nlrule->proto = tb[FRA_PROTOCOL] ?
547 548
		nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC;

549
	if (tb[FRA_IIFNAME]) {
550 551
		struct net_device *dev;

552 553 554
		nlrule->iifindex = -1;
		nla_strlcpy(nlrule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
		dev = __dev_get_by_name(net, nlrule->iifname);
555
		if (dev)
556
			nlrule->iifindex = dev->ifindex;
557 558
	}

559 560 561
	if (tb[FRA_OIFNAME]) {
		struct net_device *dev;

562 563 564
		nlrule->oifindex = -1;
		nla_strlcpy(nlrule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
		dev = __dev_get_by_name(net, nlrule->oifname);
565
		if (dev)
566
			nlrule->oifindex = dev->ifindex;
567 568
	}

569
	if (tb[FRA_FWMARK]) {
570 571
		nlrule->mark = nla_get_u32(tb[FRA_FWMARK]);
		if (nlrule->mark)
572 573 574
			/* compatibility: if the mark value is non-zero all bits
			 * are compared unless a mask is explicitly specified.
			 */
575
			nlrule->mark_mask = 0xFFFFFFFF;
576 577 578
	}

	if (tb[FRA_FWMASK])
579
		nlrule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);
580

581
	if (tb[FRA_TUN_ID])
582
		nlrule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
583

W
Wei Yongjun 已提交
584
	err = -EINVAL;
585 586 587
	if (tb[FRA_L3MDEV] &&
	    fib_nl2rule_l3mdev(tb[FRA_L3MDEV], nlrule, extack) < 0)
		goto errout_free;
D
David Ahern 已提交
588

589 590 591
	nlrule->action = frh->action;
	nlrule->flags = frh->flags;
	nlrule->table = frh_get_table(frh, tb);
592
	if (tb[FRA_SUPPRESS_PREFIXLEN])
593
		nlrule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]);
594
	else
595
		nlrule->suppress_prefixlen = -1;
596

597
	if (tb[FRA_SUPPRESS_IFGROUP])
598
		nlrule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]);
599
	else
600
		nlrule->suppress_ifgroup = -1;
601

T
Thomas Graf 已提交
602
	if (tb[FRA_GOTO]) {
603 604
		if (nlrule->action != FR_ACT_GOTO) {
			NL_SET_ERR_MSG(extack, "Unexpected goto");
T
Thomas Graf 已提交
605
			goto errout_free;
606
		}
T
Thomas Graf 已提交
607

608
		nlrule->target = nla_get_u32(tb[FRA_GOTO]);
T
Thomas Graf 已提交
609
		/* Backward jumps are prohibited to avoid endless loops */
610 611
		if (nlrule->target <= nlrule->pref) {
			NL_SET_ERR_MSG(extack, "Backward goto not supported");
T
Thomas Graf 已提交
612
			goto errout_free;
613
		}
614
	} else if (nlrule->action == FR_ACT_GOTO) {
615
		NL_SET_ERR_MSG(extack, "Missing goto target for action goto");
T
Thomas Graf 已提交
616
		goto errout_free;
617
	}
T
Thomas Graf 已提交
618

619 620
	if (nlrule->l3mdev && nlrule->table) {
		NL_SET_ERR_MSG(extack, "l3mdev and table are mutually exclusive");
D
David Ahern 已提交
621
		goto errout_free;
622
	}
D
David Ahern 已提交
623

624 625 626
	if (tb[FRA_UID_RANGE]) {
		if (current_user_ns() != net->user_ns) {
			err = -EPERM;
627
			NL_SET_ERR_MSG(extack, "No permission to set uid");
628 629 630
			goto errout_free;
		}

631
		nlrule->uid_range = nla_get_kuid_range(tb);
632

633
		if (!uid_range_set(&nlrule->uid_range) ||
634 635
		    !uid_lte(nlrule->uid_range.start, nlrule->uid_range.end)) {
			NL_SET_ERR_MSG(extack, "Invalid uid range");
636
			goto errout_free;
637
		}
638
	} else {
639
		nlrule->uid_range = fib_kuid_range_unset;
640 641
	}

642
	if (tb[FRA_IP_PROTO])
643
		nlrule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
644 645 646

	if (tb[FRA_SPORT_RANGE]) {
		err = nla_get_port_range(tb[FRA_SPORT_RANGE],
647
					 &nlrule->sport_range);
648 649
		if (err) {
			NL_SET_ERR_MSG(extack, "Invalid sport range");
650
			goto errout_free;
651
		}
652 653 654 655
	}

	if (tb[FRA_DPORT_RANGE]) {
		err = nla_get_port_range(tb[FRA_DPORT_RANGE],
656
					 &nlrule->dport_range);
657 658
		if (err) {
			NL_SET_ERR_MSG(extack, "Invalid dport range");
659
			goto errout_free;
660
		}
661 662
	}

663 664 665 666 667 668 669 670 671 672
	*rule = nlrule;

	return 0;

errout_free:
	kfree(nlrule);
errout:
	return err;
}

673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739
static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
		       struct nlattr **tb, struct fib_rule *rule)
{
	struct fib_rule *r;

	list_for_each_entry(r, &ops->rules_list, list) {
		if (r->action != rule->action)
			continue;

		if (r->table != rule->table)
			continue;

		if (r->pref != rule->pref)
			continue;

		if (memcmp(r->iifname, rule->iifname, IFNAMSIZ))
			continue;

		if (memcmp(r->oifname, rule->oifname, IFNAMSIZ))
			continue;

		if (r->mark != rule->mark)
			continue;

		if (r->suppress_ifgroup != rule->suppress_ifgroup)
			continue;

		if (r->suppress_prefixlen != rule->suppress_prefixlen)
			continue;

		if (r->mark_mask != rule->mark_mask)
			continue;

		if (r->tun_id != rule->tun_id)
			continue;

		if (r->fr_net != rule->fr_net)
			continue;

		if (r->l3mdev != rule->l3mdev)
			continue;

		if (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
		    !uid_eq(r->uid_range.end, rule->uid_range.end))
			continue;

		if (r->ip_proto != rule->ip_proto)
			continue;

		if (r->proto != rule->proto)
			continue;

		if (!fib_rule_port_range_compare(&r->sport_range,
						 &rule->sport_range))
			continue;

		if (!fib_rule_port_range_compare(&r->dport_range,
						 &rule->dport_range))
			continue;

		if (!ops->compare(r, frh, tb))
			continue;
		return 1;
	}
	return 0;
}

740 741 742 743 744 745 746 747 748 749 750
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
		   struct netlink_ext_ack *extack)
{
	struct net *net = sock_net(skb->sk);
	struct fib_rule_hdr *frh = nlmsg_data(nlh);
	struct fib_rules_ops *ops = NULL;
	struct fib_rule *rule = NULL, *r, *last = NULL;
	struct nlattr *tb[FRA_MAX + 1];
	int err = -EINVAL, unresolved = 0;
	bool user_priority = false;

751 752
	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
		NL_SET_ERR_MSG(extack, "Invalid msg length");
753
		goto errout;
754
	}
755 756 757 758

	ops = lookup_rules_ops(net, frh->family);
	if (!ops) {
		err = -EAFNOSUPPORT;
759
		NL_SET_ERR_MSG(extack, "Rule family not supported");
760 761 762
		goto errout;
	}

763 764
	err = nlmsg_parse_deprecated(nlh, sizeof(*frh), tb, FRA_MAX,
				     ops->policy, extack);
765 766
	if (err < 0) {
		NL_SET_ERR_MSG(extack, "Error parsing msg");
767
		goto errout;
768
	}
769 770 771 772 773

	err = fib_nl2rule(skb, nlh, extack, ops, tb, &rule, &user_priority);
	if (err)
		goto errout;

774 775 776
	if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
	    rule_exists(ops, frh, tb, rule)) {
		err = -EEXIST;
777 778 779
		goto errout_free;
	}

780
	err = ops->configure(rule, skb, frh, tb, extack);
781 782 783
	if (err < 0)
		goto errout_free;

784 785 786 787 788
	err = call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops,
				      extack);
	if (err < 0)
		goto errout_free;

789 790 791 792 793 794 795 796 797 798
	list_for_each_entry(r, &ops->rules_list, list) {
		if (r->pref == rule->target) {
			RCU_INIT_POINTER(rule->ctarget, r);
			break;
		}
	}

	if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
		unresolved = 1;

799
	list_for_each_entry(r, &ops->rules_list, list) {
800 801 802 803 804
		if (r->pref > rule->pref)
			break;
		last = r;
	}

E
Eric Dumazet 已提交
805 806 807 808 809
	if (last)
		list_add_rcu(&rule->list, &last->list);
	else
		list_add_rcu(&rule->list, &ops->rules_list);

T
Thomas Graf 已提交
810 811 812 813 814
	if (ops->unresolved_rules) {
		/*
		 * There are unresolved goto rules in the list, check if
		 * any of them are pointing to this new rule.
		 */
815
		list_for_each_entry(r, &ops->rules_list, list) {
T
Thomas Graf 已提交
816
			if (r->action == FR_ACT_GOTO &&
817 818
			    r->target == rule->pref &&
			    rtnl_dereference(r->ctarget) == NULL) {
T
Thomas Graf 已提交
819 820 821 822 823 824 825 826 827 828 829 830 831
				rcu_assign_pointer(r->ctarget, rule);
				if (--ops->unresolved_rules == 0)
					break;
			}
		}
	}

	if (rule->action == FR_ACT_GOTO)
		ops->nr_goto_rules++;

	if (unresolved)
		ops->unresolved_rules++;

832 833 834
	if (rule->tun_id)
		ip_tunnel_need_metadata();

835
	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
836
	flush_route_cache(ops);
837 838 839 840 841 842 843 844 845
	rules_ops_put(ops);
	return 0;

errout_free:
	kfree(rule);
errout:
	rules_ops_put(ops);
	return err;
}
D
David Ahern 已提交
846
EXPORT_SYMBOL_GPL(fib_nl_newrule);
847

848 849
int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
		   struct netlink_ext_ack *extack)
850
{
851
	struct net *net = sock_net(skb->sk);
852 853
	struct fib_rule_hdr *frh = nlmsg_data(nlh);
	struct fib_rules_ops *ops = NULL;
854
	struct fib_rule *rule = NULL, *r, *nlrule = NULL;
855 856
	struct nlattr *tb[FRA_MAX+1];
	int err = -EINVAL;
857
	bool user_priority = false;
858

859 860
	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
		NL_SET_ERR_MSG(extack, "Invalid msg length");
861
		goto errout;
862
	}
863

864
	ops = lookup_rules_ops(net, frh->family);
865
	if (ops == NULL) {
866
		err = -EAFNOSUPPORT;
867
		NL_SET_ERR_MSG(extack, "Rule family not supported");
868 869 870
		goto errout;
	}

871 872
	err = nlmsg_parse_deprecated(nlh, sizeof(*frh), tb, FRA_MAX,
				     ops->policy, extack);
873 874
	if (err < 0) {
		NL_SET_ERR_MSG(extack, "Error parsing msg");
875
		goto errout;
876
	}
877

878 879
	err = fib_nl2rule(skb, nlh, extack, ops, tb, &nlrule, &user_priority);
	if (err)
880 881
		goto errout;

882 883 884 885
	rule = rule_find(ops, frh, tb, nlrule, user_priority);
	if (!rule) {
		err = -ENOENT;
		goto errout;
886 887
	}

888 889 890
	if (rule->flags & FIB_RULE_PERMANENT) {
		err = -EPERM;
		goto errout;
891 892
	}

893 894
	if (ops->delete) {
		err = ops->delete(rule);
895 896 897 898
		if (err)
			goto errout;
	}

899 900
	if (rule->tun_id)
		ip_tunnel_unneed_metadata();
901

902
	list_del_rcu(&rule->list);
903

904 905 906 907 908
	if (rule->action == FR_ACT_GOTO) {
		ops->nr_goto_rules--;
		if (rtnl_dereference(rule->ctarget) == NULL)
			ops->unresolved_rules--;
	}
T
Thomas Graf 已提交
909

910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928
	/*
	 * Check if this rule is a target to any of them. If so,
	 * adjust to the next one with the same preference or
	 * disable them. As this operation is eventually very
	 * expensive, it is only performed if goto rules, except
	 * current if it is goto rule, have actually been added.
	 */
	if (ops->nr_goto_rules > 0) {
		struct fib_rule *n;

		n = list_next_entry(rule, list);
		if (&n->list == &ops->rules_list || n->pref != rule->pref)
			n = NULL;
		list_for_each_entry(r, &ops->rules_list, list) {
			if (rtnl_dereference(r->ctarget) != rule)
				continue;
			rcu_assign_pointer(r->ctarget, n);
			if (!n)
				ops->unresolved_rules++;
T
Thomas Graf 已提交
929
		}
930 931
	}

932 933 934 935 936 937 938 939 940 941
	call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops,
				NULL);
	notify_rule_change(RTM_DELRULE, rule, ops, nlh,
			   NETLINK_CB(skb).portid);
	fib_rule_put(rule);
	flush_route_cache(ops);
	rules_ops_put(ops);
	kfree(nlrule);
	return 0;

942
errout:
943
	kfree(nlrule);
944 945 946
	rules_ops_put(ops);
	return err;
}
D
David Ahern 已提交
947
EXPORT_SYMBOL_GPL(fib_nl_delrule);
948

949 950 951 952
static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
					 struct fib_rule *rule)
{
	size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
953
			 + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */
954
			 + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */
955 956
			 + nla_total_size(4) /* FRA_PRIORITY */
			 + nla_total_size(4) /* FRA_TABLE */
957
			 + nla_total_size(4) /* FRA_SUPPRESS_PREFIXLEN */
958
			 + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */
959
			 + nla_total_size(4) /* FRA_FWMARK */
960
			 + nla_total_size(4) /* FRA_FWMASK */
961
			 + nla_total_size_64bit(8) /* FRA_TUN_ID */
962
			 + nla_total_size(sizeof(struct fib_kuid_range))
963 964 965 966
			 + nla_total_size(1) /* FRA_PROTOCOL */
			 + nla_total_size(1) /* FRA_IP_PROTO */
			 + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */
			 + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */
967 968 969 970 971 972 973

	if (ops->nlmsg_payload)
		payload += ops->nlmsg_payload(rule);

	return payload;
}

974 975 976 977 978 979 980 981 982
static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
			    u32 pid, u32 seq, int type, int flags,
			    struct fib_rules_ops *ops)
{
	struct nlmsghdr *nlh;
	struct fib_rule_hdr *frh;

	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
	if (nlh == NULL)
983
		return -EMSGSIZE;
984 985

	frh = nlmsg_data(nlh);
986
	frh->family = ops->family;
987
	frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT;
988 989
	if (nla_put_u32(skb, FRA_TABLE, rule->table))
		goto nla_put_failure;
990
	if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
991
		goto nla_put_failure;
992
	frh->res1 = 0;
993
	frh->res2 = 0;
994 995
	frh->action = rule->action;
	frh->flags = rule->flags;
996 997 998

	if (nla_put_u8(skb, FRA_PROTOCOL, rule->proto))
		goto nla_put_failure;
999

E
Eric Dumazet 已提交
1000
	if (rule->action == FR_ACT_GOTO &&
1001
	    rcu_access_pointer(rule->ctarget) == NULL)
T
Thomas Graf 已提交
1002 1003
		frh->flags |= FIB_RULE_UNRESOLVED;

1004
	if (rule->iifname[0]) {
1005 1006
		if (nla_put_string(skb, FRA_IIFNAME, rule->iifname))
			goto nla_put_failure;
1007 1008
		if (rule->iifindex == -1)
			frh->flags |= FIB_RULE_IIF_DETACHED;
1009 1010
	}

1011
	if (rule->oifname[0]) {
1012 1013
		if (nla_put_string(skb, FRA_OIFNAME, rule->oifname))
			goto nla_put_failure;
1014 1015 1016 1017
		if (rule->oifindex == -1)
			frh->flags |= FIB_RULE_OIF_DETACHED;
	}

1018 1019 1020 1021 1022 1023 1024
	if ((rule->pref &&
	     nla_put_u32(skb, FRA_PRIORITY, rule->pref)) ||
	    (rule->mark &&
	     nla_put_u32(skb, FRA_FWMARK, rule->mark)) ||
	    ((rule->mark_mask || rule->mark) &&
	     nla_put_u32(skb, FRA_FWMASK, rule->mark_mask)) ||
	    (rule->target &&
1025 1026
	     nla_put_u32(skb, FRA_GOTO, rule->target)) ||
	    (rule->tun_id &&
D
David Ahern 已提交
1027 1028
	     nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) ||
	    (rule->l3mdev &&
1029 1030
	     nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
	    (uid_range_set(&rule->uid_range) &&
1031 1032 1033 1034 1035 1036
	     nla_put_uid_range(skb, &rule->uid_range)) ||
	    (fib_rule_port_range_set(&rule->sport_range) &&
	     nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) ||
	    (fib_rule_port_range_set(&rule->dport_range) &&
	     nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) ||
	    (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto)))
1037
		goto nla_put_failure;
1038 1039 1040 1041 1042 1043

	if (rule->suppress_ifgroup != -1) {
		if (nla_put_u32(skb, FRA_SUPPRESS_IFGROUP, rule->suppress_ifgroup))
			goto nla_put_failure;
	}

1044
	if (ops->fill(rule, skb, frh) < 0)
1045 1046
		goto nla_put_failure;

1047 1048
	nlmsg_end(skb, nlh);
	return 0;
1049 1050

nla_put_failure:
1051 1052
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
1053 1054
}

T
Thomas Graf 已提交
1055 1056
static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
		      struct fib_rules_ops *ops)
1057 1058 1059
{
	int idx = 0;
	struct fib_rule *rule;
1060
	int err = 0;
1061

1062 1063
	rcu_read_lock();
	list_for_each_entry_rcu(rule, &ops->rules_list, list) {
T
Thomas Graf 已提交
1064
		if (idx < cb->args[1])
1065 1066
			goto skip;

1067 1068 1069 1070
		err = fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
				       cb->nlh->nlmsg_seq, RTM_NEWRULE,
				       NLM_F_MULTI, ops);
		if (err)
1071 1072 1073 1074
			break;
skip:
		idx++;
	}
1075
	rcu_read_unlock();
T
Thomas Graf 已提交
1076
	cb->args[1] = idx;
1077 1078
	rules_ops_put(ops);

1079
	return err;
1080 1081
}

1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107
static int fib_valid_dumprule_req(const struct nlmsghdr *nlh,
				   struct netlink_ext_ack *extack)
{
	struct fib_rule_hdr *frh;

	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
		NL_SET_ERR_MSG(extack, "Invalid header for fib rule dump request");
		return -EINVAL;
	}

	frh = nlmsg_data(nlh);
	if (frh->dst_len || frh->src_len || frh->tos || frh->table ||
	    frh->res1 || frh->res2 || frh->action || frh->flags) {
		NL_SET_ERR_MSG(extack,
			       "Invalid values in header for fib rule dump request");
		return -EINVAL;
	}

	if (nlmsg_attrlen(nlh, sizeof(*frh))) {
		NL_SET_ERR_MSG(extack, "Invalid data after header in fib rule dump request");
		return -EINVAL;
	}

	return 0;
}

T
Thomas Graf 已提交
1108 1109
static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
{
1110
	const struct nlmsghdr *nlh = cb->nlh;
1111
	struct net *net = sock_net(skb->sk);
T
Thomas Graf 已提交
1112 1113 1114
	struct fib_rules_ops *ops;
	int idx = 0, family;

1115 1116 1117 1118 1119 1120 1121 1122
	if (cb->strict_check) {
		int err = fib_valid_dumprule_req(nlh, cb->extack);

		if (err < 0)
			return err;
	}

	family = rtnl_msg_family(nlh);
T
Thomas Graf 已提交
1123 1124
	if (family != AF_UNSPEC) {
		/* Protocol specific dump request */
1125
		ops = lookup_rules_ops(net, family);
T
Thomas Graf 已提交
1126 1127 1128
		if (ops == NULL)
			return -EAFNOSUPPORT;

1129 1130 1131
		dump_rules(skb, cb, ops);

		return skb->len;
T
Thomas Graf 已提交
1132 1133 1134
	}

	rcu_read_lock();
1135
	list_for_each_entry_rcu(ops, &net->rules_ops, list) {
T
Thomas Graf 已提交
1136 1137 1138 1139 1140 1141 1142
		if (idx < cb->args[0] || !try_module_get(ops->owner))
			goto skip;

		if (dump_rules(skb, cb, ops) < 0)
			break;

		cb->args[1] = 0;
1143
skip:
T
Thomas Graf 已提交
1144 1145 1146 1147 1148 1149 1150
		idx++;
	}
	rcu_read_unlock();
	cb->args[0] = idx;

	return skb->len;
}
1151

D
Denis V. Lunev 已提交
1152
static void notify_rule_change(int event, struct fib_rule *rule,
1153 1154
			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
			       u32 pid)
1155
{
D
Denis V. Lunev 已提交
1156
	struct net *net;
1157 1158
	struct sk_buff *skb;
	int err = -ENOBUFS;
1159

D
Denis V. Lunev 已提交
1160
	net = ops->fro_net;
1161
	skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
1162
	if (skb == NULL)
1163 1164 1165
		goto errout;

	err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
1166 1167 1168 1169 1170 1171
	if (err < 0) {
		/* -EMSGSIZE implies BUG in fib_rule_nlmsg_size() */
		WARN_ON(err == -EMSGSIZE);
		kfree_skb(skb);
		goto errout;
	}
D
Denis V. Lunev 已提交
1172

1173 1174
	rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
	return;
1175 1176
errout:
	if (err < 0)
1177
		rtnl_set_sk_err(net, ops->nlgroup, err);
1178 1179 1180 1181 1182 1183 1184
}

static void attach_rules(struct list_head *rules, struct net_device *dev)
{
	struct fib_rule *rule;

	list_for_each_entry(rule, rules, list) {
1185 1186 1187
		if (rule->iifindex == -1 &&
		    strcmp(dev->name, rule->iifname) == 0)
			rule->iifindex = dev->ifindex;
1188 1189 1190
		if (rule->oifindex == -1 &&
		    strcmp(dev->name, rule->oifname) == 0)
			rule->oifindex = dev->ifindex;
1191 1192 1193 1194 1195 1196 1197
	}
}

static void detach_rules(struct list_head *rules, struct net_device *dev)
{
	struct fib_rule *rule;

1198
	list_for_each_entry(rule, rules, list) {
1199 1200
		if (rule->iifindex == dev->ifindex)
			rule->iifindex = -1;
1201 1202 1203
		if (rule->oifindex == dev->ifindex)
			rule->oifindex = -1;
	}
1204 1205 1206 1207
}


static int fib_rules_event(struct notifier_block *this, unsigned long event,
1208
			   void *ptr)
1209
{
1210
	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1211
	struct net *net = dev_net(dev);
1212 1213
	struct fib_rules_ops *ops;

1214
	ASSERT_RTNL();
1215 1216 1217

	switch (event) {
	case NETDEV_REGISTER:
1218
		list_for_each_entry(ops, &net->rules_ops, list)
1219
			attach_rules(&ops->rules_list, dev);
1220 1221
		break;

1222 1223 1224 1225 1226 1227 1228
	case NETDEV_CHANGENAME:
		list_for_each_entry(ops, &net->rules_ops, list) {
			detach_rules(&ops->rules_list, dev);
			attach_rules(&ops->rules_list, dev);
		}
		break;

1229
	case NETDEV_UNREGISTER:
1230
		list_for_each_entry(ops, &net->rules_ops, list)
1231
			detach_rules(&ops->rules_list, dev);
1232 1233 1234 1235 1236 1237 1238 1239 1240 1241
		break;
	}

	return NOTIFY_DONE;
}

static struct notifier_block fib_rules_notifier = {
	.notifier_call = fib_rules_event,
};

1242
static int __net_init fib_rules_net_init(struct net *net)
1243 1244 1245 1246 1247 1248
{
	INIT_LIST_HEAD(&net->rules_ops);
	spin_lock_init(&net->rules_mod_lock);
	return 0;
}

1249 1250 1251 1252 1253
static void __net_exit fib_rules_net_exit(struct net *net)
{
	WARN_ON_ONCE(!list_empty(&net->rules_ops));
}

1254 1255
static struct pernet_operations fib_rules_net_ops = {
	.init = fib_rules_net_init,
1256
	.exit = fib_rules_net_exit,
1257 1258
};

1259 1260
static int __init fib_rules_init(void)
{
1261
	int err;
1262 1263 1264
	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, 0);
	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, 0);
	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, 0);
1265

E
Eric W. Biederman 已提交
1266
	err = register_pernet_subsys(&fib_rules_net_ops);
1267 1268 1269
	if (err < 0)
		goto fail;

E
Eric W. Biederman 已提交
1270
	err = register_netdevice_notifier(&fib_rules_notifier);
1271 1272
	if (err < 0)
		goto fail_unregister;
E
Eric W. Biederman 已提交
1273

1274 1275 1276
	return 0;

fail_unregister:
E
Eric W. Biederman 已提交
1277
	unregister_pernet_subsys(&fib_rules_net_ops);
1278 1279 1280 1281 1282
fail:
	rtnl_unregister(PF_UNSPEC, RTM_NEWRULE);
	rtnl_unregister(PF_UNSPEC, RTM_DELRULE);
	rtnl_unregister(PF_UNSPEC, RTM_GETRULE);
	return err;
1283 1284 1285
}

subsys_initcall(fib_rules_init);