fib_rules.c 27.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
/*
 * net/core/fib_rules.c		Generic Routing Rules
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License as
 *	published by the Free Software Foundation, version 2.
 *
 * Authors:	Thomas Graf <tgraf@suug.ch>
 */

#include <linux/types.h>
#include <linux/kernel.h>
13
#include <linux/slab.h>
14
#include <linux/list.h>
15
#include <linux/module.h>
16
#include <net/net_namespace.h>
17
#include <net/sock.h>
18
#include <net/fib_rules.h>
19
#include <net/ip_tunnels.h>
20

21 22 23 24 25
static const struct fib_kuid_range fib_kuid_range_unset = {
	KUIDT_INIT(0),
	KUIDT_INIT(~0),
};

26 27 28 29 30 31 32 33 34 35
bool fib_rule_matchall(const struct fib_rule *rule)
{
	if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id ||
	    rule->flags)
		return false;
	if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1)
		return false;
	if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
	    !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
		return false;
36 37 38 39
	if (fib_rule_port_range_set(&rule->sport_range))
		return false;
	if (fib_rule_port_range_set(&rule->dport_range))
		return false;
40 41 42 43
	return true;
}
EXPORT_SYMBOL_GPL(fib_rule_matchall);

44 45 46 47 48 49 50 51 52
int fib_default_rule_add(struct fib_rules_ops *ops,
			 u32 pref, u32 table, u32 flags)
{
	struct fib_rule *r;

	r = kzalloc(ops->rule_size, GFP_KERNEL);
	if (r == NULL)
		return -ENOMEM;

53
	refcount_set(&r->refcnt, 1);
54 55 56 57
	r->action = FR_ACT_TO_TBL;
	r->pref = pref;
	r->table = table;
	r->flags = flags;
58
	r->proto = RTPROT_KERNEL;
59
	r->fr_net = ops->fro_net;
60
	r->uid_range = fib_kuid_range_unset;
61

62 63 64
	r->suppress_prefixlen = -1;
	r->suppress_ifgroup = -1;

65 66 67 68 69 70 71
	/* The lock is not required here, the list in unreacheable
	 * at the moment this function is called */
	list_add_tail(&r->list, &ops->rules_list);
	return 0;
}
EXPORT_SYMBOL(fib_default_rule_add);

72
static u32 fib_default_rule_pref(struct fib_rules_ops *ops)
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
{
	struct list_head *pos;
	struct fib_rule *rule;

	if (!list_empty(&ops->rules_list)) {
		pos = ops->rules_list.next;
		if (pos->next != &ops->rules_list) {
			rule = list_entry(pos->next, struct fib_rule, list);
			if (rule->pref)
				return rule->pref - 1;
		}
	}

	return 0;
}

D
Denis V. Lunev 已提交
89
static void notify_rule_change(int event, struct fib_rule *rule,
90 91
			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
			       u32 pid);
92

93
static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family)
94 95 96 97
{
	struct fib_rules_ops *ops;

	rcu_read_lock();
98
	list_for_each_entry_rcu(ops, &net->rules_ops, list) {
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
		if (ops->family == family) {
			if (!try_module_get(ops->owner))
				ops = NULL;
			rcu_read_unlock();
			return ops;
		}
	}
	rcu_read_unlock();

	return NULL;
}

static void rules_ops_put(struct fib_rules_ops *ops)
{
	if (ops)
		module_put(ops->owner);
}

117 118 119
static void flush_route_cache(struct fib_rules_ops *ops)
{
	if (ops->flush_cache)
120
		ops->flush_cache(ops);
121 122
}

123
static int __fib_rules_register(struct fib_rules_ops *ops)
124 125 126
{
	int err = -EEXIST;
	struct fib_rules_ops *o;
D
Denis V. Lunev 已提交
127 128 129
	struct net *net;

	net = ops->fro_net;
130 131 132 133 134 135 136 137 138

	if (ops->rule_size < sizeof(struct fib_rule))
		return -EINVAL;

	if (ops->match == NULL || ops->configure == NULL ||
	    ops->compare == NULL || ops->fill == NULL ||
	    ops->action == NULL)
		return -EINVAL;

139 140
	spin_lock(&net->rules_mod_lock);
	list_for_each_entry(o, &net->rules_ops, list)
141 142 143
		if (ops->family == o->family)
			goto errout;

144
	list_add_tail_rcu(&ops->list, &net->rules_ops);
145 146
	err = 0;
errout:
147
	spin_unlock(&net->rules_mod_lock);
148 149 150 151

	return err;
}

152
struct fib_rules_ops *
153
fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
154 155 156 157
{
	struct fib_rules_ops *ops;
	int err;

158
	ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
159 160 161 162 163 164 165 166 167 168 169 170 171 172
	if (ops == NULL)
		return ERR_PTR(-ENOMEM);

	INIT_LIST_HEAD(&ops->rules_list);
	ops->fro_net = net;

	err = __fib_rules_register(ops);
	if (err) {
		kfree(ops);
		ops = ERR_PTR(err);
	}

	return ops;
}
173 174
EXPORT_SYMBOL_GPL(fib_rules_register);

175
static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
176 177 178
{
	struct fib_rule *rule, *tmp;

179
	list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) {
180
		list_del_rcu(&rule->list);
181 182
		if (ops->delete)
			ops->delete(rule);
183 184 185 186
		fib_rule_put(rule);
	}
}

D
Denis V. Lunev 已提交
187
void fib_rules_unregister(struct fib_rules_ops *ops)
188
{
D
Denis V. Lunev 已提交
189
	struct net *net = ops->fro_net;
190

191
	spin_lock(&net->rules_mod_lock);
192
	list_del_rcu(&ops->list);
193
	spin_unlock(&net->rules_mod_lock);
194

195
	fib_rules_cleanup_ops(ops);
196
	kfree_rcu(ops, rcu);
197 198 199
}
EXPORT_SYMBOL_GPL(fib_rules_unregister);

200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
static int uid_range_set(struct fib_kuid_range *range)
{
	return uid_valid(range->start) && uid_valid(range->end);
}

static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb)
{
	struct fib_rule_uid_range *in;
	struct fib_kuid_range out;

	in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]);

	out.start = make_kuid(current_user_ns(), in->start);
	out.end = make_kuid(current_user_ns(), in->end);

	return out;
}

static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
{
	struct fib_rule_uid_range out = {
		from_kuid_munged(current_user_ns(), range->start),
		from_kuid_munged(current_user_ns(), range->end)
	};

	return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
}

228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
static int nla_get_port_range(struct nlattr *pattr,
			      struct fib_rule_port_range *port_range)
{
	const struct fib_rule_port_range *pr = nla_data(pattr);

	if (!fib_rule_port_range_valid(pr))
		return -EINVAL;

	port_range->start = pr->start;
	port_range->end = pr->end;

	return 0;
}

static int nla_put_port_range(struct sk_buff *skb, int attrtype,
			      struct fib_rule_port_range *range)
{
	return nla_put(skb, attrtype, sizeof(*range), range);
}

248
static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
D
David Ahern 已提交
249 250
			  struct flowi *fl, int flags,
			  struct fib_lookup_arg *arg)
251 252 253
{
	int ret = 0;

254
	if (rule->iifindex && (rule->iifindex != fl->flowi_iif))
255 256
		goto out;

257
	if (rule->oifindex && (rule->oifindex != fl->flowi_oif))
258 259
		goto out;

260
	if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
261 262
		goto out;

263 264 265
	if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id))
		goto out;

D
David Ahern 已提交
266 267 268
	if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg))
		goto out;

269 270 271 272
	if (uid_lt(fl->flowi_uid, rule->uid_range.start) ||
	    uid_gt(fl->flowi_uid, rule->uid_range.end))
		goto out;

273 274 275 276 277
	ret = ops->match(rule, fl, flags);
out:
	return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
}

278 279 280 281 282 283 284 285
int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
		     int flags, struct fib_lookup_arg *arg)
{
	struct fib_rule *rule;
	int err;

	rcu_read_lock();

286
	list_for_each_entry_rcu(rule, &ops->rules_list, list) {
T
Thomas Graf 已提交
287
jumped:
D
David Ahern 已提交
288
		if (!fib_rule_match(rule, ops, fl, flags, arg))
289 290
			continue;

T
Thomas Graf 已提交
291 292 293 294 295 296 297 298 299 300
		if (rule->action == FR_ACT_GOTO) {
			struct fib_rule *target;

			target = rcu_dereference(rule->ctarget);
			if (target == NULL) {
				continue;
			} else {
				rule = target;
				goto jumped;
			}
301 302 303
		} else if (rule->action == FR_ACT_NOP)
			continue;
		else
T
Thomas Graf 已提交
304 305
			err = ops->action(rule, fl, flags, arg);

306 307 308
		if (!err && ops->suppress && ops->suppress(rule, arg))
			continue;

309
		if (err != -EAGAIN) {
E
Eric Dumazet 已提交
310
			if ((arg->flags & FIB_LOOKUP_NOREF) ||
311
			    likely(refcount_inc_not_zero(&rule->refcnt))) {
312 313 314 315
				arg->rule = rule;
				goto out;
			}
			break;
316 317 318
		}
	}

319
	err = -ESRCH;
320 321 322 323 324 325 326
out:
	rcu_read_unlock();

	return err;
}
EXPORT_SYMBOL_GPL(fib_rules_lookup);

327 328 329 330 331 332 333 334 335 336 337 338 339 340 341
static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
				  enum fib_event_type event_type,
				  struct fib_rule *rule, int family)
{
	struct fib_rule_notifier_info info = {
		.info.family = family,
		.rule = rule,
	};

	return call_fib_notifier(nb, net, event_type, &info.info);
}

static int call_fib_rule_notifiers(struct net *net,
				   enum fib_event_type event_type,
				   struct fib_rule *rule,
D
David Ahern 已提交
342 343
				   struct fib_rules_ops *ops,
				   struct netlink_ext_ack *extack)
344 345 346
{
	struct fib_rule_notifier_info info = {
		.info.family = ops->family,
D
David Ahern 已提交
347
		.info.extack = extack,
348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
		.rule = rule,
	};

	ops->fib_rules_seq++;
	return call_fib_notifiers(net, event_type, &info.info);
}

/* Called with rcu_read_lock() */
int fib_rules_dump(struct net *net, struct notifier_block *nb, int family)
{
	struct fib_rules_ops *ops;
	struct fib_rule *rule;

	ops = lookup_rules_ops(net, family);
	if (!ops)
		return -EAFNOSUPPORT;
	list_for_each_entry_rcu(rule, &ops->rules_list, list)
		call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule,
				       family);
	rules_ops_put(ops);

	return 0;
}
EXPORT_SYMBOL_GPL(fib_rules_dump);

unsigned int fib_rules_seq_read(struct net *net, int family)
{
	unsigned int fib_rules_seq;
	struct fib_rules_ops *ops;

	ASSERT_RTNL();

	ops = lookup_rules_ops(net, family);
	if (!ops)
		return 0;
	fib_rules_seq = ops->fib_rules_seq;
	rules_ops_put(ops);

	return fib_rules_seq;
}
EXPORT_SYMBOL_GPL(fib_rules_seq_read);

390 391 392 393 394
static struct fib_rule *rule_find(struct fib_rules_ops *ops,
				  struct fib_rule_hdr *frh,
				  struct nlattr **tb,
				  struct fib_rule *rule,
				  bool user_priority)
395 396 397 398
{
	struct fib_rule *r;

	list_for_each_entry(r, &ops->rules_list, list) {
399
		if (rule->action && r->action != rule->action)
400 401
			continue;

402
		if (rule->table && r->table != rule->table)
403 404
			continue;

405
		if (user_priority && r->pref != rule->pref)
406 407
			continue;

408 409
		if (rule->iifname[0] &&
		    memcmp(r->iifname, rule->iifname, IFNAMSIZ))
410 411
			continue;

412 413
		if (rule->oifname[0] &&
		    memcmp(r->oifname, rule->oifname, IFNAMSIZ))
414 415
			continue;

416
		if (rule->mark && r->mark != rule->mark)
417 418
			continue;

419
		if (rule->mark_mask && r->mark_mask != rule->mark_mask)
420 421
			continue;

422
		if (rule->tun_id && r->tun_id != rule->tun_id)
423 424 425 426 427
			continue;

		if (r->fr_net != rule->fr_net)
			continue;

428
		if (rule->l3mdev && r->l3mdev != rule->l3mdev)
429 430
			continue;

431 432 433
		if (uid_range_set(&rule->uid_range) &&
		    (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
		    !uid_eq(r->uid_range.end, rule->uid_range.end)))
434 435
			continue;

436
		if (rule->ip_proto && r->ip_proto != rule->ip_proto)
437 438
			continue;

439 440
		if (fib_rule_port_range_set(&rule->sport_range) &&
		    !fib_rule_port_range_compare(&r->sport_range,
441 442 443
						 &rule->sport_range))
			continue;

444 445
		if (fib_rule_port_range_set(&rule->dport_range) &&
		    !fib_rule_port_range_compare(&r->dport_range,
446 447 448
						 &rule->dport_range))
			continue;

449 450
		if (!ops->compare(r, frh, tb))
			continue;
451
		return r;
452
	}
453 454

	return NULL;
455 456
}

457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477
#ifdef CONFIG_NET_L3_MASTER_DEV
static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule,
			      struct netlink_ext_ack *extack)
{
	nlrule->l3mdev = nla_get_u8(nla);
	if (nlrule->l3mdev != 1) {
		NL_SET_ERR_MSG(extack, "Invalid l3mdev attribute");
		return -1;
	}

	return 0;
}
#else
static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule,
			      struct netlink_ext_ack *extack)
{
	NL_SET_ERR_MSG(extack, "l3mdev support is not enabled in kernel");
	return -1;
}
#endif

478 479 480 481 482 483
static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
		       struct netlink_ext_ack *extack,
		       struct fib_rules_ops *ops,
		       struct nlattr *tb[],
		       struct fib_rule **rule,
		       bool *user_priority)
484
{
485
	struct net *net = sock_net(skb->sk);
486
	struct fib_rule_hdr *frh = nlmsg_data(nlh);
487 488
	struct fib_rule *nlrule = NULL;
	int err = -EINVAL;
489

490 491 492
	if (frh->src_len)
		if (!tb[FRA_SRC] ||
		    frh->src_len > (ops->addr_size * 8) ||
493 494
		    nla_len(tb[FRA_SRC]) != ops->addr_size) {
			NL_SET_ERR_MSG(extack, "Invalid source address");
495
			goto errout;
496
	}
497

498 499 500
	if (frh->dst_len)
		if (!tb[FRA_DST] ||
		    frh->dst_len > (ops->addr_size * 8) ||
501 502
		    nla_len(tb[FRA_DST]) != ops->addr_size) {
			NL_SET_ERR_MSG(extack, "Invalid dst address");
503
			goto errout;
504
	}
505

506 507
	nlrule = kzalloc(ops->rule_size, GFP_KERNEL);
	if (!nlrule) {
508 509 510
		err = -ENOMEM;
		goto errout;
	}
511 512
	refcount_set(&nlrule->refcnt, 1);
	nlrule->fr_net = net;
513

514 515 516 517 518 519
	if (tb[FRA_PRIORITY]) {
		nlrule->pref = nla_get_u32(tb[FRA_PRIORITY]);
		*user_priority = true;
	} else {
		nlrule->pref = fib_default_rule_pref(ops);
	}
520

521
	nlrule->proto = tb[FRA_PROTOCOL] ?
522 523
		nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC;

524
	if (tb[FRA_IIFNAME]) {
525 526
		struct net_device *dev;

527 528 529
		nlrule->iifindex = -1;
		nla_strlcpy(nlrule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
		dev = __dev_get_by_name(net, nlrule->iifname);
530
		if (dev)
531
			nlrule->iifindex = dev->ifindex;
532 533
	}

534 535 536
	if (tb[FRA_OIFNAME]) {
		struct net_device *dev;

537 538 539
		nlrule->oifindex = -1;
		nla_strlcpy(nlrule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
		dev = __dev_get_by_name(net, nlrule->oifname);
540
		if (dev)
541
			nlrule->oifindex = dev->ifindex;
542 543
	}

544
	if (tb[FRA_FWMARK]) {
545 546
		nlrule->mark = nla_get_u32(tb[FRA_FWMARK]);
		if (nlrule->mark)
547 548 549
			/* compatibility: if the mark value is non-zero all bits
			 * are compared unless a mask is explicitly specified.
			 */
550
			nlrule->mark_mask = 0xFFFFFFFF;
551 552 553
	}

	if (tb[FRA_FWMASK])
554
		nlrule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);
555

556
	if (tb[FRA_TUN_ID])
557
		nlrule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
558

W
Wei Yongjun 已提交
559
	err = -EINVAL;
560 561 562
	if (tb[FRA_L3MDEV] &&
	    fib_nl2rule_l3mdev(tb[FRA_L3MDEV], nlrule, extack) < 0)
		goto errout_free;
D
David Ahern 已提交
563

564 565 566
	nlrule->action = frh->action;
	nlrule->flags = frh->flags;
	nlrule->table = frh_get_table(frh, tb);
567
	if (tb[FRA_SUPPRESS_PREFIXLEN])
568
		nlrule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]);
569
	else
570
		nlrule->suppress_prefixlen = -1;
571

572
	if (tb[FRA_SUPPRESS_IFGROUP])
573
		nlrule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]);
574
	else
575
		nlrule->suppress_ifgroup = -1;
576

T
Thomas Graf 已提交
577
	if (tb[FRA_GOTO]) {
578 579
		if (nlrule->action != FR_ACT_GOTO) {
			NL_SET_ERR_MSG(extack, "Unexpected goto");
T
Thomas Graf 已提交
580
			goto errout_free;
581
		}
T
Thomas Graf 已提交
582

583
		nlrule->target = nla_get_u32(tb[FRA_GOTO]);
T
Thomas Graf 已提交
584
		/* Backward jumps are prohibited to avoid endless loops */
585 586
		if (nlrule->target <= nlrule->pref) {
			NL_SET_ERR_MSG(extack, "Backward goto not supported");
T
Thomas Graf 已提交
587
			goto errout_free;
588
		}
589
	} else if (nlrule->action == FR_ACT_GOTO) {
590
		NL_SET_ERR_MSG(extack, "Missing goto target for action goto");
T
Thomas Graf 已提交
591
		goto errout_free;
592
	}
T
Thomas Graf 已提交
593

594 595
	if (nlrule->l3mdev && nlrule->table) {
		NL_SET_ERR_MSG(extack, "l3mdev and table are mutually exclusive");
D
David Ahern 已提交
596
		goto errout_free;
597
	}
D
David Ahern 已提交
598

599 600 601
	if (tb[FRA_UID_RANGE]) {
		if (current_user_ns() != net->user_ns) {
			err = -EPERM;
602
			NL_SET_ERR_MSG(extack, "No permission to set uid");
603 604 605
			goto errout_free;
		}

606
		nlrule->uid_range = nla_get_kuid_range(tb);
607

608
		if (!uid_range_set(&nlrule->uid_range) ||
609 610
		    !uid_lte(nlrule->uid_range.start, nlrule->uid_range.end)) {
			NL_SET_ERR_MSG(extack, "Invalid uid range");
611
			goto errout_free;
612
		}
613
	} else {
614
		nlrule->uid_range = fib_kuid_range_unset;
615 616
	}

617
	if (tb[FRA_IP_PROTO])
618
		nlrule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
619 620 621

	if (tb[FRA_SPORT_RANGE]) {
		err = nla_get_port_range(tb[FRA_SPORT_RANGE],
622
					 &nlrule->sport_range);
623 624
		if (err) {
			NL_SET_ERR_MSG(extack, "Invalid sport range");
625
			goto errout_free;
626
		}
627 628 629 630
	}

	if (tb[FRA_DPORT_RANGE]) {
		err = nla_get_port_range(tb[FRA_DPORT_RANGE],
631
					 &nlrule->dport_range);
632 633
		if (err) {
			NL_SET_ERR_MSG(extack, "Invalid dport range");
634
			goto errout_free;
635
		}
636 637
	}

638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658
	*rule = nlrule;

	return 0;

errout_free:
	kfree(nlrule);
errout:
	return err;
}

int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
		   struct netlink_ext_ack *extack)
{
	struct net *net = sock_net(skb->sk);
	struct fib_rule_hdr *frh = nlmsg_data(nlh);
	struct fib_rules_ops *ops = NULL;
	struct fib_rule *rule = NULL, *r, *last = NULL;
	struct nlattr *tb[FRA_MAX + 1];
	int err = -EINVAL, unresolved = 0;
	bool user_priority = false;

659 660
	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
		NL_SET_ERR_MSG(extack, "Invalid msg length");
661
		goto errout;
662
	}
663 664 665 666

	ops = lookup_rules_ops(net, frh->family);
	if (!ops) {
		err = -EAFNOSUPPORT;
667
		NL_SET_ERR_MSG(extack, "Rule family not supported");
668 669 670 671
		goto errout;
	}

	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
672 673
	if (err < 0) {
		NL_SET_ERR_MSG(extack, "Error parsing msg");
674
		goto errout;
675
	}
676 677 678 679 680

	err = fib_nl2rule(skb, nlh, extack, ops, tb, &rule, &user_priority);
	if (err)
		goto errout;

681
	if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
682
	    rule_find(ops, frh, tb, rule, user_priority)) {
683 684 685 686
		err = -EEXIST;
		goto errout_free;
	}

687
	err = ops->configure(rule, skb, frh, tb, extack);
688 689 690
	if (err < 0)
		goto errout_free;

691 692 693 694 695
	err = call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops,
				      extack);
	if (err < 0)
		goto errout_free;

696 697 698 699 700 701 702 703 704 705
	list_for_each_entry(r, &ops->rules_list, list) {
		if (r->pref == rule->target) {
			RCU_INIT_POINTER(rule->ctarget, r);
			break;
		}
	}

	if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
		unresolved = 1;

706
	list_for_each_entry(r, &ops->rules_list, list) {
707 708 709 710 711
		if (r->pref > rule->pref)
			break;
		last = r;
	}

E
Eric Dumazet 已提交
712 713 714 715 716
	if (last)
		list_add_rcu(&rule->list, &last->list);
	else
		list_add_rcu(&rule->list, &ops->rules_list);

T
Thomas Graf 已提交
717 718 719 720 721
	if (ops->unresolved_rules) {
		/*
		 * There are unresolved goto rules in the list, check if
		 * any of them are pointing to this new rule.
		 */
722
		list_for_each_entry(r, &ops->rules_list, list) {
T
Thomas Graf 已提交
723
			if (r->action == FR_ACT_GOTO &&
724 725
			    r->target == rule->pref &&
			    rtnl_dereference(r->ctarget) == NULL) {
T
Thomas Graf 已提交
726 727 728 729 730 731 732 733 734 735 736 737 738
				rcu_assign_pointer(r->ctarget, rule);
				if (--ops->unresolved_rules == 0)
					break;
			}
		}
	}

	if (rule->action == FR_ACT_GOTO)
		ops->nr_goto_rules++;

	if (unresolved)
		ops->unresolved_rules++;

739 740 741
	if (rule->tun_id)
		ip_tunnel_need_metadata();

742
	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
743
	flush_route_cache(ops);
744 745 746 747 748 749 750 751 752
	rules_ops_put(ops);
	return 0;

errout_free:
	kfree(rule);
errout:
	rules_ops_put(ops);
	return err;
}
D
David Ahern 已提交
753
EXPORT_SYMBOL_GPL(fib_nl_newrule);
754

755 756
int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
		   struct netlink_ext_ack *extack)
757
{
758
	struct net *net = sock_net(skb->sk);
759 760
	struct fib_rule_hdr *frh = nlmsg_data(nlh);
	struct fib_rules_ops *ops = NULL;
761
	struct fib_rule *rule = NULL, *r, *nlrule = NULL;
762 763
	struct nlattr *tb[FRA_MAX+1];
	int err = -EINVAL;
764
	bool user_priority = false;
765

766 767
	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
		NL_SET_ERR_MSG(extack, "Invalid msg length");
768
		goto errout;
769
	}
770

771
	ops = lookup_rules_ops(net, frh->family);
772
	if (ops == NULL) {
773
		err = -EAFNOSUPPORT;
774
		NL_SET_ERR_MSG(extack, "Rule family not supported");
775 776 777
		goto errout;
	}

778
	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
779 780
	if (err < 0) {
		NL_SET_ERR_MSG(extack, "Error parsing msg");
781
		goto errout;
782
	}
783

784 785
	err = fib_nl2rule(skb, nlh, extack, ops, tb, &nlrule, &user_priority);
	if (err)
786 787
		goto errout;

788 789 790 791
	rule = rule_find(ops, frh, tb, nlrule, user_priority);
	if (!rule) {
		err = -ENOENT;
		goto errout;
792 793
	}

794 795 796
	if (rule->flags & FIB_RULE_PERMANENT) {
		err = -EPERM;
		goto errout;
797 798
	}

799 800
	if (ops->delete) {
		err = ops->delete(rule);
801 802 803 804
		if (err)
			goto errout;
	}

805 806
	if (rule->tun_id)
		ip_tunnel_unneed_metadata();
807

808
	list_del_rcu(&rule->list);
809

810 811 812 813 814
	if (rule->action == FR_ACT_GOTO) {
		ops->nr_goto_rules--;
		if (rtnl_dereference(rule->ctarget) == NULL)
			ops->unresolved_rules--;
	}
T
Thomas Graf 已提交
815

816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834
	/*
	 * Check if this rule is a target to any of them. If so,
	 * adjust to the next one with the same preference or
	 * disable them. As this operation is eventually very
	 * expensive, it is only performed if goto rules, except
	 * current if it is goto rule, have actually been added.
	 */
	if (ops->nr_goto_rules > 0) {
		struct fib_rule *n;

		n = list_next_entry(rule, list);
		if (&n->list == &ops->rules_list || n->pref != rule->pref)
			n = NULL;
		list_for_each_entry(r, &ops->rules_list, list) {
			if (rtnl_dereference(r->ctarget) != rule)
				continue;
			rcu_assign_pointer(r->ctarget, n);
			if (!n)
				ops->unresolved_rules++;
T
Thomas Graf 已提交
835
		}
836 837
	}

838 839 840 841 842 843 844 845 846 847
	call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops,
				NULL);
	notify_rule_change(RTM_DELRULE, rule, ops, nlh,
			   NETLINK_CB(skb).portid);
	fib_rule_put(rule);
	flush_route_cache(ops);
	rules_ops_put(ops);
	kfree(nlrule);
	return 0;

848
errout:
849 850
	if (nlrule)
		kfree(nlrule);
851 852 853
	rules_ops_put(ops);
	return err;
}
D
David Ahern 已提交
854
EXPORT_SYMBOL_GPL(fib_nl_delrule);
855

856 857 858 859
static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
					 struct fib_rule *rule)
{
	size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
860
			 + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */
861
			 + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */
862 863
			 + nla_total_size(4) /* FRA_PRIORITY */
			 + nla_total_size(4) /* FRA_TABLE */
864
			 + nla_total_size(4) /* FRA_SUPPRESS_PREFIXLEN */
865
			 + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */
866
			 + nla_total_size(4) /* FRA_FWMARK */
867
			 + nla_total_size(4) /* FRA_FWMASK */
868
			 + nla_total_size_64bit(8) /* FRA_TUN_ID */
869
			 + nla_total_size(sizeof(struct fib_kuid_range))
870 871 872 873
			 + nla_total_size(1) /* FRA_PROTOCOL */
			 + nla_total_size(1) /* FRA_IP_PROTO */
			 + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */
			 + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */
874 875 876 877 878 879 880

	if (ops->nlmsg_payload)
		payload += ops->nlmsg_payload(rule);

	return payload;
}

881 882 883 884 885 886 887 888 889
static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
			    u32 pid, u32 seq, int type, int flags,
			    struct fib_rules_ops *ops)
{
	struct nlmsghdr *nlh;
	struct fib_rule_hdr *frh;

	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
	if (nlh == NULL)
890
		return -EMSGSIZE;
891 892

	frh = nlmsg_data(nlh);
893
	frh->family = ops->family;
894
	frh->table = rule->table;
895 896
	if (nla_put_u32(skb, FRA_TABLE, rule->table))
		goto nla_put_failure;
897
	if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
898
		goto nla_put_failure;
899
	frh->res1 = 0;
900
	frh->res2 = 0;
901 902
	frh->action = rule->action;
	frh->flags = rule->flags;
903 904 905

	if (nla_put_u8(skb, FRA_PROTOCOL, rule->proto))
		goto nla_put_failure;
906

E
Eric Dumazet 已提交
907
	if (rule->action == FR_ACT_GOTO &&
908
	    rcu_access_pointer(rule->ctarget) == NULL)
T
Thomas Graf 已提交
909 910
		frh->flags |= FIB_RULE_UNRESOLVED;

911
	if (rule->iifname[0]) {
912 913
		if (nla_put_string(skb, FRA_IIFNAME, rule->iifname))
			goto nla_put_failure;
914 915
		if (rule->iifindex == -1)
			frh->flags |= FIB_RULE_IIF_DETACHED;
916 917
	}

918
	if (rule->oifname[0]) {
919 920
		if (nla_put_string(skb, FRA_OIFNAME, rule->oifname))
			goto nla_put_failure;
921 922 923 924
		if (rule->oifindex == -1)
			frh->flags |= FIB_RULE_OIF_DETACHED;
	}

925 926 927 928 929 930 931
	if ((rule->pref &&
	     nla_put_u32(skb, FRA_PRIORITY, rule->pref)) ||
	    (rule->mark &&
	     nla_put_u32(skb, FRA_FWMARK, rule->mark)) ||
	    ((rule->mark_mask || rule->mark) &&
	     nla_put_u32(skb, FRA_FWMASK, rule->mark_mask)) ||
	    (rule->target &&
932 933
	     nla_put_u32(skb, FRA_GOTO, rule->target)) ||
	    (rule->tun_id &&
D
David Ahern 已提交
934 935
	     nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) ||
	    (rule->l3mdev &&
936 937
	     nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
	    (uid_range_set(&rule->uid_range) &&
938 939 940 941 942 943
	     nla_put_uid_range(skb, &rule->uid_range)) ||
	    (fib_rule_port_range_set(&rule->sport_range) &&
	     nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) ||
	    (fib_rule_port_range_set(&rule->dport_range) &&
	     nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) ||
	    (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto)))
944
		goto nla_put_failure;
945 946 947 948 949 950

	if (rule->suppress_ifgroup != -1) {
		if (nla_put_u32(skb, FRA_SUPPRESS_IFGROUP, rule->suppress_ifgroup))
			goto nla_put_failure;
	}

951
	if (ops->fill(rule, skb, frh) < 0)
952 953
		goto nla_put_failure;

954 955
	nlmsg_end(skb, nlh);
	return 0;
956 957

nla_put_failure:
958 959
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
960 961
}

T
Thomas Graf 已提交
962 963
static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
		      struct fib_rules_ops *ops)
964 965 966
{
	int idx = 0;
	struct fib_rule *rule;
967
	int err = 0;
968

969 970
	rcu_read_lock();
	list_for_each_entry_rcu(rule, &ops->rules_list, list) {
T
Thomas Graf 已提交
971
		if (idx < cb->args[1])
972 973
			goto skip;

974 975 976 977
		err = fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
				       cb->nlh->nlmsg_seq, RTM_NEWRULE,
				       NLM_F_MULTI, ops);
		if (err)
978 979 980 981
			break;
skip:
		idx++;
	}
982
	rcu_read_unlock();
T
Thomas Graf 已提交
983
	cb->args[1] = idx;
984 985
	rules_ops_put(ops);

986
	return err;
987 988
}

T
Thomas Graf 已提交
989 990
static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
{
991
	struct net *net = sock_net(skb->sk);
T
Thomas Graf 已提交
992 993 994 995 996 997
	struct fib_rules_ops *ops;
	int idx = 0, family;

	family = rtnl_msg_family(cb->nlh);
	if (family != AF_UNSPEC) {
		/* Protocol specific dump request */
998
		ops = lookup_rules_ops(net, family);
T
Thomas Graf 已提交
999 1000 1001
		if (ops == NULL)
			return -EAFNOSUPPORT;

1002 1003 1004
		dump_rules(skb, cb, ops);

		return skb->len;
T
Thomas Graf 已提交
1005 1006 1007
	}

	rcu_read_lock();
1008
	list_for_each_entry_rcu(ops, &net->rules_ops, list) {
T
Thomas Graf 已提交
1009 1010 1011 1012 1013 1014 1015
		if (idx < cb->args[0] || !try_module_get(ops->owner))
			goto skip;

		if (dump_rules(skb, cb, ops) < 0)
			break;

		cb->args[1] = 0;
1016
skip:
T
Thomas Graf 已提交
1017 1018 1019 1020 1021 1022 1023
		idx++;
	}
	rcu_read_unlock();
	cb->args[0] = idx;

	return skb->len;
}
1024

D
Denis V. Lunev 已提交
1025
static void notify_rule_change(int event, struct fib_rule *rule,
1026 1027
			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
			       u32 pid)
1028
{
D
Denis V. Lunev 已提交
1029
	struct net *net;
1030 1031
	struct sk_buff *skb;
	int err = -ENOBUFS;
1032

D
Denis V. Lunev 已提交
1033
	net = ops->fro_net;
1034
	skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
1035
	if (skb == NULL)
1036 1037 1038
		goto errout;

	err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
1039 1040 1041 1042 1043 1044
	if (err < 0) {
		/* -EMSGSIZE implies BUG in fib_rule_nlmsg_size() */
		WARN_ON(err == -EMSGSIZE);
		kfree_skb(skb);
		goto errout;
	}
D
Denis V. Lunev 已提交
1045

1046 1047
	rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
	return;
1048 1049
errout:
	if (err < 0)
1050
		rtnl_set_sk_err(net, ops->nlgroup, err);
1051 1052 1053 1054 1055 1056 1057
}

static void attach_rules(struct list_head *rules, struct net_device *dev)
{
	struct fib_rule *rule;

	list_for_each_entry(rule, rules, list) {
1058 1059 1060
		if (rule->iifindex == -1 &&
		    strcmp(dev->name, rule->iifname) == 0)
			rule->iifindex = dev->ifindex;
1061 1062 1063
		if (rule->oifindex == -1 &&
		    strcmp(dev->name, rule->oifname) == 0)
			rule->oifindex = dev->ifindex;
1064 1065 1066 1067 1068 1069 1070
	}
}

static void detach_rules(struct list_head *rules, struct net_device *dev)
{
	struct fib_rule *rule;

1071
	list_for_each_entry(rule, rules, list) {
1072 1073
		if (rule->iifindex == dev->ifindex)
			rule->iifindex = -1;
1074 1075 1076
		if (rule->oifindex == dev->ifindex)
			rule->oifindex = -1;
	}
1077 1078 1079 1080
}


static int fib_rules_event(struct notifier_block *this, unsigned long event,
1081
			   void *ptr)
1082
{
1083
	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1084
	struct net *net = dev_net(dev);
1085 1086
	struct fib_rules_ops *ops;

1087
	ASSERT_RTNL();
1088 1089 1090

	switch (event) {
	case NETDEV_REGISTER:
1091
		list_for_each_entry(ops, &net->rules_ops, list)
1092
			attach_rules(&ops->rules_list, dev);
1093 1094
		break;

1095 1096 1097 1098 1099 1100 1101
	case NETDEV_CHANGENAME:
		list_for_each_entry(ops, &net->rules_ops, list) {
			detach_rules(&ops->rules_list, dev);
			attach_rules(&ops->rules_list, dev);
		}
		break;

1102
	case NETDEV_UNREGISTER:
1103
		list_for_each_entry(ops, &net->rules_ops, list)
1104
			detach_rules(&ops->rules_list, dev);
1105 1106 1107 1108 1109 1110 1111 1112 1113 1114
		break;
	}

	return NOTIFY_DONE;
}

static struct notifier_block fib_rules_notifier = {
	.notifier_call = fib_rules_event,
};

1115
static int __net_init fib_rules_net_init(struct net *net)
1116 1117 1118 1119 1120 1121
{
	INIT_LIST_HEAD(&net->rules_ops);
	spin_lock_init(&net->rules_mod_lock);
	return 0;
}

1122 1123 1124 1125 1126
static void __net_exit fib_rules_net_exit(struct net *net)
{
	WARN_ON_ONCE(!list_empty(&net->rules_ops));
}

1127 1128
static struct pernet_operations fib_rules_net_ops = {
	.init = fib_rules_net_init,
1129
	.exit = fib_rules_net_exit,
1130 1131
};

1132 1133
static int __init fib_rules_init(void)
{
1134
	int err;
1135 1136 1137
	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, 0);
	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, 0);
	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, 0);
1138

E
Eric W. Biederman 已提交
1139
	err = register_pernet_subsys(&fib_rules_net_ops);
1140 1141 1142
	if (err < 0)
		goto fail;

E
Eric W. Biederman 已提交
1143
	err = register_netdevice_notifier(&fib_rules_notifier);
1144 1145
	if (err < 0)
		goto fail_unregister;
E
Eric W. Biederman 已提交
1146

1147 1148 1149
	return 0;

fail_unregister:
E
Eric W. Biederman 已提交
1150
	unregister_pernet_subsys(&fib_rules_net_ops);
1151 1152 1153 1154 1155
fail:
	rtnl_unregister(PF_UNSPEC, RTM_NEWRULE);
	rtnl_unregister(PF_UNSPEC, RTM_DELRULE);
	rtnl_unregister(PF_UNSPEC, RTM_GETRULE);
	return err;
1156 1157 1158
}

subsys_initcall(fib_rules_init);