fib_rules.c 21.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
/*
 * net/core/fib_rules.c		Generic Routing Rules
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License as
 *	published by the Free Software Foundation, version 2.
 *
 * Authors:	Thomas Graf <tgraf@suug.ch>
 */

#include <linux/types.h>
#include <linux/kernel.h>
13
#include <linux/slab.h>
14
#include <linux/list.h>
15
#include <linux/module.h>
16
#include <net/net_namespace.h>
17
#include <net/sock.h>
18
#include <net/fib_rules.h>
19
#include <net/ip_tunnels.h>
20

21 22 23 24 25
static const struct fib_kuid_range fib_kuid_range_unset = {
	KUIDT_INIT(0),
	KUIDT_INIT(~0),
};

26 27 28 29 30 31 32 33 34 35 36 37 38 39
int fib_default_rule_add(struct fib_rules_ops *ops,
			 u32 pref, u32 table, u32 flags)
{
	struct fib_rule *r;

	r = kzalloc(ops->rule_size, GFP_KERNEL);
	if (r == NULL)
		return -ENOMEM;

	atomic_set(&r->refcnt, 1);
	r->action = FR_ACT_TO_TBL;
	r->pref = pref;
	r->table = table;
	r->flags = flags;
40
	r->fr_net = ops->fro_net;
41
	r->uid_range = fib_kuid_range_unset;
42

43 44 45
	r->suppress_prefixlen = -1;
	r->suppress_ifgroup = -1;

46 47 48 49 50 51 52
	/* The lock is not required here, the list in unreacheable
	 * at the moment this function is called */
	list_add_tail(&r->list, &ops->rules_list);
	return 0;
}
EXPORT_SYMBOL(fib_default_rule_add);

53
static u32 fib_default_rule_pref(struct fib_rules_ops *ops)
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
{
	struct list_head *pos;
	struct fib_rule *rule;

	if (!list_empty(&ops->rules_list)) {
		pos = ops->rules_list.next;
		if (pos->next != &ops->rules_list) {
			rule = list_entry(pos->next, struct fib_rule, list);
			if (rule->pref)
				return rule->pref - 1;
		}
	}

	return 0;
}

D
Denis V. Lunev 已提交
70
static void notify_rule_change(int event, struct fib_rule *rule,
71 72
			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
			       u32 pid);
73

74
static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family)
75 76 77 78
{
	struct fib_rules_ops *ops;

	rcu_read_lock();
79
	list_for_each_entry_rcu(ops, &net->rules_ops, list) {
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
		if (ops->family == family) {
			if (!try_module_get(ops->owner))
				ops = NULL;
			rcu_read_unlock();
			return ops;
		}
	}
	rcu_read_unlock();

	return NULL;
}

static void rules_ops_put(struct fib_rules_ops *ops)
{
	if (ops)
		module_put(ops->owner);
}

98 99 100
static void flush_route_cache(struct fib_rules_ops *ops)
{
	if (ops->flush_cache)
101
		ops->flush_cache(ops);
102 103
}

104
static int __fib_rules_register(struct fib_rules_ops *ops)
105 106 107
{
	int err = -EEXIST;
	struct fib_rules_ops *o;
D
Denis V. Lunev 已提交
108 109 110
	struct net *net;

	net = ops->fro_net;
111 112 113 114 115 116 117 118 119

	if (ops->rule_size < sizeof(struct fib_rule))
		return -EINVAL;

	if (ops->match == NULL || ops->configure == NULL ||
	    ops->compare == NULL || ops->fill == NULL ||
	    ops->action == NULL)
		return -EINVAL;

120 121
	spin_lock(&net->rules_mod_lock);
	list_for_each_entry(o, &net->rules_ops, list)
122 123 124
		if (ops->family == o->family)
			goto errout;

125
	list_add_tail_rcu(&ops->list, &net->rules_ops);
126 127
	err = 0;
errout:
128
	spin_unlock(&net->rules_mod_lock);
129 130 131 132

	return err;
}

133
struct fib_rules_ops *
134
fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
135 136 137 138
{
	struct fib_rules_ops *ops;
	int err;

139
	ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
140 141 142 143 144 145 146 147 148 149 150 151 152 153
	if (ops == NULL)
		return ERR_PTR(-ENOMEM);

	INIT_LIST_HEAD(&ops->rules_list);
	ops->fro_net = net;

	err = __fib_rules_register(ops);
	if (err) {
		kfree(ops);
		ops = ERR_PTR(err);
	}

	return ops;
}
154 155
EXPORT_SYMBOL_GPL(fib_rules_register);

156
static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
157 158 159
{
	struct fib_rule *rule, *tmp;

160
	list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) {
161
		list_del_rcu(&rule->list);
162 163
		if (ops->delete)
			ops->delete(rule);
164 165 166 167
		fib_rule_put(rule);
	}
}

D
Denis V. Lunev 已提交
168
void fib_rules_unregister(struct fib_rules_ops *ops)
169
{
D
Denis V. Lunev 已提交
170
	struct net *net = ops->fro_net;
171

172
	spin_lock(&net->rules_mod_lock);
173
	list_del_rcu(&ops->list);
174
	spin_unlock(&net->rules_mod_lock);
175

176
	fib_rules_cleanup_ops(ops);
177
	kfree_rcu(ops, rcu);
178 179 180
}
EXPORT_SYMBOL_GPL(fib_rules_unregister);

181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
static int uid_range_set(struct fib_kuid_range *range)
{
	return uid_valid(range->start) && uid_valid(range->end);
}

static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb)
{
	struct fib_rule_uid_range *in;
	struct fib_kuid_range out;

	in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]);

	out.start = make_kuid(current_user_ns(), in->start);
	out.end = make_kuid(current_user_ns(), in->end);

	return out;
}

static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
{
	struct fib_rule_uid_range out = {
		from_kuid_munged(current_user_ns(), range->start),
		from_kuid_munged(current_user_ns(), range->end)
	};

	return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
}

209
static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
D
David Ahern 已提交
210 211
			  struct flowi *fl, int flags,
			  struct fib_lookup_arg *arg)
212 213 214
{
	int ret = 0;

215
	if (rule->iifindex && (rule->iifindex != fl->flowi_iif))
216 217
		goto out;

218
	if (rule->oifindex && (rule->oifindex != fl->flowi_oif))
219 220
		goto out;

221
	if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
222 223
		goto out;

224 225 226
	if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id))
		goto out;

D
David Ahern 已提交
227 228 229
	if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg))
		goto out;

230 231 232 233
	if (uid_lt(fl->flowi_uid, rule->uid_range.start) ||
	    uid_gt(fl->flowi_uid, rule->uid_range.end))
		goto out;

234 235 236 237 238
	ret = ops->match(rule, fl, flags);
out:
	return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
}

239 240 241 242 243 244 245 246
int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
		     int flags, struct fib_lookup_arg *arg)
{
	struct fib_rule *rule;
	int err;

	rcu_read_lock();

247
	list_for_each_entry_rcu(rule, &ops->rules_list, list) {
T
Thomas Graf 已提交
248
jumped:
D
David Ahern 已提交
249
		if (!fib_rule_match(rule, ops, fl, flags, arg))
250 251
			continue;

T
Thomas Graf 已提交
252 253 254 255 256 257 258 259 260 261
		if (rule->action == FR_ACT_GOTO) {
			struct fib_rule *target;

			target = rcu_dereference(rule->ctarget);
			if (target == NULL) {
				continue;
			} else {
				rule = target;
				goto jumped;
			}
262 263 264
		} else if (rule->action == FR_ACT_NOP)
			continue;
		else
T
Thomas Graf 已提交
265 266
			err = ops->action(rule, fl, flags, arg);

267 268 269
		if (!err && ops->suppress && ops->suppress(rule, arg))
			continue;

270
		if (err != -EAGAIN) {
E
Eric Dumazet 已提交
271 272
			if ((arg->flags & FIB_LOOKUP_NOREF) ||
			    likely(atomic_inc_not_zero(&rule->refcnt))) {
273 274 275 276
				arg->rule = rule;
				goto out;
			}
			break;
277 278 279
		}
	}

280
	err = -ESRCH;
281 282 283 284 285 286 287
out:
	rcu_read_unlock();

	return err;
}
EXPORT_SYMBOL_GPL(fib_rules_lookup);

288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
			    struct fib_rules_ops *ops)
{
	int err = -EINVAL;

	if (frh->src_len)
		if (tb[FRA_SRC] == NULL ||
		    frh->src_len > (ops->addr_size * 8) ||
		    nla_len(tb[FRA_SRC]) != ops->addr_size)
			goto errout;

	if (frh->dst_len)
		if (tb[FRA_DST] == NULL ||
		    frh->dst_len > (ops->addr_size * 8) ||
		    nla_len(tb[FRA_DST]) != ops->addr_size)
			goto errout;

	err = 0;
errout:
	return err;
}

310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
		       struct nlattr **tb, struct fib_rule *rule)
{
	struct fib_rule *r;

	list_for_each_entry(r, &ops->rules_list, list) {
		if (r->action != rule->action)
			continue;

		if (r->table != rule->table)
			continue;

		if (r->pref != rule->pref)
			continue;

		if (memcmp(r->iifname, rule->iifname, IFNAMSIZ))
			continue;

		if (memcmp(r->oifname, rule->oifname, IFNAMSIZ))
			continue;

		if (r->mark != rule->mark)
			continue;

		if (r->mark_mask != rule->mark_mask)
			continue;

		if (r->tun_id != rule->tun_id)
			continue;

		if (r->fr_net != rule->fr_net)
			continue;

		if (r->l3mdev != rule->l3mdev)
			continue;

		if (!ops->compare(r, frh, tb))
			continue;
		return 1;
	}
	return 0;
}

D
David Ahern 已提交
353
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
354
{
355
	struct net *net = sock_net(skb->sk);
356 357 358 359
	struct fib_rule_hdr *frh = nlmsg_data(nlh);
	struct fib_rules_ops *ops = NULL;
	struct fib_rule *rule, *r, *last = NULL;
	struct nlattr *tb[FRA_MAX+1];
T
Thomas Graf 已提交
360
	int err = -EINVAL, unresolved = 0;
361 362 363 364

	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
		goto errout;

365
	ops = lookup_rules_ops(net, frh->family);
366
	if (ops == NULL) {
367
		err = -EAFNOSUPPORT;
368 369 370 371 372 373 374
		goto errout;
	}

	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
	if (err < 0)
		goto errout;

375 376 377 378
	err = validate_rulemsg(frh, tb, ops);
	if (err < 0)
		goto errout;

379 380 381 382 383
	rule = kzalloc(ops->rule_size, GFP_KERNEL);
	if (rule == NULL) {
		err = -ENOMEM;
		goto errout;
	}
384
	rule->fr_net = net;
385

386 387
	rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
	                              : fib_default_rule_pref(ops);
388

389
	if (tb[FRA_IIFNAME]) {
390 391
		struct net_device *dev;

392 393 394
		rule->iifindex = -1;
		nla_strlcpy(rule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
		dev = __dev_get_by_name(net, rule->iifname);
395
		if (dev)
396
			rule->iifindex = dev->ifindex;
397 398
	}

399 400 401 402 403 404 405 406 407 408
	if (tb[FRA_OIFNAME]) {
		struct net_device *dev;

		rule->oifindex = -1;
		nla_strlcpy(rule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
		dev = __dev_get_by_name(net, rule->oifname);
		if (dev)
			rule->oifindex = dev->ifindex;
	}

409 410 411 412 413 414 415 416 417 418 419 420
	if (tb[FRA_FWMARK]) {
		rule->mark = nla_get_u32(tb[FRA_FWMARK]);
		if (rule->mark)
			/* compatibility: if the mark value is non-zero all bits
			 * are compared unless a mask is explicitly specified.
			 */
			rule->mark_mask = 0xFFFFFFFF;
	}

	if (tb[FRA_FWMASK])
		rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);

421 422 423
	if (tb[FRA_TUN_ID])
		rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);

D
David Ahern 已提交
424 425 426 427 428 429 430 431
	if (tb[FRA_L3MDEV]) {
#ifdef CONFIG_NET_L3_MASTER_DEV
		rule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]);
		if (rule->l3mdev != 1)
#endif
			goto errout_free;
	}

432 433
	rule->action = frh->action;
	rule->flags = frh->flags;
434
	rule->table = frh_get_table(frh, tb);
435 436 437 438
	if (tb[FRA_SUPPRESS_PREFIXLEN])
		rule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]);
	else
		rule->suppress_prefixlen = -1;
439

440 441
	if (tb[FRA_SUPPRESS_IFGROUP])
		rule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]);
442 443
	else
		rule->suppress_ifgroup = -1;
444

T
Thomas Graf 已提交
445 446 447 448 449 450 451 452 453 454
	err = -EINVAL;
	if (tb[FRA_GOTO]) {
		if (rule->action != FR_ACT_GOTO)
			goto errout_free;

		rule->target = nla_get_u32(tb[FRA_GOTO]);
		/* Backward jumps are prohibited to avoid endless loops */
		if (rule->target <= rule->pref)
			goto errout_free;

455
		list_for_each_entry(r, &ops->rules_list, list) {
T
Thomas Graf 已提交
456
			if (r->pref == rule->target) {
E
Eric Dumazet 已提交
457
				RCU_INIT_POINTER(rule->ctarget, r);
T
Thomas Graf 已提交
458 459 460 461
				break;
			}
		}

E
Eric Dumazet 已提交
462
		if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
T
Thomas Graf 已提交
463 464 465 466
			unresolved = 1;
	} else if (rule->action == FR_ACT_GOTO)
		goto errout_free;

D
David Ahern 已提交
467 468 469
	if (rule->l3mdev && rule->table)
		goto errout_free;

470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
	if (tb[FRA_UID_RANGE]) {
		if (current_user_ns() != net->user_ns) {
			err = -EPERM;
			goto errout_free;
		}

		rule->uid_range = nla_get_kuid_range(tb);

		if (!uid_range_set(&rule->uid_range) ||
		    !uid_lte(rule->uid_range.start, rule->uid_range.end))
			goto errout_free;
	} else {
		rule->uid_range = fib_kuid_range_unset;
	}

485 486 487 488 489 490
	if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
	    rule_exists(ops, frh, tb, rule)) {
		err = -EEXIST;
		goto errout_free;
	}

491
	err = ops->configure(rule, skb, frh, tb);
492 493 494
	if (err < 0)
		goto errout_free;

495
	list_for_each_entry(r, &ops->rules_list, list) {
496 497 498 499 500 501 502
		if (r->pref > rule->pref)
			break;
		last = r;
	}

	fib_rule_get(rule);

E
Eric Dumazet 已提交
503 504 505 506 507
	if (last)
		list_add_rcu(&rule->list, &last->list);
	else
		list_add_rcu(&rule->list, &ops->rules_list);

T
Thomas Graf 已提交
508 509 510 511 512
	if (ops->unresolved_rules) {
		/*
		 * There are unresolved goto rules in the list, check if
		 * any of them are pointing to this new rule.
		 */
513
		list_for_each_entry(r, &ops->rules_list, list) {
T
Thomas Graf 已提交
514
			if (r->action == FR_ACT_GOTO &&
515 516
			    r->target == rule->pref &&
			    rtnl_dereference(r->ctarget) == NULL) {
T
Thomas Graf 已提交
517 518 519 520 521 522 523 524 525 526 527 528 529
				rcu_assign_pointer(r->ctarget, rule);
				if (--ops->unresolved_rules == 0)
					break;
			}
		}
	}

	if (rule->action == FR_ACT_GOTO)
		ops->nr_goto_rules++;

	if (unresolved)
		ops->unresolved_rules++;

530 531 532
	if (rule->tun_id)
		ip_tunnel_need_metadata();

533
	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
534
	flush_route_cache(ops);
535 536 537 538 539 540 541 542 543
	rules_ops_put(ops);
	return 0;

errout_free:
	kfree(rule);
errout:
	rules_ops_put(ops);
	return err;
}
D
David Ahern 已提交
544
EXPORT_SYMBOL_GPL(fib_nl_newrule);
545

D
David Ahern 已提交
546
int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
547
{
548
	struct net *net = sock_net(skb->sk);
549 550
	struct fib_rule_hdr *frh = nlmsg_data(nlh);
	struct fib_rules_ops *ops = NULL;
T
Thomas Graf 已提交
551
	struct fib_rule *rule, *tmp;
552
	struct nlattr *tb[FRA_MAX+1];
553
	struct fib_kuid_range range;
554 555 556 557 558
	int err = -EINVAL;

	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
		goto errout;

559
	ops = lookup_rules_ops(net, frh->family);
560
	if (ops == NULL) {
561
		err = -EAFNOSUPPORT;
562 563 564 565 566 567 568
		goto errout;
	}

	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
	if (err < 0)
		goto errout;

569 570 571 572
	err = validate_rulemsg(frh, tb, ops);
	if (err < 0)
		goto errout;

573 574 575 576 577 578 579 580
	if (tb[FRA_UID_RANGE]) {
		range = nla_get_kuid_range(tb);
		if (!uid_range_set(&range))
			goto errout;
	} else {
		range = fib_kuid_range_unset;
	}

581
	list_for_each_entry(rule, &ops->rules_list, list) {
582 583 584
		if (frh->action && (frh->action != rule->action))
			continue;

585 586
		if (frh_get_table(frh, tb) &&
		    (frh_get_table(frh, tb) != rule->table))
587 588 589 590 591 592
			continue;

		if (tb[FRA_PRIORITY] &&
		    (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
			continue;

593 594
		if (tb[FRA_IIFNAME] &&
		    nla_strcmp(tb[FRA_IIFNAME], rule->iifname))
595 596
			continue;

597 598 599 600
		if (tb[FRA_OIFNAME] &&
		    nla_strcmp(tb[FRA_OIFNAME], rule->oifname))
			continue;

601 602 603 604 605 606 607 608
		if (tb[FRA_FWMARK] &&
		    (rule->mark != nla_get_u32(tb[FRA_FWMARK])))
			continue;

		if (tb[FRA_FWMASK] &&
		    (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK])))
			continue;

609 610 611 612
		if (tb[FRA_TUN_ID] &&
		    (rule->tun_id != nla_get_be64(tb[FRA_TUN_ID])))
			continue;

D
David Ahern 已提交
613 614 615 616
		if (tb[FRA_L3MDEV] &&
		    (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV])))
			continue;

617 618 619 620 621
		if (uid_range_set(&range) &&
		    (!uid_eq(rule->uid_range.start, range.start) ||
		     !uid_eq(rule->uid_range.end, range.end)))
			continue;

622 623 624 625 626 627 628 629
		if (!ops->compare(rule, frh, tb))
			continue;

		if (rule->flags & FIB_RULE_PERMANENT) {
			err = -EPERM;
			goto errout;
		}

630 631 632 633 634 635
		if (ops->delete) {
			err = ops->delete(rule);
			if (err)
				goto errout;
		}

636 637 638
		if (rule->tun_id)
			ip_tunnel_unneed_metadata();

639
		list_del_rcu(&rule->list);
T
Thomas Graf 已提交
640

641
		if (rule->action == FR_ACT_GOTO) {
T
Thomas Graf 已提交
642
			ops->nr_goto_rules--;
643 644 645
			if (rtnl_dereference(rule->ctarget) == NULL)
				ops->unresolved_rules--;
		}
T
Thomas Graf 已提交
646 647 648 649 650 651 652 653

		/*
		 * Check if this rule is a target to any of them. If so,
		 * disable them. As this operation is eventually very
		 * expensive, it is only performed if goto rules have
		 * actually been added.
		 */
		if (ops->nr_goto_rules > 0) {
654
			list_for_each_entry(tmp, &ops->rules_list, list) {
E
Eric Dumazet 已提交
655
				if (rtnl_dereference(tmp->ctarget) == rule) {
656
					RCU_INIT_POINTER(tmp->ctarget, NULL);
T
Thomas Graf 已提交
657 658 659 660 661
					ops->unresolved_rules++;
				}
			}
		}

D
Denis V. Lunev 已提交
662
		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
663
				   NETLINK_CB(skb).portid);
664
		fib_rule_put(rule);
665
		flush_route_cache(ops);
666 667 668 669 670 671 672 673 674
		rules_ops_put(ops);
		return 0;
	}

	err = -ENOENT;
errout:
	rules_ops_put(ops);
	return err;
}
D
David Ahern 已提交
675
EXPORT_SYMBOL_GPL(fib_nl_delrule);
676

677 678 679 680
static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
					 struct fib_rule *rule)
{
	size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
681
			 + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */
682
			 + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */
683 684
			 + nla_total_size(4) /* FRA_PRIORITY */
			 + nla_total_size(4) /* FRA_TABLE */
685
			 + nla_total_size(4) /* FRA_SUPPRESS_PREFIXLEN */
686
			 + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */
687
			 + nla_total_size(4) /* FRA_FWMARK */
688
			 + nla_total_size(4) /* FRA_FWMASK */
689 690
			 + nla_total_size_64bit(8) /* FRA_TUN_ID */
			 + nla_total_size(sizeof(struct fib_kuid_range));
691 692 693 694 695 696 697

	if (ops->nlmsg_payload)
		payload += ops->nlmsg_payload(rule);

	return payload;
}

698 699 700 701 702 703 704 705 706
static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
			    u32 pid, u32 seq, int type, int flags,
			    struct fib_rules_ops *ops)
{
	struct nlmsghdr *nlh;
	struct fib_rule_hdr *frh;

	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
	if (nlh == NULL)
707
		return -EMSGSIZE;
708 709

	frh = nlmsg_data(nlh);
710
	frh->family = ops->family;
711
	frh->table = rule->table;
712 713
	if (nla_put_u32(skb, FRA_TABLE, rule->table))
		goto nla_put_failure;
714
	if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
715
		goto nla_put_failure;
716 717 718 719 720
	frh->res1 = 0;
	frh->res2 = 0;
	frh->action = rule->action;
	frh->flags = rule->flags;

E
Eric Dumazet 已提交
721
	if (rule->action == FR_ACT_GOTO &&
722
	    rcu_access_pointer(rule->ctarget) == NULL)
T
Thomas Graf 已提交
723 724
		frh->flags |= FIB_RULE_UNRESOLVED;

725
	if (rule->iifname[0]) {
726 727
		if (nla_put_string(skb, FRA_IIFNAME, rule->iifname))
			goto nla_put_failure;
728 729
		if (rule->iifindex == -1)
			frh->flags |= FIB_RULE_IIF_DETACHED;
730 731
	}

732
	if (rule->oifname[0]) {
733 734
		if (nla_put_string(skb, FRA_OIFNAME, rule->oifname))
			goto nla_put_failure;
735 736 737 738
		if (rule->oifindex == -1)
			frh->flags |= FIB_RULE_OIF_DETACHED;
	}

739 740 741 742 743 744 745
	if ((rule->pref &&
	     nla_put_u32(skb, FRA_PRIORITY, rule->pref)) ||
	    (rule->mark &&
	     nla_put_u32(skb, FRA_FWMARK, rule->mark)) ||
	    ((rule->mark_mask || rule->mark) &&
	     nla_put_u32(skb, FRA_FWMASK, rule->mark_mask)) ||
	    (rule->target &&
746 747
	     nla_put_u32(skb, FRA_GOTO, rule->target)) ||
	    (rule->tun_id &&
D
David Ahern 已提交
748 749
	     nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) ||
	    (rule->l3mdev &&
750 751 752
	     nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
	    (uid_range_set(&rule->uid_range) &&
	     nla_put_uid_range(skb, &rule->uid_range)))
753
		goto nla_put_failure;
754 755 756 757 758 759

	if (rule->suppress_ifgroup != -1) {
		if (nla_put_u32(skb, FRA_SUPPRESS_IFGROUP, rule->suppress_ifgroup))
			goto nla_put_failure;
	}

760
	if (ops->fill(rule, skb, frh) < 0)
761 762
		goto nla_put_failure;

763 764
	nlmsg_end(skb, nlh);
	return 0;
765 766

nla_put_failure:
767 768
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
769 770
}

T
Thomas Graf 已提交
771 772
static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
		      struct fib_rules_ops *ops)
773 774 775
{
	int idx = 0;
	struct fib_rule *rule;
776
	int err = 0;
777

778 779
	rcu_read_lock();
	list_for_each_entry_rcu(rule, &ops->rules_list, list) {
T
Thomas Graf 已提交
780
		if (idx < cb->args[1])
781 782
			goto skip;

783 784 785 786
		err = fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
				       cb->nlh->nlmsg_seq, RTM_NEWRULE,
				       NLM_F_MULTI, ops);
		if (err)
787 788 789 790
			break;
skip:
		idx++;
	}
791
	rcu_read_unlock();
T
Thomas Graf 已提交
792
	cb->args[1] = idx;
793 794
	rules_ops_put(ops);

795
	return err;
796 797
}

T
Thomas Graf 已提交
798 799
static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
{
800
	struct net *net = sock_net(skb->sk);
T
Thomas Graf 已提交
801 802 803 804 805 806
	struct fib_rules_ops *ops;
	int idx = 0, family;

	family = rtnl_msg_family(cb->nlh);
	if (family != AF_UNSPEC) {
		/* Protocol specific dump request */
807
		ops = lookup_rules_ops(net, family);
T
Thomas Graf 已提交
808 809 810
		if (ops == NULL)
			return -EAFNOSUPPORT;

811 812 813
		dump_rules(skb, cb, ops);

		return skb->len;
T
Thomas Graf 已提交
814 815 816
	}

	rcu_read_lock();
817
	list_for_each_entry_rcu(ops, &net->rules_ops, list) {
T
Thomas Graf 已提交
818 819 820 821 822 823 824
		if (idx < cb->args[0] || !try_module_get(ops->owner))
			goto skip;

		if (dump_rules(skb, cb, ops) < 0)
			break;

		cb->args[1] = 0;
825
skip:
T
Thomas Graf 已提交
826 827 828 829 830 831 832
		idx++;
	}
	rcu_read_unlock();
	cb->args[0] = idx;

	return skb->len;
}
833

D
Denis V. Lunev 已提交
834
static void notify_rule_change(int event, struct fib_rule *rule,
835 836
			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
			       u32 pid)
837
{
D
Denis V. Lunev 已提交
838
	struct net *net;
839 840
	struct sk_buff *skb;
	int err = -ENOBUFS;
841

D
Denis V. Lunev 已提交
842
	net = ops->fro_net;
843
	skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
844
	if (skb == NULL)
845 846 847
		goto errout;

	err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
848 849 850 851 852 853
	if (err < 0) {
		/* -EMSGSIZE implies BUG in fib_rule_nlmsg_size() */
		WARN_ON(err == -EMSGSIZE);
		kfree_skb(skb);
		goto errout;
	}
D
Denis V. Lunev 已提交
854

855 856
	rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
	return;
857 858
errout:
	if (err < 0)
859
		rtnl_set_sk_err(net, ops->nlgroup, err);
860 861 862 863 864 865 866
}

static void attach_rules(struct list_head *rules, struct net_device *dev)
{
	struct fib_rule *rule;

	list_for_each_entry(rule, rules, list) {
867 868 869
		if (rule->iifindex == -1 &&
		    strcmp(dev->name, rule->iifname) == 0)
			rule->iifindex = dev->ifindex;
870 871 872
		if (rule->oifindex == -1 &&
		    strcmp(dev->name, rule->oifname) == 0)
			rule->oifindex = dev->ifindex;
873 874 875 876 877 878 879
	}
}

static void detach_rules(struct list_head *rules, struct net_device *dev)
{
	struct fib_rule *rule;

880
	list_for_each_entry(rule, rules, list) {
881 882
		if (rule->iifindex == dev->ifindex)
			rule->iifindex = -1;
883 884 885
		if (rule->oifindex == dev->ifindex)
			rule->oifindex = -1;
	}
886 887 888 889
}


static int fib_rules_event(struct notifier_block *this, unsigned long event,
890
			   void *ptr)
891
{
892
	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
893
	struct net *net = dev_net(dev);
894 895
	struct fib_rules_ops *ops;

896
	ASSERT_RTNL();
897 898 899

	switch (event) {
	case NETDEV_REGISTER:
900
		list_for_each_entry(ops, &net->rules_ops, list)
901
			attach_rules(&ops->rules_list, dev);
902 903
		break;

904 905 906 907 908 909 910
	case NETDEV_CHANGENAME:
		list_for_each_entry(ops, &net->rules_ops, list) {
			detach_rules(&ops->rules_list, dev);
			attach_rules(&ops->rules_list, dev);
		}
		break;

911
	case NETDEV_UNREGISTER:
912
		list_for_each_entry(ops, &net->rules_ops, list)
913
			detach_rules(&ops->rules_list, dev);
914 915 916 917 918 919 920 921 922 923
		break;
	}

	return NOTIFY_DONE;
}

static struct notifier_block fib_rules_notifier = {
	.notifier_call = fib_rules_event,
};

924
static int __net_init fib_rules_net_init(struct net *net)
925 926 927 928 929 930 931 932 933 934
{
	INIT_LIST_HEAD(&net->rules_ops);
	spin_lock_init(&net->rules_mod_lock);
	return 0;
}

static struct pernet_operations fib_rules_net_ops = {
	.init = fib_rules_net_init,
};

935 936
static int __init fib_rules_init(void)
{
937
	int err;
938 939 940
	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL);
	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL);
	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL);
941

E
Eric W. Biederman 已提交
942
	err = register_pernet_subsys(&fib_rules_net_ops);
943 944 945
	if (err < 0)
		goto fail;

E
Eric W. Biederman 已提交
946
	err = register_netdevice_notifier(&fib_rules_notifier);
947 948
	if (err < 0)
		goto fail_unregister;
E
Eric W. Biederman 已提交
949

950 951 952
	return 0;

fail_unregister:
E
Eric W. Biederman 已提交
953
	unregister_pernet_subsys(&fib_rules_net_ops);
954 955 956 957 958
fail:
	rtnl_unregister(PF_UNSPEC, RTM_NEWRULE);
	rtnl_unregister(PF_UNSPEC, RTM_DELRULE);
	rtnl_unregister(PF_UNSPEC, RTM_GETRULE);
	return err;
959 960 961
}

subsys_initcall(fib_rules_init);