route.h 11.0 KB
Newer Older
1
/* SPDX-License-Identifier: GPL-2.0-or-later */
L
Linus Torvalds 已提交
2 3 4 5 6 7 8 9 10
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET  is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Definitions for the IP router.
 *
 * Version:	@(#)route.h	1.0.4	05/27/93
 *
11
 * Authors:	Ross Biro
L
Linus Torvalds 已提交
12 13 14 15 16 17 18 19 20 21 22 23 24 25
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 * Fixes:
 *		Alan Cox	:	Reformatted. Added ip_rt_local()
 *		Alan Cox	:	Support for TCP parameters.
 *		Alexey Kuznetsov:	Major changes for new routing code.
 *		Mike McLagan    :	Routing by source
 *		Robert Olsson   :	Added rt_cache statistics
 */
#ifndef _ROUTE_H
#define _ROUTE_H

#include <net/dst.h>
#include <net/inetpeer.h>
#include <net/flow.h>
26
#include <net/inet_sock.h>
27
#include <net/ip_fib.h>
28 29
#include <net/arp.h>
#include <net/ndisc.h>
L
Linus Torvalds 已提交
30 31
#include <linux/in_route.h>
#include <linux/rtnetlink.h>
32
#include <linux/rcupdate.h>
L
Linus Torvalds 已提交
33 34 35
#include <linux/route.h>
#include <linux/ip.h>
#include <linux/cache.h>
V
Venkat Yekkirala 已提交
36
#include <linux/security.h>
L
Linus Torvalds 已提交
37

38 39 40
/* IPv4 datagram length is stored into 16bit field (tot_len) */
#define IP_MAX_MTU	0xFFFFU

L
Linus Torvalds 已提交
41 42 43
#define RTO_ONLINK	0x01

#define RT_CONN_FLAGS(sk)   (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))
44
#define RT_CONN_FLAGS_TOS(sk,tos)   (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE))
L
Linus Torvalds 已提交
45 46

struct fib_nh;
47
struct fib_info;
E
Eric Dumazet 已提交
48
struct uncached_list;
E
Eric Dumazet 已提交
49
struct rtable {
50
	struct dst_entry	dst;
L
Linus Torvalds 已提交
51

52
	int			rt_genid;
53
	unsigned int		rt_flags;
L
Linus Torvalds 已提交
54
	__u16			rt_type;
J
Julian Anastasov 已提交
55
	__u8			rt_is_input;
56
	__u8			rt_uses_gateway;
L
Linus Torvalds 已提交
57 58 59

	int			rt_iif;

60
	u8			rt_gw_family;
L
Linus Torvalds 已提交
61
	/* Info on neighbour */
62 63 64 65
	union {
		__be32		rt_gw4;
		struct in6_addr	rt_gw6;
	};
L
Linus Torvalds 已提交
66 67

	/* Miscellaneous cached information */
68 69
	u32			rt_mtu_locked:1,
				rt_pmtu:31;
70 71

	struct list_head	rt_uncached;
E
Eric Dumazet 已提交
72
	struct uncached_list	*rt_uncached_list;
L
Linus Torvalds 已提交
73 74
};

75
static inline bool rt_is_input_route(const struct rtable *rt)
76
{
77
	return rt->rt_is_input != 0;
78 79
}

80
static inline bool rt_is_output_route(const struct rtable *rt)
81
{
82
	return rt->rt_is_input == 0;
83 84
}

85 86
static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr)
{
87 88
	if (rt->rt_gw_family == AF_INET)
		return rt->rt_gw4;
89 90 91
	return daddr;
}

E
Eric Dumazet 已提交
92
struct ip_rt_acct {
L
Linus Torvalds 已提交
93 94 95 96 97 98
	__u32 	o_bytes;
	__u32 	o_packets;
	__u32 	i_bytes;
	__u32 	i_packets;
};

E
Eric Dumazet 已提交
99
struct rt_cache_stat {
L
Linus Torvalds 已提交
100 101 102 103 104 105 106 107 108 109
        unsigned int in_slow_tot;
        unsigned int in_slow_mc;
        unsigned int in_no_route;
        unsigned int in_brd;
        unsigned int in_martian_dst;
        unsigned int in_martian_src;
        unsigned int out_slow_tot;
        unsigned int out_slow_mc;
};

110
extern struct ip_rt_acct __percpu *ip_rt_acct;
L
Linus Torvalds 已提交
111 112

struct in_device;
113 114 115 116

int ip_rt_init(void);
void rt_cache_flush(struct net *net);
void rt_flush_dev(struct net_device *dev);
117 118 119 120 121
struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *flp,
					const struct sk_buff *skb);
struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *flp,
					    struct fib_result *res,
					    const struct sk_buff *skb);
122 123 124 125

static inline struct rtable *__ip_route_output_key(struct net *net,
						   struct flowi4 *flp)
{
126
	return ip_route_output_key_hash(net, flp, NULL);
127 128
}

129
struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
130
				    const struct sock *sk);
131 132
struct dst_entry *ipv4_blackhole_route(struct net *net,
				       struct dst_entry *dst_orig);
133

134
static inline struct rtable *ip_route_output_key(struct net *net, struct flowi4 *flp)
135 136 137 138
{
	return ip_route_output_flow(net, flp, NULL);
}

139 140 141
static inline struct rtable *ip_route_output(struct net *net, __be32 daddr,
					     __be32 saddr, u8 tos, int oif)
{
142 143
	struct flowi4 fl4 = {
		.flowi4_oif = oif,
144
		.flowi4_tos = tos,
145 146
		.daddr = daddr,
		.saddr = saddr,
147
	};
148
	return ip_route_output_key(net, &fl4);
149 150
}

151 152
static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi4 *fl4,
						   struct sock *sk,
153 154 155 156
						   __be32 daddr, __be32 saddr,
						   __be16 dport, __be16 sport,
						   __u8 proto, __u8 tos, int oif)
{
157
	flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos,
158 159
			   RT_SCOPE_UNIVERSE, proto,
			   sk ? inet_sk_flowi_flags(sk) : 0,
160
			   daddr, saddr, dport, sport, sock_net_uid(net, sk));
161
	if (sk)
162 163
		security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
	return ip_route_output_flow(net, fl4, sk);
164 165
}

166
static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 *fl4,
167 168 169
						 __be32 daddr, __be32 saddr,
						 __be32 gre_key, __u8 tos, int oif)
{
170 171 172 173 174 175 176 177
	memset(fl4, 0, sizeof(*fl4));
	fl4->flowi4_oif = oif;
	fl4->daddr = daddr;
	fl4->saddr = saddr;
	fl4->flowi4_tos = tos;
	fl4->flowi4_proto = IPPROTO_GRE;
	fl4->fl4_gre_key = gre_key;
	return ip_route_output_key(net, fl4);
178
}
179 180 181
int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
			  u8 tos, struct net_device *dev,
			  struct in_device *in_dev, u32 *itag);
182 183
int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src,
			 u8 tos, struct net_device *devin);
184 185 186
int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,
		       u8 tos, struct net_device *devin,
		       struct fib_result *res);
187

188 189 190 191
int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src,
		      u8 tos, struct net_device *devin,
		      const struct sk_buff *hint);

192 193 194 195 196 197 198
static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
				 u8 tos, struct net_device *devin)
{
	int err;

	rcu_read_lock();
	err = ip_route_input_noref(skb, dst, src, tos, devin);
199
	if (!err) {
E
Eric Dumazet 已提交
200
		skb_dst_force(skb);
201 202 203
		if (!skb_dst(skb))
			err = -EINVAL;
	}
204 205 206 207
	rcu_read_unlock();

	return err;
}
208

209
void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif,
210
		      u8 protocol);
211
void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu);
212
void ipv4_redirect(struct sk_buff *skb, struct net *net, int oif, u8 protocol);
213 214 215 216
void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk);
void ip_rt_send_redirect(struct sk_buff *skb);

unsigned int inet_addr_type(struct net *net, __be32 addr);
D
David Ahern 已提交
217
unsigned int inet_addr_type_table(struct net *net, __be32 addr, u32 tb_id);
218 219
unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
				__be32 addr);
D
David Ahern 已提交
220 221 222
unsigned int inet_addr_type_dev_table(struct net *net,
				      const struct net_device *dev,
				      __be32 addr);
223
void ip_rt_multicast_event(struct in_device *);
A
Al Viro 已提交
224
int ip_rt_ioctl(struct net *, unsigned int cmd, struct rtentry *rt);
225
void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt);
226 227 228
struct rtable *rt_dst_alloc(struct net_device *dev,
			     unsigned int flags, u16 type,
			     bool nopolicy, bool noxfrm, bool will_cache);
229
struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt);
L
Linus Torvalds 已提交
230

231
struct in_ifaddr;
232 233
void fib_add_ifaddr(struct in_ifaddr *);
void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *);
234
void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric);
235

236 237 238
void rt_add_uncached_list(struct rtable *rt);
void rt_del_uncached_list(struct rtable *rt);

239 240
int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
		       u32 table_id, struct fib_info *fi,
241
		       int *fa_index, int fa_start, unsigned int flags);
242

E
Eric Dumazet 已提交
243
static inline void ip_rt_put(struct rtable *rt)
L
Linus Torvalds 已提交
244
{
E
Eric Dumazet 已提交
245 246 247 248 249
	/* dst_release() accepts a NULL parameter.
	 * We rely on dst being first structure in struct rtable
	 */
	BUILD_BUG_ON(offsetof(struct rtable, dst) != 0);
	dst_release(&rt->dst);
L
Linus Torvalds 已提交
250 251 252 253
}

#define IPTOS_RT_MASK	(IPTOS_TOS_MASK & ~3)

254
extern const __u8 ip_tos2prio[16];
L
Linus Torvalds 已提交
255 256 257 258 259 260

static inline char rt_tos2priority(u8 tos)
{
	return ip_tos2prio[IPTOS_TOS(tos)>>1];
}

261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
/* ip_route_connect() and ip_route_newports() work in tandem whilst
 * binding a socket for a new outgoing connection.
 *
 * In order to use IPSEC properly, we must, in the end, have a
 * route that was looked up using all available keys including source
 * and destination ports.
 *
 * However, if a source port needs to be allocated (the user specified
 * a wildcard source port) we need to obtain addressing information
 * in order to perform that allocation.
 *
 * So ip_route_connect() looks up a route using wildcarded source and
 * destination ports in the key, simply so that we can get a pair of
 * addresses to use for port allocation.
 *
 * Later, once the ports are allocated, ip_route_newports() will make
 * another route lookup if needed to make sure we catch any IPSEC
 * rules keyed on the port information.
 *
 * The callers allocate the flow key on their stack, and must pass in
 * the same flowi4 object to both the ip_route_connect() and the
 * ip_route_newports() calls.
 */

static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 src,
					 u32 tos, int oif, u8 protocol,
					 __be16 sport, __be16 dport,
288
					 struct sock *sk)
L
Linus Torvalds 已提交
289
{
290
	__u8 flow_flags = 0;
291 292

	if (inet_sk(sk)->transparent)
293 294
		flow_flags |= FLOWI_FLAG_ANYSRC;

295
	flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
296 297
			   protocol, flow_flags, dst, src, dport, sport,
			   sk->sk_uid);
298 299 300 301 302 303
}

static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
					      __be32 dst, __be32 src, u32 tos,
					      int oif, u8 protocol,
					      __be16 sport, __be16 dport,
304
					      struct sock *sk)
305 306 307 308 309
{
	struct net *net = sock_net(sk);
	struct rtable *rt;

	ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
310
			      sport, dport, sk);
311

L
Linus Torvalds 已提交
312
	if (!dst || !src) {
313
		rt = __ip_route_output_key(net, fl4);
314 315 316
		if (IS_ERR(rt))
			return rt;
		ip_rt_put(rt);
317
		flowi4_update_output(fl4, oif, tos, fl4->daddr, fl4->saddr);
L
Linus Torvalds 已提交
318
	}
319 320
	security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
	return ip_route_output_flow(net, fl4, sk);
L
Linus Torvalds 已提交
321 322
}

323 324 325 326
static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable *rt,
					       __be16 orig_sport, __be16 orig_dport,
					       __be16 sport, __be16 dport,
					       struct sock *sk)
L
Linus Torvalds 已提交
327
{
328
	if (sport != orig_sport || dport != orig_dport) {
329 330
		fl4->fl4_dport = dport;
		fl4->fl4_sport = sport;
331
		ip_rt_put(rt);
332 333 334
		flowi4_update_output(fl4, sk->sk_bound_dev_if,
				     RT_CONN_FLAGS(sk), fl4->daddr,
				     fl4->saddr);
335 336
		security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
		return ip_route_output_flow(sock_net(sk), fl4, sk);
L
Linus Torvalds 已提交
337
	}
338
	return rt;
L
Linus Torvalds 已提交
339 340
}

341 342
static inline int inet_iif(const struct sk_buff *skb)
{
343 344 345 346
	struct rtable *rt = skb_rtable(skb);

	if (rt && rt->rt_iif)
		return rt->rt_iif;
347 348

	return skb->skb_iif;
349 350
}

351 352 353
static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
{
	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
354
	struct net *net = dev_net(dst->dev);
355 356

	if (hoplimit == 0)
357
		hoplimit = net->ipv4.sysctl_ip_default_ttl;
358 359 360
	return hoplimit;
}

361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
static inline struct neighbour *ip_neigh_gw4(struct net_device *dev,
					     __be32 daddr)
{
	struct neighbour *neigh;

	neigh = __ipv4_neigh_lookup_noref(dev, daddr);
	if (unlikely(!neigh))
		neigh = __neigh_create(&arp_tbl, &daddr, dev, false);

	return neigh;
}

static inline struct neighbour *ip_neigh_for_gw(struct rtable *rt,
						struct sk_buff *skb,
						bool *is_v6gw)
{
	struct net_device *dev = rt->dst.dev;
	struct neighbour *neigh;

	if (likely(rt->rt_gw_family == AF_INET)) {
		neigh = ip_neigh_gw4(dev, rt->rt_gw4);
	} else if (rt->rt_gw_family == AF_INET6) {
		neigh = ip_neigh_gw6(dev, &rt->rt_gw6);
		*is_v6gw = true;
	} else {
		neigh = ip_neigh_gw4(dev, ip_hdr(skb)->daddr);
	}
	return neigh;
}

L
Linus Torvalds 已提交
391
#endif	/* _ROUTE_H */