route.h 10.6 KB
Newer Older
1
/* SPDX-License-Identifier: GPL-2.0-or-later */
L
Linus Torvalds 已提交
2 3 4 5 6 7 8 9 10
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET  is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Definitions for the IP router.
 *
 * Version:	@(#)route.h	1.0.4	05/27/93
 *
11
 * Authors:	Ross Biro
L
Linus Torvalds 已提交
12 13 14 15 16 17 18 19 20 21 22 23 24 25
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 * Fixes:
 *		Alan Cox	:	Reformatted. Added ip_rt_local()
 *		Alan Cox	:	Support for TCP parameters.
 *		Alexey Kuznetsov:	Major changes for new routing code.
 *		Mike McLagan    :	Routing by source
 *		Robert Olsson   :	Added rt_cache statistics
 */
#ifndef _ROUTE_H
#define _ROUTE_H

#include <net/dst.h>
#include <net/inetpeer.h>
#include <net/flow.h>
26
#include <net/inet_sock.h>
27
#include <net/ip_fib.h>
28 29
#include <net/arp.h>
#include <net/ndisc.h>
L
Linus Torvalds 已提交
30 31
#include <linux/in_route.h>
#include <linux/rtnetlink.h>
32
#include <linux/rcupdate.h>
L
Linus Torvalds 已提交
33 34 35
#include <linux/route.h>
#include <linux/ip.h>
#include <linux/cache.h>
V
Venkat Yekkirala 已提交
36
#include <linux/security.h>
L
Linus Torvalds 已提交
37

38 39 40
/* IPv4 datagram length is stored into 16bit field (tot_len) */
#define IP_MAX_MTU	0xFFFFU

L
Linus Torvalds 已提交
41 42 43
#define RTO_ONLINK	0x01

#define RT_CONN_FLAGS(sk)   (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))
44
#define RT_CONN_FLAGS_TOS(sk,tos)   (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE))
L
Linus Torvalds 已提交
45 46

struct fib_nh;
47
struct fib_info;
E
Eric Dumazet 已提交
48
struct uncached_list;
E
Eric Dumazet 已提交
49
struct rtable {
50
	struct dst_entry	dst;
L
Linus Torvalds 已提交
51

52
	int			rt_genid;
53
	unsigned int		rt_flags;
L
Linus Torvalds 已提交
54
	__u16			rt_type;
J
Julian Anastasov 已提交
55
	__u8			rt_is_input;
56
	u8			rt_gw_family;
L
Linus Torvalds 已提交
57 58 59 60

	int			rt_iif;

	/* Info on neighbour */
61 62 63 64
	union {
		__be32		rt_gw4;
		struct in6_addr	rt_gw6;
	};
L
Linus Torvalds 已提交
65 66

	/* Miscellaneous cached information */
67 68
	u32			rt_mtu_locked:1,
				rt_pmtu:31;
69 70

	struct list_head	rt_uncached;
E
Eric Dumazet 已提交
71
	struct uncached_list	*rt_uncached_list;
L
Linus Torvalds 已提交
72 73
};

74
static inline bool rt_is_input_route(const struct rtable *rt)
75
{
76
	return rt->rt_is_input != 0;
77 78
}

79
static inline bool rt_is_output_route(const struct rtable *rt)
80
{
81
	return rt->rt_is_input == 0;
82 83
}

84 85
static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr)
{
86 87
	if (rt->rt_gw_family == AF_INET)
		return rt->rt_gw4;
88 89 90
	return daddr;
}

E
Eric Dumazet 已提交
91
struct ip_rt_acct {
L
Linus Torvalds 已提交
92 93 94 95 96 97
	__u32 	o_bytes;
	__u32 	o_packets;
	__u32 	i_bytes;
	__u32 	i_packets;
};

E
Eric Dumazet 已提交
98
struct rt_cache_stat {
L
Linus Torvalds 已提交
99 100 101 102 103 104 105 106 107 108
        unsigned int in_slow_tot;
        unsigned int in_slow_mc;
        unsigned int in_no_route;
        unsigned int in_brd;
        unsigned int in_martian_dst;
        unsigned int in_martian_src;
        unsigned int out_slow_tot;
        unsigned int out_slow_mc;
};

109
extern struct ip_rt_acct __percpu *ip_rt_acct;
L
Linus Torvalds 已提交
110 111

struct in_device;
112 113 114 115

int ip_rt_init(void);
void rt_cache_flush(struct net *net);
void rt_flush_dev(struct net_device *dev);
116 117 118 119 120
struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *flp,
					const struct sk_buff *skb);
struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *flp,
					    struct fib_result *res,
					    const struct sk_buff *skb);
121 122 123 124

static inline struct rtable *__ip_route_output_key(struct net *net,
						   struct flowi4 *flp)
{
125
	return ip_route_output_key_hash(net, flp, NULL);
126 127
}

128
struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
129
				    const struct sock *sk);
130 131
struct dst_entry *ipv4_blackhole_route(struct net *net,
				       struct dst_entry *dst_orig);
132

133
static inline struct rtable *ip_route_output_key(struct net *net, struct flowi4 *flp)
134 135 136 137
{
	return ip_route_output_flow(net, flp, NULL);
}

138 139 140
static inline struct rtable *ip_route_output(struct net *net, __be32 daddr,
					     __be32 saddr, u8 tos, int oif)
{
141 142
	struct flowi4 fl4 = {
		.flowi4_oif = oif,
143
		.flowi4_tos = tos,
144 145
		.daddr = daddr,
		.saddr = saddr,
146
	};
147
	return ip_route_output_key(net, &fl4);
148 149
}

150 151
static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi4 *fl4,
						   struct sock *sk,
152 153 154 155
						   __be32 daddr, __be32 saddr,
						   __be16 dport, __be16 sport,
						   __u8 proto, __u8 tos, int oif)
{
156
	flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos,
157 158
			   RT_SCOPE_UNIVERSE, proto,
			   sk ? inet_sk_flowi_flags(sk) : 0,
159
			   daddr, saddr, dport, sport, sock_net_uid(net, sk));
160
	if (sk)
161 162
		security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
	return ip_route_output_flow(net, fl4, sk);
163 164
}

165
static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 *fl4,
166 167 168
						 __be32 daddr, __be32 saddr,
						 __be32 gre_key, __u8 tos, int oif)
{
169 170 171 172 173 174 175 176
	memset(fl4, 0, sizeof(*fl4));
	fl4->flowi4_oif = oif;
	fl4->daddr = daddr;
	fl4->saddr = saddr;
	fl4->flowi4_tos = tos;
	fl4->flowi4_proto = IPPROTO_GRE;
	fl4->fl4_gre_key = gre_key;
	return ip_route_output_key(net, fl4);
177
}
178 179 180
int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
			  u8 tos, struct net_device *dev,
			  struct in_device *in_dev, u32 *itag);
181 182
int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src,
			 u8 tos, struct net_device *devin);
183 184 185
int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,
		       u8 tos, struct net_device *devin,
		       struct fib_result *res);
186 187 188 189 190 191 192 193

static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
				 u8 tos, struct net_device *devin)
{
	int err;

	rcu_read_lock();
	err = ip_route_input_noref(skb, dst, src, tos, devin);
194
	if (!err) {
E
Eric Dumazet 已提交
195
		skb_dst_force(skb);
196 197 198
		if (!skb_dst(skb))
			err = -EINVAL;
	}
199 200 201 202
	rcu_read_unlock();

	return err;
}
203

204
void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif,
205
		      u8 protocol);
206
void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu);
207
void ipv4_redirect(struct sk_buff *skb, struct net *net, int oif, u8 protocol);
208 209 210 211
void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk);
void ip_rt_send_redirect(struct sk_buff *skb);

unsigned int inet_addr_type(struct net *net, __be32 addr);
D
David Ahern 已提交
212
unsigned int inet_addr_type_table(struct net *net, __be32 addr, u32 tb_id);
213 214
unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
				__be32 addr);
D
David Ahern 已提交
215 216 217
unsigned int inet_addr_type_dev_table(struct net *net,
				      const struct net_device *dev,
				      __be32 addr);
218
void ip_rt_multicast_event(struct in_device *);
A
Al Viro 已提交
219
int ip_rt_ioctl(struct net *, unsigned int cmd, struct rtentry *rt);
220
void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt);
221 222 223
struct rtable *rt_dst_alloc(struct net_device *dev,
			     unsigned int flags, u16 type,
			     bool nopolicy, bool noxfrm, bool will_cache);
L
Linus Torvalds 已提交
224

225
struct in_ifaddr;
226 227
void fib_add_ifaddr(struct in_ifaddr *);
void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *);
228
void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric);
229

230 231 232
void rt_add_uncached_list(struct rtable *rt);
void rt_del_uncached_list(struct rtable *rt);

E
Eric Dumazet 已提交
233
static inline void ip_rt_put(struct rtable *rt)
L
Linus Torvalds 已提交
234
{
E
Eric Dumazet 已提交
235 236 237 238 239
	/* dst_release() accepts a NULL parameter.
	 * We rely on dst being first structure in struct rtable
	 */
	BUILD_BUG_ON(offsetof(struct rtable, dst) != 0);
	dst_release(&rt->dst);
L
Linus Torvalds 已提交
240 241 242 243
}

#define IPTOS_RT_MASK	(IPTOS_TOS_MASK & ~3)

244
extern const __u8 ip_tos2prio[16];
L
Linus Torvalds 已提交
245 246 247 248 249 250

static inline char rt_tos2priority(u8 tos)
{
	return ip_tos2prio[IPTOS_TOS(tos)>>1];
}

251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
/* ip_route_connect() and ip_route_newports() work in tandem whilst
 * binding a socket for a new outgoing connection.
 *
 * In order to use IPSEC properly, we must, in the end, have a
 * route that was looked up using all available keys including source
 * and destination ports.
 *
 * However, if a source port needs to be allocated (the user specified
 * a wildcard source port) we need to obtain addressing information
 * in order to perform that allocation.
 *
 * So ip_route_connect() looks up a route using wildcarded source and
 * destination ports in the key, simply so that we can get a pair of
 * addresses to use for port allocation.
 *
 * Later, once the ports are allocated, ip_route_newports() will make
 * another route lookup if needed to make sure we catch any IPSEC
 * rules keyed on the port information.
 *
 * The callers allocate the flow key on their stack, and must pass in
 * the same flowi4 object to both the ip_route_connect() and the
 * ip_route_newports() calls.
 */

static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 src,
					 u32 tos, int oif, u8 protocol,
					 __be16 sport, __be16 dport,
278
					 struct sock *sk)
L
Linus Torvalds 已提交
279
{
280
	__u8 flow_flags = 0;
281 282

	if (inet_sk(sk)->transparent)
283 284
		flow_flags |= FLOWI_FLAG_ANYSRC;

285
	flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
286 287
			   protocol, flow_flags, dst, src, dport, sport,
			   sk->sk_uid);
288 289 290 291 292 293
}

static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
					      __be32 dst, __be32 src, u32 tos,
					      int oif, u8 protocol,
					      __be16 sport, __be16 dport,
294
					      struct sock *sk)
295 296 297 298 299
{
	struct net *net = sock_net(sk);
	struct rtable *rt;

	ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
300
			      sport, dport, sk);
301

L
Linus Torvalds 已提交
302
	if (!dst || !src) {
303
		rt = __ip_route_output_key(net, fl4);
304 305 306
		if (IS_ERR(rt))
			return rt;
		ip_rt_put(rt);
307
		flowi4_update_output(fl4, oif, tos, fl4->daddr, fl4->saddr);
L
Linus Torvalds 已提交
308
	}
309 310
	security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
	return ip_route_output_flow(net, fl4, sk);
L
Linus Torvalds 已提交
311 312
}

313 314 315 316
static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable *rt,
					       __be16 orig_sport, __be16 orig_dport,
					       __be16 sport, __be16 dport,
					       struct sock *sk)
L
Linus Torvalds 已提交
317
{
318
	if (sport != orig_sport || dport != orig_dport) {
319 320
		fl4->fl4_dport = dport;
		fl4->fl4_sport = sport;
321
		ip_rt_put(rt);
322 323 324
		flowi4_update_output(fl4, sk->sk_bound_dev_if,
				     RT_CONN_FLAGS(sk), fl4->daddr,
				     fl4->saddr);
325 326
		security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
		return ip_route_output_flow(sock_net(sk), fl4, sk);
L
Linus Torvalds 已提交
327
	}
328
	return rt;
L
Linus Torvalds 已提交
329 330
}

331 332
static inline int inet_iif(const struct sk_buff *skb)
{
333 334 335 336
	struct rtable *rt = skb_rtable(skb);

	if (rt && rt->rt_iif)
		return rt->rt_iif;
337 338

	return skb->skb_iif;
339 340
}

341 342 343
static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
{
	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
344
	struct net *net = dev_net(dst->dev);
345 346

	if (hoplimit == 0)
347
		hoplimit = net->ipv4.sysctl_ip_default_ttl;
348 349 350
	return hoplimit;
}

351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
static inline struct neighbour *ip_neigh_gw4(struct net_device *dev,
					     __be32 daddr)
{
	struct neighbour *neigh;

	neigh = __ipv4_neigh_lookup_noref(dev, daddr);
	if (unlikely(!neigh))
		neigh = __neigh_create(&arp_tbl, &daddr, dev, false);

	return neigh;
}

static inline struct neighbour *ip_neigh_for_gw(struct rtable *rt,
						struct sk_buff *skb,
						bool *is_v6gw)
{
	struct net_device *dev = rt->dst.dev;
	struct neighbour *neigh;

	if (likely(rt->rt_gw_family == AF_INET)) {
		neigh = ip_neigh_gw4(dev, rt->rt_gw4);
	} else if (rt->rt_gw_family == AF_INET6) {
		neigh = ip_neigh_gw6(dev, &rt->rt_gw6);
		*is_v6gw = true;
	} else {
		neigh = ip_neigh_gw4(dev, ip_hdr(skb)->daddr);
	}
	return neigh;
}

L
Linus Torvalds 已提交
381
#endif	/* _ROUTE_H */