protocol.h 11.5 KB
Newer Older
M
Mat Martineau 已提交
1 2 3 4 5 6 7 8 9
/* SPDX-License-Identifier: GPL-2.0 */
/* Multipath TCP
 *
 * Copyright (c) 2017 - 2019, Intel Corporation.
 */

#ifndef __MPTCP_PROTOCOL_H
#define __MPTCP_PROTOCOL_H

10 11 12 13
#include <linux/random.h>
#include <net/tcp.h>
#include <net/inet_connection_sock.h>

14
#define MPTCP_SUPPORTED_VERSION	1
P
Peter Krystad 已提交
15 16 17 18 19

/* MPTCP option bits */
#define OPTION_MPTCP_MPC_SYN	BIT(0)
#define OPTION_MPTCP_MPC_SYNACK	BIT(1)
#define OPTION_MPTCP_MPC_ACK	BIT(2)
20 21 22
#define OPTION_MPTCP_MPJ_SYN	BIT(3)
#define OPTION_MPTCP_MPJ_SYNACK	BIT(4)
#define OPTION_MPTCP_MPJ_ACK	BIT(5)
P
Peter Krystad 已提交
23 24 25
#define OPTION_MPTCP_ADD_ADDR	BIT(6)
#define OPTION_MPTCP_ADD_ADDR6	BIT(7)
#define OPTION_MPTCP_RM_ADDR	BIT(8)
P
Peter Krystad 已提交
26 27 28 29 30 31 32 33 34 35 36 37

/* MPTCP option subtypes */
#define MPTCPOPT_MP_CAPABLE	0
#define MPTCPOPT_MP_JOIN	1
#define MPTCPOPT_DSS		2
#define MPTCPOPT_ADD_ADDR	3
#define MPTCPOPT_RM_ADDR	4
#define MPTCPOPT_MP_PRIO	5
#define MPTCPOPT_MP_FAIL	6
#define MPTCPOPT_MP_FASTCLOSE	7

/* MPTCP suboption lengths */
38
#define TCPOLEN_MPTCP_MPC_SYN		4
P
Peter Krystad 已提交
39 40
#define TCPOLEN_MPTCP_MPC_SYNACK	12
#define TCPOLEN_MPTCP_MPC_ACK		20
41
#define TCPOLEN_MPTCP_MPC_ACK_DATA	22
42 43 44
#define TCPOLEN_MPTCP_MPJ_SYN		12
#define TCPOLEN_MPTCP_MPJ_SYNACK	16
#define TCPOLEN_MPTCP_MPJ_ACK		24
45
#define TCPOLEN_MPTCP_DSS_BASE		4
46
#define TCPOLEN_MPTCP_DSS_ACK32		4
47
#define TCPOLEN_MPTCP_DSS_ACK64		8
48
#define TCPOLEN_MPTCP_DSS_MAP32		10
49 50
#define TCPOLEN_MPTCP_DSS_MAP64		14
#define TCPOLEN_MPTCP_DSS_CHECKSUM	2
P
Peter Krystad 已提交
51 52 53 54 55 56 57 58 59 60
#define TCPOLEN_MPTCP_ADD_ADDR		16
#define TCPOLEN_MPTCP_ADD_ADDR_PORT	18
#define TCPOLEN_MPTCP_ADD_ADDR_BASE	8
#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT	10
#define TCPOLEN_MPTCP_ADD_ADDR6		28
#define TCPOLEN_MPTCP_ADD_ADDR6_PORT	30
#define TCPOLEN_MPTCP_ADD_ADDR6_BASE	20
#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT	22
#define TCPOLEN_MPTCP_PORT_LEN		2
#define TCPOLEN_MPTCP_RM_ADDR_BASE	4
P
Peter Krystad 已提交
61

62
/* MPTCP MP_JOIN flags */
63 64
#define MPTCPOPT_BACKUP		BIT(0)
#define MPTCPOPT_HMAC_LEN	20
65
#define MPTCPOPT_THMAC_LEN	8
66

P
Peter Krystad 已提交
67 68 69 70
/* MPTCP MP_CAPABLE flags */
#define MPTCP_VERSION_MASK	(0x0F)
#define MPTCP_CAP_CHECKSUM_REQD	BIT(7)
#define MPTCP_CAP_EXTENSIBILITY	BIT(6)
71
#define MPTCP_CAP_HMAC_SHA256	BIT(0)
P
Peter Krystad 已提交
72 73
#define MPTCP_CAP_FLAG_MASK	(0x3F)

74 75 76 77 78 79
/* MPTCP DSS flags */
#define MPTCP_DSS_DATA_FIN	BIT(4)
#define MPTCP_DSS_DSN64		BIT(3)
#define MPTCP_DSS_HAS_MAP	BIT(2)
#define MPTCP_DSS_ACK64		BIT(1)
#define MPTCP_DSS_HAS_ACK	BIT(0)
80 81
#define MPTCP_DSS_FLAG_MASK	(0x1F)

P
Peter Krystad 已提交
82 83 84 85 86 87
/* MPTCP ADD_ADDR flags */
#define MPTCP_ADDR_ECHO		BIT(0)
#define MPTCP_ADDR_HMAC_LEN	20
#define MPTCP_ADDR_IPVERSION_4	4
#define MPTCP_ADDR_IPVERSION_6	6

88
/* MPTCP socket flags */
89 90
#define MPTCP_DATA_READY	0
#define MPTCP_SEND_SPACE	1
91
#define MPTCP_WORK_RTX		2
92
#define MPTCP_WORK_EOF		3
93

P
Peter Krystad 已提交
94 95 96 97 98 99
static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
{
	return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
		     ((nib & 0xF) << 8) | field);
}

100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
#define MPTCP_PM_MAX_ADDR	4

struct mptcp_addr_info {
	sa_family_t		family;
	__be16			port;
	u8			id;
	union {
		struct in_addr addr;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
		struct in6_addr addr6;
#endif
	};
};

enum mptcp_pm_status {
	MPTCP_PM_ADD_ADDR_RECEIVED,
	MPTCP_PM_ESTABLISHED,
	MPTCP_PM_SUBFLOW_ESTABLISHED,
};

struct mptcp_pm_data {
	struct mptcp_addr_info local;
	struct mptcp_addr_info remote;

	spinlock_t	lock;		/*protects the whole PM data */

	bool		addr_signal;
	bool		server_side;
	bool		work_pending;
	bool		accept_addr;
	bool		accept_subflow;
	u8		add_addr_signaled;
	u8		add_addr_accepted;
	u8		local_addr_used;
	u8		subflows;
	u8		add_addr_signal_max;
	u8		add_addr_accept_max;
	u8		local_addr_max;
	u8		subflows_max;
	u8		status;

	struct		work_struct work;
};

144 145 146 147 148 149 150 151 152
struct mptcp_data_frag {
	struct list_head list;
	u64 data_seq;
	int data_len;
	int offset;
	int overhead;
	struct page *page;
};

M
Mat Martineau 已提交
153 154 155 156
/* MPTCP connection sock */
struct mptcp_sock {
	/* inet_connection_sock must be the first member */
	struct inet_connection_sock sk;
157 158
	u64		local_key;
	u64		remote_key;
159 160
	u64		write_seq;
	u64		ack_seq;
161
	atomic64_t	snd_una;
162
	unsigned long	timer_ival;
163
	u32		token;
164
	unsigned long	flags;
165
	bool		can_ack;
166
	spinlock_t	join_list_lock;
P
Paolo Abeni 已提交
167
	struct work_struct work;
168
	struct list_head conn_list;
169
	struct list_head rtx_queue;
170
	struct list_head join_list;
171
	struct skb_ext	*cached_ext;	/* for the next sendmsg */
M
Mat Martineau 已提交
172
	struct socket	*subflow; /* outgoing connect/listener/!mp_capable */
173
	struct sock	*first;
174
	struct mptcp_pm_data	pm;
M
Mat Martineau 已提交
175 176
};

177 178 179
#define mptcp_for_each_subflow(__msk, __subflow)			\
	list_for_each_entry(__subflow, &((__msk)->conn_list), node)

M
Mat Martineau 已提交
180 181 182 183 184
static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
{
	return (struct mptcp_sock *)sk;
}

185 186 187 188 189 190 191 192 193 194
static inline struct mptcp_data_frag *mptcp_rtx_tail(const struct sock *sk)
{
	struct mptcp_sock *msk = mptcp_sk(sk);

	if (list_empty(&msk->rtx_queue))
		return NULL;

	return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
}

195 196 197 198 199 200 201 202 203 204
static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
{
	struct mptcp_sock *msk = mptcp_sk(sk);

	if (list_empty(&msk->rtx_queue))
		return NULL;

	return list_first_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
}

205 206
struct mptcp_subflow_request_sock {
	struct	tcp_request_sock sk;
207
	u16	mp_capable : 1,
208
		mp_join : 1,
209 210
		backup : 1,
		remote_key_valid : 1;
211
	u8	local_id;
212
	u8	remote_id;
213 214
	u64	local_key;
	u64	remote_key;
215 216
	u64	idsn;
	u32	token;
217
	u32	ssn_offset;
218 219 220
	u64	thmac;
	u32	local_nonce;
	u32	remote_nonce;
221 222 223 224 225 226 227 228
};

static inline struct mptcp_subflow_request_sock *
mptcp_subflow_rsk(const struct request_sock *rsk)
{
	return (struct mptcp_subflow_request_sock *)rsk;
}

229 230
/* MPTCP subflow context */
struct mptcp_subflow_context {
231 232 233
	struct	list_head node;/* conn_list of subflows */
	u64	local_key;
	u64	remote_key;
234
	u64	idsn;
235
	u64	map_seq;
236
	u32	snd_isn;
237
	u32	token;
238
	u32	rel_write_seq;
239 240 241
	u32	map_subflow_seq;
	u32	ssn_offset;
	u32	map_data_len;
242
	u32	request_mptcp : 1,  /* send MP_CAPABLE */
243 244
		request_join : 1,   /* send MP_JOIN */
		request_bkup : 1,
245
		mp_capable : 1,	    /* remote is MPTCP capable */
246
		mp_join : 1,	    /* remote is JOINing */
P
Paolo Abeni 已提交
247
		fully_established : 1,	    /* path validated */
248
		pm_notified : 1,    /* PM hook called for established status */
249 250
		conn_finished : 1,
		map_valid : 1,
251
		mpc_map : 1,
252
		backup : 1,
253
		data_avail : 1,
254
		rx_eof : 1,
255
		data_fin_tx_enable : 1,
256
		can_ack : 1;	    /* only after processing the remote a key */
257
	u64	data_fin_tx_seq;
258 259 260
	u32	remote_nonce;
	u64	thmac;
	u32	local_nonce;
261 262
	u32	remote_token;
	u8	hmac[MPTCPOPT_HMAC_LEN];
263 264
	u8	local_id;
	u8	remote_id;
265

266 267
	struct	sock *tcp_sock;	    /* tcp sk backpointer */
	struct	sock *conn;	    /* parent mptcp_sock */
268
	const	struct inet_connection_sock_af_ops *icsk_af_ops;
269 270 271 272
	void	(*tcp_data_ready)(struct sock *sk);
	void	(*tcp_state_change)(struct sock *sk);
	void	(*tcp_write_space)(struct sock *sk);

273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
	struct	rcu_head rcu;
};

static inline struct mptcp_subflow_context *
mptcp_subflow_ctx(const struct sock *sk)
{
	struct inet_connection_sock *icsk = inet_csk(sk);

	/* Use RCU on icsk_ulp_data only for sock diag code */
	return (__force struct mptcp_subflow_context *)icsk->icsk_ulp_data;
}

static inline struct sock *
mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
{
	return subflow->tcp_sock;
}

291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
static inline u64
mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow)
{
	return tcp_sk(mptcp_subflow_tcp_sock(subflow))->copied_seq -
		      subflow->ssn_offset -
		      subflow->map_subflow_seq;
}

static inline u64
mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
{
	return subflow->map_seq + mptcp_subflow_get_map_offset(subflow);
}

int mptcp_is_enabled(struct net *net);
bool mptcp_subflow_data_available(struct sock *sk);
307
void mptcp_subflow_init(void);
308 309 310 311 312

/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, int ifindex,
			    const struct mptcp_addr_info *loc,
			    const struct mptcp_addr_info *remote);
313 314
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);

315 316 317 318 319 320 321 322 323 324
static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
					      struct mptcp_subflow_context *ctx)
{
	sk->sk_data_ready = ctx->tcp_data_ready;
	sk->sk_state_change = ctx->tcp_state_change;
	sk->sk_write_space = ctx->tcp_write_space;

	inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
}

325 326 327 328 329
extern const struct inet_connection_sock_af_ops ipv4_specific;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
extern const struct inet_connection_sock_af_ops ipv6_specific;
#endif

330
void mptcp_proto_init(void);
331 332 333
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
int mptcp_proto_v6_init(void);
#endif
334

P
Paolo Abeni 已提交
335
struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req);
336 337 338 339
void mptcp_get_options(const struct sk_buff *skb,
		       struct tcp_options_received *opt_rx);

void mptcp_finish_connect(struct sock *sk);
340
void mptcp_data_ready(struct sock *sk, struct sock *ssk);
341
bool mptcp_finish_join(struct sock *sk);
342
void mptcp_data_acked(struct sock *sk);
343
void mptcp_subflow_eof(struct sock *sk);
344

345 346 347
int mptcp_token_new_request(struct request_sock *req);
void mptcp_token_destroy_request(u32 token);
int mptcp_token_new_connect(struct sock *sk);
P
Paolo Abeni 已提交
348
int mptcp_token_new_accept(u32 token, struct sock *conn);
349
struct mptcp_sock *mptcp_token_get_sock(u32 token);
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
void mptcp_token_destroy(u32 token);

void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
static inline void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn)
{
	/* we might consider a faster version that computes the key as a
	 * hash of some information available in the MPTCP socket. Use
	 * random data at the moment, as it's probably the safest option
	 * in case multiple sockets are opened in different namespaces at
	 * the same time.
	 */
	get_random_bytes(key, sizeof(u64));
	mptcp_crypto_key_sha(*key, token, idsn);
}

P
Peter Krystad 已提交
365
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
366

367 368
void mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
369
void mptcp_pm_close(struct mptcp_sock *msk);
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side);
void mptcp_pm_fully_established(struct mptcp_sock *msk);
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
void mptcp_pm_connection_closed(struct mptcp_sock *msk);
void mptcp_pm_subflow_established(struct mptcp_sock *msk,
				  struct mptcp_subflow_context *subflow);
void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id);
void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
				const struct mptcp_addr_info *addr);

int mptcp_pm_announce_addr(struct mptcp_sock *msk,
			   const struct mptcp_addr_info *addr);
int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id);
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 remote_id);

static inline bool mptcp_pm_should_signal(struct mptcp_sock *msk)
{
	return READ_ONCE(msk->pm.addr_signal);
}

static inline unsigned int mptcp_add_addr_len(int family)
{
	if (family == AF_INET)
		return TCPOLEN_MPTCP_ADD_ADDR;
	return TCPOLEN_MPTCP_ADD_ADDR6;
}

bool mptcp_pm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
			  struct mptcp_addr_info *saddr);
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);

P
Paolo Abeni 已提交
401 402 403 404 405 406 407
void mptcp_pm_nl_init(void);
void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
void mptcp_pm_nl_fully_established(struct mptcp_sock *msk);
void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk);
void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);

408 409 410 411 412
static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
{
	return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
}

413 414 415 416 417 418 419
static inline bool before64(__u64 seq1, __u64 seq2)
{
	return (__s64)(seq1 - seq2) < 0;
}

#define after64(seq2, seq1)	before64(seq1, seq2)

420 421
void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops);

M
Mat Martineau 已提交
422
#endif /* __MPTCP_PROTOCOL_H */