protocol.h 14.8 KB
Newer Older
M
Mat Martineau 已提交
1 2 3 4 5 6 7 8 9
/* SPDX-License-Identifier: GPL-2.0 */
/* Multipath TCP
 *
 * Copyright (c) 2017 - 2019, Intel Corporation.
 */

#ifndef __MPTCP_PROTOCOL_H
#define __MPTCP_PROTOCOL_H

10 11 12 13
#include <linux/random.h>
#include <net/tcp.h>
#include <net/inet_connection_sock.h>

14
#define MPTCP_SUPPORTED_VERSION	1
P
Peter Krystad 已提交
15 16 17 18 19

/* MPTCP option bits */
#define OPTION_MPTCP_MPC_SYN	BIT(0)
#define OPTION_MPTCP_MPC_SYNACK	BIT(1)
#define OPTION_MPTCP_MPC_ACK	BIT(2)
20 21 22
#define OPTION_MPTCP_MPJ_SYN	BIT(3)
#define OPTION_MPTCP_MPJ_SYNACK	BIT(4)
#define OPTION_MPTCP_MPJ_ACK	BIT(5)
P
Peter Krystad 已提交
23 24 25
#define OPTION_MPTCP_ADD_ADDR	BIT(6)
#define OPTION_MPTCP_ADD_ADDR6	BIT(7)
#define OPTION_MPTCP_RM_ADDR	BIT(8)
P
Peter Krystad 已提交
26 27 28 29 30 31 32 33 34 35 36 37

/* MPTCP option subtypes */
#define MPTCPOPT_MP_CAPABLE	0
#define MPTCPOPT_MP_JOIN	1
#define MPTCPOPT_DSS		2
#define MPTCPOPT_ADD_ADDR	3
#define MPTCPOPT_RM_ADDR	4
#define MPTCPOPT_MP_PRIO	5
#define MPTCPOPT_MP_FAIL	6
#define MPTCPOPT_MP_FASTCLOSE	7

/* MPTCP suboption lengths */
38
#define TCPOLEN_MPTCP_MPC_SYN		4
P
Peter Krystad 已提交
39 40
#define TCPOLEN_MPTCP_MPC_SYNACK	12
#define TCPOLEN_MPTCP_MPC_ACK		20
41
#define TCPOLEN_MPTCP_MPC_ACK_DATA	22
42 43 44
#define TCPOLEN_MPTCP_MPJ_SYN		12
#define TCPOLEN_MPTCP_MPJ_SYNACK	16
#define TCPOLEN_MPTCP_MPJ_ACK		24
45
#define TCPOLEN_MPTCP_DSS_BASE		4
46
#define TCPOLEN_MPTCP_DSS_ACK32		4
47
#define TCPOLEN_MPTCP_DSS_ACK64		8
48
#define TCPOLEN_MPTCP_DSS_MAP32		10
49 50
#define TCPOLEN_MPTCP_DSS_MAP64		14
#define TCPOLEN_MPTCP_DSS_CHECKSUM	2
P
Peter Krystad 已提交
51 52 53 54 55 56 57 58 59 60
#define TCPOLEN_MPTCP_ADD_ADDR		16
#define TCPOLEN_MPTCP_ADD_ADDR_PORT	18
#define TCPOLEN_MPTCP_ADD_ADDR_BASE	8
#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT	10
#define TCPOLEN_MPTCP_ADD_ADDR6		28
#define TCPOLEN_MPTCP_ADD_ADDR6_PORT	30
#define TCPOLEN_MPTCP_ADD_ADDR6_BASE	20
#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT	22
#define TCPOLEN_MPTCP_PORT_LEN		2
#define TCPOLEN_MPTCP_RM_ADDR_BASE	4
P
Peter Krystad 已提交
61

62
/* MPTCP MP_JOIN flags */
63 64
#define MPTCPOPT_BACKUP		BIT(0)
#define MPTCPOPT_HMAC_LEN	20
65
#define MPTCPOPT_THMAC_LEN	8
66

P
Peter Krystad 已提交
67 68 69 70
/* MPTCP MP_CAPABLE flags */
#define MPTCP_VERSION_MASK	(0x0F)
#define MPTCP_CAP_CHECKSUM_REQD	BIT(7)
#define MPTCP_CAP_EXTENSIBILITY	BIT(6)
71
#define MPTCP_CAP_HMAC_SHA256	BIT(0)
P
Peter Krystad 已提交
72 73
#define MPTCP_CAP_FLAG_MASK	(0x3F)

74 75 76 77 78 79
/* MPTCP DSS flags */
#define MPTCP_DSS_DATA_FIN	BIT(4)
#define MPTCP_DSS_DSN64		BIT(3)
#define MPTCP_DSS_HAS_MAP	BIT(2)
#define MPTCP_DSS_ACK64		BIT(1)
#define MPTCP_DSS_HAS_ACK	BIT(0)
80 81
#define MPTCP_DSS_FLAG_MASK	(0x1F)

P
Peter Krystad 已提交
82 83 84 85 86
/* MPTCP ADD_ADDR flags */
#define MPTCP_ADDR_ECHO		BIT(0)
#define MPTCP_ADDR_IPVERSION_4	4
#define MPTCP_ADDR_IPVERSION_6	6

87
/* MPTCP socket flags */
88 89
#define MPTCP_DATA_READY	0
#define MPTCP_SEND_SPACE	1
90
#define MPTCP_WORK_RTX		2
91
#define MPTCP_WORK_EOF		3
92
#define MPTCP_FALLBACK_DONE	4
93

94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
struct mptcp_options_received {
	u64	sndr_key;
	u64	rcvr_key;
	u64	data_ack;
	u64	data_seq;
	u32	subflow_seq;
	u16	data_len;
	u16	mp_capable : 1,
		mp_join : 1,
		dss : 1,
		add_addr : 1,
		rm_addr : 1,
		family : 4,
		echo : 1,
		backup : 1;
	u32	token;
	u32	nonce;
	u64	thmac;
	u8	hmac[20];
	u8	join_id;
	u8	use_map:1,
		dsn64:1,
		data_fin:1,
		use_ack:1,
		ack64:1,
		mpc_map:1,
		__unused:2;
	u8	addr_id;
	u8	rm_id;
	union {
		struct in_addr	addr;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
		struct in6_addr	addr6;
#endif
	};
	u64	ahmac;
	u16	port;
};

P
Peter Krystad 已提交
133 134 135 136 137 138
static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
{
	return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
		     ((nib & 0xF) << 8) | field);
}

139 140 141 142
struct mptcp_addr_info {
	sa_family_t		family;
	__be16			port;
	u8			id;
143 144
	u8			flags;
	int			ifindex;
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
	union {
		struct in_addr addr;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
		struct in6_addr addr6;
#endif
	};
};

enum mptcp_pm_status {
	MPTCP_PM_ADD_ADDR_RECEIVED,
	MPTCP_PM_ESTABLISHED,
	MPTCP_PM_SUBFLOW_ESTABLISHED,
};

struct mptcp_pm_data {
	struct mptcp_addr_info local;
	struct mptcp_addr_info remote;

	spinlock_t	lock;		/*protects the whole PM data */

165
	bool		add_addr_signal;
166
	bool		rm_addr_signal;
167 168 169 170 171 172 173 174 175 176 177 178 179
	bool		server_side;
	bool		work_pending;
	bool		accept_addr;
	bool		accept_subflow;
	u8		add_addr_signaled;
	u8		add_addr_accepted;
	u8		local_addr_used;
	u8		subflows;
	u8		add_addr_signal_max;
	u8		add_addr_accept_max;
	u8		local_addr_max;
	u8		subflows_max;
	u8		status;
180
	u8		rm_id;
181 182
};

183 184 185 186 187 188 189 190 191
struct mptcp_data_frag {
	struct list_head list;
	u64 data_seq;
	int data_len;
	int offset;
	int overhead;
	struct page *page;
};

M
Mat Martineau 已提交
192 193 194 195
/* MPTCP connection sock */
struct mptcp_sock {
	/* inet_connection_sock must be the first member */
	struct inet_connection_sock sk;
196 197
	u64		local_key;
	u64		remote_key;
198 199
	u64		write_seq;
	u64		ack_seq;
200
	u64		rcv_data_fin_seq;
201 202
	struct sock	*last_snd;
	int		snd_burst;
203
	atomic64_t	snd_una;
204
	unsigned long	timer_ival;
205
	u32		token;
206
	unsigned long	flags;
207
	bool		can_ack;
208
	bool		fully_established;
209
	bool		rcv_data_fin;
210
	bool		snd_data_fin_enable;
211
	spinlock_t	join_list_lock;
P
Paolo Abeni 已提交
212
	struct work_struct work;
213 214
	struct sk_buff  *ooo_last_skb;
	struct rb_root  out_of_order_queue;
215
	struct list_head conn_list;
216
	struct list_head rtx_queue;
217
	struct list_head join_list;
218
	struct skb_ext	*cached_ext;	/* for the next sendmsg */
M
Mat Martineau 已提交
219
	struct socket	*subflow; /* outgoing connect/listener/!mp_capable */
220
	struct sock	*first;
221
	struct mptcp_pm_data	pm;
222 223 224 225 226 227
	struct {
		u32	space;	/* bytes copied in last measurement window */
		u32	copied; /* bytes copied in this measurement window */
		u64	time;	/* start time of measurement window */
		u64	rtt_us; /* last maximum rtt of subflows */
	} rcvq_space;
M
Mat Martineau 已提交
228 229
};

230 231 232
#define mptcp_for_each_subflow(__msk, __subflow)			\
	list_for_each_entry(__subflow, &((__msk)->conn_list), node)

M
Mat Martineau 已提交
233 234 235 236 237
static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
{
	return (struct mptcp_sock *)sk;
}

238 239 240 241 242 243 244 245 246 247
static inline struct mptcp_data_frag *mptcp_rtx_tail(const struct sock *sk)
{
	struct mptcp_sock *msk = mptcp_sk(sk);

	if (list_empty(&msk->rtx_queue))
		return NULL;

	return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
}

248 249 250 251
static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
{
	struct mptcp_sock *msk = mptcp_sk(sk);

252
	return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
253 254
}

255 256
struct mptcp_subflow_request_sock {
	struct	tcp_request_sock sk;
257
	u16	mp_capable : 1,
258
		mp_join : 1,
259
		backup : 1;
260
	u8	local_id;
261
	u8	remote_id;
262
	u64	local_key;
263 264
	u64	idsn;
	u32	token;
265
	u32	ssn_offset;
266 267 268
	u64	thmac;
	u32	local_nonce;
	u32	remote_nonce;
269
	struct mptcp_sock	*msk;
P
Paolo Abeni 已提交
270
	struct hlist_nulls_node token_node;
271 272 273 274 275 276 277 278
};

static inline struct mptcp_subflow_request_sock *
mptcp_subflow_rsk(const struct request_sock *rsk)
{
	return (struct mptcp_subflow_request_sock *)rsk;
}

279 280 281 282 283 284
enum mptcp_data_avail {
	MPTCP_SUBFLOW_NODATA,
	MPTCP_SUBFLOW_DATA_AVAIL,
	MPTCP_SUBFLOW_OOO_DATA
};

285 286
/* MPTCP subflow context */
struct mptcp_subflow_context {
287 288 289
	struct	list_head node;/* conn_list of subflows */
	u64	local_key;
	u64	remote_key;
290
	u64	idsn;
291
	u64	map_seq;
292
	u32	snd_isn;
293
	u32	token;
294
	u32	rel_write_seq;
295 296 297
	u32	map_subflow_seq;
	u32	ssn_offset;
	u32	map_data_len;
298
	u32	request_mptcp : 1,  /* send MP_CAPABLE */
299 300
		request_join : 1,   /* send MP_JOIN */
		request_bkup : 1,
301
		mp_capable : 1,	    /* remote is MPTCP capable */
302
		mp_join : 1,	    /* remote is JOINing */
P
Paolo Abeni 已提交
303
		fully_established : 1,	    /* path validated */
304
		pm_notified : 1,    /* PM hook called for established status */
305 306
		conn_finished : 1,
		map_valid : 1,
307
		mpc_map : 1,
308
		backup : 1,
309
		rx_eof : 1,
310
		use_64bit_ack : 1, /* Set when we received a 64-bit DSN */
311
		can_ack : 1;	    /* only after processing the remote a key */
312
	enum mptcp_data_avail data_avail;
313 314 315
	u32	remote_nonce;
	u64	thmac;
	u32	local_nonce;
316 317
	u32	remote_token;
	u8	hmac[MPTCPOPT_HMAC_LEN];
318 319
	u8	local_id;
	u8	remote_id;
320

321 322
	struct	sock *tcp_sock;	    /* tcp sk backpointer */
	struct	sock *conn;	    /* parent mptcp_sock */
323
	const	struct inet_connection_sock_af_ops *icsk_af_ops;
324 325 326 327
	void	(*tcp_data_ready)(struct sock *sk);
	void	(*tcp_state_change)(struct sock *sk);
	void	(*tcp_write_space)(struct sock *sk);

328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
	struct	rcu_head rcu;
};

static inline struct mptcp_subflow_context *
mptcp_subflow_ctx(const struct sock *sk)
{
	struct inet_connection_sock *icsk = inet_csk(sk);

	/* Use RCU on icsk_ulp_data only for sock diag code */
	return (__force struct mptcp_subflow_context *)icsk->icsk_ulp_data;
}

static inline struct sock *
mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
{
	return subflow->tcp_sock;
}

346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
static inline u64
mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow)
{
	return tcp_sk(mptcp_subflow_tcp_sock(subflow))->copied_seq -
		      subflow->ssn_offset -
		      subflow->map_subflow_seq;
}

static inline u64
mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
{
	return subflow->map_seq + mptcp_subflow_get_map_offset(subflow);
}

int mptcp_is_enabled(struct net *net);
361 362
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
				     struct mptcp_options_received *mp_opt);
363
bool mptcp_subflow_data_available(struct sock *sk);
364
void __init mptcp_subflow_init(void);
365 366

/* called with sk socket lock held */
367
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
368
			    const struct mptcp_addr_info *remote);
369 370
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);

371 372 373 374 375 376 377 378 379 380
static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
					      struct mptcp_subflow_context *ctx)
{
	sk->sk_data_ready = ctx->tcp_data_ready;
	sk->sk_state_change = ctx->tcp_state_change;
	sk->sk_write_space = ctx->tcp_write_space;

	inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
}

381
void __init mptcp_proto_init(void);
382
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
383
int __init mptcp_proto_v6_init(void);
384
#endif
385

386
struct sock *mptcp_sk_clone(const struct sock *sk,
387
			    const struct mptcp_options_received *mp_opt,
388
			    struct request_sock *req);
389
void mptcp_get_options(const struct sk_buff *skb,
390
		       struct mptcp_options_received *mp_opt);
391 392

void mptcp_finish_connect(struct sock *sk);
393 394 395 396 397
static inline bool mptcp_is_fully_established(struct sock *sk)
{
	return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
	       READ_ONCE(mptcp_sk(sk)->fully_established);
}
398
void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk);
399
void mptcp_data_ready(struct sock *sk, struct sock *ssk);
400
bool mptcp_finish_join(struct sock *sk);
401
void mptcp_data_acked(struct sock *sk);
402
void mptcp_subflow_eof(struct sock *sk);
403
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq);
404

P
Paolo Abeni 已提交
405 406 407 408 409 410
void __init mptcp_token_init(void);
static inline void mptcp_token_init_request(struct request_sock *req)
{
	mptcp_subflow_rsk(req)->token_node.pprev = NULL;
}

411
int mptcp_token_new_request(struct request_sock *req);
P
Paolo Abeni 已提交
412
void mptcp_token_destroy_request(struct request_sock *req);
413
int mptcp_token_new_connect(struct sock *sk);
P
Paolo Abeni 已提交
414 415
void mptcp_token_accept(struct mptcp_subflow_request_sock *r,
			struct mptcp_sock *msk);
416
bool mptcp_token_exists(u32 token);
417
struct mptcp_sock *mptcp_token_get_sock(u32 token);
P
Paolo Abeni 已提交
418 419
struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot,
					 long *s_num);
P
Paolo Abeni 已提交
420
void mptcp_token_destroy(struct mptcp_sock *msk);
421 422 423

void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);

P
Peter Krystad 已提交
424
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
425

426
void __init mptcp_pm_init(void);
427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442
void mptcp_pm_data_init(struct mptcp_sock *msk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side);
void mptcp_pm_fully_established(struct mptcp_sock *msk);
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
void mptcp_pm_connection_closed(struct mptcp_sock *msk);
void mptcp_pm_subflow_established(struct mptcp_sock *msk,
				  struct mptcp_subflow_context *subflow);
void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id);
void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
				const struct mptcp_addr_info *addr);

int mptcp_pm_announce_addr(struct mptcp_sock *msk,
			   const struct mptcp_addr_info *addr);
int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id);
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 remote_id);

443
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
444
{
445
	return READ_ONCE(msk->pm.add_addr_signal);
446 447
}

448 449 450 451 452
static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk)
{
	return READ_ONCE(msk->pm.rm_addr_signal);
}

453 454 455 456 457 458 459
static inline unsigned int mptcp_add_addr_len(int family)
{
	if (family == AF_INET)
		return TCPOLEN_MPTCP_ADD_ADDR;
	return TCPOLEN_MPTCP_ADD_ADDR6;
}

460 461
bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
			      struct mptcp_addr_info *saddr);
462 463
bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
			     u8 *rm_id);
464 465
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);

466
void __init mptcp_pm_nl_init(void);
P
Paolo Abeni 已提交
467 468 469 470 471 472
void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
void mptcp_pm_nl_fully_established(struct mptcp_sock *msk);
void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk);
void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);

473 474 475 476 477
static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
{
	return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
}

478 479 480 481 482 483 484
static inline bool before64(__u64 seq1, __u64 seq2)
{
	return (__s64)(seq1 - seq2) < 0;
}

#define after64(seq2, seq1)	before64(seq1, seq2)

485 486
void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops);

487
static inline bool __mptcp_check_fallback(const struct mptcp_sock *msk)
488 489 490 491
{
	return test_bit(MPTCP_FALLBACK_DONE, &msk->flags);
}

492
static inline bool mptcp_check_fallback(const struct sock *sk)
493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
	struct mptcp_sock *msk = mptcp_sk(subflow->conn);

	return __mptcp_check_fallback(msk);
}

static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
{
	if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) {
		pr_debug("TCP fallback already done (msk=%p)", msk);
		return;
	}
	set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
}

static inline void mptcp_do_fallback(struct sock *sk)
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
	struct mptcp_sock *msk = mptcp_sk(subflow->conn);

	__mptcp_do_fallback(msk);
}

#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a)

519 520 521 522 523 524 525 526 527 528
static inline bool subflow_simultaneous_connect(struct sock *sk)
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
	struct sock *parent = subflow->conn;

	return sk->sk_state == TCP_ESTABLISHED &&
	       !mptcp_sk(parent)->pm.server_side &&
	       !subflow->conn_finished;
}

529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548
#ifdef CONFIG_SYN_COOKIES
void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req,
				       struct sk_buff *skb);
bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req,
					struct sk_buff *skb);
void __init mptcp_join_cookie_init(void);
#else
static inline void
subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req,
				  struct sk_buff *skb) {}
static inline bool
mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req,
				   struct sk_buff *skb)
{
	return false;
}

static inline void mptcp_join_cookie_init(void) {}
#endif

M
Mat Martineau 已提交
549
#endif /* __MPTCP_PROTOCOL_H */