protocol.h 25.5 KB
Newer Older
M
Mat Martineau 已提交
1 2 3 4 5 6 7 8 9
/* SPDX-License-Identifier: GPL-2.0 */
/* Multipath TCP
 *
 * Copyright (c) 2017 - 2019, Intel Corporation.
 */

#ifndef __MPTCP_PROTOCOL_H
#define __MPTCP_PROTOCOL_H

10 11 12
#include <linux/random.h>
#include <net/tcp.h>
#include <net/inet_connection_sock.h>
13
#include <uapi/linux/mptcp.h>
14

15
#define MPTCP_SUPPORTED_VERSION	1
P
Peter Krystad 已提交
16 17 18 19 20

/* MPTCP option bits */
#define OPTION_MPTCP_MPC_SYN	BIT(0)
#define OPTION_MPTCP_MPC_SYNACK	BIT(1)
#define OPTION_MPTCP_MPC_ACK	BIT(2)
21 22 23
#define OPTION_MPTCP_MPJ_SYN	BIT(3)
#define OPTION_MPTCP_MPJ_SYNACK	BIT(4)
#define OPTION_MPTCP_MPJ_ACK	BIT(5)
P
Peter Krystad 已提交
24
#define OPTION_MPTCP_ADD_ADDR	BIT(6)
25 26 27 28
#define OPTION_MPTCP_RM_ADDR	BIT(7)
#define OPTION_MPTCP_FASTCLOSE	BIT(8)
#define OPTION_MPTCP_PRIO	BIT(9)
#define OPTION_MPTCP_RST	BIT(10)
P
Peter Krystad 已提交
29 30 31 32 33 34 35 36 37 38

/* MPTCP option subtypes */
#define MPTCPOPT_MP_CAPABLE	0
#define MPTCPOPT_MP_JOIN	1
#define MPTCPOPT_DSS		2
#define MPTCPOPT_ADD_ADDR	3
#define MPTCPOPT_RM_ADDR	4
#define MPTCPOPT_MP_PRIO	5
#define MPTCPOPT_MP_FAIL	6
#define MPTCPOPT_MP_FASTCLOSE	7
39
#define MPTCPOPT_RST		8
P
Peter Krystad 已提交
40 41

/* MPTCP suboption lengths */
42
#define TCPOLEN_MPTCP_MPC_SYN		4
P
Peter Krystad 已提交
43 44
#define TCPOLEN_MPTCP_MPC_SYNACK	12
#define TCPOLEN_MPTCP_MPC_ACK		20
45
#define TCPOLEN_MPTCP_MPC_ACK_DATA	22
46 47 48
#define TCPOLEN_MPTCP_MPJ_SYN		12
#define TCPOLEN_MPTCP_MPJ_SYNACK	16
#define TCPOLEN_MPTCP_MPJ_ACK		24
49
#define TCPOLEN_MPTCP_DSS_BASE		4
50
#define TCPOLEN_MPTCP_DSS_ACK32		4
51
#define TCPOLEN_MPTCP_DSS_ACK64		8
52
#define TCPOLEN_MPTCP_DSS_MAP32		10
53 54
#define TCPOLEN_MPTCP_DSS_MAP64		14
#define TCPOLEN_MPTCP_DSS_CHECKSUM	2
P
Peter Krystad 已提交
55
#define TCPOLEN_MPTCP_ADD_ADDR		16
56
#define TCPOLEN_MPTCP_ADD_ADDR_PORT	18
P
Peter Krystad 已提交
57
#define TCPOLEN_MPTCP_ADD_ADDR_BASE	8
58
#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT	10
P
Peter Krystad 已提交
59
#define TCPOLEN_MPTCP_ADD_ADDR6		28
60
#define TCPOLEN_MPTCP_ADD_ADDR6_PORT	30
P
Peter Krystad 已提交
61
#define TCPOLEN_MPTCP_ADD_ADDR6_BASE	20
62 63 64
#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT	22
#define TCPOLEN_MPTCP_PORT_LEN		2
#define TCPOLEN_MPTCP_PORT_ALIGN	2
65
#define TCPOLEN_MPTCP_RM_ADDR_BASE	3
66 67
#define TCPOLEN_MPTCP_PRIO		3
#define TCPOLEN_MPTCP_PRIO_ALIGN	4
68
#define TCPOLEN_MPTCP_FASTCLOSE		12
69
#define TCPOLEN_MPTCP_RST		4
P
Peter Krystad 已提交
70

71 72
#define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM	(TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)

73
/* MPTCP MP_JOIN flags */
74 75
#define MPTCPOPT_BACKUP		BIT(0)
#define MPTCPOPT_HMAC_LEN	20
76
#define MPTCPOPT_THMAC_LEN	8
77

P
Peter Krystad 已提交
78 79 80 81
/* MPTCP MP_CAPABLE flags */
#define MPTCP_VERSION_MASK	(0x0F)
#define MPTCP_CAP_CHECKSUM_REQD	BIT(7)
#define MPTCP_CAP_EXTENSIBILITY	BIT(6)
82
#define MPTCP_CAP_DENY_JOIN_ID0	BIT(5)
83
#define MPTCP_CAP_HMAC_SHA256	BIT(0)
84
#define MPTCP_CAP_FLAG_MASK	(0x1F)
P
Peter Krystad 已提交
85

86 87 88 89 90 91
/* MPTCP DSS flags */
#define MPTCP_DSS_DATA_FIN	BIT(4)
#define MPTCP_DSS_DSN64		BIT(3)
#define MPTCP_DSS_HAS_MAP	BIT(2)
#define MPTCP_DSS_ACK64		BIT(1)
#define MPTCP_DSS_HAS_ACK	BIT(0)
92 93
#define MPTCP_DSS_FLAG_MASK	(0x1F)

P
Peter Krystad 已提交
94 95 96
/* MPTCP ADD_ADDR flags */
#define MPTCP_ADDR_ECHO		BIT(0)

97 98 99
/* MPTCP MP_PRIO flags */
#define MPTCP_PRIO_BKUP		BIT(0)

100 101 102
/* MPTCP TCPRST flags */
#define MPTCP_RST_TRANSIENT	BIT(0)

103
/* MPTCP socket flags */
104
#define MPTCP_DATA_READY	0
105
#define MPTCP_NOSPACE		1
106
#define MPTCP_WORK_RTX		2
107
#define MPTCP_WORK_EOF		3
108
#define MPTCP_FALLBACK_DONE	4
P
Paolo Abeni 已提交
109
#define MPTCP_WORK_CLOSE_SUBFLOW 5
110 111
#define MPTCP_PUSH_PENDING	6
#define MPTCP_CLEAN_UNA		7
P
Paolo Abeni 已提交
112
#define MPTCP_ERROR_REPORT	8
P
Paolo Abeni 已提交
113
#define MPTCP_RETRANSMIT	9
114
#define MPTCP_WORK_SYNC_SETSOCKOPT 10
115
#define MPTCP_CONNECTED		11
116

P
Paolo Abeni 已提交
117 118 119 120 121 122 123
static inline bool before64(__u64 seq1, __u64 seq2)
{
	return (__s64)(seq1 - seq2) < 0;
}

#define after64(seq2, seq1)	before64(seq1, seq2)

124 125 126 127 128 129 130
struct mptcp_options_received {
	u64	sndr_key;
	u64	rcvr_key;
	u64	data_ack;
	u64	data_seq;
	u32	subflow_seq;
	u16	data_len;
131
	__sum16	csum;
132 133
	u16	mp_capable : 1,
		mp_join : 1,
134
		fastclose : 1,
135
		reset : 1,
136 137 138
		dss : 1,
		add_addr : 1,
		rm_addr : 1,
139
		mp_prio : 1,
140
		echo : 1,
141
		csum_reqd : 1,
142 143
		backup : 1,
		deny_join_id0 : 1;
144 145 146
	u32	token;
	u32	nonce;
	u64	thmac;
G
Geliang Tang 已提交
147
	u8	hmac[MPTCPOPT_HMAC_LEN];
148 149 150 151 152 153 154 155
	u8	join_id;
	u8	use_map:1,
		dsn64:1,
		data_fin:1,
		use_ack:1,
		ack64:1,
		mpc_map:1,
		__unused:2;
156
	struct mptcp_addr_info addr;
157
	struct mptcp_rm_list rm_list;
158
	u64	ahmac;
159 160
	u8	reset_reason:4;
	u8	reset_transient:1;
161 162
};

P
Peter Krystad 已提交
163 164 165 166 167 168
static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
{
	return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
		     ((nib & 0xF) << 8) | field);
}

169 170
enum mptcp_pm_status {
	MPTCP_PM_ADD_ADDR_RECEIVED,
171
	MPTCP_PM_ADD_ADDR_SEND_ACK,
172
	MPTCP_PM_RM_ADDR_RECEIVED,
173
	MPTCP_PM_ESTABLISHED,
174
	MPTCP_PM_ALREADY_ESTABLISHED,	/* persistent status, set after ESTABLISHED event */
175 176 177
	MPTCP_PM_SUBFLOW_ESTABLISHED,
};

178
enum mptcp_addr_signal_status {
G
Geliang Tang 已提交
179 180
	MPTCP_ADD_ADDR_SIGNAL,
	MPTCP_ADD_ADDR_ECHO,
181
	MPTCP_ADD_ADDR_IPV6,
182
	MPTCP_ADD_ADDR_PORT,
G
Geliang Tang 已提交
183
	MPTCP_RM_ADDR_SIGNAL,
G
Geliang Tang 已提交
184 185
};

186 187 188
struct mptcp_pm_data {
	struct mptcp_addr_info local;
	struct mptcp_addr_info remote;
189
	struct list_head anno_list;
190 191 192

	spinlock_t	lock;		/*protects the whole PM data */

193
	u8		addr_signal;
194 195 196 197
	bool		server_side;
	bool		work_pending;
	bool		accept_addr;
	bool		accept_subflow;
198
	bool		remote_deny_join_id0;
199 200 201 202 203
	u8		add_addr_signaled;
	u8		add_addr_accepted;
	u8		local_addr_used;
	u8		subflows;
	u8		status;
204
	struct mptcp_rm_list rm_list_tx;
205
	struct mptcp_rm_list rm_list_rx;
206 207
};

208 209 210
struct mptcp_data_frag {
	struct list_head list;
	u64 data_seq;
211 212 213 214
	u16 data_len;
	u16 offset;
	u16 overhead;
	u16 already_sent;
215 216 217
	struct page *page;
};

M
Mat Martineau 已提交
218 219 220 221
/* MPTCP connection sock */
struct mptcp_sock {
	/* inet_connection_sock must be the first member */
	struct inet_connection_sock sk;
222 223
	u64		local_key;
	u64		remote_key;
224
	u64		write_seq;
P
Paolo Abeni 已提交
225
	u64		snd_nxt;
226
	u64		ack_seq;
227
	u64		rcv_wnd_sent;
228
	u64		rcv_data_fin_seq;
P
Paolo Abeni 已提交
229
	int		wmem_reserved;
230 231
	struct sock	*last_snd;
	int		snd_burst;
232
	int		old_wspace;
233 234 235 236
	u64		recovery_snd_nxt;	/* in recovery mode accept up to this seq;
						 * recovery related fields are under data_lock
						 * protection
						 */
237 238
	u64		snd_una;
	u64		wnd_end;
239
	unsigned long	timer_ival;
240
	u32		token;
241
	int		rmem_released;
242
	unsigned long	flags;
243
	bool		recovery;		/* closing subflow write queue reinjected */
244
	bool		can_ack;
245
	bool		fully_established;
246
	bool		rcv_data_fin;
247
	bool		snd_data_fin_enable;
248
	bool		rcv_fastclose;
249
	bool		use_64bit_ack; /* Set when we received a 64-bit DSN */
250
	bool		csum_enabled;
251
	spinlock_t	join_list_lock;
P
Paolo Abeni 已提交
252
	struct work_struct work;
253 254
	struct sk_buff  *ooo_last_skb;
	struct rb_root  out_of_order_queue;
255
	struct sk_buff_head receive_queue;
256
	int		tx_pending_data;
257
	struct list_head conn_list;
258
	struct list_head rtx_queue;
259
	struct mptcp_data_frag *first_pending;
260
	struct list_head join_list;
M
Mat Martineau 已提交
261
	struct socket	*subflow; /* outgoing connect/listener/!mp_capable */
262
	struct sock	*first;
263
	struct mptcp_pm_data	pm;
264 265 266 267 268 269
	struct {
		u32	space;	/* bytes copied in last measurement window */
		u32	copied; /* bytes copied in this measurement window */
		u64	time;	/* start time of measurement window */
		u64	rtt_us; /* last maximum rtt of subflows */
	} rcvq_space;
270 271

	u32 setsockopt_seq;
272
	char		ca_name[TCP_CA_NAME_MAX];
M
Mat Martineau 已提交
273 274
};

275 276 277 278 279 280 281 282 283 284 285 286 287
#define mptcp_lock_sock(___sk, cb) do {					\
	struct sock *__sk = (___sk); /* silence macro reuse warning */	\
	might_sleep();							\
	spin_lock_bh(&__sk->sk_lock.slock);				\
	if (__sk->sk_lock.owned)					\
		__lock_sock(__sk);					\
	cb;								\
	__sk->sk_lock.owned = 1;					\
	spin_unlock(&__sk->sk_lock.slock);				\
	mutex_acquire(&__sk->sk_lock.dep_map, 0, 0, _RET_IP_);		\
	local_bh_enable();						\
} while (0)

288 289 290
#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
#define mptcp_data_unlock(sk) spin_unlock_bh(&(sk)->sk_lock.slock)

291 292 293
#define mptcp_for_each_subflow(__msk, __subflow)			\
	list_for_each_entry(__subflow, &((__msk)->conn_list), node)

294 295 296 297 298
static inline void msk_owned_by_me(const struct mptcp_sock *msk)
{
	sock_owned_by_me((const struct sock *)msk);
}

M
Mat Martineau 已提交
299 300 301 302 303
static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
{
	return (struct mptcp_sock *)sk;
}

304 305 306 307 308 309 310 311
/* the msk socket don't use the backlog, also account for the bulk
 * free memory
 */
static inline int __mptcp_rmem(const struct sock *sk)
{
	return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released);
}

312 313
static inline int __mptcp_space(const struct sock *sk)
{
314
	return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
315 316
}

317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
{
	const struct mptcp_sock *msk = mptcp_sk(sk);

	return READ_ONCE(msk->first_pending);
}

static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk)
{
	struct mptcp_sock *msk = mptcp_sk(sk);
	struct mptcp_data_frag *cur;

	cur = msk->first_pending;
	return list_is_last(&cur->list, &msk->rtx_queue) ? NULL :
						     list_next_entry(cur, list);
}

static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk)
{
	struct mptcp_sock *msk = mptcp_sk(sk);

	if (!msk->first_pending)
		return NULL;

	if (WARN_ON_ONCE(list_empty(&msk->rtx_queue)))
		return NULL;

	return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
}

347
static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
348 349 350
{
	struct mptcp_sock *msk = mptcp_sk(sk);

351
	if (msk->snd_una == READ_ONCE(msk->snd_nxt))
352 353
		return NULL;

354
	return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
355 356
}

G
Geliang Tang 已提交
357 358 359 360 361 362 363
struct csum_pseudo_header {
	__be64 data_seq;
	__be32 subflow_seq;
	__be16 data_len;
	__sum16 csum;
};

364 365
struct mptcp_subflow_request_sock {
	struct	tcp_request_sock sk;
366
	u16	mp_capable : 1,
367
		mp_join : 1,
368
		backup : 1,
369 370
		csum_reqd : 1,
		allow_join_id0 : 1;
371
	u8	local_id;
372
	u8	remote_id;
373
	u64	local_key;
374 375
	u64	idsn;
	u32	token;
376
	u32	ssn_offset;
377 378 379
	u64	thmac;
	u32	local_nonce;
	u32	remote_nonce;
380
	struct mptcp_sock	*msk;
P
Paolo Abeni 已提交
381
	struct hlist_nulls_node token_node;
382 383 384 385 386 387 388 389
};

static inline struct mptcp_subflow_request_sock *
mptcp_subflow_rsk(const struct request_sock *rsk)
{
	return (struct mptcp_subflow_request_sock *)rsk;
}

390 391 392 393 394
enum mptcp_data_avail {
	MPTCP_SUBFLOW_NODATA,
	MPTCP_SUBFLOW_DATA_AVAIL,
};

P
Paolo Abeni 已提交
395 396 397 398 399 400 401 402 403
struct mptcp_delegated_action {
	struct napi_struct napi;
	struct list_head head;
};

DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);

#define MPTCP_DELEGATE_SEND		0

404 405
/* MPTCP subflow context */
struct mptcp_subflow_context {
406 407 408
	struct	list_head node;/* conn_list of subflows */
	u64	local_key;
	u64	remote_key;
409
	u64	idsn;
410
	u64	map_seq;
411
	u32	snd_isn;
412
	u32	token;
413
	u32	rel_write_seq;
414 415 416
	u32	map_subflow_seq;
	u32	ssn_offset;
	u32	map_data_len;
P
Paolo Abeni 已提交
417 418
	__wsum	map_data_csum;
	u32	map_csum_len;
419
	u32	request_mptcp : 1,  /* send MP_CAPABLE */
420 421
		request_join : 1,   /* send MP_JOIN */
		request_bkup : 1,
422
		mp_capable : 1,	    /* remote is MPTCP capable */
423
		mp_join : 1,	    /* remote is JOINing */
P
Paolo Abeni 已提交
424
		fully_established : 1,	    /* path validated */
425
		pm_notified : 1,    /* PM hook called for established status */
426 427
		conn_finished : 1,
		map_valid : 1,
P
Paolo Abeni 已提交
428 429
		map_csum_reqd : 1,
		map_data_fin : 1,
430
		mpc_map : 1,
431
		backup : 1,
432
		send_mp_prio : 1,
433
		rx_eof : 1,
P
Paolo Abeni 已提交
434 435
		can_ack : 1,        /* only after processing the remote a key */
		disposable : 1;	    /* ctx can be free at ulp release time */
436
	enum mptcp_data_avail data_avail;
437 438 439
	u32	remote_nonce;
	u64	thmac;
	u32	local_nonce;
440 441
	u32	remote_token;
	u8	hmac[MPTCPOPT_HMAC_LEN];
442 443
	u8	local_id;
	u8	remote_id;
444 445 446
	u8	reset_seen:1;
	u8	reset_transient:1;
	u8	reset_reason:4;
447
	u8	stale_count;
448

P
Paolo Abeni 已提交
449 450 451
	long	delegated_status;
	struct	list_head delegated_node;   /* link into delegated_action, protected by local BH */

452 453
	u32	setsockopt_seq;
	u32	stale_rcv_tstamp;
454

455 456
	struct	sock *tcp_sock;	    /* tcp sk backpointer */
	struct	sock *conn;	    /* parent mptcp_sock */
457
	const	struct inet_connection_sock_af_ops *icsk_af_ops;
458 459 460
	void	(*tcp_data_ready)(struct sock *sk);
	void	(*tcp_state_change)(struct sock *sk);
	void	(*tcp_write_space)(struct sock *sk);
P
Paolo Abeni 已提交
461
	void	(*tcp_error_report)(struct sock *sk);
462

463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480
	struct	rcu_head rcu;
};

static inline struct mptcp_subflow_context *
mptcp_subflow_ctx(const struct sock *sk)
{
	struct inet_connection_sock *icsk = inet_csk(sk);

	/* Use RCU on icsk_ulp_data only for sock diag code */
	return (__force struct mptcp_subflow_context *)icsk->icsk_ulp_data;
}

static inline struct sock *
mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
{
	return subflow->tcp_sock;
}

481 482 483 484 485 486 487 488 489 490 491 492 493 494
static inline u64
mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow)
{
	return tcp_sk(mptcp_subflow_tcp_sock(subflow))->copied_seq -
		      subflow->ssn_offset -
		      subflow->map_subflow_seq;
}

static inline u64
mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
{
	return subflow->map_seq + mptcp_subflow_get_map_offset(subflow);
}

495 496 497 498 499 500 501 502 503
static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk,
					     struct mptcp_subflow_context *subflow)
{
	sock_hold(mptcp_subflow_tcp_sock(subflow));
	spin_lock_bh(&msk->join_list_lock);
	list_add_tail(&subflow->node, &msk->join_list);
	spin_unlock_bh(&msk->join_list_lock);
}

P
Paolo Abeni 已提交
504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558
void mptcp_subflow_process_delegated(struct sock *ssk);

static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow)
{
	struct mptcp_delegated_action *delegated;
	bool schedule;

	/* The implied barrier pairs with mptcp_subflow_delegated_done(), and
	 * ensures the below list check sees list updates done prior to status
	 * bit changes
	 */
	if (!test_and_set_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
		/* still on delegated list from previous scheduling */
		if (!list_empty(&subflow->delegated_node))
			return;

		/* the caller held the subflow bh socket lock */
		lockdep_assert_in_softirq();

		delegated = this_cpu_ptr(&mptcp_delegated_actions);
		schedule = list_empty(&delegated->head);
		list_add_tail(&subflow->delegated_node, &delegated->head);
		sock_hold(mptcp_subflow_tcp_sock(subflow));
		if (schedule)
			napi_schedule(&delegated->napi);
	}
}

static inline struct mptcp_subflow_context *
mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated)
{
	struct mptcp_subflow_context *ret;

	if (list_empty(&delegated->head))
		return NULL;

	ret = list_first_entry(&delegated->head, struct mptcp_subflow_context, delegated_node);
	list_del_init(&ret->delegated_node);
	return ret;
}

static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow)
{
	return test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
}

static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow)
{
	/* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before
	 * touching the status bit
	 */
	smp_wmb();
	clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
}

559
int mptcp_is_enabled(struct net *net);
560
unsigned int mptcp_get_add_addr_timeout(struct net *net);
561
int mptcp_is_checksum_enabled(struct net *net);
562
int mptcp_allow_join_id0(struct net *net);
563 564
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
				     struct mptcp_options_received *mp_opt);
565
bool __mptcp_retransmit_pending_data(struct sock *sk);
566
bool mptcp_subflow_data_available(struct sock *sk);
567
void __init mptcp_subflow_init(void);
568
void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
569 570
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
		     struct mptcp_subflow_context *subflow);
571
void mptcp_subflow_reset(struct sock *ssk);
572
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
573
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
574 575

/* called with sk socket lock held */
576
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
577 578
			    const struct mptcp_addr_info *remote,
			    u8 flags, int ifindex);
579
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
580 581 582
void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
			 struct sockaddr_storage *addr,
			 unsigned short family);
583

584 585 586 587 588 589 590 591 592 593 594 595
static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
{
	struct sock *ssk = mptcp_subflow_tcp_sock(subflow);

	/* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */
	if (subflow->request_join && !subflow->fully_established)
		return false;

	/* only send if our side has not closed yet */
	return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT));
}

596 597 598 599 600 601
static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
					      struct mptcp_subflow_context *ctx)
{
	sk->sk_data_ready = ctx->tcp_data_ready;
	sk->sk_state_change = ctx->tcp_state_change;
	sk->sk_write_space = ctx->tcp_write_space;
P
Paolo Abeni 已提交
602
	sk->sk_error_report = ctx->tcp_error_report;
603 604 605 606

	inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
}

607
void __init mptcp_proto_init(void);
608
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
609
int __init mptcp_proto_v6_init(void);
610
#endif
611

612
struct sock *mptcp_sk_clone(const struct sock *sk,
613
			    const struct mptcp_options_received *mp_opt,
614
			    struct request_sock *req);
615 616
void mptcp_get_options(const struct sock *sk,
		       const struct sk_buff *skb,
617
		       struct mptcp_options_received *mp_opt);
618 619

void mptcp_finish_connect(struct sock *sk);
620
void __mptcp_set_connected(struct sock *sk);
621 622 623 624 625
static inline bool mptcp_is_fully_established(struct sock *sk)
{
	return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
	       READ_ONCE(mptcp_sk(sk)->fully_established);
}
626
void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk);
627
void mptcp_data_ready(struct sock *sk, struct sock *ssk);
628
bool mptcp_finish_join(struct sock *sk);
P
Paolo Abeni 已提交
629
bool mptcp_schedule_work(struct sock *sk);
630 631 632 633 634
int mptcp_setsockopt(struct sock *sk, int level, int optname,
		     sockptr_t optval, unsigned int optlen);
int mptcp_getsockopt(struct sock *sk, int level, int optname,
		     char __user *optval, int __user *option);

635 636 637 638 639 640 641 642
u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq);
static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit)
{
	if (use_64bit)
		return cur_seq;

	return __mptcp_expand_seq(old_seq, cur_seq);
}
643
void __mptcp_check_push(struct sock *sk, struct sock *ssk);
644
void __mptcp_data_acked(struct sock *sk);
P
Paolo Abeni 已提交
645
void __mptcp_error_report(struct sock *sk);
646
void mptcp_subflow_eof(struct sock *sk);
647
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
648
void __mptcp_flush_join_list(struct mptcp_sock *msk);
P
Paolo Abeni 已提交
649 650 651 652 653 654
static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
{
	return READ_ONCE(msk->snd_data_fin_enable) &&
	       READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
}

P
Paolo Abeni 已提交
655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673
static inline bool mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
{
	if ((sk->sk_userlocks & SOCK_SNDBUF_LOCK) || ssk->sk_sndbuf <= READ_ONCE(sk->sk_sndbuf))
		return false;

	WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf);
	return true;
}

static inline void mptcp_write_space(struct sock *sk)
{
	if (sk_stream_is_writeable(sk)) {
		/* pairs with memory barrier in mptcp_poll */
		smp_mb();
		if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
			sk_stream_write_space(sk);
	}
}

674
void mptcp_destroy_common(struct mptcp_sock *msk);
675

676 677
#define MPTCP_TOKEN_MAX_RETRIES	4

P
Paolo Abeni 已提交
678 679 680 681 682 683
void __init mptcp_token_init(void);
static inline void mptcp_token_init_request(struct request_sock *req)
{
	mptcp_subflow_rsk(req)->token_node.pprev = NULL;
}

684
int mptcp_token_new_request(struct request_sock *req);
P
Paolo Abeni 已提交
685
void mptcp_token_destroy_request(struct request_sock *req);
686
int mptcp_token_new_connect(struct sock *sk);
P
Paolo Abeni 已提交
687 688
void mptcp_token_accept(struct mptcp_subflow_request_sock *r,
			struct mptcp_sock *msk);
689
bool mptcp_token_exists(u32 token);
690
struct mptcp_sock *mptcp_token_get_sock(u32 token);
P
Paolo Abeni 已提交
691 692
struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot,
					 long *s_num);
P
Paolo Abeni 已提交
693
void mptcp_token_destroy(struct mptcp_sock *msk);
694 695 696

void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);

P
Peter Krystad 已提交
697
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
698

699
void __init mptcp_pm_init(void);
700
void mptcp_pm_data_init(struct mptcp_sock *msk);
701
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
702 703
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp);
704 705
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
void mptcp_pm_connection_closed(struct mptcp_sock *msk);
706
void mptcp_pm_subflow_established(struct mptcp_sock *msk);
707 708 709
void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id);
void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
				const struct mptcp_addr_info *addr);
710 711
void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
			      struct mptcp_addr_info *addr);
712
void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk);
713
void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk);
714 715
void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
			       const struct mptcp_rm_list *rm_list);
716
void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
717 718 719
int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
				 struct mptcp_addr_info *addr,
				 u8 bkup);
720
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
721
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
722 723
struct mptcp_pm_add_entry *
mptcp_pm_del_add_timer(struct mptcp_sock *msk,
724
		       struct mptcp_addr_info *addr, bool check_id);
725 726 727
struct mptcp_pm_add_entry *
mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk,
				struct mptcp_addr_info *addr);
728 729

int mptcp_pm_announce_addr(struct mptcp_sock *msk,
730
			   const struct mptcp_addr_info *addr,
731
			   bool echo);
732
int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
733
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
734

735 736 737 738 739
void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
		 const struct sock *ssk, gfp_t gfp);
void mptcp_event_addr_announced(const struct mptcp_sock *msk, const struct mptcp_addr_info *info);
void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);

740
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
741
{
742
	return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL);
G
Geliang Tang 已提交
743 744 745 746
}

static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk)
{
747
	return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO);
748 749
}

750 751
static inline bool mptcp_pm_should_add_signal_ipv6(struct mptcp_sock *msk)
{
752
	return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_IPV6);
753 754
}

755 756
static inline bool mptcp_pm_should_add_signal_port(struct mptcp_sock *msk)
{
757
	return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_PORT);
758 759
}

760 761
static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk)
{
762
	return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_RM_ADDR_SIGNAL);
763 764
}

765
static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port)
766
{
767 768 769 770 771 772
	u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;

	if (family == AF_INET6)
		len = TCPOLEN_MPTCP_ADD_ADDR6_BASE;
	if (!echo)
		len += MPTCPOPT_THMAC_LEN;
773
	/* account for 2 trailing 'nop' options */
774
	if (port)
775
		len += TCPOLEN_MPTCP_PORT_LEN + TCPOLEN_MPTCP_PORT_ALIGN;
776 777

	return len;
778 779
}

780 781 782 783 784 785 786 787
static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list)
{
	if (rm_list->nr == 0 || rm_list->nr > MPTCP_RM_IDS_MAX)
		return -EINVAL;

	return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1;
}

788
bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
789
			      struct mptcp_addr_info *saddr, bool *echo, bool *port);
790
bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
791
			     struct mptcp_rm_list *rm_list);
792 793
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);

794
void __init mptcp_pm_nl_init(void);
P
Paolo Abeni 已提交
795
void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
796
void mptcp_pm_nl_work(struct mptcp_sock *msk);
797 798
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
				     const struct mptcp_rm_list *rm_list);
P
Paolo Abeni 已提交
799
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
800 801 802
unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk);
unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk);
unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk);
803
unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk);
804 805 806

void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
void mptcp_sockopt_sync_all(struct mptcp_sock *msk);
P
Paolo Abeni 已提交
807

808
static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
809 810 811 812
{
	return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
}

813 814
void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops);

815
static inline bool __mptcp_check_fallback(const struct mptcp_sock *msk)
816 817 818 819
{
	return test_bit(MPTCP_FALLBACK_DONE, &msk->flags);
}

820
static inline bool mptcp_check_fallback(const struct sock *sk)
821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
	struct mptcp_sock *msk = mptcp_sk(subflow->conn);

	return __mptcp_check_fallback(msk);
}

static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
{
	if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) {
		pr_debug("TCP fallback already done (msk=%p)", msk);
		return;
	}
	set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
}

static inline void mptcp_do_fallback(struct sock *sk)
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
	struct mptcp_sock *msk = mptcp_sk(subflow->conn);

	__mptcp_do_fallback(msk);
}

#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a)

847 848 849 850 851 852 853 854 855 856
static inline bool subflow_simultaneous_connect(struct sock *sk)
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
	struct sock *parent = subflow->conn;

	return sk->sk_state == TCP_ESTABLISHED &&
	       !mptcp_sk(parent)->pm.server_side &&
	       !subflow->conn_finished;
}

857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876
#ifdef CONFIG_SYN_COOKIES
void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req,
				       struct sk_buff *skb);
bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req,
					struct sk_buff *skb);
void __init mptcp_join_cookie_init(void);
#else
static inline void
subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req,
				  struct sk_buff *skb) {}
static inline bool
mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req,
				   struct sk_buff *skb)
{
	return false;
}

static inline void mptcp_join_cookie_init(void) {}
#endif

M
Mat Martineau 已提交
877
#endif /* __MPTCP_PROTOCOL_H */