options.c 15.0 KB
Newer Older
1 2 3 4
/*
 *  net/dccp/options.c
 *
 *  An implementation of the DCCP protocol
I
Ian McDonald 已提交
5 6
 *  Copyright (c) 2005 Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
 *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
7
 *  Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
8 9 10 11 12 13 14 15 16 17 18 19
 *
 *      This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */
#include <linux/dccp.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>

20
#include "ackvec.h"
21 22
#include "ccid.h"
#include "dccp.h"
23
#include "feat.h"
24

25 26 27 28 29 30
int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
int sysctl_dccp_feat_rx_ccid	      = DCCPF_INITIAL_CCID;
int sysctl_dccp_feat_tx_ccid	      = DCCPF_INITIAL_CCID;
int sysctl_dccp_feat_ack_ratio	      = DCCPF_INITIAL_ACK_RATIO;
int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
int sysctl_dccp_feat_send_ndp_count  = DCCPF_INITIAL_SEND_NDP_COUNT;
31

32
void dccp_minisock_init(struct dccp_minisock *dmsk)
33
{
34 35 36 37 38 39
	dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
	dmsk->dccpms_rx_ccid	     = sysctl_dccp_feat_rx_ccid;
	dmsk->dccpms_tx_ccid	     = sysctl_dccp_feat_tx_ccid;
	dmsk->dccpms_ack_ratio	     = sysctl_dccp_feat_ack_ratio;
	dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector;
	dmsk->dccpms_send_ndp_count  = sysctl_dccp_feat_send_ndp_count;
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
}

static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
{
	u32 value = 0;

	if (len > 3)
		value += *bf++ << 24;
	if (len > 2)
		value += *bf++ << 16;
	if (len > 1)
		value += *bf++ << 8;
	if (len > 0)
		value += *bf;

	return value;
}

int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
{
	struct dccp_sock *dp = dccp_sk(sk);
	const struct dccp_hdr *dh = dccp_hdr(skb);
	const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
63
	u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
64 65
	unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
	unsigned char *opt_ptr = options;
66 67
	const unsigned char *opt_end = (unsigned char *)dh +
					(dh->dccph_doff * 4);
68 69 70
	struct dccp_options_received *opt_recv = &dp->dccps_options_received;
	unsigned char opt, len;
	unsigned char *value;
71
	u32 elapsed_time;
72 73
	int rc;
	int mandatory = 0;
74 75 76

	memset(opt_recv, 0, sizeof(*opt_recv));

77
	opt = len = 0;
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
	while (opt_ptr != opt_end) {
		opt   = *opt_ptr++;
		len   = 0;
		value = NULL;

		/* Check if this isn't a single byte option */
		if (opt > DCCPO_MAX_RESERVED) {
			if (opt_ptr == opt_end)
				goto out_invalid_option;

			len = *opt_ptr++;
			if (len < 3)
				goto out_invalid_option;
			/*
			 * Remove the type and len fields, leaving
			 * just the value size
			 */
			len	-= 2;
			value	= opt_ptr;
			opt_ptr += len;

			if (opt_ptr > opt_end)
				goto out_invalid_option;
		}

		switch (opt) {
		case DCCPO_PADDING:
			break;
106 107 108
		case DCCPO_MANDATORY:
			if (mandatory)
				goto out_invalid_option;
109 110
			if (pkt_type != DCCP_PKT_DATA)
				mandatory = 1;
111
			break;
112 113 114 115 116
		case DCCPO_NDP_COUNT:
			if (len > 3)
				goto out_invalid_option;

			opt_recv->dccpor_ndp = dccp_decode_value_var(value, len);
117
			dccp_pr_debug("%s rx opt: NDP count=%d\n", dccp_role(sk),
118
				      opt_recv->dccpor_ndp);
119
			break;
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
		case DCCPO_CHANGE_L:
			/* fall through */
		case DCCPO_CHANGE_R:
			if (len < 2)
				goto out_invalid_option;
			rc = dccp_feat_change_recv(sk, opt, *value, value + 1,
						   len - 1);
			/*
			 * When there is a change error, change_recv is
			 * responsible for dealing with it.  i.e. reply with an
			 * empty confirm.
			 * If the change was mandatory, then we need to die.
			 */
			if (rc && mandatory)
				goto out_invalid_option;
			break;
		case DCCPO_CONFIRM_L:
			/* fall through */
		case DCCPO_CONFIRM_R:
			if (len < 2)
				goto out_invalid_option;
			if (dccp_feat_confirm_recv(sk, opt, *value,
						   value + 1, len - 1))
				goto out_invalid_option;
			break;
145
		case DCCPO_ACK_VECTOR_0:
146
		case DCCPO_ACK_VECTOR_1:
147
			if (pkt_type == DCCP_PKT_DATA)
148
				break;
149

150
			if (dccp_msk(sk)->dccpms_send_ack_vector &&
151
			    dccp_ackvec_parse(sk, skb, &ackno, opt, value, len))
152
				goto out_invalid_option;
153 154 155 156 157
			break;
		case DCCPO_TIMESTAMP:
			if (len != 4)
				goto out_invalid_option;

158
			opt_recv->dccpor_timestamp = ntohl(*(__be32 *)value);
159 160

			dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
161
			dccp_timestamp(sk, &dp->dccps_timestamp_time);
162

163 164
			dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n",
				      dccp_role(sk), opt_recv->dccpor_timestamp,
165
				      (unsigned long long)
166 167 168
				      DCCP_SKB_CB(skb)->dccpd_ack_seq);
			break;
		case DCCPO_TIMESTAMP_ECHO:
I
Ian McDonald 已提交
169
			if (len != 4 && len != 6 && len != 8)
170 171
				goto out_invalid_option;

172
			opt_recv->dccpor_timestamp_echo = ntohl(*(__be32 *)value);
173

174 175
			dccp_pr_debug("%s rx opt: TIMESTAMP_ECHO=%u, len=%d, "
				      "ackno=%llu, ",  dccp_role(sk),
176
				      opt_recv->dccpor_timestamp_echo,
177 178
				      len + 2,
				      (unsigned long long)
I
Ian McDonald 已提交
179 180 181
				      DCCP_SKB_CB(skb)->dccpd_ack_seq);


182 183 184 185
			if (len == 4)
				break;

			if (len == 6)
186
				elapsed_time = ntohs(*(__be16 *)(value + 4));
187
			else
188
				elapsed_time = ntohl(*(__be32 *)(value + 4));
189 190 191 192

			/* Give precedence to the biggest ELAPSED_TIME */
			if (elapsed_time > opt_recv->dccpor_elapsed_time)
				opt_recv->dccpor_elapsed_time = elapsed_time;
193 194
			break;
		case DCCPO_ELAPSED_TIME:
I
Ian McDonald 已提交
195
			if (len != 2 && len != 4)
196 197 198 199
				goto out_invalid_option;

			if (pkt_type == DCCP_PKT_DATA)
				continue;
I
Ian McDonald 已提交
200 201

			if (len == 2)
202
				elapsed_time = ntohs(*(__be16 *)value);
I
Ian McDonald 已提交
203
			else
204
				elapsed_time = ntohl(*(__be32 *)value);
205 206 207

			if (elapsed_time > opt_recv->dccpor_elapsed_time)
				opt_recv->dccpor_elapsed_time = elapsed_time;
I
Ian McDonald 已提交
208

209 210
			dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n",
				      dccp_role(sk), elapsed_time);
211 212
			break;
			/*
213
			 * From RFC 4340, sec. 10.3:
214
			 *
215 216 217 218 219
			 *	Option numbers 128 through 191 are for
			 *	options sent from the HC-Sender to the
			 *	HC-Receiver; option numbers 192 through 255
			 *	are for options sent from the HC-Receiver to
			 *	the HC-Sender.
220 221 222 223
			 */
		case 128 ... 191: {
			const u16 idx = value - options;

224 225 226
			if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk,
						     opt, len, idx,
						     value) != 0)
227 228 229 230 231 232
				goto out_invalid_option;
		}
			break;
		case 192 ... 255: {
			const u16 idx = value - options;

233 234 235
			if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
						     opt, len, idx,
						     value) != 0)
236 237 238 239
				goto out_invalid_option;
		}
			break;
		default:
240 241
			DCCP_CRIT("DCCP(%p): option %d(len=%d) not "
				  "implemented, ignoring", sk, opt, len);
242
			break;
243
		}
244 245 246

		if (opt != DCCPO_MANDATORY)
			mandatory = 0;
247 248
	}

249 250 251 252
	/* mandatory was the last byte in option list -> reset connection */
	if (mandatory)
		goto out_invalid_option;

253 254 255 256 257
	return 0;

out_invalid_option:
	DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
	DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
258
	DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len);
259 260 261
	return -1;
}

262 263
EXPORT_SYMBOL_GPL(dccp_parse_options);

264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
static void dccp_encode_value_var(const u32 value, unsigned char *to,
				  const unsigned int len)
{
	if (len > 3)
		*to++ = (value & 0xFF000000) >> 24;
	if (len > 2)
		*to++ = (value & 0xFF0000) >> 16;
	if (len > 1)
		*to++ = (value & 0xFF00) >> 8;
	if (len > 0)
		*to++ = (value & 0xFF);
}

static inline int dccp_ndp_len(const int ndp)
{
	return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3;
}

282
int dccp_insert_option(struct sock *sk, struct sk_buff *skb,
283 284 285 286 287
			const unsigned char option,
			const void *value, const unsigned char len)
{
	unsigned char *to;

288 289
	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN)
		return -1;
290 291 292 293 294 295 296 297

	DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2;

	to    = skb_push(skb, len + 2);
	*to++ = option;
	*to++ = len + 2;

	memcpy(to, value, len);
298
	return 0;
299 300 301 302
}

EXPORT_SYMBOL_GPL(dccp_insert_option);

303
static int dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb)
304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
{
	struct dccp_sock *dp = dccp_sk(sk);
	int ndp = dp->dccps_ndp_count;

	if (dccp_non_data_packet(skb))
		++dp->dccps_ndp_count;
	else
		dp->dccps_ndp_count = 0;

	if (ndp > 0) {
		unsigned char *ptr;
		const int ndp_len = dccp_ndp_len(ndp);
		const int len = ndp_len + 2;

		if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
319
			return -1;
320 321 322 323 324 325 326 327

		DCCP_SKB_CB(skb)->dccpd_opt_len += len;

		ptr = skb_push(skb, len);
		*ptr++ = DCCPO_NDP_COUNT;
		*ptr++ = len;
		dccp_encode_value_var(ndp, ptr, ndp_len);
	}
328 329

	return 0;
330 331 332 333
}

static inline int dccp_elapsed_time_len(const u32 elapsed_time)
{
334
	return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4;
335 336
}

337 338
int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb,
				    u32 elapsed_time)
339 340 341 342 343
{
	const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
	const int len = 2 + elapsed_time_len;
	unsigned char *to;

I
Ian McDonald 已提交
344
	if (elapsed_time_len == 0)
345
		return 0;
346

347 348
	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
		return -1;
349 350 351 352 353 354 355

	DCCP_SKB_CB(skb)->dccpd_opt_len += len;

	to    = skb_push(skb, len);
	*to++ = DCCPO_ELAPSED_TIME;
	*to++ = len;

I
Ian McDonald 已提交
356
	if (elapsed_time_len == 2) {
357
		const __be16 var16 = htons((u16)elapsed_time);
I
Ian McDonald 已提交
358 359
		memcpy(to, &var16, 2);
	} else {
360
		const __be32 var32 = htonl(elapsed_time);
I
Ian McDonald 已提交
361 362
		memcpy(to, &var32, 4);
	}
363

364
	return 0;
365 366
}

367
EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
368

369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
void dccp_timestamp(const struct sock *sk, struct timeval *tv)
{
	const struct dccp_sock *dp = dccp_sk(sk);

	do_gettimeofday(tv);
	tv->tv_sec  -= dp->dccps_epoch.tv_sec;
	tv->tv_usec -= dp->dccps_epoch.tv_usec;

	while (tv->tv_usec < 0) {
		tv->tv_sec--;
		tv->tv_usec += USEC_PER_SEC;
	}
}

EXPORT_SYMBOL_GPL(dccp_timestamp);

385
int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
386
{
I
Ian McDonald 已提交
387
	struct timeval tv;
388
	__be32 now;
389

390
	dccp_timestamp(sk, &tv);
391
	now = htonl(timeval_usecs(&tv) / 10);
I
Ian McDonald 已提交
392 393 394
	/* yes this will overflow but that is the point as we want a
	 * 10 usec 32 bit timer which mean it wraps every 11.9 hours */

395
	return dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now));
396 397
}

398 399
EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp);

400 401
static int dccp_insert_option_timestamp_echo(struct sock *sk,
					     struct sk_buff *skb)
402 403
{
	struct dccp_sock *dp = dccp_sk(sk);
404
	struct timeval now;
405
	__be32 tstamp_echo;
406 407
	u32 elapsed_time;
	int len, elapsed_time_len;
408 409
	unsigned char *to;

410 411 412 413 414
	dccp_timestamp(sk, &now);
	elapsed_time = timeval_delta(&now, &dp->dccps_timestamp_time) / 10;
	elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
	len = 6 + elapsed_time_len;

415 416
	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
		return -1;
417 418 419 420 421 422 423 424 425 426

	DCCP_SKB_CB(skb)->dccpd_opt_len += len;

	to    = skb_push(skb, len);
	*to++ = DCCPO_TIMESTAMP_ECHO;
	*to++ = len;

	tstamp_echo = htonl(dp->dccps_timestamp_echo);
	memcpy(to, &tstamp_echo, 4);
	to += 4;
427

I
Ian McDonald 已提交
428
	if (elapsed_time_len == 2) {
429
		const __be16 var16 = htons((u16)elapsed_time);
I
Ian McDonald 已提交
430 431
		memcpy(to, &var16, 2);
	} else if (elapsed_time_len == 4) {
432
		const __be32 var32 = htonl(elapsed_time);
I
Ian McDonald 已提交
433 434
		memcpy(to, &var32, 4);
	}
435 436

	dp->dccps_timestamp_echo = 0;
I
Ian McDonald 已提交
437 438
	dp->dccps_timestamp_time.tv_sec = 0;
	dp->dccps_timestamp_time.tv_usec = 0;
439
	return 0;
440 441
}

442
static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat,
443
				u8 *val, u8 len)
444 445 446 447
{
	u8 *to;

	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 3 > DCCP_MAX_OPT_LEN) {
448
		DCCP_WARN("packet too small for feature %d option!\n", feat);
449 450 451 452 453 454 455 456 457 458 459 460 461
		return -1;
	}

	DCCP_SKB_CB(skb)->dccpd_opt_len += len + 3;

	to    = skb_push(skb, len + 3);
	*to++ = type;
	*to++ = len + 3;
	*to++ = feat;

	if (len)
		memcpy(to, val, len);

462 463 464
	dccp_pr_debug("%s(%s (%d), ...), length %d\n",
		      dccp_feat_typename(type),
		      dccp_feat_name(feat), feat, len);
465 466 467
	return 0;
}

468
static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb)
469 470
{
	struct dccp_sock *dp = dccp_sk(sk);
471
	struct dccp_minisock *dmsk = dccp_msk(sk);
472 473 474 475
	struct dccp_opt_pend *opt, *next;
	int change = 0;

	/* confirm any options [NN opts] */
476
	list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) {
477 478 479 480 481 482 483 484
		dccp_insert_feat_opt(skb, opt->dccpop_type,
				     opt->dccpop_feat, opt->dccpop_val,
				     opt->dccpop_len);
		/* fear empty confirms */
		if (opt->dccpop_val)
			kfree(opt->dccpop_val);
		kfree(opt);
	}
485
	INIT_LIST_HEAD(&dmsk->dccpms_conf);
486 487

	/* see which features we need to send */
488
	list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
		/* see if we need to send any confirm */
		if (opt->dccpop_sc) {
			dccp_insert_feat_opt(skb, opt->dccpop_type + 1,
					     opt->dccpop_feat,
					     opt->dccpop_sc->dccpoc_val,
					     opt->dccpop_sc->dccpoc_len);

			BUG_ON(!opt->dccpop_sc->dccpoc_val);
			kfree(opt->dccpop_sc->dccpoc_val);
			kfree(opt->dccpop_sc);
			opt->dccpop_sc = NULL;
		}

		/* any option not confirmed, re-send it */
		if (!opt->dccpop_conf) {
			dccp_insert_feat_opt(skb, opt->dccpop_type,
					     opt->dccpop_feat, opt->dccpop_val,
					     opt->dccpop_len);
			change++;
		}
	}

	/* Retransmit timer.
	 * If this is the master listening sock, we don't set a timer on it.  It
	 * should be fine because if the dude doesn't receive our RESPONSE
	 * [which will contain the CHANGE] he will send another REQUEST which
	 * will "retrnasmit" the change.
	 */
	if (change && dp->dccps_role != DCCP_ROLE_LISTEN) {
		dccp_pr_debug("reset feat negotiation timer %p\n", sk);

		/* XXX don't reset the timer on re-transmissions.  I.e. reset it
		 * only when sending new stuff i guess.  Currently the timer
		 * never backs off because on re-transmission it just resets it!
		 */
		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
					  inet_csk(sk)->icsk_rto, DCCP_RTO_MAX);
	}
527 528

	return 0;
529 530
}

531
int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
532 533
{
	struct dccp_sock *dp = dccp_sk(sk);
534
	struct dccp_minisock *dmsk = dccp_msk(sk);
535 536 537

	DCCP_SKB_CB(skb)->dccpd_opt_len = 0;

538
	if (dmsk->dccpms_send_ndp_count &&
539 540
	    dccp_insert_option_ndp(sk, skb))
		return -1;
541 542

	if (!dccp_packet_without_ack(skb)) {
543
		if (dmsk->dccpms_send_ack_vector &&
544 545 546 547 548 549 550
		    dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
		    dccp_insert_option_ackvec(sk, skb))
			return -1;

		if (dp->dccps_timestamp_echo != 0 &&
		    dccp_insert_option_timestamp_echo(sk, skb))
			return -1;
551 552
	}

553
	if (dp->dccps_hc_rx_insert_options) {
554 555
		if (ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb))
			return -1;
556 557
		dp->dccps_hc_rx_insert_options = 0;
	}
558

559
	/* Feature negotiation */
560 561 562 563 564
	/* Data packets can't do feat negotiation */
	if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA &&
	    DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATAACK &&
	    dccp_insert_options_feat(sk, skb))
		return -1;
565

566 567 568 569 570 571 572 573
	/*
	 * Obtain RTT sample from Request/Response exchange.
	 * This is currently used in CCID 3 initialisation.
	 */
	if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST &&
	    dccp_insert_option_timestamp(sk, skb))
		return -1;

574 575 576 577 578 579 580 581 582 583 584 585
	/* XXX: insert other options when appropriate */

	if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
		/* The length of all options has to be a multiple of 4 */
		int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4;

		if (padding != 0) {
			padding = 4 - padding;
			memset(skb_push(skb, padding), 0, padding);
			DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
		}
	}
586 587

	return 0;
588
}