msg.c 13.5 KB
Newer Older
P
Per Liden 已提交
1 2
/*
 * net/tipc/msg.c: TIPC message header routines
3
 *
4
 * Copyright (c) 2000-2006, 2014, Ericsson AB
5
 * Copyright (c) 2005, 2010-2011, Wind River Systems
P
Per Liden 已提交
6 7
 * All rights reserved.
 *
P
Per Liden 已提交
8
 * Redistribution and use in source and binary forms, with or without
P
Per Liden 已提交
9 10
 * modification, are permitted provided that the following conditions are met:
 *
P
Per Liden 已提交
11 12 13 14 15 16 17 18
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the names of the copyright holders nor the names of its
 *    contributors may be used to endorse or promote products derived from
 *    this software without specific prior written permission.
P
Per Liden 已提交
19
 *
P
Per Liden 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33
 * Alternatively, this software may be distributed under the terms of the
 * GNU General Public License ("GPL") version 2 as published by the Free
 * Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
P
Per Liden 已提交
34 35 36
 * POSSIBILITY OF SUCH DAMAGE.
 */

37
#include <net/sock.h>
P
Per Liden 已提交
38 39
#include "core.h"
#include "msg.h"
40 41
#include "addr.h"
#include "name_table.h"
P
Per Liden 已提交
42

43 44
#define MAX_FORWARD_SIZE 1024

45
static unsigned int align(unsigned int i)
46
{
47
	return (i + 3) & ~3u;
48 49
}

Y
Ying Xue 已提交
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
/**
 * tipc_buf_acquire - creates a TIPC message buffer
 * @size: message size (including TIPC header)
 *
 * Returns a new buffer with data pointers set to the specified size.
 *
 * NOTE: Headroom is reserved to allow prepending of a data link header.
 *       There may also be unrequested tailroom present at the buffer's end.
 */
struct sk_buff *tipc_buf_acquire(u32 size)
{
	struct sk_buff *skb;
	unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;

	skb = alloc_skb_fclone(buf_size, GFP_ATOMIC);
	if (skb) {
		skb_reserve(skb, BUF_HEADROOM);
		skb_put(skb, size);
		skb->next = NULL;
	}
	return skb;
}

73 74
void tipc_msg_init(struct net *net, struct tipc_msg *m, u32 user, u32 type,
		   u32 hsize, u32 destnode)
75
{
76 77
	struct tipc_net *tn = net_generic(net, tipc_net_id);

78 79 80 81 82
	memset(m, 0, hsize);
	msg_set_version(m);
	msg_set_user(m, user);
	msg_set_hdr_sz(m, hsize);
	msg_set_size(m, hsize);
83
	msg_set_prevnode(m, tn->own_addr);
84
	msg_set_type(m, type);
85
	if (hsize > SHORT_H_SIZE) {
86
		msg_set_orignode(m, tn->own_addr);
87 88 89 90
		msg_set_destnode(m, destnode);
	}
}

91 92 93
struct sk_buff *tipc_msg_create(struct net *net, uint user, uint type,
				uint hdr_sz, uint data_sz, u32 dnode,
				u32 onode, u32 dport, u32 oport, int errcode)
94 95 96 97 98 99 100 101 102
{
	struct tipc_msg *msg;
	struct sk_buff *buf;

	buf = tipc_buf_acquire(hdr_sz + data_sz);
	if (unlikely(!buf))
		return NULL;

	msg = buf_msg(buf);
103
	tipc_msg_init(net, msg, user, type, hdr_sz, dnode);
104 105 106 107 108 109 110 111 112 113
	msg_set_size(msg, hdr_sz + data_sz);
	msg_set_prevnode(msg, onode);
	msg_set_origport(msg, oport);
	msg_set_destport(msg, dport);
	msg_set_errcode(msg, errcode);
	if (hdr_sz > SHORT_H_SIZE) {
		msg_set_orignode(msg, onode);
		msg_set_destnode(msg, dnode);
	}
	return buf;
114 115
}

116
/* tipc_buf_append(): Append a buffer to the fragment list of another buffer
117 118 119
 * @*headbuf: in:  NULL for first frag, otherwise value returned from prev call
 *            out: set when successful non-complete reassembly, otherwise NULL
 * @*buf:     in:  the buffer to append. Always defined
S
stephen hemminger 已提交
120
 *            out: head buf after successful complete reassembly, otherwise NULL
121
 * Returns 1 when reassembly complete, otherwise 0
122 123 124 125 126 127
 */
int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
{
	struct sk_buff *head = *headbuf;
	struct sk_buff *frag = *buf;
	struct sk_buff *tail;
128 129
	struct tipc_msg *msg;
	u32 fragid;
130
	int delta;
131
	bool headstolen;
132

133 134 135 136 137 138
	if (!frag)
		goto err;

	msg = buf_msg(frag);
	fragid = msg_type(msg);
	frag->next = NULL;
139 140 141
	skb_pull(frag, msg_hdr_sz(msg));

	if (fragid == FIRST_FRAGMENT) {
142 143 144 145
		if (unlikely(head))
			goto err;
		if (unlikely(skb_unclone(frag, GFP_ATOMIC)))
			goto err;
146 147
		head = *headbuf = frag;
		skb_frag_list_init(head);
148
		TIPC_SKB_CB(head)->tail = NULL;
149
		*buf = NULL;
150 151
		return 0;
	}
152

153
	if (!head)
154 155
		goto err;

156 157 158
	if (skb_try_coalesce(head, frag, &headstolen, &delta)) {
		kfree_skb_partial(frag, headstolen);
	} else {
159
		tail = TIPC_SKB_CB(head)->tail;
160 161 162 163 164 165 166 167 168
		if (!skb_has_frag_list(head))
			skb_shinfo(head)->frag_list = frag;
		else
			tail->next = frag;
		head->truesize += frag->truesize;
		head->data_len += frag->len;
		head->len += frag->len;
		TIPC_SKB_CB(head)->tail = frag;
	}
169

170 171 172 173 174 175 176 177
	if (fragid == LAST_FRAGMENT) {
		*buf = head;
		TIPC_SKB_CB(head)->tail = NULL;
		*headbuf = NULL;
		return 1;
	}
	*buf = NULL;
	return 0;
178 179

err:
180 181
	pr_warn_ratelimited("Unable to build fragment list\n");
	kfree_skb(*buf);
182 183
	kfree_skb(*headbuf);
	*buf = *headbuf = NULL;
184 185
	return 0;
}
186

187 188

/**
189
 * tipc_msg_build - create buffer chain containing specified header and data
190
 * @mhdr: Message header, to be prepended to data
191
 * @m: User message
192 193 194
 * @offset: Posision in iov to start copying from
 * @dsz: Total length of user data
 * @pktmax: Max packet size that can be used
195 196
 * @list: Buffer or chain of buffers to be returned to caller
 *
197 198
 * Returns message data size or errno: -ENOMEM, -EFAULT
 */
199 200
int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m,
		   int offset, int dsz, int pktmax, struct sk_buff_head *list)
201 202 203 204 205 206 207 208
{
	int mhsz = msg_hdr_sz(mhdr);
	int msz = mhsz + dsz;
	int pktno = 1;
	int pktsz;
	int pktrem = pktmax;
	int drem = dsz;
	struct tipc_msg pkthdr;
209
	struct sk_buff *skb;
210 211
	char *pktpos;
	int rc;
212

213 214 215 216
	msg_set_size(mhdr, msz);

	/* No fragmentation needed? */
	if (likely(msz <= pktmax)) {
217 218
		skb = tipc_buf_acquire(msz);
		if (unlikely(!skb))
219
			return -ENOMEM;
220
		skb_orphan(skb);
221 222 223
		__skb_queue_tail(list, skb);
		skb_copy_to_linear_data(skb, mhdr, mhsz);
		pktpos = skb->data + mhsz;
A
Al Viro 已提交
224
		if (!dsz || !memcpy_fromiovecend(pktpos, m->msg_iter.iov, offset,
225
						 dsz))
226 227 228 229 230 231
			return dsz;
		rc = -EFAULT;
		goto error;
	}

	/* Prepare reusable fragment header */
232 233
	tipc_msg_init(net, &pkthdr, MSG_FRAGMENTER, FIRST_FRAGMENT, INT_H_SIZE,
		      msg_destnode(mhdr));
234 235 236 237
	msg_set_size(&pkthdr, pktmax);
	msg_set_fragm_no(&pkthdr, pktno);

	/* Prepare first fragment */
238 239
	skb = tipc_buf_acquire(pktmax);
	if (!skb)
240
		return -ENOMEM;
241
	skb_orphan(skb);
242 243 244
	__skb_queue_tail(list, skb);
	pktpos = skb->data;
	skb_copy_to_linear_data(skb, &pkthdr, INT_H_SIZE);
245 246
	pktpos += INT_H_SIZE;
	pktrem -= INT_H_SIZE;
247
	skb_copy_to_linear_data_offset(skb, INT_H_SIZE, mhdr, mhsz);
248 249 250 251 252 253 254
	pktpos += mhsz;
	pktrem -= mhsz;

	do {
		if (drem < pktrem)
			pktrem = drem;

A
Al Viro 已提交
255
		if (memcpy_fromiovecend(pktpos, m->msg_iter.iov, offset, pktrem)) {
256 257 258 259 260 261 262 263 264 265 266 267 268 269
			rc = -EFAULT;
			goto error;
		}
		drem -= pktrem;
		offset += pktrem;

		if (!drem)
			break;

		/* Prepare new fragment: */
		if (drem < (pktmax - INT_H_SIZE))
			pktsz = drem + INT_H_SIZE;
		else
			pktsz = pktmax;
270 271
		skb = tipc_buf_acquire(pktsz);
		if (!skb) {
272 273 274
			rc = -ENOMEM;
			goto error;
		}
275
		skb_orphan(skb);
276
		__skb_queue_tail(list, skb);
277 278 279
		msg_set_type(&pkthdr, FRAGMENT);
		msg_set_size(&pkthdr, pktsz);
		msg_set_fragm_no(&pkthdr, ++pktno);
280 281
		skb_copy_to_linear_data(skb, &pkthdr, INT_H_SIZE);
		pktpos = skb->data + INT_H_SIZE;
282 283 284
		pktrem = pktsz - INT_H_SIZE;

	} while (1);
285
	msg_set_type(buf_msg(skb), LAST_FRAGMENT);
286 287
	return dsz;
error:
288 289
	__skb_queue_purge(list);
	__skb_queue_head_init(list);
290 291 292
	return rc;
}

293 294
/**
 * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one
295 296
 * @list: the buffer chain of the existing buffer ("bundle")
 * @skb:  buffer to be appended
297 298 299 300
 * @mtu:  max allowable size for the bundle buffer
 * Consumes buffer if successful
 * Returns true if bundling could be performed, otherwise false
 */
301
bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu)
302
{
303 304 305
	struct sk_buff *bskb = skb_peek_tail(list);
	struct tipc_msg *bmsg = buf_msg(bskb);
	struct tipc_msg *msg = buf_msg(skb);
306 307 308 309 310 311 312 313 314 315 316 317 318 319
	unsigned int bsz = msg_size(bmsg);
	unsigned int msz = msg_size(msg);
	u32 start = align(bsz);
	u32 max = mtu - INT_H_SIZE;
	u32 pad = start - bsz;

	if (likely(msg_user(msg) == MSG_FRAGMENTER))
		return false;
	if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL))
		return false;
	if (unlikely(msg_user(msg) == BCAST_PROTOCOL))
		return false;
	if (likely(msg_user(bmsg) != MSG_BUNDLER))
		return false;
320
	if (likely(!TIPC_SKB_CB(bskb)->bundling))
321
		return false;
322
	if (unlikely(skb_tailroom(bskb) < (pad + msz)))
323 324 325 326
		return false;
	if (unlikely(max < (start + msz)))
		return false;

327 328
	skb_put(bskb, pad + msz);
	skb_copy_to_linear_data_offset(bskb, start, skb->data, msz);
329 330
	msg_set_size(bmsg, start + msz);
	msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1);
331
	kfree_skb(skb);
332 333 334 335 336
	return true;
}

/**
 * tipc_msg_make_bundle(): Create bundle buf and append message to its tail
337 338 339
 * @list: the buffer chain
 * @skb: buffer to be appended and replaced
 * @mtu: max allowable size for the bundle buffer, inclusive header
340 341
 * @dnode: destination node for message. (Not always present in header)
 * Replaces buffer if successful
S
stephen hemminger 已提交
342
 * Returns true if success, otherwise false
343
 */
344 345
bool tipc_msg_make_bundle(struct net *net, struct sk_buff_head *list,
			  struct sk_buff *skb, u32 mtu, u32 dnode)
346
{
347
	struct sk_buff *bskb;
348
	struct tipc_msg *bmsg;
349
	struct tipc_msg *msg = buf_msg(skb);
350 351 352 353 354 355 356 357 358 359 360 361
	u32 msz = msg_size(msg);
	u32 max = mtu - INT_H_SIZE;

	if (msg_user(msg) == MSG_FRAGMENTER)
		return false;
	if (msg_user(msg) == CHANGEOVER_PROTOCOL)
		return false;
	if (msg_user(msg) == BCAST_PROTOCOL)
		return false;
	if (msz > (max / 2))
		return false;

362 363
	bskb = tipc_buf_acquire(max);
	if (!bskb)
364 365
		return false;

366 367
	skb_trim(bskb, INT_H_SIZE);
	bmsg = buf_msg(bskb);
368
	tipc_msg_init(net, bmsg, MSG_BUNDLER, 0, INT_H_SIZE, dnode);
369 370 371
	msg_set_seqno(bmsg, msg_seqno(msg));
	msg_set_ack(bmsg, msg_ack(msg));
	msg_set_bcast_ack(bmsg, msg_bcast_ack(msg));
372 373 374
	TIPC_SKB_CB(bskb)->bundling = true;
	__skb_queue_tail(list, bskb);
	return tipc_msg_bundle(list, skb, mtu);
375
}
376 377 378 379 380 381 382 383 384

/**
 * tipc_msg_reverse(): swap source and destination addresses and add error code
 * @buf:  buffer containing message to be reversed
 * @dnode: return value: node where to send message after reversal
 * @err:  error code to be set in message
 * Consumes buffer if failure
 * Returns true if success, otherwise false
 */
385 386
bool tipc_msg_reverse(struct net *net, struct sk_buff *buf, u32 *dnode,
		      int err)
387
{
388
	struct tipc_net *tn = net_generic(net, tipc_net_id);
389 390 391 392 393
	struct tipc_msg *msg = buf_msg(buf);
	uint imp = msg_importance(msg);
	struct tipc_msg ohdr;
	uint rdsz = min_t(uint, msg_data_sz(msg), MAX_FORWARD_SIZE);

394
	if (skb_linearize(buf))
395
		goto exit;
396 397 398
	if (msg_dest_droppable(msg))
		goto exit;
	if (msg_errcode(msg))
399 400 401
		goto exit;

	memcpy(&ohdr, msg, msg_hdr_sz(msg));
402 403 404
	imp = min_t(uint, imp + 1, TIPC_CRITICAL_IMPORTANCE);
	if (msg_isdata(msg))
		msg_set_importance(msg, imp);
405 406 407
	msg_set_errcode(msg, err);
	msg_set_origport(msg, msg_destport(&ohdr));
	msg_set_destport(msg, msg_origport(&ohdr));
408
	msg_set_prevnode(msg, tn->own_addr);
409 410 411 412 413 414 415 416 417 418 419 420 421
	if (!msg_short(msg)) {
		msg_set_orignode(msg, msg_destnode(&ohdr));
		msg_set_destnode(msg, msg_orignode(&ohdr));
	}
	msg_set_size(msg, msg_hdr_sz(msg) + rdsz);
	skb_trim(buf, msg_size(msg));
	skb_orphan(buf);
	*dnode = msg_orignode(&ohdr);
	return true;
exit:
	kfree_skb(buf);
	return false;
}
422 423 424 425 426 427 428 429 430 431 432

/**
 * tipc_msg_eval: determine fate of message that found no destination
 * @buf: the buffer containing the message.
 * @dnode: return value: next-hop node, if message to be forwarded
 * @err: error code to use, if message to be rejected
 *
 * Does not consume buffer
 * Returns 0 (TIPC_OK) if message ok and we can try again, -TIPC error
 * code if message to be rejected
 */
433
int tipc_msg_eval(struct net *net, struct sk_buff *buf, u32 *dnode)
434 435 436 437 438 439 440 441 442 443 444 445 446
{
	struct tipc_msg *msg = buf_msg(buf);
	u32 dport;

	if (msg_type(msg) != TIPC_NAMED_MSG)
		return -TIPC_ERR_NO_PORT;
	if (skb_linearize(buf))
		return -TIPC_ERR_NO_NAME;
	if (msg_data_sz(msg) > MAX_FORWARD_SIZE)
		return -TIPC_ERR_NO_NAME;
	if (msg_reroute_cnt(msg) > 0)
		return -TIPC_ERR_NO_NAME;

447
	*dnode = addr_domain(net, msg_lookup_scope(msg));
448
	dport = tipc_nametbl_translate(net, msg_nametype(msg),
449 450 451 452 453 454 455 456 457
				       msg_nameinst(msg),
				       dnode);
	if (!dport)
		return -TIPC_ERR_NO_NAME;
	msg_incr_reroute_cnt(msg);
	msg_set_destnode(msg, *dnode);
	msg_set_destport(msg, dport);
	return TIPC_OK;
}
458 459 460 461

/* tipc_msg_reassemble() - clone a buffer chain of fragments and
 *                         reassemble the clones into one message
 */
462
struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list)
463
{
464 465
	struct sk_buff *skb;
	struct sk_buff *frag = NULL;
466 467 468 469
	struct sk_buff *head = NULL;
	int hdr_sz;

	/* Copy header if single buffer */
470 471 472 473
	if (skb_queue_len(list) == 1) {
		skb = skb_peek(list);
		hdr_sz = skb_headroom(skb) + msg_hdr_sz(buf_msg(skb));
		return __pskb_copy(skb, hdr_sz, GFP_ATOMIC);
474 475 476
	}

	/* Clone all fragments and reassemble */
477 478
	skb_queue_walk(list, skb) {
		frag = skb_clone(skb, GFP_ATOMIC);
479 480 481 482 483 484 485 486 487 488 489 490 491 492
		if (!frag)
			goto error;
		frag->next = NULL;
		if (tipc_buf_append(&head, &frag))
			break;
		if (!head)
			goto error;
	}
	return frag;
error:
	pr_warn("Failed do clone local mcast rcv buffer\n");
	kfree_skb(head);
	return NULL;
}