channel.c 26.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
 *   Haiyang Zhang <haiyangz@microsoft.com>
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/kernel.h>
24 25
#include <linux/sched.h>
#include <linux/wait.h>
26
#include <linux/mm.h>
27
#include <linux/slab.h>
28
#include <linux/module.h>
29
#include <linux/hyperv.h>
30
#include <linux/uio.h>
31

32
#include "hyperv_vmbus.h"
33

K
K. Y. Srinivasan 已提交
34 35 36
#define NUM_PAGES_SPANNED(addr, len) \
((PAGE_ALIGN(addr + len) >> PAGE_SHIFT) - (addr >> PAGE_SHIFT))

37
/*
38
 * vmbus_setevent- Trigger an event notification on the specified
39
 * channel.
40
 */
41
static void vmbus_setevent(struct vmbus_channel *channel)
42
{
43
	struct hv_monitor_page *monitorpage;
44

45
	if (channel->offermsg.monitor_allocated) {
46
		/* Each u32 represents 32 channels */
47
		sync_set_bit(channel->offermsg.child_relid & 31,
48
			(unsigned long *) vmbus_connection.send_int_page +
49
			(channel->offermsg.child_relid >> 5));
50

51 52
		/* Get the child to parent monitor page */
		monitorpage = vmbus_connection.monitor_pages[1];
53

54
		sync_set_bit(channel->monitor_bit,
55 56
			(unsigned long *)&monitorpage->trigger_group
					[channel->monitor_grp].pending);
57

58
	} else {
59
		vmbus_set_event(channel);
60 61 62
	}
}

63
/*
64
 * vmbus_open - Open the specified channel.
65
 */
66
int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
67 68
		     u32 recv_ringbuffer_size, void *userdata, u32 userdatalen,
		     void (*onchannelcallback)(void *context), void *context)
69
{
70
	struct vmbus_channel_open_channel *open_msg;
71
	struct vmbus_channel_msginfo *open_info = NULL;
72
	void *in, *out;
73
	unsigned long flags;
74 75
	int ret, err = 0;
	unsigned long t;
76
	struct page *page;
77

78
	spin_lock_irqsave(&newchannel->lock, flags);
79 80 81
	if (newchannel->state == CHANNEL_OPEN_STATE) {
		newchannel->state = CHANNEL_OPENING_STATE;
	} else {
82
		spin_unlock_irqrestore(&newchannel->lock, flags);
83 84
		return -EINVAL;
	}
85
	spin_unlock_irqrestore(&newchannel->lock, flags);
86

87 88
	newchannel->onchannel_callback = onchannelcallback;
	newchannel->channel_callback_context = context;
89

90
	/* Allocate the ring buffer */
91 92 93 94 95 96 97 98 99 100 101
	page = alloc_pages_node(cpu_to_node(newchannel->target_cpu),
				GFP_KERNEL|__GFP_ZERO,
				get_order(send_ringbuffer_size +
				recv_ringbuffer_size));

	if (!page)
		out = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
					       get_order(send_ringbuffer_size +
					       recv_ringbuffer_size));
	else
		out = (void *)page_address(page);
102

103 104 105 106
	if (!out) {
		err = -ENOMEM;
		goto error0;
	}
107

108
	in = (void *)((unsigned long)out + send_ringbuffer_size);
109

110 111
	newchannel->ringbuffer_pages = out;
	newchannel->ringbuffer_pagecount = (send_ringbuffer_size +
112
					   recv_ringbuffer_size) >> PAGE_SHIFT;
113

114 115 116
	ret = hv_ringbuffer_init(
		&newchannel->outbound, out, send_ringbuffer_size);

117
	if (ret != 0) {
118
		err = ret;
119
		goto error0;
120 121
	}

122 123
	ret = hv_ringbuffer_init(
		&newchannel->inbound, in, recv_ringbuffer_size);
124
	if (ret != 0) {
125
		err = ret;
126
		goto error0;
127
	}
128 129


130
	/* Establish the gpadl for the ring buffer */
131
	newchannel->ringbuffer_gpadlhandle = 0;
132

133
	ret = vmbus_establish_gpadl(newchannel,
134
					 newchannel->outbound.ring_buffer,
135 136
					 send_ringbuffer_size +
					 recv_ringbuffer_size,
137
					 &newchannel->ringbuffer_gpadlhandle);
138

139
	if (ret != 0) {
140
		err = ret;
141
		goto error0;
142
	}
143

144
	/* Create and init the channel open message */
145
	open_info = kmalloc(sizeof(*open_info) +
146 147
			   sizeof(struct vmbus_channel_open_channel),
			   GFP_KERNEL);
148
	if (!open_info) {
149
		err = -ENOMEM;
150
		goto error_gpadl;
151
	}
152

153
	init_completion(&open_info->waitevent);
154

155
	open_msg = (struct vmbus_channel_open_channel *)open_info->msg;
156 157 158 159 160
	open_msg->header.msgtype = CHANNELMSG_OPENCHANNEL;
	open_msg->openid = newchannel->offermsg.child_relid;
	open_msg->child_relid = newchannel->offermsg.child_relid;
	open_msg->ringbuffer_gpadlhandle = newchannel->ringbuffer_gpadlhandle;
	open_msg->downstream_ringbuffer_pageoffset = send_ringbuffer_size >>
161
						  PAGE_SHIFT;
162
	open_msg->target_vp = newchannel->target_vp;
163

164
	if (userdatalen > MAX_USER_DEFINED_BYTES) {
165
		err = -EINVAL;
166
		goto error_gpadl;
167 168
	}

169
	if (userdatalen)
170
		memcpy(open_msg->userdata, userdata, userdatalen);
171

172
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
173
	list_add_tail(&open_info->msglistentry,
174
		      &vmbus_connection.chn_msg_list);
175
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
176

177
	ret = vmbus_post_msg(open_msg,
178
			       sizeof(struct vmbus_channel_open_channel));
179

180 181
	if (ret != 0) {
		err = ret;
182
		goto error1;
183
	}
184

185
	t = wait_for_completion_timeout(&open_info->waitevent, 5*HZ);
186
	if (t == 0) {
187
		err = -ETIMEDOUT;
188
		goto error1;
189 190
	}

191
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
192
	list_del(&open_info->msglistentry);
193
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
194

195 196 197 198
	if (open_info->response.open_result.status) {
		err = -EAGAIN;
		goto error_gpadl;
	}
199

200
	newchannel->state = CHANNEL_OPENED_STATE;
201
	kfree(open_info);
202
	return 0;
203

204 205 206 207 208
error1:
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
	list_del(&open_info->msglistentry);
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);

209 210 211
error_gpadl:
	vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);

212
error0:
213 214
	free_pages((unsigned long)out,
		get_order(send_ringbuffer_size + recv_ringbuffer_size));
215
	kfree(open_info);
216
	newchannel->state = CHANNEL_OPEN_STATE;
217
	return err;
218
}
219
EXPORT_SYMBOL_GPL(vmbus_open);
220

221
/*
222
 * create_gpadl_header - Creates a gpadl for the specified buffer
223
 */
224
static int create_gpadl_header(void *kbuffer, u32 size,
225 226
					 struct vmbus_channel_msginfo **msginfo,
					 u32 *messagecount)
227 228
{
	int i;
229 230 231 232 233 234
	int pagecount;
	struct vmbus_channel_gpadl_header *gpadl_header;
	struct vmbus_channel_gpadl_body *gpadl_body;
	struct vmbus_channel_msginfo *msgheader;
	struct vmbus_channel_msginfo *msgbody = NULL;
	u32 msgsize;
235

236
	int pfnsum, pfncount, pfnleft, pfncurr, pfnsize;
237

238
	pagecount = size >> PAGE_SHIFT;
239

240
	/* do we need a gpadl body msg */
241
	pfnsize = MAX_SIZE_CHANNEL_MESSAGE -
242 243
		  sizeof(struct vmbus_channel_gpadl_header) -
		  sizeof(struct gpa_range);
244
	pfncount = pfnsize / sizeof(u64);
245

246
	if (pagecount > pfncount) {
247
		/* we need a gpadl body */
248
		/* fill in the header */
249
		msgsize = sizeof(struct vmbus_channel_msginfo) +
250
			  sizeof(struct vmbus_channel_gpadl_header) +
251 252 253
			  sizeof(struct gpa_range) + pfncount * sizeof(u64);
		msgheader =  kzalloc(msgsize, GFP_KERNEL);
		if (!msgheader)
254
			goto nomem;
255

256 257
		INIT_LIST_HEAD(&msgheader->submsglist);
		msgheader->msgsize = msgsize;
258

259
		gpadl_header = (struct vmbus_channel_gpadl_header *)
260 261 262
			msgheader->msg;
		gpadl_header->rangecount = 1;
		gpadl_header->range_buflen = sizeof(struct gpa_range) +
263
					 pagecount * sizeof(u64);
264 265
		gpadl_header->range[0].byte_offset = 0;
		gpadl_header->range[0].byte_count = size;
266
		for (i = 0; i < pfncount; i++)
267 268
			gpadl_header->range[0].pfn_array[i] = slow_virt_to_phys(
				kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT;
269 270
		*msginfo = msgheader;
		*messagecount = 1;
271

272 273
		pfnsum = pfncount;
		pfnleft = pagecount - pfncount;
274

275
		/* how many pfns can we fit */
276
		pfnsize = MAX_SIZE_CHANNEL_MESSAGE -
277
			  sizeof(struct vmbus_channel_gpadl_body);
278
		pfncount = pfnsize / sizeof(u64);
279

280
		/* fill in the body */
281 282 283
		while (pfnleft) {
			if (pfnleft > pfncount)
				pfncurr = pfncount;
284
			else
285
				pfncurr = pfnleft;
286

287
			msgsize = sizeof(struct vmbus_channel_msginfo) +
288
				  sizeof(struct vmbus_channel_gpadl_body) +
289 290
				  pfncurr * sizeof(u64);
			msgbody = kzalloc(msgsize, GFP_KERNEL);
291 292 293 294 295 296 297 298 299 300 301 302 303 304 305

			if (!msgbody) {
				struct vmbus_channel_msginfo *pos = NULL;
				struct vmbus_channel_msginfo *tmp = NULL;
				/*
				 * Free up all the allocated messages.
				 */
				list_for_each_entry_safe(pos, tmp,
					&msgheader->submsglist,
					msglistentry) {

					list_del(&pos->msglistentry);
					kfree(pos);
				}

306
				goto nomem;
307 308
			}

309
			msgbody->msgsize = msgsize;
310 311
			(*messagecount)++;
			gpadl_body =
312
				(struct vmbus_channel_gpadl_body *)msgbody->msg;
313 314 315 316

			/*
			 * Gpadl is u32 and we are using a pointer which could
			 * be 64-bit
317 318
			 * This is governed by the guest/host protocol and
			 * so the hypervisor gurantees that this is ok.
319
			 */
320
			for (i = 0; i < pfncurr; i++)
321 322 323
				gpadl_body->pfn[i] = slow_virt_to_phys(
					kbuffer + PAGE_SIZE * (pfnsum + i)) >>
					PAGE_SHIFT;
324

325
			/* add to msg header */
326 327
			list_add_tail(&msgbody->msglistentry,
				      &msgheader->submsglist);
328 329
			pfnsum += pfncurr;
			pfnleft -= pfncurr;
330
		}
331
	} else {
332
		/* everything fits in a header */
333
		msgsize = sizeof(struct vmbus_channel_msginfo) +
334
			  sizeof(struct vmbus_channel_gpadl_header) +
335 336 337
			  sizeof(struct gpa_range) + pagecount * sizeof(u64);
		msgheader = kzalloc(msgsize, GFP_KERNEL);
		if (msgheader == NULL)
338
			goto nomem;
339
		msgheader->msgsize = msgsize;
340 341

		gpadl_header = (struct vmbus_channel_gpadl_header *)
342 343 344
			msgheader->msg;
		gpadl_header->rangecount = 1;
		gpadl_header->range_buflen = sizeof(struct gpa_range) +
345
					 pagecount * sizeof(u64);
346 347
		gpadl_header->range[0].byte_offset = 0;
		gpadl_header->range[0].byte_count = size;
348
		for (i = 0; i < pagecount; i++)
349 350
			gpadl_header->range[0].pfn_array[i] = slow_virt_to_phys(
				kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT;
351 352 353

		*msginfo = msgheader;
		*messagecount = 1;
354 355 356
	}

	return 0;
357
nomem:
358 359
	kfree(msgheader);
	kfree(msgbody);
360
	return -ENOMEM;
361 362
}

363
/*
364
 * vmbus_establish_gpadl - Estabish a GPADL for the specified buffer
365
 *
366
 * @channel: a channel
367
 * @kbuffer: from kmalloc or vmalloc
368 369
 * @size: page-size multiple
 * @gpadl_handle: some funky thing
370
 */
371
int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
372
			       u32 size, u32 *gpadl_handle)
373
{
374 375 376 377 378
	struct vmbus_channel_gpadl_header *gpadlmsg;
	struct vmbus_channel_gpadl_body *gpadl_body;
	struct vmbus_channel_msginfo *msginfo = NULL;
	struct vmbus_channel_msginfo *submsginfo;
	u32 msgcount;
379
	struct list_head *curr;
380
	u32 next_gpadl_handle;
381
	unsigned long flags;
382
	int ret = 0;
383

384 385
	next_gpadl_handle =
		(atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1);
386

387
	ret = create_gpadl_header(kbuffer, size, &msginfo, &msgcount);
388 389
	if (ret)
		return ret;
390

391
	init_completion(&msginfo->waitevent);
392

393 394 395 396
	gpadlmsg = (struct vmbus_channel_gpadl_header *)msginfo->msg;
	gpadlmsg->header.msgtype = CHANNELMSG_GPADL_HEADER;
	gpadlmsg->child_relid = channel->offermsg.child_relid;
	gpadlmsg->gpadl = next_gpadl_handle;
397 398


399
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
400
	list_add_tail(&msginfo->msglistentry,
401
		      &vmbus_connection.chn_msg_list);
402

403
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
404

405
	ret = vmbus_post_msg(gpadlmsg, msginfo->msgsize -
406
			       sizeof(*msginfo));
407
	if (ret != 0)
408
		goto cleanup;
409

410
	if (msgcount > 1) {
411
		list_for_each(curr, &msginfo->submsglist) {
412

413 414
			submsginfo = (struct vmbus_channel_msginfo *)curr;
			gpadl_body =
415
			     (struct vmbus_channel_gpadl_body *)submsginfo->msg;
416

417 418 419
			gpadl_body->header.msgtype =
				CHANNELMSG_GPADL_BODY;
			gpadl_body->gpadl = next_gpadl_handle;
420

421
			ret = vmbus_post_msg(gpadl_body,
422
					       submsginfo->msgsize -
423
					       sizeof(*submsginfo));
424
			if (ret != 0)
425
				goto cleanup;
426

427 428
		}
	}
429
	wait_for_completion(&msginfo->waitevent);
430

431
	/* At this point, we received the gpadl created msg */
432
	*gpadl_handle = gpadlmsg->gpadl;
433

434
cleanup:
435
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
436
	list_del(&msginfo->msglistentry);
437
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
438

439
	kfree(msginfo);
440 441
	return ret;
}
442
EXPORT_SYMBOL_GPL(vmbus_establish_gpadl);
443

444
/*
445
 * vmbus_teardown_gpadl -Teardown the specified GPADL handle
446
 */
447
int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
448
{
449
	struct vmbus_channel_gpadl_teardown *msg;
450
	struct vmbus_channel_msginfo *info;
451
	unsigned long flags;
452
	int ret;
453

454 455
	info = kmalloc(sizeof(*info) +
		       sizeof(struct vmbus_channel_gpadl_teardown), GFP_KERNEL);
456 457
	if (!info)
		return -ENOMEM;
458

459
	init_completion(&info->waitevent);
460

461
	msg = (struct vmbus_channel_gpadl_teardown *)info->msg;
462

463 464 465
	msg->header.msgtype = CHANNELMSG_GPADL_TEARDOWN;
	msg->child_relid = channel->offermsg.child_relid;
	msg->gpadl = gpadl_handle;
466

467
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
468
	list_add_tail(&info->msglistentry,
469
		      &vmbus_connection.chn_msg_list);
470
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
471
	ret = vmbus_post_msg(msg,
472
			       sizeof(struct vmbus_channel_gpadl_teardown));
473

474 475 476 477
	if (ret)
		goto post_msg_err;

	wait_for_completion(&info->waitevent);
478

479
post_msg_err:
480
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
481
	list_del(&info->msglistentry);
482
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
483

484
	kfree(info);
485 486
	return ret;
}
487
EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
488

489 490 491 492 493 494 495
static void reset_channel_cb(void *arg)
{
	struct vmbus_channel *channel = arg;

	channel->onchannel_callback = NULL;
}

496
static int vmbus_close_internal(struct vmbus_channel *channel)
497
{
498
	struct vmbus_channel_close_channel *msg;
499
	int ret;
500

501 502
	channel->state = CHANNEL_OPEN_STATE;
	channel->sc_creation_callback = NULL;
503
	/* Stop callback and cancel the timer asap */
504 505
	if (channel->target_cpu != get_cpu()) {
		put_cpu();
506 507
		smp_call_function_single(channel->target_cpu, reset_channel_cb,
					 channel, true);
508
	} else {
509
		reset_channel_cb(channel);
510 511
		put_cpu();
	}
512

513
	/* Send a closing message */
514

515
	msg = &channel->close_msg.msg;
516

517 518
	msg->header.msgtype = CHANNELMSG_CLOSECHANNEL;
	msg->child_relid = channel->offermsg.child_relid;
519

520
	ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_close_channel));
521

522 523 524 525 526 527 528 529 530
	if (ret) {
		pr_err("Close failed: close post msg return is %d\n", ret);
		/*
		 * If we failed to post the close msg,
		 * it is perhaps better to leak memory.
		 */
		return ret;
	}

531
	/* Tear down the gpadl for the channel's ring buffer */
532 533 534 535 536 537 538 539 540 541 542 543
	if (channel->ringbuffer_gpadlhandle) {
		ret = vmbus_teardown_gpadl(channel,
					   channel->ringbuffer_gpadlhandle);
		if (ret) {
			pr_err("Close failed: teardown gpadl return %d\n", ret);
			/*
			 * If we failed to teardown gpadl,
			 * it is perhaps better to leak memory.
			 */
			return ret;
		}
	}
544

545
	/* Cleanup the ring buffers for this channel */
546 547
	hv_ringbuffer_cleanup(&channel->outbound);
	hv_ringbuffer_cleanup(&channel->inbound);
548

549 550
	free_pages((unsigned long)channel->ringbuffer_pages,
		get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
551

552 553 554 555 556 557
	/*
	 * If the channel has been rescinded; process device removal.
	 */
	if (channel->rescind)
		hv_process_channel_removal(channel,
					   channel->offermsg.child_relid);
558
	return ret;
559
}
560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590

/*
 * vmbus_close - Close the specified channel
 */
void vmbus_close(struct vmbus_channel *channel)
{
	struct list_head *cur, *tmp;
	struct vmbus_channel *cur_channel;

	if (channel->primary_channel != NULL) {
		/*
		 * We will only close sub-channels when
		 * the primary is closed.
		 */
		return;
	}
	/*
	 * Close all the sub-channels first and then close the
	 * primary channel.
	 */
	list_for_each_safe(cur, tmp, &channel->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
		if (cur_channel->state != CHANNEL_OPENED_STATE)
			continue;
		vmbus_close_internal(cur_channel);
	}
	/*
	 * Now close the primary.
	 */
	vmbus_close_internal(channel);
}
591
EXPORT_SYMBOL_GPL(vmbus_close);
592

593
int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
594
			   u32 bufferlen, u64 requestid,
595
			   enum vmbus_packet_type type, u32 flags, bool kick_q)
596
{
597
	struct vmpacket_descriptor desc;
598
	u32 packetlen = sizeof(struct vmpacket_descriptor) + bufferlen;
599
	u32 packetlen_aligned = ALIGN(packetlen, sizeof(u64));
600
	struct kvec bufferlist[3];
601
	u64 aligned_data = 0;
602
	int ret;
603
	bool signal = false;
604
	int num_vecs = ((bufferlen != 0) ? 3 : 1);
605 606


607
	/* Setup the descriptor */
608 609
	desc.type = type; /* VmbusPacketTypeDataInBand; */
	desc.flags = flags; /* VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; */
610
	/* in 8-bytes granularity */
611 612 613
	desc.offset8 = sizeof(struct vmpacket_descriptor) >> 3;
	desc.len8 = (u16)(packetlen_aligned >> 3);
	desc.trans_id = requestid;
614

615 616 617 618 619 620
	bufferlist[0].iov_base = &desc;
	bufferlist[0].iov_len = sizeof(struct vmpacket_descriptor);
	bufferlist[1].iov_base = buffer;
	bufferlist[1].iov_len = bufferlen;
	bufferlist[2].iov_base = &aligned_data;
	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
621

622 623
	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs,
				  &signal);
624

625 626 627 628 629 630 631 632 633 634 635 636 637
	/*
	 * Signalling the host is conditional on many factors:
	 * 1. The ring state changed from being empty to non-empty.
	 *    This is tracked by the variable "signal".
	 * 2. The variable kick_q tracks if more data will be placed
	 *    on the ring. We will not signal if more data is
	 *    to be placed.
	 *
	 * If we cannot write to the ring-buffer; signal the host
	 * even if we may not have written anything. This is a rare
	 * enough condition that it should not matter.
	 */
	if (((ret == 0) && kick_q && signal) || (ret))
638
		vmbus_setevent(channel);
639 640 641

	return ret;
}
642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664
EXPORT_SYMBOL(vmbus_sendpacket_ctl);

/**
 * vmbus_sendpacket() - Send the specified buffer on the given channel
 * @channel: Pointer to vmbus_channel structure.
 * @buffer: Pointer to the buffer you want to receive the data into.
 * @bufferlen: Maximum size of what the the buffer will hold
 * @requestid: Identifier of the request
 * @type: Type of packet that is being send e.g. negotiate, time
 * packet etc.
 *
 * Sends data in @buffer directly to hyper-v via the vmbus
 * This will send the data unparsed to hyper-v.
 *
 * Mainly used by Hyper-V drivers.
 */
int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer,
			   u32 bufferlen, u64 requestid,
			   enum vmbus_packet_type type, u32 flags)
{
	return vmbus_sendpacket_ctl(channel, buffer, bufferlen, requestid,
				    type, flags, true);
}
665
EXPORT_SYMBOL(vmbus_sendpacket);
666

667
/*
668 669 670 671 672
 * vmbus_sendpacket_pagebuffer_ctl - Send a range of single-page buffer
 * packets using a GPADL Direct packet type. This interface allows you
 * to control notifying the host. This will be useful for sending
 * batched data. Also the sender can control the send flags
 * explicitly.
673
 */
674
int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
675 676
				     struct hv_page_buffer pagebuffers[],
				     u32 pagecount, void *buffer, u32 bufferlen,
677 678 679
				     u64 requestid,
				     u32 flags,
				     bool kick_q)
680
{
681 682
	int ret;
	int i;
683
	struct vmbus_channel_packet_page_buffer desc;
684 685 686
	u32 descsize;
	u32 packetlen;
	u32 packetlen_aligned;
687
	struct kvec bufferlist[3];
688
	u64 aligned_data = 0;
689
	bool signal = false;
690

691
	if (pagecount > MAX_PAGE_BUFFER_COUNT)
692
		return -EINVAL;
693 694


695
	/*
696
	 * Adjust the size down since vmbus_channel_packet_page_buffer is the
697 698
	 * largest size we support
	 */
699 700
	descsize = sizeof(struct vmbus_channel_packet_page_buffer) -
			  ((MAX_PAGE_BUFFER_COUNT - pagecount) *
701
			  sizeof(struct hv_page_buffer));
702
	packetlen = descsize + bufferlen;
703
	packetlen_aligned = ALIGN(packetlen, sizeof(u64));
704

705
	/* Setup the descriptor */
706
	desc.type = VM_PKT_DATA_USING_GPA_DIRECT;
707
	desc.flags = flags;
708 709 710 711 712 713
	desc.dataoffset8 = descsize >> 3; /* in 8-bytes grandularity */
	desc.length8 = (u16)(packetlen_aligned >> 3);
	desc.transactionid = requestid;
	desc.rangecount = pagecount;

	for (i = 0; i < pagecount; i++) {
714 715 716
		desc.range[i].len = pagebuffers[i].len;
		desc.range[i].offset = pagebuffers[i].offset;
		desc.range[i].pfn	 = pagebuffers[i].pfn;
717 718
	}

719 720 721 722 723 724
	bufferlist[0].iov_base = &desc;
	bufferlist[0].iov_len = descsize;
	bufferlist[1].iov_base = buffer;
	bufferlist[1].iov_len = bufferlen;
	bufferlist[2].iov_base = &aligned_data;
	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
725

726
	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);
727

728 729 730 731 732 733 734 735 736 737 738 739 740
	/*
	 * Signalling the host is conditional on many factors:
	 * 1. The ring state changed from being empty to non-empty.
	 *    This is tracked by the variable "signal".
	 * 2. The variable kick_q tracks if more data will be placed
	 *    on the ring. We will not signal if more data is
	 *    to be placed.
	 *
	 * If we cannot write to the ring-buffer; signal the host
	 * even if we may not have written anything. This is a rare
	 * enough condition that it should not matter.
	 */
	if (((ret == 0) && kick_q && signal) || (ret))
741
		vmbus_setevent(channel);
742 743 744

	return ret;
}
745
EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer_ctl);
746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761

/*
 * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer
 * packets using a GPADL Direct packet type.
 */
int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
				     struct hv_page_buffer pagebuffers[],
				     u32 pagecount, void *buffer, u32 bufferlen,
				     u64 requestid)
{
	u32 flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
	return vmbus_sendpacket_pagebuffer_ctl(channel, pagebuffers, pagecount,
					       buffer, bufferlen, requestid,
					       flags, true);

}
762
EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer);
763

764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807
/*
 * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet
 * using a GPADL Direct packet type.
 * The buffer includes the vmbus descriptor.
 */
int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
			      struct vmbus_packet_mpb_array *desc,
			      u32 desc_size,
			      void *buffer, u32 bufferlen, u64 requestid)
{
	int ret;
	u32 packetlen;
	u32 packetlen_aligned;
	struct kvec bufferlist[3];
	u64 aligned_data = 0;
	bool signal = false;

	packetlen = desc_size + bufferlen;
	packetlen_aligned = ALIGN(packetlen, sizeof(u64));

	/* Setup the descriptor */
	desc->type = VM_PKT_DATA_USING_GPA_DIRECT;
	desc->flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
	desc->dataoffset8 = desc_size >> 3; /* in 8-bytes grandularity */
	desc->length8 = (u16)(packetlen_aligned >> 3);
	desc->transactionid = requestid;
	desc->rangecount = 1;

	bufferlist[0].iov_base = desc;
	bufferlist[0].iov_len = desc_size;
	bufferlist[1].iov_base = buffer;
	bufferlist[1].iov_len = bufferlen;
	bufferlist[2].iov_base = &aligned_data;
	bufferlist[2].iov_len = (packetlen_aligned - packetlen);

	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);

	if (ret == 0 && signal)
		vmbus_setevent(channel);

	return ret;
}
EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc);

808
/*
809
 * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet
810
 * using a GPADL Direct packet type.
811
 */
812
int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
813 814
				struct hv_multipage_buffer *multi_pagebuffer,
				void *buffer, u32 bufferlen, u64 requestid)
815
{
816
	int ret;
817
	struct vmbus_channel_packet_multipage_buffer desc;
818 819 820
	u32 descsize;
	u32 packetlen;
	u32 packetlen_aligned;
821
	struct kvec bufferlist[3];
822
	u64 aligned_data = 0;
823
	bool signal = false;
824 825
	u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset,
					 multi_pagebuffer->len);
826

827
	if (pfncount > MAX_MULTIPAGE_BUFFER_COUNT)
828
		return -EINVAL;
829

830
	/*
831
	 * Adjust the size down since vmbus_channel_packet_multipage_buffer is
832 833
	 * the largest size we support
	 */
834 835
	descsize = sizeof(struct vmbus_channel_packet_multipage_buffer) -
			  ((MAX_MULTIPAGE_BUFFER_COUNT - pfncount) *
836
			  sizeof(u64));
837
	packetlen = descsize + bufferlen;
838
	packetlen_aligned = ALIGN(packetlen, sizeof(u64));
839 840


841
	/* Setup the descriptor */
842
	desc.type = VM_PKT_DATA_USING_GPA_DIRECT;
843
	desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
844 845 846
	desc.dataoffset8 = descsize >> 3; /* in 8-bytes grandularity */
	desc.length8 = (u16)(packetlen_aligned >> 3);
	desc.transactionid = requestid;
847
	desc.rangecount = 1;
848

849 850
	desc.range.len = multi_pagebuffer->len;
	desc.range.offset = multi_pagebuffer->offset;
851

852
	memcpy(desc.range.pfn_array, multi_pagebuffer->pfn_array,
853
	       pfncount * sizeof(u64));
854

855 856 857 858 859 860
	bufferlist[0].iov_base = &desc;
	bufferlist[0].iov_len = descsize;
	bufferlist[1].iov_base = buffer;
	bufferlist[1].iov_len = bufferlen;
	bufferlist[2].iov_base = &aligned_data;
	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
861

862
	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);
863

864
	if (ret == 0 && signal)
865
		vmbus_setevent(channel);
866 867 868

	return ret;
}
869
EXPORT_SYMBOL_GPL(vmbus_sendpacket_multipagebuffer);
870 871

/**
872
 * vmbus_recvpacket() - Retrieve the user packet on the specified channel
873 874 875 876 877
 * @channel: Pointer to vmbus_channel structure.
 * @buffer: Pointer to the buffer you want to receive the data into.
 * @bufferlen: Maximum size of what the the buffer will hold
 * @buffer_actual_len: The actual size of the data after it was received
 * @requestid: Identifier of the request
878 879 880 881 882
 *
 * Receives directly from the hyper-v vmbus and puts the data it received
 * into Buffer. This will receive the data unparsed from hyper-v.
 *
 * Mainly used by Hyper-V drivers.
883
 */
884
int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer,
885
			u32 bufferlen, u32 *buffer_actual_len, u64 *requestid)
886
{
887
	struct vmpacket_descriptor desc;
888 889
	u32 packetlen;
	u32 userlen;
890
	int ret;
891
	bool signal = false;
892

893 894
	*buffer_actual_len = 0;
	*requestid = 0;
895 896


897
	ret = hv_ringbuffer_peek(&channel->inbound, &desc,
898
			     sizeof(struct vmpacket_descriptor));
899
	if (ret != 0)
900 901
		return 0;

902 903
	packetlen = desc.len8 << 3;
	userlen = packetlen - (desc.offset8 << 3);
904

905
	*buffer_actual_len = userlen;
906

907
	if (userlen > bufferlen) {
908

909
		pr_err("Buffer too small - got %d needs %d\n",
910
			   bufferlen, userlen);
911
		return -ETOOSMALL;
912 913
	}

914
	*requestid = desc.trans_id;
915

916
	/* Copy over the packet to the user buffer */
917
	ret = hv_ringbuffer_read(&channel->inbound, buffer, userlen,
918
			     (desc.offset8 << 3), &signal);
919

920 921
	if (signal)
		vmbus_setevent(channel);
922 923 924

	return 0;
}
925
EXPORT_SYMBOL(vmbus_recvpacket);
926

927
/*
928
 * vmbus_recvpacket_raw - Retrieve the raw packet on the specified channel
929
 */
930
int vmbus_recvpacket_raw(struct vmbus_channel *channel, void *buffer,
931 932
			      u32 bufferlen, u32 *buffer_actual_len,
			      u64 *requestid)
933
{
934
	struct vmpacket_descriptor desc;
935
	u32 packetlen;
936
	int ret;
937
	bool signal = false;
938

939 940
	*buffer_actual_len = 0;
	*requestid = 0;
941 942


943
	ret = hv_ringbuffer_peek(&channel->inbound, &desc,
944
			     sizeof(struct vmpacket_descriptor));
945
	if (ret != 0)
946 947 948
		return 0;


949
	packetlen = desc.len8 << 3;
950

951
	*buffer_actual_len = packetlen;
952

953
	if (packetlen > bufferlen)
954
		return -ENOBUFS;
955

956
	*requestid = desc.trans_id;
957

958
	/* Copy over the entire packet to the user buffer */
959 960 961 962 963
	ret = hv_ringbuffer_read(&channel->inbound, buffer, packetlen, 0,
				 &signal);

	if (signal)
		vmbus_setevent(channel);
964

965
	return ret;
966
}
967
EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw);