channel.c 27.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
 *   Haiyang Zhang <haiyangz@microsoft.com>
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/kernel.h>
24 25
#include <linux/sched.h>
#include <linux/wait.h>
26
#include <linux/mm.h>
27
#include <linux/slab.h>
28
#include <linux/module.h>
29
#include <linux/hyperv.h>
30
#include <linux/uio.h>
31
#include <linux/interrupt.h>
32

33
#include "hyperv_vmbus.h"
34

K
K. Y. Srinivasan 已提交
35 36 37
#define NUM_PAGES_SPANNED(addr, len) \
((PAGE_ALIGN(addr + len) >> PAGE_SHIFT) - (addr >> PAGE_SHIFT))

38
/*
39
 * vmbus_setevent- Trigger an event notification on the specified
40
 * channel.
41
 */
42
static void vmbus_setevent(struct vmbus_channel *channel)
43
{
44
	struct hv_monitor_page *monitorpage;
45

46
	if (channel->offermsg.monitor_allocated) {
47
		/* Each u32 represents 32 channels */
48
		sync_set_bit(channel->offermsg.child_relid & 31,
49
			(unsigned long *) vmbus_connection.send_int_page +
50
			(channel->offermsg.child_relid >> 5));
51

52 53
		/* Get the child to parent monitor page */
		monitorpage = vmbus_connection.monitor_pages[1];
54

55
		sync_set_bit(channel->monitor_bit,
56 57
			(unsigned long *)&monitorpage->trigger_group
					[channel->monitor_grp].pending);
58

59
	} else {
60
		vmbus_set_event(channel);
61 62 63
	}
}

64
/*
65
 * vmbus_open - Open the specified channel.
66
 */
67
int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
68 69
		     u32 recv_ringbuffer_size, void *userdata, u32 userdatalen,
		     void (*onchannelcallback)(void *context), void *context)
70
{
71
	struct vmbus_channel_open_channel *open_msg;
72
	struct vmbus_channel_msginfo *open_info = NULL;
73
	void *in, *out;
74
	unsigned long flags;
75 76
	int ret, err = 0;
	unsigned long t;
77
	struct page *page;
78

79
	spin_lock_irqsave(&newchannel->lock, flags);
80 81 82
	if (newchannel->state == CHANNEL_OPEN_STATE) {
		newchannel->state = CHANNEL_OPENING_STATE;
	} else {
83
		spin_unlock_irqrestore(&newchannel->lock, flags);
84 85
		return -EINVAL;
	}
86
	spin_unlock_irqrestore(&newchannel->lock, flags);
87

88 89
	newchannel->onchannel_callback = onchannelcallback;
	newchannel->channel_callback_context = context;
90

91
	/* Allocate the ring buffer */
92 93 94 95 96 97 98 99 100 101 102
	page = alloc_pages_node(cpu_to_node(newchannel->target_cpu),
				GFP_KERNEL|__GFP_ZERO,
				get_order(send_ringbuffer_size +
				recv_ringbuffer_size));

	if (!page)
		out = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
					       get_order(send_ringbuffer_size +
					       recv_ringbuffer_size));
	else
		out = (void *)page_address(page);
103

104 105 106 107
	if (!out) {
		err = -ENOMEM;
		goto error0;
	}
108

109
	in = (void *)((unsigned long)out + send_ringbuffer_size);
110

111 112
	newchannel->ringbuffer_pages = out;
	newchannel->ringbuffer_pagecount = (send_ringbuffer_size +
113
					   recv_ringbuffer_size) >> PAGE_SHIFT;
114

115 116 117
	ret = hv_ringbuffer_init(
		&newchannel->outbound, out, send_ringbuffer_size);

118
	if (ret != 0) {
119
		err = ret;
120
		goto error0;
121 122
	}

123 124
	ret = hv_ringbuffer_init(
		&newchannel->inbound, in, recv_ringbuffer_size);
125
	if (ret != 0) {
126
		err = ret;
127
		goto error0;
128
	}
129 130


131
	/* Establish the gpadl for the ring buffer */
132
	newchannel->ringbuffer_gpadlhandle = 0;
133

134
	ret = vmbus_establish_gpadl(newchannel,
135
					 newchannel->outbound.ring_buffer,
136 137
					 send_ringbuffer_size +
					 recv_ringbuffer_size,
138
					 &newchannel->ringbuffer_gpadlhandle);
139

140
	if (ret != 0) {
141
		err = ret;
142
		goto error0;
143
	}
144

145
	/* Create and init the channel open message */
146
	open_info = kmalloc(sizeof(*open_info) +
147 148
			   sizeof(struct vmbus_channel_open_channel),
			   GFP_KERNEL);
149
	if (!open_info) {
150
		err = -ENOMEM;
151
		goto error_gpadl;
152
	}
153

154
	init_completion(&open_info->waitevent);
155

156
	open_msg = (struct vmbus_channel_open_channel *)open_info->msg;
157 158 159 160 161
	open_msg->header.msgtype = CHANNELMSG_OPENCHANNEL;
	open_msg->openid = newchannel->offermsg.child_relid;
	open_msg->child_relid = newchannel->offermsg.child_relid;
	open_msg->ringbuffer_gpadlhandle = newchannel->ringbuffer_gpadlhandle;
	open_msg->downstream_ringbuffer_pageoffset = send_ringbuffer_size >>
162
						  PAGE_SHIFT;
163
	open_msg->target_vp = newchannel->target_vp;
164

165
	if (userdatalen > MAX_USER_DEFINED_BYTES) {
166
		err = -EINVAL;
167
		goto error_gpadl;
168 169
	}

170
	if (userdatalen)
171
		memcpy(open_msg->userdata, userdata, userdatalen);
172

173
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
174
	list_add_tail(&open_info->msglistentry,
175
		      &vmbus_connection.chn_msg_list);
176
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
177

178
	ret = vmbus_post_msg(open_msg,
179
			       sizeof(struct vmbus_channel_open_channel));
180

181 182
	if (ret != 0) {
		err = ret;
183
		goto error1;
184
	}
185

186
	t = wait_for_completion_timeout(&open_info->waitevent, 5*HZ);
187
	if (t == 0) {
188
		err = -ETIMEDOUT;
189
		goto error1;
190 191
	}

192
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
193
	list_del(&open_info->msglistentry);
194
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
195

196 197 198 199
	if (open_info->response.open_result.status) {
		err = -EAGAIN;
		goto error_gpadl;
	}
200

201
	newchannel->state = CHANNEL_OPENED_STATE;
202
	kfree(open_info);
203
	return 0;
204

205 206 207 208 209
error1:
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
	list_del(&open_info->msglistentry);
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);

210 211 212
error_gpadl:
	vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);

213
error0:
214 215
	free_pages((unsigned long)out,
		get_order(send_ringbuffer_size + recv_ringbuffer_size));
216
	kfree(open_info);
217
	newchannel->state = CHANNEL_OPEN_STATE;
218
	return err;
219
}
220
EXPORT_SYMBOL_GPL(vmbus_open);
221

222
/*
223
 * create_gpadl_header - Creates a gpadl for the specified buffer
224
 */
225
static int create_gpadl_header(void *kbuffer, u32 size,
226 227
					 struct vmbus_channel_msginfo **msginfo,
					 u32 *messagecount)
228 229
{
	int i;
230 231 232 233 234 235
	int pagecount;
	struct vmbus_channel_gpadl_header *gpadl_header;
	struct vmbus_channel_gpadl_body *gpadl_body;
	struct vmbus_channel_msginfo *msgheader;
	struct vmbus_channel_msginfo *msgbody = NULL;
	u32 msgsize;
236

237
	int pfnsum, pfncount, pfnleft, pfncurr, pfnsize;
238

239
	pagecount = size >> PAGE_SHIFT;
240

241
	/* do we need a gpadl body msg */
242
	pfnsize = MAX_SIZE_CHANNEL_MESSAGE -
243 244
		  sizeof(struct vmbus_channel_gpadl_header) -
		  sizeof(struct gpa_range);
245
	pfncount = pfnsize / sizeof(u64);
246

247
	if (pagecount > pfncount) {
248
		/* we need a gpadl body */
249
		/* fill in the header */
250
		msgsize = sizeof(struct vmbus_channel_msginfo) +
251
			  sizeof(struct vmbus_channel_gpadl_header) +
252 253 254
			  sizeof(struct gpa_range) + pfncount * sizeof(u64);
		msgheader =  kzalloc(msgsize, GFP_KERNEL);
		if (!msgheader)
255
			goto nomem;
256

257 258
		INIT_LIST_HEAD(&msgheader->submsglist);
		msgheader->msgsize = msgsize;
259

260
		gpadl_header = (struct vmbus_channel_gpadl_header *)
261 262 263
			msgheader->msg;
		gpadl_header->rangecount = 1;
		gpadl_header->range_buflen = sizeof(struct gpa_range) +
264
					 pagecount * sizeof(u64);
265 266
		gpadl_header->range[0].byte_offset = 0;
		gpadl_header->range[0].byte_count = size;
267
		for (i = 0; i < pfncount; i++)
268 269
			gpadl_header->range[0].pfn_array[i] = slow_virt_to_phys(
				kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT;
270 271
		*msginfo = msgheader;
		*messagecount = 1;
272

273 274
		pfnsum = pfncount;
		pfnleft = pagecount - pfncount;
275

276
		/* how many pfns can we fit */
277
		pfnsize = MAX_SIZE_CHANNEL_MESSAGE -
278
			  sizeof(struct vmbus_channel_gpadl_body);
279
		pfncount = pfnsize / sizeof(u64);
280

281
		/* fill in the body */
282 283 284
		while (pfnleft) {
			if (pfnleft > pfncount)
				pfncurr = pfncount;
285
			else
286
				pfncurr = pfnleft;
287

288
			msgsize = sizeof(struct vmbus_channel_msginfo) +
289
				  sizeof(struct vmbus_channel_gpadl_body) +
290 291
				  pfncurr * sizeof(u64);
			msgbody = kzalloc(msgsize, GFP_KERNEL);
292 293 294 295 296 297 298 299 300 301 302 303 304 305 306

			if (!msgbody) {
				struct vmbus_channel_msginfo *pos = NULL;
				struct vmbus_channel_msginfo *tmp = NULL;
				/*
				 * Free up all the allocated messages.
				 */
				list_for_each_entry_safe(pos, tmp,
					&msgheader->submsglist,
					msglistentry) {

					list_del(&pos->msglistentry);
					kfree(pos);
				}

307
				goto nomem;
308 309
			}

310
			msgbody->msgsize = msgsize;
311 312
			(*messagecount)++;
			gpadl_body =
313
				(struct vmbus_channel_gpadl_body *)msgbody->msg;
314 315 316 317

			/*
			 * Gpadl is u32 and we are using a pointer which could
			 * be 64-bit
318 319
			 * This is governed by the guest/host protocol and
			 * so the hypervisor gurantees that this is ok.
320
			 */
321
			for (i = 0; i < pfncurr; i++)
322 323 324
				gpadl_body->pfn[i] = slow_virt_to_phys(
					kbuffer + PAGE_SIZE * (pfnsum + i)) >>
					PAGE_SHIFT;
325

326
			/* add to msg header */
327 328
			list_add_tail(&msgbody->msglistentry,
				      &msgheader->submsglist);
329 330
			pfnsum += pfncurr;
			pfnleft -= pfncurr;
331
		}
332
	} else {
333
		/* everything fits in a header */
334
		msgsize = sizeof(struct vmbus_channel_msginfo) +
335
			  sizeof(struct vmbus_channel_gpadl_header) +
336 337 338
			  sizeof(struct gpa_range) + pagecount * sizeof(u64);
		msgheader = kzalloc(msgsize, GFP_KERNEL);
		if (msgheader == NULL)
339
			goto nomem;
340
		msgheader->msgsize = msgsize;
341 342

		gpadl_header = (struct vmbus_channel_gpadl_header *)
343 344 345
			msgheader->msg;
		gpadl_header->rangecount = 1;
		gpadl_header->range_buflen = sizeof(struct gpa_range) +
346
					 pagecount * sizeof(u64);
347 348
		gpadl_header->range[0].byte_offset = 0;
		gpadl_header->range[0].byte_count = size;
349
		for (i = 0; i < pagecount; i++)
350 351
			gpadl_header->range[0].pfn_array[i] = slow_virt_to_phys(
				kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT;
352 353 354

		*msginfo = msgheader;
		*messagecount = 1;
355 356 357
	}

	return 0;
358
nomem:
359 360
	kfree(msgheader);
	kfree(msgbody);
361
	return -ENOMEM;
362 363
}

364
/*
365
 * vmbus_establish_gpadl - Estabish a GPADL for the specified buffer
366
 *
367
 * @channel: a channel
368
 * @kbuffer: from kmalloc or vmalloc
369 370
 * @size: page-size multiple
 * @gpadl_handle: some funky thing
371
 */
372
int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
373
			       u32 size, u32 *gpadl_handle)
374
{
375 376 377 378 379
	struct vmbus_channel_gpadl_header *gpadlmsg;
	struct vmbus_channel_gpadl_body *gpadl_body;
	struct vmbus_channel_msginfo *msginfo = NULL;
	struct vmbus_channel_msginfo *submsginfo;
	u32 msgcount;
380
	struct list_head *curr;
381
	u32 next_gpadl_handle;
382
	unsigned long flags;
383
	int ret = 0;
384

385 386
	next_gpadl_handle =
		(atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1);
387

388
	ret = create_gpadl_header(kbuffer, size, &msginfo, &msgcount);
389 390
	if (ret)
		return ret;
391

392
	init_completion(&msginfo->waitevent);
393

394 395 396 397
	gpadlmsg = (struct vmbus_channel_gpadl_header *)msginfo->msg;
	gpadlmsg->header.msgtype = CHANNELMSG_GPADL_HEADER;
	gpadlmsg->child_relid = channel->offermsg.child_relid;
	gpadlmsg->gpadl = next_gpadl_handle;
398 399


400
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
401
	list_add_tail(&msginfo->msglistentry,
402
		      &vmbus_connection.chn_msg_list);
403

404
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
405

406
	ret = vmbus_post_msg(gpadlmsg, msginfo->msgsize -
407
			       sizeof(*msginfo));
408
	if (ret != 0)
409
		goto cleanup;
410

411
	if (msgcount > 1) {
412
		list_for_each(curr, &msginfo->submsglist) {
413

414 415
			submsginfo = (struct vmbus_channel_msginfo *)curr;
			gpadl_body =
416
			     (struct vmbus_channel_gpadl_body *)submsginfo->msg;
417

418 419 420
			gpadl_body->header.msgtype =
				CHANNELMSG_GPADL_BODY;
			gpadl_body->gpadl = next_gpadl_handle;
421

422
			ret = vmbus_post_msg(gpadl_body,
423
					       submsginfo->msgsize -
424
					       sizeof(*submsginfo));
425
			if (ret != 0)
426
				goto cleanup;
427

428 429
		}
	}
430
	wait_for_completion(&msginfo->waitevent);
431

432
	/* At this point, we received the gpadl created msg */
433
	*gpadl_handle = gpadlmsg->gpadl;
434

435
cleanup:
436
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
437
	list_del(&msginfo->msglistentry);
438
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
439

440
	kfree(msginfo);
441 442
	return ret;
}
443
EXPORT_SYMBOL_GPL(vmbus_establish_gpadl);
444

445
/*
446
 * vmbus_teardown_gpadl -Teardown the specified GPADL handle
447
 */
448
int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
449
{
450
	struct vmbus_channel_gpadl_teardown *msg;
451
	struct vmbus_channel_msginfo *info;
452
	unsigned long flags;
453
	int ret;
454

455 456
	info = kmalloc(sizeof(*info) +
		       sizeof(struct vmbus_channel_gpadl_teardown), GFP_KERNEL);
457 458
	if (!info)
		return -ENOMEM;
459

460
	init_completion(&info->waitevent);
461

462
	msg = (struct vmbus_channel_gpadl_teardown *)info->msg;
463

464 465 466
	msg->header.msgtype = CHANNELMSG_GPADL_TEARDOWN;
	msg->child_relid = channel->offermsg.child_relid;
	msg->gpadl = gpadl_handle;
467

468
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
469
	list_add_tail(&info->msglistentry,
470
		      &vmbus_connection.chn_msg_list);
471
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
472
	ret = vmbus_post_msg(msg,
473
			       sizeof(struct vmbus_channel_gpadl_teardown));
474

475 476 477 478
	if (ret)
		goto post_msg_err;

	wait_for_completion(&info->waitevent);
479

480
post_msg_err:
481
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
482
	list_del(&info->msglistentry);
483
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
484

485
	kfree(info);
486 487
	return ret;
}
488
EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
489

490 491 492 493 494 495 496
static void reset_channel_cb(void *arg)
{
	struct vmbus_channel *channel = arg;

	channel->onchannel_callback = NULL;
}

497
static int vmbus_close_internal(struct vmbus_channel *channel)
498
{
499
	struct vmbus_channel_close_channel *msg;
500
	struct tasklet_struct *tasklet;
501
	int ret;
502

503 504 505 506 507 508 509 510 511 512 513 514
	/*
	 * process_chn_event(), running in the tasklet, can race
	 * with vmbus_close_internal() in the case of SMP guest, e.g., when
	 * the former is accessing channel->inbound.ring_buffer, the latter
	 * could be freeing the ring_buffer pages.
	 *
	 * To resolve the race, we can serialize them by disabling the
	 * tasklet when the latter is running here.
	 */
	tasklet = hv_context.event_dpc[channel->target_cpu];
	tasklet_disable(tasklet);

515 516 517 518 519 520 521 522 523 524 525 526
	/*
	 * In case a device driver's probe() fails (e.g.,
	 * util_probe() -> vmbus_open() returns -ENOMEM) and the device is
	 * rescinded later (e.g., we dynamically disble an Integrated Service
	 * in Hyper-V Manager), the driver's remove() invokes vmbus_close():
	 * here we should skip most of the below cleanup work.
	 */
	if (channel->state != CHANNEL_OPENED_STATE) {
		ret = -EINVAL;
		goto out;
	}

527 528
	channel->state = CHANNEL_OPEN_STATE;
	channel->sc_creation_callback = NULL;
529
	/* Stop callback and cancel the timer asap */
530 531
	if (channel->target_cpu != get_cpu()) {
		put_cpu();
532 533
		smp_call_function_single(channel->target_cpu, reset_channel_cb,
					 channel, true);
534
	} else {
535
		reset_channel_cb(channel);
536 537
		put_cpu();
	}
538

539
	/* Send a closing message */
540

541
	msg = &channel->close_msg.msg;
542

543 544
	msg->header.msgtype = CHANNELMSG_CLOSECHANNEL;
	msg->child_relid = channel->offermsg.child_relid;
545

546
	ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_close_channel));
547

548 549 550 551 552 553
	if (ret) {
		pr_err("Close failed: close post msg return is %d\n", ret);
		/*
		 * If we failed to post the close msg,
		 * it is perhaps better to leak memory.
		 */
554
		goto out;
555 556
	}

557
	/* Tear down the gpadl for the channel's ring buffer */
558 559 560 561 562 563 564 565 566
	if (channel->ringbuffer_gpadlhandle) {
		ret = vmbus_teardown_gpadl(channel,
					   channel->ringbuffer_gpadlhandle);
		if (ret) {
			pr_err("Close failed: teardown gpadl return %d\n", ret);
			/*
			 * If we failed to teardown gpadl,
			 * it is perhaps better to leak memory.
			 */
567
			goto out;
568 569
		}
	}
570

571
	/* Cleanup the ring buffers for this channel */
572 573
	hv_ringbuffer_cleanup(&channel->outbound);
	hv_ringbuffer_cleanup(&channel->inbound);
574

575 576
	free_pages((unsigned long)channel->ringbuffer_pages,
		get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
577

578 579 580
out:
	tasklet_enable(tasklet);

581
	return ret;
582
}
583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613

/*
 * vmbus_close - Close the specified channel
 */
void vmbus_close(struct vmbus_channel *channel)
{
	struct list_head *cur, *tmp;
	struct vmbus_channel *cur_channel;

	if (channel->primary_channel != NULL) {
		/*
		 * We will only close sub-channels when
		 * the primary is closed.
		 */
		return;
	}
	/*
	 * Close all the sub-channels first and then close the
	 * primary channel.
	 */
	list_for_each_safe(cur, tmp, &channel->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
		if (cur_channel->state != CHANNEL_OPENED_STATE)
			continue;
		vmbus_close_internal(cur_channel);
	}
	/*
	 * Now close the primary.
	 */
	vmbus_close_internal(channel);
}
614
EXPORT_SYMBOL_GPL(vmbus_close);
615

616
int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
617
			   u32 bufferlen, u64 requestid,
618
			   enum vmbus_packet_type type, u32 flags, bool kick_q)
619
{
620
	struct vmpacket_descriptor desc;
621
	u32 packetlen = sizeof(struct vmpacket_descriptor) + bufferlen;
622
	u32 packetlen_aligned = ALIGN(packetlen, sizeof(u64));
623
	struct kvec bufferlist[3];
624
	u64 aligned_data = 0;
625
	int ret;
626
	bool signal = false;
627
	int num_vecs = ((bufferlen != 0) ? 3 : 1);
628 629


630
	/* Setup the descriptor */
631 632
	desc.type = type; /* VmbusPacketTypeDataInBand; */
	desc.flags = flags; /* VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; */
633
	/* in 8-bytes granularity */
634 635 636
	desc.offset8 = sizeof(struct vmpacket_descriptor) >> 3;
	desc.len8 = (u16)(packetlen_aligned >> 3);
	desc.trans_id = requestid;
637

638 639 640 641 642 643
	bufferlist[0].iov_base = &desc;
	bufferlist[0].iov_len = sizeof(struct vmpacket_descriptor);
	bufferlist[1].iov_base = buffer;
	bufferlist[1].iov_len = bufferlen;
	bufferlist[2].iov_base = &aligned_data;
	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
644

645 646
	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs,
				  &signal);
647

648 649 650 651 652 653 654 655
	/*
	 * Signalling the host is conditional on many factors:
	 * 1. The ring state changed from being empty to non-empty.
	 *    This is tracked by the variable "signal".
	 * 2. The variable kick_q tracks if more data will be placed
	 *    on the ring. We will not signal if more data is
	 *    to be placed.
	 *
656 657 658
	 * Based on the channel signal state, we will decide
	 * which signaling policy will be applied.
	 *
659 660 661 662
	 * If we cannot write to the ring-buffer; signal the host
	 * even if we may not have written anything. This is a rare
	 * enough condition that it should not matter.
	 */
663 664 665 666 667 668

	if (channel->signal_policy)
		signal = true;
	else
		kick_q = true;

669
	if (((ret == 0) && kick_q && signal) || (ret))
670
		vmbus_setevent(channel);
671 672 673

	return ret;
}
674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696
EXPORT_SYMBOL(vmbus_sendpacket_ctl);

/**
 * vmbus_sendpacket() - Send the specified buffer on the given channel
 * @channel: Pointer to vmbus_channel structure.
 * @buffer: Pointer to the buffer you want to receive the data into.
 * @bufferlen: Maximum size of what the the buffer will hold
 * @requestid: Identifier of the request
 * @type: Type of packet that is being send e.g. negotiate, time
 * packet etc.
 *
 * Sends data in @buffer directly to hyper-v via the vmbus
 * This will send the data unparsed to hyper-v.
 *
 * Mainly used by Hyper-V drivers.
 */
int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer,
			   u32 bufferlen, u64 requestid,
			   enum vmbus_packet_type type, u32 flags)
{
	return vmbus_sendpacket_ctl(channel, buffer, bufferlen, requestid,
				    type, flags, true);
}
697
EXPORT_SYMBOL(vmbus_sendpacket);
698

699
/*
700 701 702 703 704
 * vmbus_sendpacket_pagebuffer_ctl - Send a range of single-page buffer
 * packets using a GPADL Direct packet type. This interface allows you
 * to control notifying the host. This will be useful for sending
 * batched data. Also the sender can control the send flags
 * explicitly.
705
 */
706
int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
707 708
				     struct hv_page_buffer pagebuffers[],
				     u32 pagecount, void *buffer, u32 bufferlen,
709 710 711
				     u64 requestid,
				     u32 flags,
				     bool kick_q)
712
{
713 714
	int ret;
	int i;
715
	struct vmbus_channel_packet_page_buffer desc;
716 717 718
	u32 descsize;
	u32 packetlen;
	u32 packetlen_aligned;
719
	struct kvec bufferlist[3];
720
	u64 aligned_data = 0;
721
	bool signal = false;
722

723
	if (pagecount > MAX_PAGE_BUFFER_COUNT)
724
		return -EINVAL;
725 726


727
	/*
728
	 * Adjust the size down since vmbus_channel_packet_page_buffer is the
729 730
	 * largest size we support
	 */
731 732
	descsize = sizeof(struct vmbus_channel_packet_page_buffer) -
			  ((MAX_PAGE_BUFFER_COUNT - pagecount) *
733
			  sizeof(struct hv_page_buffer));
734
	packetlen = descsize + bufferlen;
735
	packetlen_aligned = ALIGN(packetlen, sizeof(u64));
736

737
	/* Setup the descriptor */
738
	desc.type = VM_PKT_DATA_USING_GPA_DIRECT;
739
	desc.flags = flags;
740 741 742 743 744 745
	desc.dataoffset8 = descsize >> 3; /* in 8-bytes grandularity */
	desc.length8 = (u16)(packetlen_aligned >> 3);
	desc.transactionid = requestid;
	desc.rangecount = pagecount;

	for (i = 0; i < pagecount; i++) {
746 747 748
		desc.range[i].len = pagebuffers[i].len;
		desc.range[i].offset = pagebuffers[i].offset;
		desc.range[i].pfn	 = pagebuffers[i].pfn;
749 750
	}

751 752 753 754 755 756
	bufferlist[0].iov_base = &desc;
	bufferlist[0].iov_len = descsize;
	bufferlist[1].iov_base = buffer;
	bufferlist[1].iov_len = bufferlen;
	bufferlist[2].iov_base = &aligned_data;
	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
757

758
	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);
759

760 761 762 763 764 765 766 767
	/*
	 * Signalling the host is conditional on many factors:
	 * 1. The ring state changed from being empty to non-empty.
	 *    This is tracked by the variable "signal".
	 * 2. The variable kick_q tracks if more data will be placed
	 *    on the ring. We will not signal if more data is
	 *    to be placed.
	 *
768 769 770
	 * Based on the channel signal state, we will decide
	 * which signaling policy will be applied.
	 *
771 772 773 774
	 * If we cannot write to the ring-buffer; signal the host
	 * even if we may not have written anything. This is a rare
	 * enough condition that it should not matter.
	 */
775 776 777 778 779 780

	if (channel->signal_policy)
		signal = true;
	else
		kick_q = true;

781
	if (((ret == 0) && kick_q && signal) || (ret))
782
		vmbus_setevent(channel);
783 784 785

	return ret;
}
786
EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer_ctl);
787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802

/*
 * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer
 * packets using a GPADL Direct packet type.
 */
int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
				     struct hv_page_buffer pagebuffers[],
				     u32 pagecount, void *buffer, u32 bufferlen,
				     u64 requestid)
{
	u32 flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
	return vmbus_sendpacket_pagebuffer_ctl(channel, pagebuffers, pagecount,
					       buffer, bufferlen, requestid,
					       flags, true);

}
803
EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer);
804

805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848
/*
 * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet
 * using a GPADL Direct packet type.
 * The buffer includes the vmbus descriptor.
 */
int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
			      struct vmbus_packet_mpb_array *desc,
			      u32 desc_size,
			      void *buffer, u32 bufferlen, u64 requestid)
{
	int ret;
	u32 packetlen;
	u32 packetlen_aligned;
	struct kvec bufferlist[3];
	u64 aligned_data = 0;
	bool signal = false;

	packetlen = desc_size + bufferlen;
	packetlen_aligned = ALIGN(packetlen, sizeof(u64));

	/* Setup the descriptor */
	desc->type = VM_PKT_DATA_USING_GPA_DIRECT;
	desc->flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
	desc->dataoffset8 = desc_size >> 3; /* in 8-bytes grandularity */
	desc->length8 = (u16)(packetlen_aligned >> 3);
	desc->transactionid = requestid;
	desc->rangecount = 1;

	bufferlist[0].iov_base = desc;
	bufferlist[0].iov_len = desc_size;
	bufferlist[1].iov_base = buffer;
	bufferlist[1].iov_len = bufferlen;
	bufferlist[2].iov_base = &aligned_data;
	bufferlist[2].iov_len = (packetlen_aligned - packetlen);

	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);

	if (ret == 0 && signal)
		vmbus_setevent(channel);

	return ret;
}
EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc);

849
/*
850
 * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet
851
 * using a GPADL Direct packet type.
852
 */
853
int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
854 855
				struct hv_multipage_buffer *multi_pagebuffer,
				void *buffer, u32 bufferlen, u64 requestid)
856
{
857
	int ret;
858
	struct vmbus_channel_packet_multipage_buffer desc;
859 860 861
	u32 descsize;
	u32 packetlen;
	u32 packetlen_aligned;
862
	struct kvec bufferlist[3];
863
	u64 aligned_data = 0;
864
	bool signal = false;
865 866
	u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset,
					 multi_pagebuffer->len);
867

868
	if (pfncount > MAX_MULTIPAGE_BUFFER_COUNT)
869
		return -EINVAL;
870

871
	/*
872
	 * Adjust the size down since vmbus_channel_packet_multipage_buffer is
873 874
	 * the largest size we support
	 */
875 876
	descsize = sizeof(struct vmbus_channel_packet_multipage_buffer) -
			  ((MAX_MULTIPAGE_BUFFER_COUNT - pfncount) *
877
			  sizeof(u64));
878
	packetlen = descsize + bufferlen;
879
	packetlen_aligned = ALIGN(packetlen, sizeof(u64));
880 881


882
	/* Setup the descriptor */
883
	desc.type = VM_PKT_DATA_USING_GPA_DIRECT;
884
	desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
885 886 887
	desc.dataoffset8 = descsize >> 3; /* in 8-bytes grandularity */
	desc.length8 = (u16)(packetlen_aligned >> 3);
	desc.transactionid = requestid;
888
	desc.rangecount = 1;
889

890 891
	desc.range.len = multi_pagebuffer->len;
	desc.range.offset = multi_pagebuffer->offset;
892

893
	memcpy(desc.range.pfn_array, multi_pagebuffer->pfn_array,
894
	       pfncount * sizeof(u64));
895

896 897 898 899 900 901
	bufferlist[0].iov_base = &desc;
	bufferlist[0].iov_len = descsize;
	bufferlist[1].iov_base = buffer;
	bufferlist[1].iov_len = bufferlen;
	bufferlist[2].iov_base = &aligned_data;
	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
902

903
	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);
904

905
	if (ret == 0 && signal)
906
		vmbus_setevent(channel);
907 908 909

	return ret;
}
910
EXPORT_SYMBOL_GPL(vmbus_sendpacket_multipagebuffer);
911 912

/**
913
 * vmbus_recvpacket() - Retrieve the user packet on the specified channel
914 915 916 917 918
 * @channel: Pointer to vmbus_channel structure.
 * @buffer: Pointer to the buffer you want to receive the data into.
 * @bufferlen: Maximum size of what the the buffer will hold
 * @buffer_actual_len: The actual size of the data after it was received
 * @requestid: Identifier of the request
919 920 921 922 923
 *
 * Receives directly from the hyper-v vmbus and puts the data it received
 * into Buffer. This will receive the data unparsed from hyper-v.
 *
 * Mainly used by Hyper-V drivers.
924
 */
925 926 927 928
static inline int
__vmbus_recvpacket(struct vmbus_channel *channel, void *buffer,
		   u32 bufferlen, u32 *buffer_actual_len, u64 *requestid,
		   bool raw)
929
{
930
	struct vmpacket_descriptor desc;
931 932
	u32 packetlen;
	u32 userlen;
933
	int ret;
934
	bool signal = false;
935

936 937
	*buffer_actual_len = 0;
	*requestid = 0;
938 939


940
	ret = hv_ringbuffer_peek(&channel->inbound, &desc,
941
			     sizeof(struct vmpacket_descriptor));
942
	if (ret != 0)
943 944
		return 0;

945
	packetlen = desc.len8 << 3;
946 947 948 949
	if (!raw)
		userlen = packetlen - (desc.offset8 << 3);
	else
		userlen = packetlen;
950

951
	*buffer_actual_len = userlen;
952

953 954
	if (userlen > bufferlen)
		return -ENOBUFS;
955

956
	*requestid = desc.trans_id;
957

958
	/* Copy over the packet to the user buffer */
959
	ret = hv_ringbuffer_read(&channel->inbound, buffer, userlen,
960
				 raw ? 0 : desc.offset8 << 3, &signal);
961

962 963
	if (signal)
		vmbus_setevent(channel);
964

965 966 967 968 969 970 971 972 973
	return ret;
}

int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer,
		     u32 bufferlen, u32 *buffer_actual_len,
		     u64 *requestid)
{
	return __vmbus_recvpacket(channel, buffer, bufferlen,
				  buffer_actual_len, requestid, false);
974
}
975
EXPORT_SYMBOL(vmbus_recvpacket);
976

977
/*
978
 * vmbus_recvpacket_raw - Retrieve the raw packet on the specified channel
979
 */
980
int vmbus_recvpacket_raw(struct vmbus_channel *channel, void *buffer,
981 982
			      u32 bufferlen, u32 *buffer_actual_len,
			      u64 *requestid)
983
{
984 985
	return __vmbus_recvpacket(channel, buffer, bufferlen,
				  buffer_actual_len, requestid, true);
986
}
987
EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw);