channel.c 27.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
 *   Haiyang Zhang <haiyangz@microsoft.com>
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/kernel.h>
24 25
#include <linux/sched.h>
#include <linux/wait.h>
26
#include <linux/mm.h>
27
#include <linux/slab.h>
28
#include <linux/module.h>
29
#include <linux/hyperv.h>
30
#include <linux/uio.h>
31
#include <linux/interrupt.h>
32

33
#include "hyperv_vmbus.h"
34

K
K. Y. Srinivasan 已提交
35 36 37
#define NUM_PAGES_SPANNED(addr, len) \
((PAGE_ALIGN(addr + len) >> PAGE_SHIFT) - (addr >> PAGE_SHIFT))

38
/*
39
 * vmbus_setevent- Trigger an event notification on the specified
40
 * channel.
41
 */
42
static void vmbus_setevent(struct vmbus_channel *channel)
43
{
44
	struct hv_monitor_page *monitorpage;
45

46
	if (channel->offermsg.monitor_allocated) {
47
		/* Each u32 represents 32 channels */
48
		sync_set_bit(channel->offermsg.child_relid & 31,
49
			(unsigned long *) vmbus_connection.send_int_page +
50
			(channel->offermsg.child_relid >> 5));
51

52 53
		/* Get the child to parent monitor page */
		monitorpage = vmbus_connection.monitor_pages[1];
54

55
		sync_set_bit(channel->monitor_bit,
56 57
			(unsigned long *)&monitorpage->trigger_group
					[channel->monitor_grp].pending);
58

59
	} else {
60
		vmbus_set_event(channel);
61 62 63
	}
}

64
/*
65
 * vmbus_open - Open the specified channel.
66
 */
67
int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
68 69
		     u32 recv_ringbuffer_size, void *userdata, u32 userdatalen,
		     void (*onchannelcallback)(void *context), void *context)
70
{
71
	struct vmbus_channel_open_channel *open_msg;
72
	struct vmbus_channel_msginfo *open_info = NULL;
73
	void *in, *out;
74
	unsigned long flags;
75 76
	int ret, err = 0;
	unsigned long t;
77
	struct page *page;
78

79
	spin_lock_irqsave(&newchannel->lock, flags);
80 81 82
	if (newchannel->state == CHANNEL_OPEN_STATE) {
		newchannel->state = CHANNEL_OPENING_STATE;
	} else {
83
		spin_unlock_irqrestore(&newchannel->lock, flags);
84 85
		return -EINVAL;
	}
86
	spin_unlock_irqrestore(&newchannel->lock, flags);
87

88 89
	newchannel->onchannel_callback = onchannelcallback;
	newchannel->channel_callback_context = context;
90

91
	/* Allocate the ring buffer */
92 93 94 95 96 97 98 99 100 101 102
	page = alloc_pages_node(cpu_to_node(newchannel->target_cpu),
				GFP_KERNEL|__GFP_ZERO,
				get_order(send_ringbuffer_size +
				recv_ringbuffer_size));

	if (!page)
		out = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
					       get_order(send_ringbuffer_size +
					       recv_ringbuffer_size));
	else
		out = (void *)page_address(page);
103

104 105 106 107
	if (!out) {
		err = -ENOMEM;
		goto error0;
	}
108

109
	in = (void *)((unsigned long)out + send_ringbuffer_size);
110

111 112
	newchannel->ringbuffer_pages = out;
	newchannel->ringbuffer_pagecount = (send_ringbuffer_size +
113
					   recv_ringbuffer_size) >> PAGE_SHIFT;
114

115 116 117
	ret = hv_ringbuffer_init(
		&newchannel->outbound, out, send_ringbuffer_size);

118
	if (ret != 0) {
119
		err = ret;
120
		goto error0;
121 122
	}

123 124
	ret = hv_ringbuffer_init(
		&newchannel->inbound, in, recv_ringbuffer_size);
125
	if (ret != 0) {
126
		err = ret;
127
		goto error0;
128
	}
129 130


131
	/* Establish the gpadl for the ring buffer */
132
	newchannel->ringbuffer_gpadlhandle = 0;
133

134
	ret = vmbus_establish_gpadl(newchannel,
135
					 newchannel->outbound.ring_buffer,
136 137
					 send_ringbuffer_size +
					 recv_ringbuffer_size,
138
					 &newchannel->ringbuffer_gpadlhandle);
139

140
	if (ret != 0) {
141
		err = ret;
142
		goto error0;
143
	}
144

145
	/* Create and init the channel open message */
146
	open_info = kmalloc(sizeof(*open_info) +
147 148
			   sizeof(struct vmbus_channel_open_channel),
			   GFP_KERNEL);
149
	if (!open_info) {
150
		err = -ENOMEM;
151
		goto error_gpadl;
152
	}
153

154
	init_completion(&open_info->waitevent);
155

156
	open_msg = (struct vmbus_channel_open_channel *)open_info->msg;
157 158 159 160 161
	open_msg->header.msgtype = CHANNELMSG_OPENCHANNEL;
	open_msg->openid = newchannel->offermsg.child_relid;
	open_msg->child_relid = newchannel->offermsg.child_relid;
	open_msg->ringbuffer_gpadlhandle = newchannel->ringbuffer_gpadlhandle;
	open_msg->downstream_ringbuffer_pageoffset = send_ringbuffer_size >>
162
						  PAGE_SHIFT;
163
	open_msg->target_vp = newchannel->target_vp;
164

165
	if (userdatalen > MAX_USER_DEFINED_BYTES) {
166
		err = -EINVAL;
167
		goto error_gpadl;
168 169
	}

170
	if (userdatalen)
171
		memcpy(open_msg->userdata, userdata, userdatalen);
172

173
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
174
	list_add_tail(&open_info->msglistentry,
175
		      &vmbus_connection.chn_msg_list);
176
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
177

178
	ret = vmbus_post_msg(open_msg,
179
			       sizeof(struct vmbus_channel_open_channel));
180

181 182
	if (ret != 0) {
		err = ret;
183
		goto error1;
184
	}
185

186
	t = wait_for_completion_timeout(&open_info->waitevent, 5*HZ);
187
	if (t == 0) {
188
		err = -ETIMEDOUT;
189
		goto error1;
190 191
	}

192
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
193
	list_del(&open_info->msglistentry);
194
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
195

196 197 198 199
	if (open_info->response.open_result.status) {
		err = -EAGAIN;
		goto error_gpadl;
	}
200

201
	newchannel->state = CHANNEL_OPENED_STATE;
202
	kfree(open_info);
203
	return 0;
204

205 206 207 208 209
error1:
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
	list_del(&open_info->msglistentry);
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);

210 211 212
error_gpadl:
	vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);

213
error0:
214 215
	free_pages((unsigned long)out,
		get_order(send_ringbuffer_size + recv_ringbuffer_size));
216
	kfree(open_info);
217
	newchannel->state = CHANNEL_OPEN_STATE;
218
	return err;
219
}
220
EXPORT_SYMBOL_GPL(vmbus_open);
221

222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
/* Used for Hyper-V Socket: a guest client's connect() to the host */
int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id,
				  const uuid_le *shv_host_servie_id)
{
	struct vmbus_channel_tl_connect_request conn_msg;

	memset(&conn_msg, 0, sizeof(conn_msg));
	conn_msg.header.msgtype = CHANNELMSG_TL_CONNECT_REQUEST;
	conn_msg.guest_endpoint_id = *shv_guest_servie_id;
	conn_msg.host_service_id = *shv_host_servie_id;

	return vmbus_post_msg(&conn_msg, sizeof(conn_msg));
}
EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request);

237
/*
238
 * create_gpadl_header - Creates a gpadl for the specified buffer
239
 */
240
static int create_gpadl_header(void *kbuffer, u32 size,
241 242
					 struct vmbus_channel_msginfo **msginfo,
					 u32 *messagecount)
243 244
{
	int i;
245 246 247 248 249 250
	int pagecount;
	struct vmbus_channel_gpadl_header *gpadl_header;
	struct vmbus_channel_gpadl_body *gpadl_body;
	struct vmbus_channel_msginfo *msgheader;
	struct vmbus_channel_msginfo *msgbody = NULL;
	u32 msgsize;
251

252
	int pfnsum, pfncount, pfnleft, pfncurr, pfnsize;
253

254
	pagecount = size >> PAGE_SHIFT;
255

256
	/* do we need a gpadl body msg */
257
	pfnsize = MAX_SIZE_CHANNEL_MESSAGE -
258 259
		  sizeof(struct vmbus_channel_gpadl_header) -
		  sizeof(struct gpa_range);
260
	pfncount = pfnsize / sizeof(u64);
261

262
	if (pagecount > pfncount) {
263
		/* we need a gpadl body */
264
		/* fill in the header */
265
		msgsize = sizeof(struct vmbus_channel_msginfo) +
266
			  sizeof(struct vmbus_channel_gpadl_header) +
267 268 269
			  sizeof(struct gpa_range) + pfncount * sizeof(u64);
		msgheader =  kzalloc(msgsize, GFP_KERNEL);
		if (!msgheader)
270
			goto nomem;
271

272 273
		INIT_LIST_HEAD(&msgheader->submsglist);
		msgheader->msgsize = msgsize;
274

275
		gpadl_header = (struct vmbus_channel_gpadl_header *)
276 277 278
			msgheader->msg;
		gpadl_header->rangecount = 1;
		gpadl_header->range_buflen = sizeof(struct gpa_range) +
279
					 pagecount * sizeof(u64);
280 281
		gpadl_header->range[0].byte_offset = 0;
		gpadl_header->range[0].byte_count = size;
282
		for (i = 0; i < pfncount; i++)
283 284
			gpadl_header->range[0].pfn_array[i] = slow_virt_to_phys(
				kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT;
285 286
		*msginfo = msgheader;
		*messagecount = 1;
287

288 289
		pfnsum = pfncount;
		pfnleft = pagecount - pfncount;
290

291
		/* how many pfns can we fit */
292
		pfnsize = MAX_SIZE_CHANNEL_MESSAGE -
293
			  sizeof(struct vmbus_channel_gpadl_body);
294
		pfncount = pfnsize / sizeof(u64);
295

296
		/* fill in the body */
297 298 299
		while (pfnleft) {
			if (pfnleft > pfncount)
				pfncurr = pfncount;
300
			else
301
				pfncurr = pfnleft;
302

303
			msgsize = sizeof(struct vmbus_channel_msginfo) +
304
				  sizeof(struct vmbus_channel_gpadl_body) +
305 306
				  pfncurr * sizeof(u64);
			msgbody = kzalloc(msgsize, GFP_KERNEL);
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321

			if (!msgbody) {
				struct vmbus_channel_msginfo *pos = NULL;
				struct vmbus_channel_msginfo *tmp = NULL;
				/*
				 * Free up all the allocated messages.
				 */
				list_for_each_entry_safe(pos, tmp,
					&msgheader->submsglist,
					msglistentry) {

					list_del(&pos->msglistentry);
					kfree(pos);
				}

322
				goto nomem;
323 324
			}

325
			msgbody->msgsize = msgsize;
326 327
			(*messagecount)++;
			gpadl_body =
328
				(struct vmbus_channel_gpadl_body *)msgbody->msg;
329 330 331 332

			/*
			 * Gpadl is u32 and we are using a pointer which could
			 * be 64-bit
333 334
			 * This is governed by the guest/host protocol and
			 * so the hypervisor gurantees that this is ok.
335
			 */
336
			for (i = 0; i < pfncurr; i++)
337 338 339
				gpadl_body->pfn[i] = slow_virt_to_phys(
					kbuffer + PAGE_SIZE * (pfnsum + i)) >>
					PAGE_SHIFT;
340

341
			/* add to msg header */
342 343
			list_add_tail(&msgbody->msglistentry,
				      &msgheader->submsglist);
344 345
			pfnsum += pfncurr;
			pfnleft -= pfncurr;
346
		}
347
	} else {
348
		/* everything fits in a header */
349
		msgsize = sizeof(struct vmbus_channel_msginfo) +
350
			  sizeof(struct vmbus_channel_gpadl_header) +
351 352 353
			  sizeof(struct gpa_range) + pagecount * sizeof(u64);
		msgheader = kzalloc(msgsize, GFP_KERNEL);
		if (msgheader == NULL)
354
			goto nomem;
355
		msgheader->msgsize = msgsize;
356 357

		gpadl_header = (struct vmbus_channel_gpadl_header *)
358 359 360
			msgheader->msg;
		gpadl_header->rangecount = 1;
		gpadl_header->range_buflen = sizeof(struct gpa_range) +
361
					 pagecount * sizeof(u64);
362 363
		gpadl_header->range[0].byte_offset = 0;
		gpadl_header->range[0].byte_count = size;
364
		for (i = 0; i < pagecount; i++)
365 366
			gpadl_header->range[0].pfn_array[i] = slow_virt_to_phys(
				kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT;
367 368 369

		*msginfo = msgheader;
		*messagecount = 1;
370 371 372
	}

	return 0;
373
nomem:
374 375
	kfree(msgheader);
	kfree(msgbody);
376
	return -ENOMEM;
377 378
}

379
/*
380
 * vmbus_establish_gpadl - Estabish a GPADL for the specified buffer
381
 *
382
 * @channel: a channel
383
 * @kbuffer: from kmalloc or vmalloc
384 385
 * @size: page-size multiple
 * @gpadl_handle: some funky thing
386
 */
387
int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
388
			       u32 size, u32 *gpadl_handle)
389
{
390 391 392 393 394
	struct vmbus_channel_gpadl_header *gpadlmsg;
	struct vmbus_channel_gpadl_body *gpadl_body;
	struct vmbus_channel_msginfo *msginfo = NULL;
	struct vmbus_channel_msginfo *submsginfo;
	u32 msgcount;
395
	struct list_head *curr;
396
	u32 next_gpadl_handle;
397
	unsigned long flags;
398
	int ret = 0;
399

400 401
	next_gpadl_handle =
		(atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1);
402

403
	ret = create_gpadl_header(kbuffer, size, &msginfo, &msgcount);
404 405
	if (ret)
		return ret;
406

407
	init_completion(&msginfo->waitevent);
408

409 410 411 412
	gpadlmsg = (struct vmbus_channel_gpadl_header *)msginfo->msg;
	gpadlmsg->header.msgtype = CHANNELMSG_GPADL_HEADER;
	gpadlmsg->child_relid = channel->offermsg.child_relid;
	gpadlmsg->gpadl = next_gpadl_handle;
413 414


415
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
416
	list_add_tail(&msginfo->msglistentry,
417
		      &vmbus_connection.chn_msg_list);
418

419
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
420

421
	ret = vmbus_post_msg(gpadlmsg, msginfo->msgsize -
422
			       sizeof(*msginfo));
423
	if (ret != 0)
424
		goto cleanup;
425

426
	if (msgcount > 1) {
427
		list_for_each(curr, &msginfo->submsglist) {
428

429 430
			submsginfo = (struct vmbus_channel_msginfo *)curr;
			gpadl_body =
431
			     (struct vmbus_channel_gpadl_body *)submsginfo->msg;
432

433 434 435
			gpadl_body->header.msgtype =
				CHANNELMSG_GPADL_BODY;
			gpadl_body->gpadl = next_gpadl_handle;
436

437
			ret = vmbus_post_msg(gpadl_body,
438
					       submsginfo->msgsize -
439
					       sizeof(*submsginfo));
440
			if (ret != 0)
441
				goto cleanup;
442

443 444
		}
	}
445
	wait_for_completion(&msginfo->waitevent);
446

447
	/* At this point, we received the gpadl created msg */
448
	*gpadl_handle = gpadlmsg->gpadl;
449

450
cleanup:
451
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
452
	list_del(&msginfo->msglistentry);
453
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
454

455
	kfree(msginfo);
456 457
	return ret;
}
458
EXPORT_SYMBOL_GPL(vmbus_establish_gpadl);
459

460
/*
461
 * vmbus_teardown_gpadl -Teardown the specified GPADL handle
462
 */
463
int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
464
{
465
	struct vmbus_channel_gpadl_teardown *msg;
466
	struct vmbus_channel_msginfo *info;
467
	unsigned long flags;
468
	int ret;
469

470 471
	info = kmalloc(sizeof(*info) +
		       sizeof(struct vmbus_channel_gpadl_teardown), GFP_KERNEL);
472 473
	if (!info)
		return -ENOMEM;
474

475
	init_completion(&info->waitevent);
476

477
	msg = (struct vmbus_channel_gpadl_teardown *)info->msg;
478

479 480 481
	msg->header.msgtype = CHANNELMSG_GPADL_TEARDOWN;
	msg->child_relid = channel->offermsg.child_relid;
	msg->gpadl = gpadl_handle;
482

483
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
484
	list_add_tail(&info->msglistentry,
485
		      &vmbus_connection.chn_msg_list);
486
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
487
	ret = vmbus_post_msg(msg,
488
			       sizeof(struct vmbus_channel_gpadl_teardown));
489

490 491 492 493
	if (ret)
		goto post_msg_err;

	wait_for_completion(&info->waitevent);
494

495
post_msg_err:
496
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
497
	list_del(&info->msglistentry);
498
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
499

500
	kfree(info);
501 502
	return ret;
}
503
EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
504

505 506 507 508 509 510 511
static void reset_channel_cb(void *arg)
{
	struct vmbus_channel *channel = arg;

	channel->onchannel_callback = NULL;
}

512
static int vmbus_close_internal(struct vmbus_channel *channel)
513
{
514
	struct vmbus_channel_close_channel *msg;
515
	struct tasklet_struct *tasklet;
516
	int ret;
517

518 519 520 521 522 523 524 525 526 527 528 529
	/*
	 * process_chn_event(), running in the tasklet, can race
	 * with vmbus_close_internal() in the case of SMP guest, e.g., when
	 * the former is accessing channel->inbound.ring_buffer, the latter
	 * could be freeing the ring_buffer pages.
	 *
	 * To resolve the race, we can serialize them by disabling the
	 * tasklet when the latter is running here.
	 */
	tasklet = hv_context.event_dpc[channel->target_cpu];
	tasklet_disable(tasklet);

530 531 532 533 534 535 536 537 538 539 540 541
	/*
	 * In case a device driver's probe() fails (e.g.,
	 * util_probe() -> vmbus_open() returns -ENOMEM) and the device is
	 * rescinded later (e.g., we dynamically disble an Integrated Service
	 * in Hyper-V Manager), the driver's remove() invokes vmbus_close():
	 * here we should skip most of the below cleanup work.
	 */
	if (channel->state != CHANNEL_OPENED_STATE) {
		ret = -EINVAL;
		goto out;
	}

542 543
	channel->state = CHANNEL_OPEN_STATE;
	channel->sc_creation_callback = NULL;
544
	/* Stop callback and cancel the timer asap */
545 546
	if (channel->target_cpu != get_cpu()) {
		put_cpu();
547 548
		smp_call_function_single(channel->target_cpu, reset_channel_cb,
					 channel, true);
549
	} else {
550
		reset_channel_cb(channel);
551 552
		put_cpu();
	}
553

554
	/* Send a closing message */
555

556
	msg = &channel->close_msg.msg;
557

558 559
	msg->header.msgtype = CHANNELMSG_CLOSECHANNEL;
	msg->child_relid = channel->offermsg.child_relid;
560

561
	ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_close_channel));
562

563 564 565 566 567 568
	if (ret) {
		pr_err("Close failed: close post msg return is %d\n", ret);
		/*
		 * If we failed to post the close msg,
		 * it is perhaps better to leak memory.
		 */
569
		goto out;
570 571
	}

572
	/* Tear down the gpadl for the channel's ring buffer */
573 574 575 576 577 578 579 580 581
	if (channel->ringbuffer_gpadlhandle) {
		ret = vmbus_teardown_gpadl(channel,
					   channel->ringbuffer_gpadlhandle);
		if (ret) {
			pr_err("Close failed: teardown gpadl return %d\n", ret);
			/*
			 * If we failed to teardown gpadl,
			 * it is perhaps better to leak memory.
			 */
582
			goto out;
583 584
		}
	}
585

586
	/* Cleanup the ring buffers for this channel */
587 588
	hv_ringbuffer_cleanup(&channel->outbound);
	hv_ringbuffer_cleanup(&channel->inbound);
589

590 591
	free_pages((unsigned long)channel->ringbuffer_pages,
		get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
592

593 594 595
out:
	tasklet_enable(tasklet);

596
	return ret;
597
}
598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628

/*
 * vmbus_close - Close the specified channel
 */
void vmbus_close(struct vmbus_channel *channel)
{
	struct list_head *cur, *tmp;
	struct vmbus_channel *cur_channel;

	if (channel->primary_channel != NULL) {
		/*
		 * We will only close sub-channels when
		 * the primary is closed.
		 */
		return;
	}
	/*
	 * Close all the sub-channels first and then close the
	 * primary channel.
	 */
	list_for_each_safe(cur, tmp, &channel->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
		if (cur_channel->state != CHANNEL_OPENED_STATE)
			continue;
		vmbus_close_internal(cur_channel);
	}
	/*
	 * Now close the primary.
	 */
	vmbus_close_internal(channel);
}
629
EXPORT_SYMBOL_GPL(vmbus_close);
630

631
int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
632
			   u32 bufferlen, u64 requestid,
633
			   enum vmbus_packet_type type, u32 flags, bool kick_q)
634
{
635
	struct vmpacket_descriptor desc;
636
	u32 packetlen = sizeof(struct vmpacket_descriptor) + bufferlen;
637
	u32 packetlen_aligned = ALIGN(packetlen, sizeof(u64));
638
	struct kvec bufferlist[3];
639
	u64 aligned_data = 0;
640
	int ret;
641
	bool signal = false;
642
	bool lock = channel->acquire_ring_lock;
643
	int num_vecs = ((bufferlen != 0) ? 3 : 1);
644 645


646
	/* Setup the descriptor */
647 648
	desc.type = type; /* VmbusPacketTypeDataInBand; */
	desc.flags = flags; /* VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; */
649
	/* in 8-bytes granularity */
650 651 652
	desc.offset8 = sizeof(struct vmpacket_descriptor) >> 3;
	desc.len8 = (u16)(packetlen_aligned >> 3);
	desc.trans_id = requestid;
653

654 655 656 657 658 659
	bufferlist[0].iov_base = &desc;
	bufferlist[0].iov_len = sizeof(struct vmpacket_descriptor);
	bufferlist[1].iov_base = buffer;
	bufferlist[1].iov_len = bufferlen;
	bufferlist[2].iov_base = &aligned_data;
	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
660

661
	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs,
662
				  &signal, lock);
663

664 665 666 667 668 669 670 671
	/*
	 * Signalling the host is conditional on many factors:
	 * 1. The ring state changed from being empty to non-empty.
	 *    This is tracked by the variable "signal".
	 * 2. The variable kick_q tracks if more data will be placed
	 *    on the ring. We will not signal if more data is
	 *    to be placed.
	 *
672 673 674
	 * Based on the channel signal state, we will decide
	 * which signaling policy will be applied.
	 *
675 676 677
	 * If we cannot write to the ring-buffer; signal the host
	 * even if we may not have written anything. This is a rare
	 * enough condition that it should not matter.
678 679 680
	 * NOTE: in this case, the hvsock channel is an exception, because
	 * it looks the host side's hvsock implementation has a throttling
	 * mechanism which can hurt the performance otherwise.
681
	 */
682 683 684 685 686 687

	if (channel->signal_policy)
		signal = true;
	else
		kick_q = true;

688 689
	if (((ret == 0) && kick_q && signal) ||
	    (ret && !is_hvsock_channel(channel)))
690
		vmbus_setevent(channel);
691 692 693

	return ret;
}
694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716
EXPORT_SYMBOL(vmbus_sendpacket_ctl);

/**
 * vmbus_sendpacket() - Send the specified buffer on the given channel
 * @channel: Pointer to vmbus_channel structure.
 * @buffer: Pointer to the buffer you want to receive the data into.
 * @bufferlen: Maximum size of what the the buffer will hold
 * @requestid: Identifier of the request
 * @type: Type of packet that is being send e.g. negotiate, time
 * packet etc.
 *
 * Sends data in @buffer directly to hyper-v via the vmbus
 * This will send the data unparsed to hyper-v.
 *
 * Mainly used by Hyper-V drivers.
 */
int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer,
			   u32 bufferlen, u64 requestid,
			   enum vmbus_packet_type type, u32 flags)
{
	return vmbus_sendpacket_ctl(channel, buffer, bufferlen, requestid,
				    type, flags, true);
}
717
EXPORT_SYMBOL(vmbus_sendpacket);
718

719
/*
720 721 722 723 724
 * vmbus_sendpacket_pagebuffer_ctl - Send a range of single-page buffer
 * packets using a GPADL Direct packet type. This interface allows you
 * to control notifying the host. This will be useful for sending
 * batched data. Also the sender can control the send flags
 * explicitly.
725
 */
726
int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
727 728
				     struct hv_page_buffer pagebuffers[],
				     u32 pagecount, void *buffer, u32 bufferlen,
729 730 731
				     u64 requestid,
				     u32 flags,
				     bool kick_q)
732
{
733 734
	int ret;
	int i;
735
	struct vmbus_channel_packet_page_buffer desc;
736 737 738
	u32 descsize;
	u32 packetlen;
	u32 packetlen_aligned;
739
	struct kvec bufferlist[3];
740
	u64 aligned_data = 0;
741
	bool signal = false;
742
	bool lock = channel->acquire_ring_lock;
743

744
	if (pagecount > MAX_PAGE_BUFFER_COUNT)
745
		return -EINVAL;
746 747


748
	/*
749
	 * Adjust the size down since vmbus_channel_packet_page_buffer is the
750 751
	 * largest size we support
	 */
752 753
	descsize = sizeof(struct vmbus_channel_packet_page_buffer) -
			  ((MAX_PAGE_BUFFER_COUNT - pagecount) *
754
			  sizeof(struct hv_page_buffer));
755
	packetlen = descsize + bufferlen;
756
	packetlen_aligned = ALIGN(packetlen, sizeof(u64));
757

758
	/* Setup the descriptor */
759
	desc.type = VM_PKT_DATA_USING_GPA_DIRECT;
760
	desc.flags = flags;
761 762 763 764 765 766
	desc.dataoffset8 = descsize >> 3; /* in 8-bytes grandularity */
	desc.length8 = (u16)(packetlen_aligned >> 3);
	desc.transactionid = requestid;
	desc.rangecount = pagecount;

	for (i = 0; i < pagecount; i++) {
767 768 769
		desc.range[i].len = pagebuffers[i].len;
		desc.range[i].offset = pagebuffers[i].offset;
		desc.range[i].pfn	 = pagebuffers[i].pfn;
770 771
	}

772 773 774 775 776 777
	bufferlist[0].iov_base = &desc;
	bufferlist[0].iov_len = descsize;
	bufferlist[1].iov_base = buffer;
	bufferlist[1].iov_len = bufferlen;
	bufferlist[2].iov_base = &aligned_data;
	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
778

779 780
	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
				  &signal, lock);
781

782 783 784 785 786 787 788 789
	/*
	 * Signalling the host is conditional on many factors:
	 * 1. The ring state changed from being empty to non-empty.
	 *    This is tracked by the variable "signal".
	 * 2. The variable kick_q tracks if more data will be placed
	 *    on the ring. We will not signal if more data is
	 *    to be placed.
	 *
790 791 792
	 * Based on the channel signal state, we will decide
	 * which signaling policy will be applied.
	 *
793 794 795 796
	 * If we cannot write to the ring-buffer; signal the host
	 * even if we may not have written anything. This is a rare
	 * enough condition that it should not matter.
	 */
797 798 799 800 801 802

	if (channel->signal_policy)
		signal = true;
	else
		kick_q = true;

803
	if (((ret == 0) && kick_q && signal) || (ret))
804
		vmbus_setevent(channel);
805 806 807

	return ret;
}
808
EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer_ctl);
809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824

/*
 * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer
 * packets using a GPADL Direct packet type.
 */
int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
				     struct hv_page_buffer pagebuffers[],
				     u32 pagecount, void *buffer, u32 bufferlen,
				     u64 requestid)
{
	u32 flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
	return vmbus_sendpacket_pagebuffer_ctl(channel, pagebuffers, pagecount,
					       buffer, bufferlen, requestid,
					       flags, true);

}
825
EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer);
826

827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842
/*
 * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet
 * using a GPADL Direct packet type.
 * The buffer includes the vmbus descriptor.
 */
int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
			      struct vmbus_packet_mpb_array *desc,
			      u32 desc_size,
			      void *buffer, u32 bufferlen, u64 requestid)
{
	int ret;
	u32 packetlen;
	u32 packetlen_aligned;
	struct kvec bufferlist[3];
	u64 aligned_data = 0;
	bool signal = false;
843
	bool lock = channel->acquire_ring_lock;
844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862

	packetlen = desc_size + bufferlen;
	packetlen_aligned = ALIGN(packetlen, sizeof(u64));

	/* Setup the descriptor */
	desc->type = VM_PKT_DATA_USING_GPA_DIRECT;
	desc->flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
	desc->dataoffset8 = desc_size >> 3; /* in 8-bytes grandularity */
	desc->length8 = (u16)(packetlen_aligned >> 3);
	desc->transactionid = requestid;
	desc->rangecount = 1;

	bufferlist[0].iov_base = desc;
	bufferlist[0].iov_len = desc_size;
	bufferlist[1].iov_base = buffer;
	bufferlist[1].iov_len = bufferlen;
	bufferlist[2].iov_base = &aligned_data;
	bufferlist[2].iov_len = (packetlen_aligned - packetlen);

863 864
	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
				  &signal, lock);
865 866 867 868 869 870 871 872

	if (ret == 0 && signal)
		vmbus_setevent(channel);

	return ret;
}
EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc);

873
/*
874
 * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet
875
 * using a GPADL Direct packet type.
876
 */
877
int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
878 879
				struct hv_multipage_buffer *multi_pagebuffer,
				void *buffer, u32 bufferlen, u64 requestid)
880
{
881
	int ret;
882
	struct vmbus_channel_packet_multipage_buffer desc;
883 884 885
	u32 descsize;
	u32 packetlen;
	u32 packetlen_aligned;
886
	struct kvec bufferlist[3];
887
	u64 aligned_data = 0;
888
	bool signal = false;
889
	bool lock = channel->acquire_ring_lock;
890 891
	u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset,
					 multi_pagebuffer->len);
892

893
	if (pfncount > MAX_MULTIPAGE_BUFFER_COUNT)
894
		return -EINVAL;
895

896
	/*
897
	 * Adjust the size down since vmbus_channel_packet_multipage_buffer is
898 899
	 * the largest size we support
	 */
900 901
	descsize = sizeof(struct vmbus_channel_packet_multipage_buffer) -
			  ((MAX_MULTIPAGE_BUFFER_COUNT - pfncount) *
902
			  sizeof(u64));
903
	packetlen = descsize + bufferlen;
904
	packetlen_aligned = ALIGN(packetlen, sizeof(u64));
905 906


907
	/* Setup the descriptor */
908
	desc.type = VM_PKT_DATA_USING_GPA_DIRECT;
909
	desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
910 911 912
	desc.dataoffset8 = descsize >> 3; /* in 8-bytes grandularity */
	desc.length8 = (u16)(packetlen_aligned >> 3);
	desc.transactionid = requestid;
913
	desc.rangecount = 1;
914

915 916
	desc.range.len = multi_pagebuffer->len;
	desc.range.offset = multi_pagebuffer->offset;
917

918
	memcpy(desc.range.pfn_array, multi_pagebuffer->pfn_array,
919
	       pfncount * sizeof(u64));
920

921 922 923 924 925 926
	bufferlist[0].iov_base = &desc;
	bufferlist[0].iov_len = descsize;
	bufferlist[1].iov_base = buffer;
	bufferlist[1].iov_len = bufferlen;
	bufferlist[2].iov_base = &aligned_data;
	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
927

928 929
	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
				  &signal, lock);
930

931
	if (ret == 0 && signal)
932
		vmbus_setevent(channel);
933 934 935

	return ret;
}
936
EXPORT_SYMBOL_GPL(vmbus_sendpacket_multipagebuffer);
937 938

/**
939
 * vmbus_recvpacket() - Retrieve the user packet on the specified channel
940 941 942 943 944
 * @channel: Pointer to vmbus_channel structure.
 * @buffer: Pointer to the buffer you want to receive the data into.
 * @bufferlen: Maximum size of what the the buffer will hold
 * @buffer_actual_len: The actual size of the data after it was received
 * @requestid: Identifier of the request
945 946 947 948 949
 *
 * Receives directly from the hyper-v vmbus and puts the data it received
 * into Buffer. This will receive the data unparsed from hyper-v.
 *
 * Mainly used by Hyper-V drivers.
950
 */
951 952 953 954
static inline int
__vmbus_recvpacket(struct vmbus_channel *channel, void *buffer,
		   u32 bufferlen, u32 *buffer_actual_len, u64 *requestid,
		   bool raw)
955 956
{
	int ret;
957
	bool signal = false;
958

959 960
	ret = hv_ringbuffer_read(&channel->inbound, buffer, bufferlen,
				 buffer_actual_len, requestid, &signal, raw);
961

962 963
	if (signal)
		vmbus_setevent(channel);
964

965 966 967 968 969 970 971 972 973
	return ret;
}

int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer,
		     u32 bufferlen, u32 *buffer_actual_len,
		     u64 *requestid)
{
	return __vmbus_recvpacket(channel, buffer, bufferlen,
				  buffer_actual_len, requestid, false);
974
}
975
EXPORT_SYMBOL(vmbus_recvpacket);
976

977
/*
978
 * vmbus_recvpacket_raw - Retrieve the raw packet on the specified channel
979
 */
980
int vmbus_recvpacket_raw(struct vmbus_channel *channel, void *buffer,
981 982
			      u32 bufferlen, u32 *buffer_actual_len,
			      u64 *requestid)
983
{
984 985
	return __vmbus_recvpacket(channel, buffer, bufferlen,
				  buffer_actual_len, requestid, true);
986
}
987
EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw);