hv_util.c 15.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * Copyright (c) 2010, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
 *   Haiyang Zhang <haiyangz@microsoft.com>
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23 24 25 26 27
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
28
#include <linux/reboot.h>
29
#include <linux/hyperv.h>
30 31
#include <linux/clockchips.h>
#include <linux/ptp_clock_kernel.h>
32
#include <asm/mshyperv.h>
33

34
#include "hyperv_vmbus.h"
35

36 37 38
#define SD_MAJOR	3
#define SD_MINOR	0
#define SD_VERSION	(SD_MAJOR << 16 | SD_MINOR)
39

40 41
#define SD_MAJOR_1	1
#define SD_VERSION_1	(SD_MAJOR_1 << 16 | SD_MINOR)
42

43
#define TS_MAJOR	4
44 45 46
#define TS_MINOR	0
#define TS_VERSION	(TS_MAJOR << 16 | TS_MINOR)

47 48
#define TS_MAJOR_1	1
#define TS_VERSION_1	(TS_MAJOR_1 << 16 | TS_MINOR)
49

50 51 52
#define TS_MAJOR_3	3
#define TS_VERSION_3	(TS_MAJOR_3 << 16 | TS_MINOR)

53
#define HB_MAJOR	3
54
#define HB_MINOR	0
55 56
#define HB_VERSION	(HB_MAJOR << 16 | HB_MINOR)

57 58
#define HB_MAJOR_1	1
#define HB_VERSION_1	(HB_MAJOR_1 << 16 | HB_MINOR)
59 60 61 62

static int sd_srv_version;
static int ts_srv_version;
static int hb_srv_version;
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87

#define SD_VER_COUNT 2
static const int sd_versions[] = {
	SD_VERSION,
	SD_VERSION_1
};

#define TS_VER_COUNT 3
static const int ts_versions[] = {
	TS_VERSION,
	TS_VERSION_3,
	TS_VERSION_1
};

#define HB_VER_COUNT 2
static const int hb_versions[] = {
	HB_VERSION,
	HB_VERSION_1
};

#define FW_VER_COUNT 2
static const int fw_versions[] = {
	UTIL_FW_VERSION,
	UTIL_WS2K8_FW_VERSION
};
88

89 90 91 92 93
static void shutdown_onchannelcallback(void *context);
static struct hv_util_service util_shutdown = {
	.util_cb = shutdown_onchannelcallback,
};

94 95 96
static int hv_timesync_init(struct hv_util_service *srv);
static void hv_timesync_deinit(void);

97 98 99
static void timesync_onchannelcallback(void *context);
static struct hv_util_service util_timesynch = {
	.util_cb = timesync_onchannelcallback,
100 101
	.util_init = hv_timesync_init,
	.util_deinit = hv_timesync_deinit,
102 103 104 105 106 107 108 109 110 111 112 113
};

static void heartbeat_onchannelcallback(void *context);
static struct hv_util_service util_heartbeat = {
	.util_cb = heartbeat_onchannelcallback,
};

static struct hv_util_service util_kvp = {
	.util_cb = hv_kvp_onchannelcallback,
	.util_init = hv_kvp_init,
	.util_deinit = hv_kvp_deinit,
};
114

115 116 117 118 119 120
static struct hv_util_service util_vss = {
	.util_cb = hv_vss_onchannelcallback,
	.util_init = hv_vss_init,
	.util_deinit = hv_vss_deinit,
};

121 122 123 124 125 126
static struct hv_util_service util_fcopy = {
	.util_cb = hv_fcopy_onchannelcallback,
	.util_init = hv_fcopy_init,
	.util_deinit = hv_fcopy_deinit,
};

127 128 129 130 131 132 133 134 135 136
static void perform_shutdown(struct work_struct *dummy)
{
	orderly_poweroff(true);
}

/*
 * Perform the shutdown operation in a thread context.
 */
static DECLARE_WORK(shutdown_work, perform_shutdown);

137
static void shutdown_onchannelcallback(void *context)
138 139
{
	struct vmbus_channel *channel = context;
140
	u32 recvlen;
141
	u64 requestid;
142
	bool execute_shutdown = false;
143
	u8  *shut_txf_buf = util_shutdown.recv_buffer;
144 145 146 147 148

	struct shutdown_msg_data *shutdown_msg;

	struct icmsg_hdr *icmsghdrp;

149 150
	vmbus_recvpacket(channel, shut_txf_buf,
			 PAGE_SIZE, &recvlen, &requestid);
151 152

	if (recvlen > 0) {
153
		icmsghdrp = (struct icmsg_hdr *)&shut_txf_buf[
154 155 156
			sizeof(struct vmbuspipe_hdr)];

		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
157 158 159 160 161 162 163 164
			if (vmbus_prep_negotiate_resp(icmsghdrp, shut_txf_buf,
					fw_versions, FW_VER_COUNT,
					sd_versions, SD_VER_COUNT,
					NULL, &sd_srv_version)) {
				pr_info("Shutdown IC version %d.%d\n",
					sd_srv_version >> 16,
					sd_srv_version & 0xFFFF);
			}
165
		} else {
166 167 168 169
			shutdown_msg =
				(struct shutdown_msg_data *)&shut_txf_buf[
					sizeof(struct vmbuspipe_hdr) +
					sizeof(struct icmsg_hdr)];
170 171 172 173 174 175 176

			switch (shutdown_msg->flags) {
			case 0:
			case 1:
				icmsghdrp->status = HV_S_OK;
				execute_shutdown = true;

177
				pr_info("Shutdown request received -"
178
					    " graceful shutdown initiated\n");
179 180 181 182 183
				break;
			default:
				icmsghdrp->status = HV_E_FAIL;
				execute_shutdown = false;

184 185
				pr_info("Shutdown request received -"
					    " Invalid request\n");
186
				break;
187
			}
188 189 190 191 192
		}

		icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
			| ICMSGHDRFLAG_RESPONSE;

193
		vmbus_sendpacket(channel, shut_txf_buf,
194
				       recvlen, requestid,
195
				       VM_PKT_DATA_INBAND, 0);
196 197 198
	}

	if (execute_shutdown == true)
199
		schedule_work(&shutdown_work);
200 201
}

202 203 204 205 206 207 208
/*
 * Set the host time in a process context.
 */

struct adj_time_work {
	struct work_struct work;
	u64	host_time;
209 210
	u64	ref_time;
	u8	flags;
211 212 213 214
};

static void hv_set_host_time(struct work_struct *work)
{
215
	struct adj_time_work *wrk;
216
	struct timespec64 host_ts;
217
	u64 reftime, newtime;
218 219

	wrk = container_of(work, struct adj_time_work, work);
220

221 222 223
	reftime = hyperv_cs->read(hyperv_cs);
	newtime = wrk->host_time + (reftime - wrk->ref_time);
	host_ts = ns_to_timespec64((newtime - WLTIMEDELTA) * 100);
224

225
	do_settimeofday64(&host_ts);
226 227
}

228 229 230 231 232 233
/*
 * Synchronize time with host after reboot, restore, etc.
 *
 * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
 * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
 * message after the timesync channel is opened. Since the hv_utils module is
234 235 236 237 238 239
 * loaded after hv_vmbus, the first message is usually missed. This bit is
 * considered a hard request to discipline the clock.
 *
 * ICTIMESYNCFLAG_SAMPLE bit indicates a time sample from host. This is
 * typically used as a hint to the guest. The guest is under no obligation
 * to discipline the clock.
240
 */
241
static struct adj_time_work  wrk;
242 243 244 245 246 247 248 249 250 251 252 253 254 255

/*
 * The last time sample, received from the host. PTP device responds to
 * requests by using this data and the current partition-wide time reference
 * count.
 */
static struct {
	u64				host_time;
	u64				ref_time;
	struct system_time_snapshot	snap;
	spinlock_t			lock;
} host_ts;

static inline void adj_guesttime(u64 hosttime, u64 reftime, u8 adj_flags)
256
{
257 258
	unsigned long flags;
	u64 cur_reftime;
259

260 261
	/*
	 * This check is safe since we are executing in the
K
K. Y. Srinivasan 已提交
262
	 * interrupt context and time synch messages are always
263 264
	 * delivered on the same CPU.
	 */
265 266 267 268 269 270 271 272
	if (adj_flags & ICTIMESYNCFLAG_SYNC) {
		/* Queue a job to do do_settimeofday64() */
		if (work_pending(&wrk.work))
			return;

		wrk.host_time = hosttime;
		wrk.ref_time = reftime;
		wrk.flags = adj_flags;
273
		schedule_work(&wrk.work);
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
	} else {
		/*
		 * Save the adjusted time sample from the host and the snapshot
		 * of the current system time for PTP device.
		 */
		spin_lock_irqsave(&host_ts.lock, flags);

		cur_reftime = hyperv_cs->read(hyperv_cs);
		host_ts.host_time = hosttime;
		host_ts.ref_time = cur_reftime;
		ktime_get_snapshot(&host_ts.snap);

		/*
		 * TimeSync v4 messages contain reference time (guest's Hyper-V
		 * clocksource read when the time sample was generated), we can
		 * improve the precision by adding the delta between now and the
		 * time of generation.
		 */
		if (ts_srv_version > TS_VERSION_3)
			host_ts.host_time += (cur_reftime - reftime);

		spin_unlock_irqrestore(&host_ts.lock, flags);
296
	}
297 298 299 300 301 302 303 304
}

/*
 * Time Sync Channel message handler.
 */
static void timesync_onchannelcallback(void *context)
{
	struct vmbus_channel *channel = context;
305
	u32 recvlen;
306 307 308
	u64 requestid;
	struct icmsg_hdr *icmsghdrp;
	struct ictimesync_data *timedatap;
309
	struct ictimesync_ref_data *refdata;
310
	u8 *time_txf_buf = util_timesynch.recv_buffer;
311

312 313
	vmbus_recvpacket(channel, time_txf_buf,
			 PAGE_SIZE, &recvlen, &requestid);
314 315

	if (recvlen > 0) {
316
		icmsghdrp = (struct icmsg_hdr *)&time_txf_buf[
317 318 319
				sizeof(struct vmbuspipe_hdr)];

		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
320 321 322 323
			if (vmbus_prep_negotiate_resp(icmsghdrp, time_txf_buf,
						fw_versions, FW_VER_COUNT,
						ts_versions, TS_VER_COUNT,
						NULL, &ts_srv_version)) {
324
				pr_info("TimeSync IC version %d.%d\n",
325 326 327
					ts_srv_version >> 16,
					ts_srv_version & 0xFFFF);
			}
328
		} else {
329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
			if (ts_srv_version > TS_VERSION_3) {
				refdata = (struct ictimesync_ref_data *)
					&time_txf_buf[
					sizeof(struct vmbuspipe_hdr) +
					sizeof(struct icmsg_hdr)];

				adj_guesttime(refdata->parenttime,
						refdata->vmreferencetime,
						refdata->flags);
			} else {
				timedatap = (struct ictimesync_data *)
					&time_txf_buf[
					sizeof(struct vmbuspipe_hdr) +
					sizeof(struct icmsg_hdr)];
				adj_guesttime(timedatap->parenttime,
						0,
						timedatap->flags);
			}
347 348 349 350 351
		}

		icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
			| ICMSGHDRFLAG_RESPONSE;

352
		vmbus_sendpacket(channel, time_txf_buf,
353
				recvlen, requestid,
354
				VM_PKT_DATA_INBAND, 0);
355 356 357
	}
}

358 359 360 361 362 363 364 365
/*
 * Heartbeat functionality.
 * Every two seconds, Hyper-V send us a heartbeat request message.
 * we respond to this message, and Hyper-V knows we are alive.
 */
static void heartbeat_onchannelcallback(void *context)
{
	struct vmbus_channel *channel = context;
366
	u32 recvlen;
367 368 369
	u64 requestid;
	struct icmsg_hdr *icmsghdrp;
	struct heartbeat_msg_data *heartbeat_msg;
370
	u8 *hbeat_txf_buf = util_heartbeat.recv_buffer;
371

372 373 374 375 376 377 378
	while (1) {

		vmbus_recvpacket(channel, hbeat_txf_buf,
				 PAGE_SIZE, &recvlen, &requestid);

		if (!recvlen)
			break;
379

380
		icmsghdrp = (struct icmsg_hdr *)&hbeat_txf_buf[
381 382 383
				sizeof(struct vmbuspipe_hdr)];

		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
384 385 386 387 388 389
			if (vmbus_prep_negotiate_resp(icmsghdrp,
					hbeat_txf_buf,
					fw_versions, FW_VER_COUNT,
					hb_versions, HB_VER_COUNT,
					NULL, &hb_srv_version)) {

390
				pr_info("Heartbeat IC version %d.%d\n",
391 392 393
					hb_srv_version >> 16,
					hb_srv_version & 0xFFFF);
			}
394
		} else {
395 396 397 398
			heartbeat_msg =
				(struct heartbeat_msg_data *)&hbeat_txf_buf[
					sizeof(struct vmbuspipe_hdr) +
					sizeof(struct icmsg_hdr)];
399 400 401 402 403 404 405

			heartbeat_msg->seq_num += 1;
		}

		icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
			| ICMSGHDRFLAG_RESPONSE;

406
		vmbus_sendpacket(channel, hbeat_txf_buf,
407
				       recvlen, requestid,
408
				       VM_PKT_DATA_INBAND, 0);
409 410
	}
}
411

412 413
static int util_probe(struct hv_device *dev,
			const struct hv_vmbus_device_id *dev_id)
414
{
415 416 417 418
	struct hv_util_service *srv =
		(struct hv_util_service *)dev_id->driver_data;
	int ret;

419
	srv->recv_buffer = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
420 421
	if (!srv->recv_buffer)
		return -ENOMEM;
422
	srv->channel = dev->channel;
423 424 425
	if (srv->util_init) {
		ret = srv->util_init(srv);
		if (ret) {
426 427
			ret = -ENODEV;
			goto error1;
428 429 430
		}
	}

431 432 433 434 435 436 437
	/*
	 * The set of services managed by the util driver are not performance
	 * critical and do not need batched reading. Furthermore, some services
	 * such as KVP can only handle one message from the host at a time.
	 * Turn off batched reading for all util drivers before we open the
	 * channel.
	 */
438
	set_channel_read_mode(dev->channel, HV_CALL_DIRECT);
439

440
	hv_set_drvdata(dev, srv);
441 442 443 444 445 446

	ret = vmbus_open(dev->channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 0,
			srv->util_cb, dev->channel);
	if (ret)
		goto error;

447
	return 0;
448 449 450 451 452 453 454

error:
	if (srv->util_deinit)
		srv->util_deinit();
error1:
	kfree(srv->recv_buffer);
	return ret;
455 456 457 458
}

static int util_remove(struct hv_device *dev)
{
459 460 461 462
	struct hv_util_service *srv = hv_get_drvdata(dev);

	if (srv->util_deinit)
		srv->util_deinit();
463
	vmbus_close(dev->channel);
464 465
	kfree(srv->recv_buffer);

466 467 468 469
	return 0;
}

static const struct hv_vmbus_device_id id_table[] = {
470
	/* Shutdown guid */
471 472 473
	{ HV_SHUTDOWN_GUID,
	  .driver_data = (unsigned long)&util_shutdown
	},
474
	/* Time synch guid */
475 476 477
	{ HV_TS_GUID,
	  .driver_data = (unsigned long)&util_timesynch
	},
478
	/* Heartbeat guid */
479 480 481
	{ HV_HEART_BEAT_GUID,
	  .driver_data = (unsigned long)&util_heartbeat
	},
482
	/* KVP guid */
483 484 485
	{ HV_KVP_GUID,
	  .driver_data = (unsigned long)&util_kvp
	},
486 487 488 489
	/* VSS GUID */
	{ HV_VSS_GUID,
	  .driver_data = (unsigned long)&util_vss
	},
490 491 492 493
	/* File copy GUID */
	{ HV_FCOPY_GUID,
	  .driver_data = (unsigned long)&util_fcopy
	},
494
	{ },
495 496 497 498 499 500
};

MODULE_DEVICE_TABLE(vmbus, id_table);

/* The one and only one */
static  struct hv_driver util_drv = {
501
	.name = "hv_util",
502 503 504 505 506
	.id_table = id_table,
	.probe =  util_probe,
	.remove =  util_remove,
};

507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586
static int hv_ptp_enable(struct ptp_clock_info *info,
			 struct ptp_clock_request *request, int on)
{
	return -EOPNOTSUPP;
}

static int hv_ptp_settime(struct ptp_clock_info *p, const struct timespec64 *ts)
{
	return -EOPNOTSUPP;
}

static int hv_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
{
	return -EOPNOTSUPP;
}
static int hv_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
{
	return -EOPNOTSUPP;
}

static int hv_ptp_gettime(struct ptp_clock_info *info, struct timespec64 *ts)
{
	unsigned long flags;
	u64 newtime, reftime;

	spin_lock_irqsave(&host_ts.lock, flags);
	reftime = hyperv_cs->read(hyperv_cs);
	newtime = host_ts.host_time + (reftime - host_ts.ref_time);
	*ts = ns_to_timespec64((newtime - WLTIMEDELTA) * 100);
	spin_unlock_irqrestore(&host_ts.lock, flags);

	return 0;
}

static int hv_ptp_get_syncdevicetime(ktime_t *device,
				     struct system_counterval_t *system,
				     void *ctx)
{
	system->cs = hyperv_cs;
	system->cycles = host_ts.ref_time;
	*device = ns_to_ktime((host_ts.host_time - WLTIMEDELTA) * 100);

	return 0;
}

static int hv_ptp_getcrosststamp(struct ptp_clock_info *ptp,
				 struct system_device_crosststamp *xtstamp)
{
	unsigned long flags;
	int ret;

	spin_lock_irqsave(&host_ts.lock, flags);

	/*
	 * host_ts contains the last time sample from the host and the snapshot
	 * of system time. We don't need to calculate the time delta between
	 * the reception and now as get_device_system_crosststamp() does the
	 * required interpolation.
	 */
	ret = get_device_system_crosststamp(hv_ptp_get_syncdevicetime,
					    NULL, &host_ts.snap, xtstamp);

	spin_unlock_irqrestore(&host_ts.lock, flags);

	return ret;
}

static struct ptp_clock_info ptp_hyperv_info = {
	.name		= "hyperv",
	.enable         = hv_ptp_enable,
	.adjtime        = hv_ptp_adjtime,
	.adjfreq        = hv_ptp_adjfreq,
	.gettime64      = hv_ptp_gettime,
	.getcrosststamp = hv_ptp_getcrosststamp,
	.settime64      = hv_ptp_settime,
	.owner		= THIS_MODULE,
};

static struct ptp_clock *hv_ptp_clock;

587 588
static int hv_timesync_init(struct hv_util_service *srv)
{
589 590 591 592
	/* TimeSync requires Hyper-V clocksource. */
	if (!hyperv_cs)
		return -ENODEV;

593
	INIT_WORK(&wrk.work, hv_set_host_time);
594 595 596 597 598 599 600 601 602 603 604 605 606

	/*
	 * ptp_clock_register() returns NULL when CONFIG_PTP_1588_CLOCK is
	 * disabled but the driver is still useful without the PTP device
	 * as it still handles the ICTIMESYNCFLAG_SYNC case.
	 */
	hv_ptp_clock = ptp_clock_register(&ptp_hyperv_info, NULL);
	if (IS_ERR_OR_NULL(hv_ptp_clock)) {
		pr_err("cannot register PTP clock: %ld\n",
		       PTR_ERR(hv_ptp_clock));
		hv_ptp_clock = NULL;
	}

607 608 609 610 611
	return 0;
}

static void hv_timesync_deinit(void)
{
612 613
	if (hv_ptp_clock)
		ptp_clock_unregister(hv_ptp_clock);
614 615 616
	cancel_work_sync(&wrk.work);
}

617 618
static int __init init_hyperv_utils(void)
{
619
	pr_info("Registering HyperV Utility Driver\n");
620

621
	return vmbus_driver_register(&util_drv);
622 623 624 625
}

static void exit_hyperv_utils(void)
{
626
	pr_info("De-Registered HyperV Utility Driver\n");
627

628
	vmbus_driver_unregister(&util_drv);
629 630 631 632 633 634 635
}

module_init(init_hyperv_utils);
module_exit(exit_hyperv_utils);

MODULE_DESCRIPTION("Hyper-V Utilities");
MODULE_LICENSE("GPL");