diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 26d269ba947c85c5a87fe4d9ba1743ca6ccaa348..85a2142c9384007d2bc43a1f072ceb7b96b0c53b 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -380,7 +380,7 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel) * execute: * * (a) In the "normal (i.e., not resuming from hibernation)" path, - * the full barrier in smp_store_mb() guarantees that the store + * the full barrier in virt_store_mb() guarantees that the store * is propagated to all CPUs before the add_channel_work work * is queued. In turn, add_channel_work is queued before the * channel's ring buffer is allocated/initialized and the @@ -392,14 +392,14 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel) * recv_int_page before retrieving the channel pointer from the * array of channels. * - * (b) In the "resuming from hibernation" path, the smp_store_mb() + * (b) In the "resuming from hibernation" path, the virt_store_mb() * guarantees that the store is propagated to all CPUs before * the VMBus connection is marked as ready for the resume event * (cf. check_ready_for_resume_event()). The interrupt handler * of the VMBus driver and vmbus_chan_sched() can not run before * vmbus_bus_resume() has completed execution (cf. resume_noirq). */ - smp_store_mb( + virt_store_mb( vmbus_connection.channels[channel->offermsg.child_relid], channel); } diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 439f99b8b5de269bdc2c0574d4eaae2521e9d50f..3248b48f37f6128aa1dd58ff6db3582edaa5f362 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1130,6 +1131,7 @@ static void post_status(struct hv_dynmem_device *dm) struct dm_status status; unsigned long now = jiffies; unsigned long last_post = last_post_time; + unsigned long num_pages_avail, num_pages_committed; if (pressure_report_delay > 0) { --pressure_report_delay; @@ -1154,16 +1156,21 @@ static void post_status(struct hv_dynmem_device *dm) * num_pages_onlined) as committed to the host, otherwise it can try * asking us to balloon them out. */ - status.num_avail = si_mem_available(); - status.num_committed = vm_memory_committed() + + num_pages_avail = si_mem_available(); + num_pages_committed = vm_memory_committed() + dm->num_pages_ballooned + (dm->num_pages_added > dm->num_pages_onlined ? dm->num_pages_added - dm->num_pages_onlined : 0) + compute_balloon_floor(); - trace_balloon_status(status.num_avail, status.num_committed, + trace_balloon_status(num_pages_avail, num_pages_committed, vm_memory_committed(), dm->num_pages_ballooned, dm->num_pages_added, dm->num_pages_onlined); + + /* Convert numbers of pages into numbers of HV_HYP_PAGEs. */ + status.num_avail = num_pages_avail * NR_HV_HYP_PAGES_IN_PAGE; + status.num_committed = num_pages_committed * NR_HV_HYP_PAGES_IN_PAGE; + /* * If our transaction ID is no longer current, just don't * send the status. This can happen if we were interrupted @@ -1653,6 +1660,38 @@ static void disable_page_reporting(void) } } +static int ballooning_enabled(void) +{ + /* + * Disable ballooning if the page size is not 4k (HV_HYP_PAGE_SIZE), + * since currently it's unclear to us whether an unballoon request can + * make sure all page ranges are guest page size aligned. + */ + if (PAGE_SIZE != HV_HYP_PAGE_SIZE) { + pr_info("Ballooning disabled because page size is not 4096 bytes\n"); + return 0; + } + + return 1; +} + +static int hot_add_enabled(void) +{ + /* + * Disable hot add on ARM64, because we currently rely on + * memory_add_physaddr_to_nid() to get a node id of a hot add range, + * however ARM64's memory_add_physaddr_to_nid() always return 0 and + * DM_MEM_HOT_ADD_REQUEST doesn't have the NUMA node information for + * add_memory(). + */ + if (IS_ENABLED(CONFIG_ARM64)) { + pr_info("Memory hot add disabled on ARM64\n"); + return 0; + } + + return 1; +} + static int balloon_connect_vsp(struct hv_device *dev) { struct dm_version_request version_req; @@ -1724,8 +1763,8 @@ static int balloon_connect_vsp(struct hv_device *dev) * currently still requires the bits to be set, so we have to add code * to fail the host's hot-add and balloon up/down requests, if any. */ - cap_msg.caps.cap_bits.balloon = 1; - cap_msg.caps.cap_bits.hot_add = 1; + cap_msg.caps.cap_bits.balloon = ballooning_enabled(); + cap_msg.caps.cap_bits.hot_add = hot_add_enabled(); /* * Specify our alignment requirements as it relates diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index c1dd21d0d7ef8d1ba96523b602ba71a48f0963b1..ae68298c0dcac59add8948c0510499b6958ea434 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -218,6 +219,16 @@ bool hv_query_ext_cap(u64 cap_query) } EXPORT_SYMBOL_GPL(hv_query_ext_cap); +void hv_setup_dma_ops(struct device *dev, bool coherent) +{ + /* + * Hyper-V does not offer a vIOMMU in the guest + * VM, so pass 0/NULL for the IOMMU settings + */ + arch_setup_dma_ops(dev, 0, 0, NULL, coherent); +} +EXPORT_SYMBOL_GPL(hv_setup_dma_ops); + bool hv_is_hibernation_supported(void) { return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4); diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 71efacb90965946f12e6513efc0a9f05bd8ba89f..3d215d9dec433b33417f69de5513c1b231a27e36 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -439,7 +439,16 @@ int hv_ringbuffer_read(struct vmbus_channel *channel, static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi) { u32 priv_read_loc = rbi->priv_read_index; - u32 write_loc = READ_ONCE(rbi->ring_buffer->write_index); + u32 write_loc; + + /* + * The Hyper-V host writes the packet data, then uses + * store_release() to update the write_index. Use load_acquire() + * here to prevent loads of the packet data from being re-ordered + * before the read of the write_index and potentially getting + * stale data. + */ + write_loc = virt_load_acquire(&rbi->ring_buffer->write_index); if (write_loc >= priv_read_loc) return write_loc - priv_read_loc; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 60ee8b329f9e374787d20b6e4f33831fdbad0426..14de17087864ac3a0d1efa475d8a44fb4d684cae 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -77,8 +77,8 @@ static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, /* * Hyper-V should be notified only once about a panic. If we will be - * doing hyperv_report_panic_msg() later with kmsg data, don't do - * the notification here. + * doing hv_kmsg_dump() with kmsg data later, don't do the notification + * here. */ if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE && hyperv_report_reg()) { @@ -100,8 +100,8 @@ static int hyperv_die_event(struct notifier_block *nb, unsigned long val, /* * Hyper-V should be notified only once about a panic. If we will be - * doing hyperv_report_panic_msg() later with kmsg data, don't do - * the notification here. + * doing hv_kmsg_dump() with kmsg data later, don't do the notification + * here. */ if (hyperv_report_reg()) hyperv_report_panic(regs, val, true); @@ -920,6 +920,21 @@ static int vmbus_probe(struct device *child_device) return ret; } +/* + * vmbus_dma_configure -- Configure DMA coherence for VMbus device + */ +static int vmbus_dma_configure(struct device *child_device) +{ + /* + * On ARM64, propagate the DMA coherence setting from the top level + * VMbus ACPI device to the child VMbus device being added here. + * On x86/x64 coherence is assumed and these calls have no effect. + */ + hv_setup_dma_ops(child_device, + device_get_dma_attr(&hv_acpi_dev->dev) == DEV_DMA_COHERENT); + return 0; +} + /* * vmbus_remove - Remove a vmbus device */ @@ -1040,6 +1055,7 @@ static struct bus_type hv_bus = { .remove = vmbus_remove, .probe = vmbus_probe, .uevent = vmbus_uevent, + .dma_configure = vmbus_dma_configure, .dev_groups = vmbus_dev_groups, .drv_groups = vmbus_drv_groups, .bus_groups = vmbus_bus_groups, @@ -1546,14 +1562,20 @@ static int vmbus_bus_init(void) if (ret) goto err_connect; + if (hv_is_isolation_supported()) + sysctl_record_panic_msg = 0; + /* * Only register if the crash MSRs are available */ if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { u64 hyperv_crash_ctl; /* - * Sysctl registration is not fatal, since by default - * reporting is enabled. + * Panic message recording (sysctl_record_panic_msg) + * is enabled by default in non-isolated guests and + * disabled by default in isolated guests; the panic + * message recording won't be available in isolated + * guests should the following registration fail. */ hv_ctl_table_hdr = register_sysctl_table(hv_root_table); if (!hv_ctl_table_hdr) @@ -2097,6 +2119,10 @@ int vmbus_device_register(struct hv_device *child_device_obj) child_device_obj->device.parent = &hv_acpi_dev->dev; child_device_obj->device.release = vmbus_device_release; + child_device_obj->device.dma_parms = &child_device_obj->dma_parms; + child_device_obj->device.dma_mask = &child_device_obj->dma_mask; + dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64)); + /* * Register with the LDM. This will kick off the driver/device * binding...which will eventually call vmbus_match() and vmbus_probe() @@ -2122,9 +2148,6 @@ int vmbus_device_register(struct hv_device *child_device_obj) } hv_debug_add_dev_dir(child_device_obj); - child_device_obj->device.dma_parms = &child_device_obj->dma_parms; - child_device_obj->device.dma_mask = &child_device_obj->dma_mask; - dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64)); return 0; err_kset_unregister: @@ -2428,6 +2451,21 @@ static int vmbus_acpi_add(struct acpi_device *device) hv_acpi_dev = device; + /* + * Older versions of Hyper-V for ARM64 fail to include the _CCA + * method on the top level VMbus device in the DSDT. But devices + * are hardware coherent in all current Hyper-V use cases, so fix + * up the ACPI device to behave as if _CCA is present and indicates + * hardware coherence. + */ + ACPI_COMPANION_SET(&device->dev, device); + if (IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED) && + device_get_dma_attr(&device->dev) == DEV_DMA_NOT_SUPPORTED) { + pr_info("No ACPI _CCA found; assuming coherent device I/O\n"); + device->flags.cca_seen = true; + device->flags.coherent_dma = true; + } + result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, vmbus_walk_resources, NULL); @@ -2780,10 +2818,15 @@ static void __exit vmbus_exit(void) if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { kmsg_dump_unregister(&hv_kmsg_dumper); unregister_die_notifier(&hyperv_die_block); - atomic_notifier_chain_unregister(&panic_notifier_list, - &hyperv_panic_block); } + /* + * The panic notifier is always registered, hence we should + * also unconditionally unregister it here as well. + */ + atomic_notifier_chain_unregister(&panic_notifier_list, + &hyperv_panic_block); + free_page((unsigned long)hv_panic_page); unregister_sysctl_table(hv_ctl_table_hdr); hv_ctl_table_hdr = NULL; diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 558b35aba610460e10303a3447e58ee4d28575a5..d270a204324e9cc556de9715b6d9470b28694583 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -3407,6 +3407,15 @@ static int hv_pci_probe(struct hv_device *hdev, hbus->bridge->domain_nr = dom; #ifdef CONFIG_X86 hbus->sysdata.domain = dom; +#elif defined(CONFIG_ARM64) + /* + * Set the PCI bus parent to be the corresponding VMbus + * device. Then the VMbus device will be assigned as the + * ACPI companion in pcibios_root_bridge_prepare() and + * pci_dma_configure() will propagate device coherence + * information to devices created on the bus. + */ + hbus->sysdata.parent = hdev->device.parent; #endif hbus->hdev = hdev; diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index c08758b6b364206d656a15979cc166e0e4fb3e58..c05d2ce9b6cd85bb9c2c439cf35160b4cdbcc838 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -269,6 +269,7 @@ bool hv_isolation_type_snp(void); u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size); void hyperv_cleanup(void); bool hv_query_ext_cap(u64 cap_query); +void hv_setup_dma_ops(struct device *dev, bool coherent); void *hv_map_memory(void *addr, unsigned long size); void hv_unmap_memory(void *addr); #else /* CONFIG_HYPERV */