提交 6639032a 编写于 作者: D David Hildenbrand 提交者: Michael S. Tsirkin

virtio-mem: prepare page onlining code for granularity smaller than MAX_ORDER - 1

Let's prepare our page onlining code for subblock size smaller than
MAX_ORDER - 1: we'll get called for a MAX_ORDER - 1 page but might have
some subblocks in the range plugged and some unplugged. In that case,
fallback to subblock granularity to properly only expose the plugged
parts to the buddy.
Signed-off-by: NDavid Hildenbrand <david@redhat.com>
Link: https://lore.kernel.org/r/20211126134209.17332-2-david@redhat.comSigned-off-by: NMichael S. Tsirkin <mst@redhat.com>
Reviewed-by: NZi Yan <ziy@nvidia.com>
Reviewed-by: NEric Ren <renzhengeek@gmail.com>
上级 539fec78
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/bitmap.h> #include <linux/bitmap.h>
#include <linux/lockdep.h> #include <linux/lockdep.h>
#include <linux/log2.h>
#include <acpi/acpi_numa.h> #include <acpi/acpi_numa.h>
...@@ -1228,28 +1229,46 @@ static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, ...@@ -1228,28 +1229,46 @@ static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn,
page_ref_inc(pfn_to_page(pfn + i)); page_ref_inc(pfn_to_page(pfn + i));
} }
static void virtio_mem_online_page_cb(struct page *page, unsigned int order) static void virtio_mem_online_page(struct virtio_mem *vm,
struct page *page, unsigned int order)
{ {
const unsigned long addr = page_to_phys(page); const unsigned long start = page_to_phys(page);
unsigned long id, sb_id; const unsigned long end = start + PFN_PHYS(1 << order);
struct virtio_mem *vm; unsigned long addr, next, id, sb_id, count;
bool do_online; bool do_online;
rcu_read_lock(); /*
list_for_each_entry_rcu(vm, &virtio_mem_devices, next) { * We can get called with any order up to MAX_ORDER - 1. If our
if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order))) * subblock size is smaller than that and we have a mixture of plugged
continue; * and unplugged subblocks within such a page, we have to process in
* smaller granularity. In that case we'll adjust the order exactly once
* within the loop.
*/
for (addr = start; addr < end; ) {
next = addr + PFN_PHYS(1 << order);
if (vm->in_sbm) { if (vm->in_sbm) {
/*
* We exploit here that subblocks have at least
* MAX_ORDER_NR_PAGES size/alignment - so we cannot
* cross subblocks within one call.
*/
id = virtio_mem_phys_to_mb_id(addr); id = virtio_mem_phys_to_mb_id(addr);
sb_id = virtio_mem_phys_to_sb_id(vm, addr); sb_id = virtio_mem_phys_to_sb_id(vm, addr);
do_online = virtio_mem_sbm_test_sb_plugged(vm, id, count = virtio_mem_phys_to_sb_id(vm, next - 1) - sb_id + 1;
sb_id, 1);
if (virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, count)) {
/* Fully plugged. */
do_online = true;
} else if (count == 1 ||
virtio_mem_sbm_test_sb_unplugged(vm, id, sb_id, count)) {
/* Fully unplugged. */
do_online = false;
} else {
/*
* Mixture, process sub-blocks instead. This
* will be at least the size of a pageblock.
* We'll run into this case exactly once.
*/
order = ilog2(vm->sbm.sb_size) - PAGE_SHIFT;
do_online = virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, 1);
continue;
}
} else { } else {
/* /*
* If the whole block is marked fake offline, keep * If the whole block is marked fake offline, keep
...@@ -1260,18 +1279,38 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order) ...@@ -1260,18 +1279,38 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order)
VIRTIO_MEM_BBM_BB_FAKE_OFFLINE; VIRTIO_MEM_BBM_BB_FAKE_OFFLINE;
} }
if (do_online)
generic_online_page(pfn_to_page(PFN_DOWN(addr)), order);
else
virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order,
false);
addr = next;
}
}
static void virtio_mem_online_page_cb(struct page *page, unsigned int order)
{
const unsigned long addr = page_to_phys(page);
struct virtio_mem *vm;
rcu_read_lock();
list_for_each_entry_rcu(vm, &virtio_mem_devices, next) {
/* /*
* virtio_mem_set_fake_offline() might sleep, we don't need * Pages we're onlining will never cross memory blocks and,
* the device anymore. See virtio_mem_remove() how races * therefore, not virtio-mem devices.
*/
if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order)))
continue;
/*
* virtio_mem_set_fake_offline() might sleep. We can safely
* drop the RCU lock at this point because the device
* cannot go away. See virtio_mem_remove() how races
* between memory onlining and device removal are handled. * between memory onlining and device removal are handled.
*/ */
rcu_read_unlock(); rcu_read_unlock();
if (do_online) virtio_mem_online_page(vm, page, order);
generic_online_page(page, order);
else
virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order,
false);
return; return;
} }
rcu_read_unlock(); rcu_read_unlock();
...@@ -2438,8 +2477,7 @@ static int virtio_mem_init_hotplug(struct virtio_mem *vm) ...@@ -2438,8 +2477,7 @@ static int virtio_mem_init_hotplug(struct virtio_mem *vm)
/* /*
* We want subblocks to span at least MAX_ORDER_NR_PAGES and * We want subblocks to span at least MAX_ORDER_NR_PAGES and
* pageblock_nr_pages pages. This: * pageblock_nr_pages pages. This:
* - Simplifies our page onlining code (virtio_mem_online_page_cb) * - Simplifies our fake page onlining code (virtio_mem_fake_online).
* and fake page onlining code (virtio_mem_fake_online).
* - Is required for now for alloc_contig_range() to work reliably - * - Is required for now for alloc_contig_range() to work reliably -
* it doesn't properly handle smaller granularity on ZONE_NORMAL. * it doesn't properly handle smaller granularity on ZONE_NORMAL.
*/ */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册