mmu.c 7.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
#define pr_fmt(fmt)  "Hyper-V: " fmt

#include <linux/hyperv.h>
#include <linux/log2.h>
#include <linux/slab.h>
#include <linux/types.h>

#include <asm/fpu/api.h>
#include <asm/mshyperv.h>
#include <asm/msr.h>
#include <asm/tlbflush.h>

13 14 15
#define CREATE_TRACE_POINTS
#include <asm/trace/hyperv.h>

16 17 18 19 20 21 22 23
/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
struct hv_flush_pcpu {
	u64 address_space;
	u64 flags;
	u64 processor_mask;
	u64 gva_list[];
};

24 25 26 27 28 29 30 31 32 33 34 35
/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
struct hv_flush_pcpu_ex {
	u64 address_space;
	u64 flags;
	struct {
		u64 format;
		u64 valid_bank_mask;
		u64 bank_contents[];
	} hv_vp_set;
	u64 gva_list[];
};

36 37 38
/* Each gva in gva_list encodes up to 4096 pages to flush */
#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)

39
static struct hv_flush_pcpu __percpu **pcpu_flush;
40

41
static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex;
42

43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
/*
 * Fills in gva_list starting from offset. Returns the number of items added.
 */
static inline int fill_gva_list(u64 gva_list[], int offset,
				unsigned long start, unsigned long end)
{
	int gva_n = offset;
	unsigned long cur = start, diff;

	do {
		diff = end > cur ? end - cur : 0;

		gva_list[gva_n] = cur & PAGE_MASK;
		/*
		 * Lower 12 bits encode the number of additional
		 * pages to flush (in addition to the 'cur' page).
		 */
		if (diff >= HV_TLB_FLUSH_UNIT)
			gva_list[gva_n] |= ~PAGE_MASK;
		else if (diff)
			gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;

		cur += HV_TLB_FLUSH_UNIT;
		gva_n++;

	} while (cur < end);

	return gva_n - offset;
}

73 74 75 76 77 78
/* Return the number of banks in the resulting vp_set */
static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
				    const struct cpumask *cpus)
{
	int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;

79 80 81 82 83 84 85 86 87 88 89 90
	/* valid_bank_mask can represent up to 64 banks */
	if (hv_max_vp_index / 64 >= 64)
		return 0;

	/*
	 * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex
	 * structs are not cleared between calls, we risk flushing unneeded
	 * vCPUs otherwise.
	 */
	for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
		flush->hv_vp_set.bank_contents[vcpu_bank] = 0;

91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
	/*
	 * Some banks may end up being empty but this is acceptable.
	 */
	for_each_cpu(cpu, cpus) {
		vcpu = hv_cpu_number_to_vp_number(cpu);
		vcpu_bank = vcpu / 64;
		vcpu_offset = vcpu % 64;
		__set_bit(vcpu_offset, (unsigned long *)
			  &flush->hv_vp_set.bank_contents[vcpu_bank]);
		if (vcpu_bank >= nr_bank)
			nr_bank = vcpu_bank + 1;
	}
	flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);

	return nr_bank;
}

108 109 110 111
static void hyperv_flush_tlb_others(const struct cpumask *cpus,
				    const struct flush_tlb_info *info)
{
	int cpu, vcpu, gva_n, max_gvas;
112
	struct hv_flush_pcpu **flush_pcpu;
113 114 115 116
	struct hv_flush_pcpu *flush;
	u64 status = U64_MAX;
	unsigned long flags;

117 118
	trace_hyperv_mmu_flush_tlb_others(cpus, info);

119 120 121 122 123 124 125 126
	if (!pcpu_flush || !hv_hypercall_pg)
		goto do_native;

	if (cpumask_empty(cpus))
		return;

	local_irq_save(flags);

127 128 129 130 131 132 133 134 135 136 137
	flush_pcpu = this_cpu_ptr(pcpu_flush);

	if (unlikely(!*flush_pcpu))
		*flush_pcpu = page_address(alloc_page(GFP_ATOMIC));

	flush = *flush_pcpu;

	if (unlikely(!flush)) {
		local_irq_restore(flags);
		goto do_native;
	}
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189

	if (info->mm) {
		flush->address_space = virt_to_phys(info->mm->pgd);
		flush->flags = 0;
	} else {
		flush->address_space = 0;
		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
	}

	flush->processor_mask = 0;
	if (cpumask_equal(cpus, cpu_present_mask)) {
		flush->flags |= HV_FLUSH_ALL_PROCESSORS;
	} else {
		for_each_cpu(cpu, cpus) {
			vcpu = hv_cpu_number_to_vp_number(cpu);
			if (vcpu >= 64)
				goto do_native;

			__set_bit(vcpu, (unsigned long *)
				  &flush->processor_mask);
		}
	}

	/*
	 * We can flush not more than max_gvas with one hypercall. Flush the
	 * whole address space if we were asked to do more.
	 */
	max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);

	if (info->end == TLB_FLUSH_ALL) {
		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
		status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
					 flush, NULL);
	} else if (info->end &&
		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
		status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
					 flush, NULL);
	} else {
		gva_n = fill_gva_list(flush->gva_list, 0,
				      info->start, info->end);
		status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
					     gva_n, 0, flush, NULL);
	}

	local_irq_restore(flags);

	if (!(status & HV_HYPERCALL_RESULT_MASK))
		return;
do_native:
	native_flush_tlb_others(cpus, info);
}

190 191 192 193
static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
				       const struct flush_tlb_info *info)
{
	int nr_bank = 0, max_gvas, gva_n;
194
	struct hv_flush_pcpu_ex **flush_pcpu;
195 196 197 198
	struct hv_flush_pcpu_ex *flush;
	u64 status = U64_MAX;
	unsigned long flags;

199 200
	trace_hyperv_mmu_flush_tlb_others(cpus, info);

201 202 203 204 205 206 207 208
	if (!pcpu_flush_ex || !hv_hypercall_pg)
		goto do_native;

	if (cpumask_empty(cpus))
		return;

	local_irq_save(flags);

209 210 211 212 213 214 215 216 217 218 219
	flush_pcpu = this_cpu_ptr(pcpu_flush_ex);

	if (unlikely(!*flush_pcpu))
		*flush_pcpu = page_address(alloc_page(GFP_ATOMIC));

	flush = *flush_pcpu;

	if (unlikely(!flush)) {
		local_irq_restore(flags);
		goto do_native;
	}
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275

	if (info->mm) {
		flush->address_space = virt_to_phys(info->mm->pgd);
		flush->flags = 0;
	} else {
		flush->address_space = 0;
		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
	}

	flush->hv_vp_set.valid_bank_mask = 0;

	if (!cpumask_equal(cpus, cpu_present_mask)) {
		flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K;
		nr_bank = cpumask_to_vp_set(flush, cpus);
	}

	if (!nr_bank) {
		flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
		flush->flags |= HV_FLUSH_ALL_PROCESSORS;
	}

	/*
	 * We can flush not more than max_gvas with one hypercall. Flush the
	 * whole address space if we were asked to do more.
	 */
	max_gvas =
		(PAGE_SIZE - sizeof(*flush) - nr_bank *
		 sizeof(flush->hv_vp_set.bank_contents[0])) /
		sizeof(flush->gva_list[0]);

	if (info->end == TLB_FLUSH_ALL) {
		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
		status = hv_do_rep_hypercall(
			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
			0, nr_bank + 2, flush, NULL);
	} else if (info->end &&
		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
		status = hv_do_rep_hypercall(
			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
			0, nr_bank + 2, flush, NULL);
	} else {
		gva_n = fill_gva_list(flush->gva_list, nr_bank,
				      info->start, info->end);
		status = hv_do_rep_hypercall(
			HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
			gva_n, nr_bank + 2, flush, NULL);
	}

	local_irq_restore(flags);

	if (!(status & HV_HYPERCALL_RESULT_MASK))
		return;
do_native:
	native_flush_tlb_others(cpus, info);
}

276 277
void hyperv_setup_mmu_ops(void)
{
278 279 280 281 282 283
	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
		return;

	setup_clear_cpu_cap(X86_FEATURE_PCID);

	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
284 285
		pr_info("Using hypercall for remote TLB flush\n");
		pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
286 287 288
	} else {
		pr_info("Using ext hypercall for remote TLB flush\n");
		pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
289 290 291 292 293
	}
}

void hyper_alloc_mmu(void)
{
294 295 296 297
	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
		return;

	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
298
		pcpu_flush = alloc_percpu(struct hv_flush_pcpu *);
299
	else
300
		pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *);
301
}