tlb.c 6.8 KB
Newer Older
G
Glauber Costa 已提交
1 2 3 4 5 6
#include <linux/init.h>

#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
T
Tejun Heo 已提交
7
#include <linux/module.h>
8
#include <linux/cpu.h>
G
Glauber Costa 已提交
9 10 11

#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
12
#include <asm/cache.h>
T
Tejun Heo 已提交
13
#include <asm/apic.h>
T
Tejun Heo 已提交
14
#include <asm/uv/uv.h>
15
#include <linux/debugfs.h>
16

17 18 19
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
			= { &init_mm, 0, };

G
Glauber Costa 已提交
20 21 22 23 24 25 26 27 28 29 30
/*
 *	Smarter SMP flushing macros.
 *		c/o Linus Torvalds.
 *
 *	These mean you can really definitely utterly forget about
 *	writing to user space from interrupts. (Its not allowed anyway).
 *
 *	Optimizations Manfred Spraul <manfred@colorfullife.com>
 *
 *	More scalable flush, from Andi Kleen
 *
31
 *	Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
G
Glauber Costa 已提交
32 33
 */

34 35 36 37 38
struct flush_tlb_info {
	struct mm_struct *flush_mm;
	unsigned long flush_start;
	unsigned long flush_end;
};
39

G
Glauber Costa 已提交
40 41 42 43 44 45
/*
 * We cannot call mmdrop() because we are in interrupt context,
 * instead update mm->cpu_vm_mask.
 */
void leave_mm(int cpu)
{
46
	struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm);
47
	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
G
Glauber Costa 已提交
48
		BUG();
49 50 51 52
	if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
		cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
		load_cr3(swapper_pg_dir);
	}
G
Glauber Costa 已提交
53 54 55 56 57 58 59 60
}
EXPORT_SYMBOL_GPL(leave_mm);

/*
 * The flush IPI assumes that a thread switch happens in this order:
 * [cpu0: the cpu that switches]
 * 1) switch_mm() either 1a) or 1b)
 * 1a) thread switch to a different mm
61 62 63 64
 * 1a1) set cpu_tlbstate to TLBSTATE_OK
 *	Now the tlb flush NMI handler flush_tlb_func won't call leave_mm
 *	if cpu0 was in lazy tlb mode.
 * 1a2) update cpu active_mm
G
Glauber Costa 已提交
65
 *	Now cpu0 accepts tlb flushes for the new mm.
66
 * 1a3) cpu_set(cpu, new_mm->cpu_vm_mask);
G
Glauber Costa 已提交
67 68
 *	Now the other cpus will send tlb flush ipis.
 * 1a4) change cr3.
69 70 71 72
 * 1a5) cpu_clear(cpu, old_mm->cpu_vm_mask);
 *	Stop ipi delivery for the old mm. This is not synchronized with
 *	the other cpus, but flush_tlb_func ignore flush ipis for the wrong
 *	mm, and in the worst case we perform a superfluous tlb flush.
G
Glauber Costa 已提交
73
 * 1b) thread switch without mm change
74 75
 *	cpu active_mm is correct, cpu0 already handles flush ipis.
 * 1b1) set cpu_tlbstate to TLBSTATE_OK
G
Glauber Costa 已提交
76 77 78 79 80 81 82 83 84 85 86 87
 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
 *	Atomically set the bit [other cpus will start sending flush ipis],
 *	and test the bit.
 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
 * 2) switch %%esp, ie current
 *
 * The interrupt must handle 2 special cases:
 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
 *   runs in kernel space, the cpu could load tlb entries for user space
 *   pages.
 *
88
 * The good news is that cpu_tlbstate is local to each cpu, no
G
Glauber Costa 已提交
89 90 91 92
 * write/read ordering problems.
 */

/*
93
 * TLB flush funcation:
G
Glauber Costa 已提交
94 95
 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
 * 2) Leave the mm if we are in the lazy tlb mode.
T
Tejun Heo 已提交
96
 */
97
static void flush_tlb_func(void *info)
G
Glauber Costa 已提交
98
{
99
	struct flush_tlb_info *f = info;
G
Glauber Costa 已提交
100

101 102
	inc_irq_stat(irq_tlb_count);

103 104
	if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
		return;
D
Dave Hansen 已提交
105 106
	if (!f->flush_end)
		f->flush_end = f->flush_start + PAGE_SIZE;
G
Glauber Costa 已提交
107

108
	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
109
	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
110
		if (f->flush_end == TLB_FLUSH_ALL)
111 112 113 114 115 116 117
			local_flush_tlb();
		else {
			unsigned long addr;
			addr = f->flush_start;
			while (addr < f->flush_end) {
				__flush_tlb_single(addr);
				addr += PAGE_SIZE;
118
			}
119 120 121
		}
	} else
		leave_mm(smp_processor_id());
G
Glauber Costa 已提交
122 123 124

}

125
void native_flush_tlb_others(const struct cpumask *cpumask,
126 127
				 struct mm_struct *mm, unsigned long start,
				 unsigned long end)
128
{
129 130 131 132 133
	struct flush_tlb_info info;
	info.flush_mm = mm;
	info.flush_start = start;
	info.flush_end = end;

134
	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
135
	if (is_uv_system()) {
T
Tejun Heo 已提交
136
		unsigned int cpu;
137

138
		cpu = smp_processor_id();
139
		cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu);
T
Tejun Heo 已提交
140
		if (cpumask)
141 142
			smp_call_function_many(cpumask, flush_tlb_func,
								&info, 1);
143
		return;
144
	}
145
	smp_call_function_many(cpumask, flush_tlb_func, &info, 1);
G
Glauber Costa 已提交
146 147 148 149 150 151 152 153
}

void flush_tlb_current_task(void)
{
	struct mm_struct *mm = current->mm;

	preempt_disable();

154
	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
G
Glauber Costa 已提交
155
	local_flush_tlb();
156
	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
157
		flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
G
Glauber Costa 已提交
158 159 160
	preempt_enable();
}

161 162 163
/* in units of pages */
unsigned long tlb_single_page_flush_ceiling = 1;

164 165 166 167
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
				unsigned long end, unsigned long vmflag)
{
	unsigned long addr;
168 169
	/* do a global flush by default */
	unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
170 171

	preempt_disable();
172
	if (current->active_mm != mm)
173
		goto out;
174

175 176
	if (!current->mm) {
		leave_mm(smp_processor_id());
177
		goto out;
178
	}
G
Glauber Costa 已提交
179

180 181
	if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
		base_pages_to_flush = (end - start) >> PAGE_SHIFT;
182

183 184
	if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
		base_pages_to_flush = TLB_FLUSH_ALL;
185
		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
186
		local_flush_tlb();
D
Dave Hansen 已提交
187
	} else {
188
		/* flush range by one by one 'invlpg' */
D
Dave Hansen 已提交
189
		for (addr = start; addr < end;	addr += PAGE_SIZE) {
190
			count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
191
			__flush_tlb_single(addr);
D
Dave Hansen 已提交
192
		}
193
	}
194
out:
195
	if (base_pages_to_flush == TLB_FLUSH_ALL) {
196 197 198
		start = 0UL;
		end = TLB_FLUSH_ALL;
	}
199
	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
200
		flush_tlb_others(mm_cpumask(mm), mm, start, end);
G
Glauber Costa 已提交
201 202 203
	preempt_enable();
}

204
void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
G
Glauber Costa 已提交
205 206 207 208 209 210 211
{
	struct mm_struct *mm = vma->vm_mm;

	preempt_disable();

	if (current->active_mm == mm) {
		if (current->mm)
212
			__flush_tlb_one(start);
G
Glauber Costa 已提交
213 214 215 216
		else
			leave_mm(smp_processor_id());
	}

217
	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
218
		flush_tlb_others(mm_cpumask(mm), mm, start, 0UL);
G
Glauber Costa 已提交
219 220 221 222 223 224

	preempt_enable();
}

static void do_flush_tlb_all(void *info)
{
225
	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
G
Glauber Costa 已提交
226
	__flush_tlb_all();
227
	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
228
		leave_mm(smp_processor_id());
G
Glauber Costa 已提交
229 230 231 232
}

void flush_tlb_all(void)
{
233
	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
234
	on_each_cpu(do_flush_tlb_all, NULL, 1);
G
Glauber Costa 已提交
235
}
236

237 238 239 240 241 242
static void do_kernel_range_flush(void *info)
{
	struct flush_tlb_info *f = info;
	unsigned long addr;

	/* flush range by one by one 'invlpg' */
243
	for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE)
244 245 246 247 248 249 250
		__flush_tlb_single(addr);
}

void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{

	/* Balance as user space task's flush, a bit conservative */
251 252
	if (end == TLB_FLUSH_ALL ||
	    (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) {
253
		on_each_cpu(do_flush_tlb_all, NULL, 1);
254 255
	} else {
		struct flush_tlb_info info;
256 257 258 259 260
		info.flush_start = start;
		info.flush_end = end;
		on_each_cpu(do_kernel_range_flush, &info, 1);
	}
}