tlb.c 7.2 KB
Newer Older
G
Glauber Costa 已提交
1 2 3 4 5 6
#include <linux/init.h>

#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
T
Tejun Heo 已提交
7
#include <linux/module.h>
8
#include <linux/cpu.h>
G
Glauber Costa 已提交
9 10 11

#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
12
#include <asm/cache.h>
T
Tejun Heo 已提交
13
#include <asm/apic.h>
T
Tejun Heo 已提交
14
#include <asm/uv/uv.h>
15
#include <linux/debugfs.h>
16

17 18 19
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
			= { &init_mm, 0, };

G
Glauber Costa 已提交
20 21 22 23 24 25 26 27 28 29 30
/*
 *	Smarter SMP flushing macros.
 *		c/o Linus Torvalds.
 *
 *	These mean you can really definitely utterly forget about
 *	writing to user space from interrupts. (Its not allowed anyway).
 *
 *	Optimizations Manfred Spraul <manfred@colorfullife.com>
 *
 *	More scalable flush, from Andi Kleen
 *
31
 *	Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
G
Glauber Costa 已提交
32 33
 */

34 35 36 37 38
struct flush_tlb_info {
	struct mm_struct *flush_mm;
	unsigned long flush_start;
	unsigned long flush_end;
};
39

G
Glauber Costa 已提交
40 41 42 43 44 45
/*
 * We cannot call mmdrop() because we are in interrupt context,
 * instead update mm->cpu_vm_mask.
 */
void leave_mm(int cpu)
{
46
	struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm);
47
	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
G
Glauber Costa 已提交
48
		BUG();
49 50 51
	if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
		cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
		load_cr3(swapper_pg_dir);
52
		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
53
	}
G
Glauber Costa 已提交
54 55 56 57 58 59 60 61
}
EXPORT_SYMBOL_GPL(leave_mm);

/*
 * The flush IPI assumes that a thread switch happens in this order:
 * [cpu0: the cpu that switches]
 * 1) switch_mm() either 1a) or 1b)
 * 1a) thread switch to a different mm
62 63 64 65
 * 1a1) set cpu_tlbstate to TLBSTATE_OK
 *	Now the tlb flush NMI handler flush_tlb_func won't call leave_mm
 *	if cpu0 was in lazy tlb mode.
 * 1a2) update cpu active_mm
G
Glauber Costa 已提交
66
 *	Now cpu0 accepts tlb flushes for the new mm.
67
 * 1a3) cpu_set(cpu, new_mm->cpu_vm_mask);
G
Glauber Costa 已提交
68 69
 *	Now the other cpus will send tlb flush ipis.
 * 1a4) change cr3.
70 71 72 73
 * 1a5) cpu_clear(cpu, old_mm->cpu_vm_mask);
 *	Stop ipi delivery for the old mm. This is not synchronized with
 *	the other cpus, but flush_tlb_func ignore flush ipis for the wrong
 *	mm, and in the worst case we perform a superfluous tlb flush.
G
Glauber Costa 已提交
74
 * 1b) thread switch without mm change
75 76
 *	cpu active_mm is correct, cpu0 already handles flush ipis.
 * 1b1) set cpu_tlbstate to TLBSTATE_OK
G
Glauber Costa 已提交
77 78 79 80 81 82 83 84 85 86 87 88
 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
 *	Atomically set the bit [other cpus will start sending flush ipis],
 *	and test the bit.
 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
 * 2) switch %%esp, ie current
 *
 * The interrupt must handle 2 special cases:
 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
 *   runs in kernel space, the cpu could load tlb entries for user space
 *   pages.
 *
89
 * The good news is that cpu_tlbstate is local to each cpu, no
G
Glauber Costa 已提交
90 91 92 93
 * write/read ordering problems.
 */

/*
94
 * TLB flush funcation:
G
Glauber Costa 已提交
95 96
 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
 * 2) Leave the mm if we are in the lazy tlb mode.
T
Tejun Heo 已提交
97
 */
98
static void flush_tlb_func(void *info)
G
Glauber Costa 已提交
99
{
100
	struct flush_tlb_info *f = info;
G
Glauber Costa 已提交
101

102 103
	inc_irq_stat(irq_tlb_count);

104 105
	if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
		return;
D
Dave Hansen 已提交
106 107
	if (!f->flush_end)
		f->flush_end = f->flush_start + PAGE_SIZE;
G
Glauber Costa 已提交
108

109
	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
110
	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
111
		if (f->flush_end == TLB_FLUSH_ALL) {
112
			local_flush_tlb();
113 114
			trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
		} else {
115
			unsigned long addr;
116 117
			unsigned long nr_pages =
				f->flush_end - f->flush_start / PAGE_SIZE;
118 119 120 121
			addr = f->flush_start;
			while (addr < f->flush_end) {
				__flush_tlb_single(addr);
				addr += PAGE_SIZE;
122
			}
123
			trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages);
124 125 126
		}
	} else
		leave_mm(smp_processor_id());
G
Glauber Costa 已提交
127 128 129

}

130
void native_flush_tlb_others(const struct cpumask *cpumask,
131 132
				 struct mm_struct *mm, unsigned long start,
				 unsigned long end)
133
{
134 135 136 137 138
	struct flush_tlb_info info;
	info.flush_mm = mm;
	info.flush_start = start;
	info.flush_end = end;

139
	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
140
	if (is_uv_system()) {
T
Tejun Heo 已提交
141
		unsigned int cpu;
142

143
		cpu = smp_processor_id();
144
		cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu);
T
Tejun Heo 已提交
145
		if (cpumask)
146 147
			smp_call_function_many(cpumask, flush_tlb_func,
								&info, 1);
148
		return;
149
	}
150
	smp_call_function_many(cpumask, flush_tlb_func, &info, 1);
G
Glauber Costa 已提交
151 152 153 154 155 156 157 158
}

void flush_tlb_current_task(void)
{
	struct mm_struct *mm = current->mm;

	preempt_disable();

159
	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
G
Glauber Costa 已提交
160
	local_flush_tlb();
161
	trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
162
	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
163
		flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
G
Glauber Costa 已提交
164 165 166
	preempt_enable();
}

167 168 169
/* in units of pages */
unsigned long tlb_single_page_flush_ceiling = 1;

170 171 172 173
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
				unsigned long end, unsigned long vmflag)
{
	unsigned long addr;
174 175
	/* do a global flush by default */
	unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
176 177

	preempt_disable();
178
	if (current->active_mm != mm)
179
		goto out;
180

181 182
	if (!current->mm) {
		leave_mm(smp_processor_id());
183
		goto out;
184
	}
G
Glauber Costa 已提交
185

186 187
	if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
		base_pages_to_flush = (end - start) >> PAGE_SHIFT;
188

189 190
	if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
		base_pages_to_flush = TLB_FLUSH_ALL;
191
		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
192
		local_flush_tlb();
D
Dave Hansen 已提交
193
	} else {
194
		/* flush range by one by one 'invlpg' */
D
Dave Hansen 已提交
195
		for (addr = start; addr < end;	addr += PAGE_SIZE) {
196
			count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
197
			__flush_tlb_single(addr);
D
Dave Hansen 已提交
198
		}
199
	}
200
	trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush);
201
out:
202
	if (base_pages_to_flush == TLB_FLUSH_ALL) {
203 204 205
		start = 0UL;
		end = TLB_FLUSH_ALL;
	}
206
	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
207
		flush_tlb_others(mm_cpumask(mm), mm, start, end);
G
Glauber Costa 已提交
208 209 210
	preempt_enable();
}

211
void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
G
Glauber Costa 已提交
212 213 214 215 216 217 218
{
	struct mm_struct *mm = vma->vm_mm;

	preempt_disable();

	if (current->active_mm == mm) {
		if (current->mm)
219
			__flush_tlb_one(start);
G
Glauber Costa 已提交
220 221 222 223
		else
			leave_mm(smp_processor_id());
	}

224
	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
225
		flush_tlb_others(mm_cpumask(mm), mm, start, 0UL);
G
Glauber Costa 已提交
226 227 228 229 230 231

	preempt_enable();
}

static void do_flush_tlb_all(void *info)
{
232
	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
G
Glauber Costa 已提交
233
	__flush_tlb_all();
234
	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
235
		leave_mm(smp_processor_id());
G
Glauber Costa 已提交
236 237 238 239
}

void flush_tlb_all(void)
{
240
	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
241
	on_each_cpu(do_flush_tlb_all, NULL, 1);
G
Glauber Costa 已提交
242
}
243

244 245 246 247 248 249
static void do_kernel_range_flush(void *info)
{
	struct flush_tlb_info *f = info;
	unsigned long addr;

	/* flush range by one by one 'invlpg' */
250
	for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE)
251 252 253 254 255 256 257
		__flush_tlb_single(addr);
}

void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{

	/* Balance as user space task's flush, a bit conservative */
258 259
	if (end == TLB_FLUSH_ALL ||
	    (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) {
260
		on_each_cpu(do_flush_tlb_all, NULL, 1);
261 262
	} else {
		struct flush_tlb_info info;
263 264 265 266 267
		info.flush_start = start;
		info.flush_end = end;
		on_each_cpu(do_kernel_range_flush, &info, 1);
	}
}