mce_power.c 9.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
/*
 * Machine check exception handling CPU-side for power7 and power8
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * Copyright 2013 IBM Corporation
 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
 */

#undef DEBUG
#define pr_fmt(fmt) "mce_power: " fmt

#include <linux/types.h>
#include <linux/ptrace.h>
#include <asm/mmu.h>
#include <asm/mce.h>
29
#include <asm/machdep.h>
30

31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
static void flush_tlb_206(unsigned int num_sets, unsigned int action)
{
	unsigned long rb;
	unsigned int i;

	switch (action) {
	case TLB_INVAL_SCOPE_GLOBAL:
		rb = TLBIEL_INVAL_SET;
		break;
	case TLB_INVAL_SCOPE_LPID:
		rb = TLBIEL_INVAL_SET_LPID;
		break;
	default:
		BUG();
		break;
	}

	asm volatile("ptesync" : : : "memory");
	for (i = 0; i < num_sets; i++) {
		asm volatile("tlbiel %0" : : "r" (rb));
		rb += 1 << TLBIEL_INVAL_SET_SHIFT;
	}
	asm volatile("ptesync" : : : "memory");
}

/*
57 58
 * Generic routines to flush TLB on POWER processors. These routines
 * are used as flush_tlb hook in the cpu_spec.
59 60 61 62 63 64 65 66 67 68 69 70 71 72
 *
 * action => TLB_INVAL_SCOPE_GLOBAL:  Invalidate all TLBs.
 *	     TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID.
 */
void __flush_tlb_power7(unsigned int action)
{
	flush_tlb_206(POWER7_TLB_SETS, action);
}

void __flush_tlb_power8(unsigned int action)
{
	flush_tlb_206(POWER8_TLB_SETS, action);
}

73 74
void __flush_tlb_power9(unsigned int action)
{
75 76 77
	if (radix_enabled())
		flush_tlb_206(POWER9_TLB_SETS_RADIX, action);

78 79 80 81
	flush_tlb_206(POWER9_TLB_SETS_HASH, action);
}


82
/* flush SLBs and reload */
83
#ifdef CONFIG_PPC_MMU_STD_64
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
static void flush_and_reload_slb(void)
{
	struct slb_shadow *slb;
	unsigned long i, n;

	/* Invalidate all SLBs */
	asm volatile("slbmte %0,%0; slbia" : : "r" (0));

#ifdef CONFIG_KVM_BOOK3S_HANDLER
	/*
	 * If machine check is hit when in guest or in transition, we will
	 * only flush the SLBs and continue.
	 */
	if (get_paca()->kvm_hstate.in_guest)
		return;
#endif

	/* For host kernel, reload the SLBs from shadow SLB buffer. */
	slb = get_slb_shadow();
	if (!slb)
		return;

106
	n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
107 108 109

	/* Load up the SLB entries from shadow SLB */
	for (i = 0; i < n; i++) {
110 111
		unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
		unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
112 113 114 115 116

		rb = (rb & ~0xFFFul) | i;
		asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
	}
}
117
#endif
118 119 120 121 122 123 124 125 126 127

static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
{
	long handled = 1;

	/*
	 * flush and reload SLBs for SLB errors and flush TLBs for TLB errors.
	 * reset the error bits whenever we handle them so that at the end
	 * we can check whether we handled all of them or not.
	 * */
128
#ifdef CONFIG_PPC_MMU_STD_64
129 130 131 132 133 134 135
	if (dsisr & slb_error_bits) {
		flush_and_reload_slb();
		/* reset error bits */
		dsisr &= ~(slb_error_bits);
	}
	if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
		if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
136
			cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
137 138 139
		/* reset error bits */
		dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
	}
140
#endif
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
	/* Any other errors we don't understand? */
	if (dsisr & 0xffffffffUL)
		handled = 0;

	return handled;
}

static long mce_handle_derror_p7(uint64_t dsisr)
{
	return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS);
}

static long mce_handle_common_ierror(uint64_t srr1)
{
	long handled = 0;

	switch (P7_SRR1_MC_IFETCH(srr1)) {
	case 0:
		break;
160
#ifdef CONFIG_PPC_MMU_STD_64
161 162 163 164 165 166 167 168
	case P7_SRR1_MC_IFETCH_SLB_PARITY:
	case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
		/* flush and reload SLBs for SLB errors. */
		flush_and_reload_slb();
		handled = 1;
		break;
	case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
		if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
169
			cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
170 171 172
			handled = 1;
		}
		break;
173
#endif
174 175 176 177 178 179 180 181 182 183 184 185 186
	default:
		break;
	}

	return handled;
}

static long mce_handle_ierror_p7(uint64_t srr1)
{
	long handled = 0;

	handled = mce_handle_common_ierror(srr1);

187
#ifdef CONFIG_PPC_MMU_STD_64
188 189 190 191
	if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
		flush_and_reload_slb();
		handled = 1;
	}
192
#endif
193 194 195
	return handled;
}

196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
{
	switch (P7_SRR1_MC_IFETCH(srr1)) {
	case P7_SRR1_MC_IFETCH_SLB_PARITY:
		mce_err->error_type = MCE_ERROR_TYPE_SLB;
		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
		break;
	case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
		mce_err->error_type = MCE_ERROR_TYPE_SLB;
		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
		break;
	case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
		mce_err->error_type = MCE_ERROR_TYPE_TLB;
		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
		break;
	case P7_SRR1_MC_IFETCH_UE:
	case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
		mce_err->error_type = MCE_ERROR_TYPE_UE;
		mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
		break;
	case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
		mce_err->error_type = MCE_ERROR_TYPE_UE;
		mce_err->u.ue_error_type =
				MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
		break;
	}
}

static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
{
	mce_get_common_ierror(mce_err, srr1);
	if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
		mce_err->error_type = MCE_ERROR_TYPE_SLB;
		mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
	}
}

static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
{
	if (dsisr & P7_DSISR_MC_UE) {
		mce_err->error_type = MCE_ERROR_TYPE_UE;
		mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
	} else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
		mce_err->error_type = MCE_ERROR_TYPE_UE;
		mce_err->u.ue_error_type =
				MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
	} else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
	} else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
		mce_err->error_type = MCE_ERROR_TYPE_SLB;
		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
	} else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
		mce_err->error_type = MCE_ERROR_TYPE_SLB;
		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
	} else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
		mce_err->error_type = MCE_ERROR_TYPE_TLB;
		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
	} else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
		mce_err->error_type = MCE_ERROR_TYPE_SLB;
		mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
	}
}

260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
static long mce_handle_ue_error(struct pt_regs *regs)
{
	long handled = 0;

	/*
	 * On specific SCOM read via MMIO we may get a machine check
	 * exception with SRR0 pointing inside opal. If that is the
	 * case OPAL may have recovery address to re-read SCOM data in
	 * different way and hence we can recover from this MC.
	 */

	if (ppc_md.mce_check_early_recovery) {
		if (ppc_md.mce_check_early_recovery(regs))
			handled = 1;
	}
	return handled;
}

278 279
long __machine_check_early_realmode_p7(struct pt_regs *regs)
{
280
	uint64_t srr1, nip, addr;
281
	long handled = 1;
282
	struct mce_error_info mce_error_info = { 0 };
283 284

	srr1 = regs->msr;
285
	nip = regs->nip;
286

287 288 289 290 291 292 293
	/*
	 * Handle memory errors depending whether this was a load/store or
	 * ifetch exception. Also, populate the mce error_type and
	 * type-specific error_type from either SRR1 or DSISR, depending
	 * whether this was a load/store or ifetch exception
	 */
	if (P7_SRR1_MC_LOADSTORE(srr1)) {
294
		handled = mce_handle_derror_p7(regs->dsisr);
295 296 297
		mce_get_derror_p7(&mce_error_info, regs->dsisr);
		addr = regs->dar;
	} else {
298
		handled = mce_handle_ierror_p7(srr1);
299 300 301
		mce_get_ierror_p7(&mce_error_info, srr1);
		addr = regs->nip;
	}
302

303 304 305 306 307
	/* Handle UE error. */
	if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
		handled = mce_handle_ue_error(regs);

	save_mce_event(regs, handled, &mce_error_info, nip, addr);
308 309
	return handled;
}
310

311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328
static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
{
	mce_get_common_ierror(mce_err, srr1);
	if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
	}
}

static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
{
	mce_get_derror_p7(mce_err, dsisr);
	if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
	}
}

329 330 331 332 333 334
static long mce_handle_ierror_p8(uint64_t srr1)
{
	long handled = 0;

	handled = mce_handle_common_ierror(srr1);

335
#ifdef CONFIG_PPC_MMU_STD_64
336 337 338 339
	if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
		flush_and_reload_slb();
		handled = 1;
	}
340
#endif
341 342 343 344 345 346 347 348 349 350
	return handled;
}

static long mce_handle_derror_p8(uint64_t dsisr)
{
	return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS);
}

long __machine_check_early_realmode_p8(struct pt_regs *regs)
{
351
	uint64_t srr1, nip, addr;
352
	long handled = 1;
353
	struct mce_error_info mce_error_info = { 0 };
354 355

	srr1 = regs->msr;
356
	nip = regs->nip;
357

358
	if (P7_SRR1_MC_LOADSTORE(srr1)) {
359
		handled = mce_handle_derror_p8(regs->dsisr);
360 361 362
		mce_get_derror_p8(&mce_error_info, regs->dsisr);
		addr = regs->dar;
	} else {
363
		handled = mce_handle_ierror_p8(srr1);
364 365 366
		mce_get_ierror_p8(&mce_error_info, srr1);
		addr = regs->nip;
	}
367

368 369 370 371 372
	/* Handle UE error. */
	if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
		handled = mce_handle_ue_error(regs);

	save_mce_event(regs, handled, &mce_error_info, nip, addr);
373 374
	return handled;
}