mce_amd_inj.c 8.7 KB
Newer Older
B
Borislav Petkov 已提交
1
/*
2 3 4
 * A simple MCE injection facility for testing different aspects of the RAS
 * code. This driver should be built as module so that it can be loaded
 * on production kernels for testing purposes.
B
Borislav Petkov 已提交
5 6 7 8
 *
 * This file may be distributed under the terms of the GNU General Public
 * License version 2.
 *
9
 * Copyright (c) 2010-15:  Borislav Petkov <bp@alien8.de>
B
Borislav Petkov 已提交
10 11 12 13
 *			Advanced Micro Devices Inc.
 */

#include <linux/kobject.h>
14
#include <linux/debugfs.h>
15
#include <linux/device.h>
16
#include <linux/module.h>
17
#include <linux/cpu.h>
18 19
#include <linux/string.h>
#include <linux/uaccess.h>
B
Borislav Petkov 已提交
20 21
#include <asm/mce.h>

22
#include "../kernel/cpu/mcheck/mce-internal.h"
B
Borislav Petkov 已提交
23 24 25 26 27

/*
 * Collect all the MCi_XXX settings
 */
static struct mce i_mce;
28
static struct dentry *dfs_inj;
B
Borislav Petkov 已提交
29

30 31
static u8 n_banks;

32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
#define MAX_FLAG_OPT_SIZE	3

enum injection_type {
	SW_INJ = 0,	/* SW injection, simply decode the error */
	HW_INJ,		/* Trigger a #MC */
	N_INJ_TYPES,
};

static const char * const flags_options[] = {
	[SW_INJ] = "sw",
	[HW_INJ] = "hw",
	NULL
};

/* Set default injection to SW_INJ */
47
static enum injection_type inj_type = SW_INJ;
48

49 50
#define MCE_INJECT_SET(reg)						\
static int inj_##reg##_set(void *data, u64 val)				\
B
Borislav Petkov 已提交
51
{									\
52
	struct mce *m = (struct mce *)data;				\
B
Borislav Petkov 已提交
53
									\
54 55
	m->reg = val;							\
	return 0;							\
B
Borislav Petkov 已提交
56 57
}

58 59 60
MCE_INJECT_SET(status);
MCE_INJECT_SET(misc);
MCE_INJECT_SET(addr);
B
Borislav Petkov 已提交
61

62 63
#define MCE_INJECT_GET(reg)						\
static int inj_##reg##_get(void *data, u64 *val)			\
B
Borislav Petkov 已提交
64
{									\
65 66 67 68
	struct mce *m = (struct mce *)data;				\
									\
	*val = m->reg;							\
	return 0;							\
B
Borislav Petkov 已提交
69 70
}

71 72 73
MCE_INJECT_GET(status);
MCE_INJECT_GET(misc);
MCE_INJECT_GET(addr);
B
Borislav Petkov 已提交
74

75 76 77
DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
B
Borislav Petkov 已提交
78

79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
/*
 * Caller needs to be make sure this cpu doesn't disappear
 * from under us, i.e.: get_cpu/put_cpu.
 */
static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
{
	u32 l, h;
	int err;

	err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h);
	if (err) {
		pr_err("%s: error reading HWCR\n", __func__);
		return err;
	}

	enable ? (l |= BIT(18)) : (l &= ~BIT(18));

	err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h);
	if (err)
		pr_err("%s: error writing HWCR\n", __func__);

	return err;
}

103
static int __set_inj(const char *buf)
104
{
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
	int i;

	for (i = 0; i < N_INJ_TYPES; i++) {
		if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
			inj_type = i;
			return 0;
		}
	}
	return -EINVAL;
}

static ssize_t flags_read(struct file *filp, char __user *ubuf,
			  size_t cnt, loff_t *ppos)
{
	char buf[MAX_FLAG_OPT_SIZE];
	int n;
121

122
	n = sprintf(buf, "%s\n", flags_options[inj_type]);
123

124
	return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
125 126
}

127 128
static ssize_t flags_write(struct file *filp, const char __user *ubuf,
			   size_t cnt, loff_t *ppos)
129
{
130 131
	char buf[MAX_FLAG_OPT_SIZE], *__buf;
	int err;
132

133
	if (cnt > MAX_FLAG_OPT_SIZE)
134
		return -EINVAL;
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149

	if (copy_from_user(&buf, ubuf, cnt))
		return -EFAULT;

	buf[cnt - 1] = 0;

	/* strip whitespace */
	__buf = strstrip(buf);

	err = __set_inj(__buf);
	if (err) {
		pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
		return err;
	}

150
	*ppos += cnt;
151

152
	return cnt;
153 154
}

155 156 157 158 159
static const struct file_operations flags_fops = {
	.read           = flags_read,
	.write          = flags_write,
	.llseek         = generic_file_llseek,
};
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179

/*
 * On which CPU to inject?
 */
MCE_INJECT_GET(extcpu);

static int inj_extcpu_set(void *data, u64 val)
{
	struct mce *m = (struct mce *)data;

	if (val >= nr_cpu_ids || !cpu_online(val)) {
		pr_err("%s: Invalid CPU: %llu\n", __func__, val);
		return -EINVAL;
	}
	m->extcpu = val;
	return 0;
}

DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n");

180 181 182 183 184 185 186 187 188 189 190
static void trigger_mce(void *info)
{
	asm volatile("int $18");
}

static void do_inject(void)
{
	u64 mcg_status = 0;
	unsigned int cpu = i_mce.extcpu;
	u8 b = i_mce.bank;

191 192 193
	if (i_mce.misc)
		i_mce.status |= MCI_STATUS_MISCV;

194
	if (inj_type == SW_INJ) {
195
		mce_inject_log(&i_mce);
196 197 198 199 200 201 202 203 204
		return;
	}

	/* prep MCE global settings for the injection */
	mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;

	if (!(i_mce.status & MCI_STATUS_PCC))
		mcg_status |= MCG_STATUS_RIPV;

205 206 207 208
	get_online_cpus();
	if (!cpu_online(cpu))
		goto err;

209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
	toggle_hw_mce_inject(cpu, true);

	wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS,
		     (u32)mcg_status, (u32)(mcg_status >> 32));

	wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b),
		     (u32)i_mce.status, (u32)(i_mce.status >> 32));

	wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b),
		     (u32)i_mce.addr, (u32)(i_mce.addr >> 32));

	wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b),
		     (u32)i_mce.misc, (u32)(i_mce.misc >> 32));

	toggle_hw_mce_inject(cpu, false);

	smp_call_function_single(cpu, trigger_mce, NULL, 0);

err:
	put_online_cpus();

}

B
Borislav Petkov 已提交
232 233 234 235
/*
 * This denotes into which bank we're injecting and triggers
 * the injection, at the same time.
 */
236
static int inj_bank_set(void *data, u64 val)
B
Borislav Petkov 已提交
237
{
238
	struct mce *m = (struct mce *)data;
B
Borislav Petkov 已提交
239

240 241 242
	if (val >= n_banks) {
		pr_err("Non-existent MCE bank: %llu\n", val);
		return -EINVAL;
243
	}
B
Borislav Petkov 已提交
244

245
	m->bank = val;
246
	do_inject();
B
Borislav Petkov 已提交
247

248
	return 0;
B
Borislav Petkov 已提交
249 250
}

251
MCE_INJECT_GET(bank);
B
Borislav Petkov 已提交
252

253 254
DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");

255
static const char readme_msg[] =
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
"Description of the files and their usages:\n"
"\n"
"Note1: i refers to the bank number below.\n"
"Note2: See respective BKDGs for the exact bit definitions of the files below\n"
"as they mirror the hardware registers.\n"
"\n"
"status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
"\t attributes of the error which caused the MCE.\n"
"\n"
"misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
"\t used for error thresholding purposes and its validity is indicated by\n"
"\t MCi_STATUS[MiscV].\n"
"\n"
"addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
"\t associated with the error.\n"
"\n"
"cpu:\t The CPU to inject the error on.\n"
"\n"
"bank:\t Specify the bank you want to inject the error into: the number of\n"
"\t banks in a processor varies and is family/model-specific, therefore, the\n"
"\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
"\t injection.\n"
"\n"
"flags:\t Injection type to be performed. Writing to this file will trigger a\n"
"\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
"\t for AMD processors.\n"
"\n"
"\t Allowed error injection types:\n"
"\t  - \"sw\": Software error injection. Decode error to a human-readable \n"
"\t    format only. Safe to use.\n"
"\t  - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
"\t    handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
"\t    is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
"\t    before injecting.\n"
"\n";
291 292 293 294 295 296 297 298 299 300 301 302 303

static ssize_t
inj_readme_read(struct file *filp, char __user *ubuf,
		       size_t cnt, loff_t *ppos)
{
	return simple_read_from_buffer(ubuf, cnt, ppos,
					readme_msg, strlen(readme_msg));
}

static const struct file_operations readme_fops = {
	.read		= inj_readme_read,
};

304
static struct dfs_node {
305 306 307
	char *name;
	struct dentry *d;
	const struct file_operations *fops;
308
	umode_t perm;
309
} dfs_fls[] = {
310 311 312 313 314 315
	{ .name = "status",	.fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
	{ .name = "misc",	.fops = &misc_fops,   .perm = S_IRUSR | S_IWUSR },
	{ .name = "addr",	.fops = &addr_fops,   .perm = S_IRUSR | S_IWUSR },
	{ .name = "bank",	.fops = &bank_fops,   .perm = S_IRUSR | S_IWUSR },
	{ .name = "flags",	.fops = &flags_fops,  .perm = S_IRUSR | S_IWUSR },
	{ .name = "cpu",	.fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
316
	{ .name = "README",	.fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
B
Borislav Petkov 已提交
317 318
};

319
static int __init init_mce_inject(void)
B
Borislav Petkov 已提交
320
{
321
	int i;
322 323 324 325
	u64 cap;

	rdmsrl(MSR_IA32_MCG_CAP, cap);
	n_banks = cap & MCG_BANKCNT_MASK;
B
Borislav Petkov 已提交
326

327 328
	dfs_inj = debugfs_create_dir("mce-inject", NULL);
	if (!dfs_inj)
B
Borislav Petkov 已提交
329 330
		return -EINVAL;

331 332
	for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) {
		dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name,
333
						    dfs_fls[i].perm,
334 335 336
						    dfs_inj,
						    &i_mce,
						    dfs_fls[i].fops);
B
Borislav Petkov 已提交
337

338 339
		if (!dfs_fls[i].d)
			goto err_dfs_add;
B
Borislav Petkov 已提交
340
	}
341

B
Borislav Petkov 已提交
342 343
	return 0;

344
err_dfs_add:
345
	while (--i >= 0)
346
		debugfs_remove(dfs_fls[i].d);
B
Borislav Petkov 已提交
347

348 349
	debugfs_remove(dfs_inj);
	dfs_inj = NULL;
B
Borislav Petkov 已提交
350

351
	return -ENOMEM;
B
Borislav Petkov 已提交
352 353
}

354
static void __exit exit_mce_inject(void)
B
Borislav Petkov 已提交
355 356 357
{
	int i;

358 359
	for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
		debugfs_remove(dfs_fls[i].d);
B
Borislav Petkov 已提交
360

361
	memset(&dfs_fls, 0, sizeof(dfs_fls));
B
Borislav Petkov 已提交
362

363 364
	debugfs_remove(dfs_inj);
	dfs_inj = NULL;
B
Borislav Petkov 已提交
365
}
366 367
module_init(init_mce_inject);
module_exit(exit_mce_inject);
B
Borislav Petkov 已提交
368 369

MODULE_LICENSE("GPL");
370
MODULE_AUTHOR("Borislav Petkov <bp@alien8.de>");
B
Borislav Petkov 已提交
371
MODULE_AUTHOR("AMD Inc.");
372
MODULE_DESCRIPTION("MCE injection facility for RAS testing");