pdt.c 8.7 KB
Newer Older
1 2 3
/*
 *    Page Deallocation Table (PDT) support
 *
4 5 6 7
 *    The Page Deallocation Table (PDT) is maintained by firmware and holds a
 *    list of memory addresses in which memory errors were detected.
 *    The list contains both single-bit (correctable) and double-bit
 *    (uncorrectable) errors.
8 9 10
 *
 *    Copyright 2017 by Helge Deller <deller@gmx.de>
 *
11 12
 *    possible future enhancements:
 *    - add userspace interface via procfs or sysfs to clear PDT
13 14 15 16
 */

#include <linux/memblock.h>
#include <linux/seq_file.h>
17
#include <linux/kthread.h>
18 19 20 21 22 23 24 25 26 27

#include <asm/pdc.h>
#include <asm/pdcpat.h>
#include <asm/sections.h>
#include <asm/pgtable.h>

enum pdt_access_type {
	PDT_NONE,
	PDT_PDC,
	PDT_PAT_NEW,
28
	PDT_PAT_CELL
29 30 31 32
};

static enum pdt_access_type pdt_type;

33 34 35 36 37
/* PDT poll interval: 1 minute if errors, 5 minutes if everything OK. */
#define PDT_POLL_INTERVAL_DEFAULT	(5*60*HZ)
#define PDT_POLL_INTERVAL_SHORT		(1*60*HZ)
static unsigned long pdt_poll_interval = PDT_POLL_INTERVAL_DEFAULT;

38 39 40 41 42 43 44
/* global PDT status information */
static struct pdc_mem_retinfo pdt_status;

#define MAX_PDT_TABLE_SIZE	PAGE_SIZE
#define MAX_PDT_ENTRIES		(MAX_PDT_TABLE_SIZE / sizeof(unsigned long))
static unsigned long pdt_entry[MAX_PDT_ENTRIES] __page_aligned_bss;

45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
/*
 * Constants for the pdt_entry format:
 * A pdt_entry holds the physical address in bits 0-57, bits 58-61 are
 * reserved, bit 62 is the perm bit and bit 63 is the error_type bit.
 * The perm bit indicates whether the error have been verified as a permanent
 * error (value of 1) or has not been verified, and may be transient (value
 * of 0). The error_type bit indicates whether the error is a single bit error
 * (value of 1) or a multiple bit error.
 * On non-PAT machines phys_addr is encoded in bits 0-59 and error_type in bit
 * 63. Those machines don't provide the perm bit.
 */

#define PDT_ADDR_PHYS_MASK	(pdt_type != PDT_PDC ? ~0x3f : ~0x0f)
#define PDT_ADDR_PERM_ERR	(pdt_type != PDT_PDC ? 2UL : 0UL)
#define PDT_ADDR_SINGLE_ERR	1UL
60 61 62 63 64 65 66 67 68 69 70 71 72

/* report PDT entries via /proc/meminfo */
void arch_report_meminfo(struct seq_file *m)
{
	if (pdt_type == PDT_NONE)
		return;

	seq_printf(m, "PDT_max_entries: %7lu\n",
			pdt_status.pdt_size);
	seq_printf(m, "PDT_cur_entries: %7lu\n",
			pdt_status.pdt_entries);
}

73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
static int get_info_pat_new(void)
{
	struct pdc_pat_mem_retinfo pat_rinfo;
	int ret;

	/* newer PAT machines like C8000 report info for all cells */
	if (is_pdc_pat())
		ret = pdc_pat_mem_pdt_info(&pat_rinfo);
	else
		return PDC_BAD_PROC;

	pdt_status.pdt_size = pat_rinfo.max_pdt_entries;
	pdt_status.pdt_entries = pat_rinfo.current_pdt_entries;
	pdt_status.pdt_status = 0;
	pdt_status.first_dbe_loc = pat_rinfo.first_dbe_loc;
	pdt_status.good_mem = pat_rinfo.good_mem;

	return ret;
}

static int get_info_pat_cell(void)
{
	struct pdc_pat_mem_cell_pdt_retinfo cell_rinfo;
	int ret;

	/* older PAT machines like rp5470 report cell info only */
	if (is_pdc_pat())
		ret = pdc_pat_mem_pdt_cell_info(&cell_rinfo, parisc_cell_num);
	else
		return PDC_BAD_PROC;

	pdt_status.pdt_size = cell_rinfo.max_pdt_entries;
	pdt_status.pdt_entries = cell_rinfo.current_pdt_entries;
	pdt_status.pdt_status = 0;
	pdt_status.first_dbe_loc = cell_rinfo.first_dbe_loc;
	pdt_status.good_mem = cell_rinfo.good_mem;

	return ret;
}

static void report_mem_err(unsigned long pde)
{
	struct pdc_pat_mem_phys_mem_location loc;
	unsigned long addr;
	char dimm_txt[32];

	addr = pde & PDT_ADDR_PHYS_MASK;

	/* show DIMM slot description on PAT machines */
	if (is_pdc_pat()) {
		pdc_pat_mem_get_dimm_phys_location(&loc, addr);
		sprintf(dimm_txt, "DIMM slot %02x, ", loc.dimm_slot);
	} else
		dimm_txt[0] = 0;

	pr_warn("PDT: BAD MEMORY at 0x%08lx, %s%s%s-bit error.\n",
		addr, dimm_txt,
		pde & PDT_ADDR_PERM_ERR ? "permanent ":"",
		pde & PDT_ADDR_SINGLE_ERR ? "single":"multi");
}


135 136 137 138 139 140 141 142 143 144 145 146 147 148
/*
 * pdc_pdt_init()
 *
 * Initialize kernel PDT structures, read initial PDT table from firmware,
 * report all current PDT entries and mark bad memory with memblock_reserve()
 * to avoid that the kernel will use broken memory areas.
 *
 */
void __init pdc_pdt_init(void)
{
	int ret, i;
	unsigned long entries;
	struct pdc_mem_read_pdt pdt_read_ret;

149 150
	pdt_type = PDT_PAT_NEW;
	ret = get_info_pat_new();
151

152 153 154 155 156 157
	if (ret != PDC_OK) {
		pdt_type = PDT_PAT_CELL;
		ret = get_info_pat_cell();
	}

	if (ret != PDC_OK) {
158
		pdt_type = PDT_PDC;
159
		/* non-PAT machines provide the standard PDC call */
160 161 162 163 164 165 166 167 168 169 170
		ret = pdc_mem_pdt_info(&pdt_status);
	}

	if (ret != PDC_OK) {
		pdt_type = PDT_NONE;
		pr_info("PDT: Firmware does not provide any page deallocation"
			" information.\n");
		return;
	}

	entries = pdt_status.pdt_entries;
171 172
	if (WARN_ON(entries > MAX_PDT_ENTRIES))
		entries = pdt_status.pdt_entries = MAX_PDT_ENTRIES;
173

174 175 176 177 178
	pr_info("PDT: type %s, size %lu, entries %lu, status %lu, dbe_loc 0x%lx,"
		" good_mem %lu MB\n",
			pdt_type == PDT_PDC ? __stringify(PDT_PDC) :
			pdt_type == PDT_PAT_CELL ? __stringify(PDT_PAT_CELL)
						 : __stringify(PDT_PAT_NEW),
179 180
			pdt_status.pdt_size, pdt_status.pdt_entries,
			pdt_status.pdt_status, pdt_status.first_dbe_loc,
181
			pdt_status.good_mem / 1024 / 1024);
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200

	if (entries == 0) {
		pr_info("PDT: Firmware reports all memory OK.\n");
		return;
	}

	if (pdt_status.first_dbe_loc &&
		pdt_status.first_dbe_loc <= __pa((unsigned long)&_end))
		pr_crit("CRITICAL: Bad memory inside kernel image memory area!\n");

	pr_warn("PDT: Firmware reports %lu entries of faulty memory:\n",
		entries);

	if (pdt_type == PDT_PDC)
		ret = pdc_mem_pdt_read_entries(&pdt_read_ret, pdt_entry);
	else {
#ifdef CONFIG_64BIT
		struct pdc_pat_mem_read_pd_retinfo pat_pret;

201 202 203 204
		if (pdt_type == PDT_PAT_CELL)
			ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry,
				MAX_PDT_ENTRIES);
		else
205 206 207 208 209 210 211 212 213
			ret = pdc_pat_mem_read_pd_pdt(&pat_pret, pdt_entry,
				MAX_PDT_TABLE_SIZE, 0);
#else
		ret = PDC_BAD_PROC;
#endif
	}

	if (ret != PDC_OK) {
		pdt_type = PDT_NONE;
214
		pr_warn("PDT: Get PDT entries failed with %d\n", ret);
215 216 217 218
		return;
	}

	for (i = 0; i < pdt_status.pdt_entries; i++) {
219 220 221 222 223 224
		report_mem_err(pdt_entry[i]);

		/* mark memory page bad */
		memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE);
	}
}
225

226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283

/*
 * This is the PDT kernel thread main loop.
 */

static int pdt_mainloop(void *unused)
{
	struct pdc_mem_read_pdt pdt_read_ret;
	struct pdc_pat_mem_read_pd_retinfo pat_pret __maybe_unused;
	unsigned long old_num_entries;
	unsigned long *bad_mem_ptr;
	int num, ret;

	for (;;) {
		set_current_state(TASK_INTERRUPTIBLE);

		old_num_entries = pdt_status.pdt_entries;

		schedule_timeout(pdt_poll_interval);
		if (kthread_should_stop())
			break;

		/* Do we have new PDT entries? */
		switch (pdt_type) {
		case PDT_PAT_NEW:
			ret = get_info_pat_new();
			break;
		case PDT_PAT_CELL:
			ret = get_info_pat_cell();
			break;
		default:
			ret = pdc_mem_pdt_info(&pdt_status);
			break;
		}

		if (ret != PDC_OK) {
			pr_warn("PDT: unexpected failure %d\n", ret);
			return -EINVAL;
		}

		/* if no new PDT entries, just wait again */
		num = pdt_status.pdt_entries - old_num_entries;
		if (num <= 0)
			continue;

		/* decrease poll interval in case we found memory errors */
		if (pdt_status.pdt_entries &&
			pdt_poll_interval == PDT_POLL_INTERVAL_DEFAULT)
			pdt_poll_interval = PDT_POLL_INTERVAL_SHORT;

		/* limit entries to get */
		if (num > MAX_PDT_ENTRIES) {
			num = MAX_PDT_ENTRIES;
			pdt_status.pdt_entries = old_num_entries + num;
		}

		/* get new entries */
		switch (pdt_type) {
284
#ifdef CONFIG_64BIT
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
		case PDT_PAT_CELL:
			if (pdt_status.pdt_entries > MAX_PDT_ENTRIES) {
				pr_crit("PDT: too many entries.\n");
				return -ENOMEM;
			}
			ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry,
				MAX_PDT_ENTRIES);
			bad_mem_ptr = &pdt_entry[old_num_entries];
			break;
		case PDT_PAT_NEW:
			ret = pdc_pat_mem_read_pd_pdt(&pat_pret,
				pdt_entry,
				num * sizeof(unsigned long),
				old_num_entries * sizeof(unsigned long));
			bad_mem_ptr = &pdt_entry[0];
			break;
301
#endif
302 303 304 305 306 307
		default:
			ret = pdc_mem_pdt_read_entries(&pdt_read_ret,
				pdt_entry);
			bad_mem_ptr = &pdt_entry[old_num_entries];
			break;
		}
308

309 310 311
		/* report and mark memory broken */
		while (num--) {
			unsigned long pde = *bad_mem_ptr++;
312

313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
			report_mem_err(pde);

#ifdef CONFIG_MEMORY_FAILURE
			if ((pde & PDT_ADDR_PERM_ERR) ||
			    ((pde & PDT_ADDR_SINGLE_ERR) == 0))
				memory_failure(pde >> PAGE_SHIFT, 0, 0);
			else
				soft_offline_page(
					pfn_to_page(pde >> PAGE_SHIFT), 0);
#else
			pr_crit("PDT: memory error at 0x%lx ignored.\n"
				"Rebuild kernel with CONFIG_MEMORY_FAILURE=y "
				"for real handling.\n",
				pde & PDT_ADDR_PHYS_MASK);
#endif

		}
330
	}
331 332

	return 0;
333
}
334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352


static int __init pdt_initcall(void)
{
	struct task_struct *kpdtd_task;

	if (pdt_type == PDT_NONE)
		return -ENODEV;

	kpdtd_task = kthread_create(pdt_mainloop, NULL, "kpdtd");
	if (IS_ERR(kpdtd_task))
		return PTR_ERR(kpdtd_task);

	wake_up_process(kpdtd_task);

	return 0;
}

late_initcall(pdt_initcall);