diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 86d6d823e47e97bdfd40ec1dc485cb6dd503d47b..624190fe86a7e9bf076ff6a1779e14b416e29d8c 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1,12 +1,12 @@ /* - * Handle the memory map. - * The functions here do the job until bootmem takes over. + * Low level x86 E820 memory map handling functions. * - * Getting sanitize_e820_table() in sync with i386 version by applying change: - * - Provisions for empty E820 memory regions (reported by certain BIOSes). - * Alex Achenbach , December 2002. - * Venkatesh Pallipadi + * The firmware and bootloader passes us the "E820 table", which is the primary + * physical memory layout description available about x86 systems. * + * The kernel takes the E820 memory layout and optionally modifies it with + * quirks and other tweaks, and feeds that into the generic Linux memory + * allocation code routines via a platform independent interface (memblock, etc.). */ #include #include @@ -27,13 +27,6 @@ #include /* - * The firmware and bootloader passes us an E820 table that is the primary - * physical memory layout description available about x86 systems. - * - * The kernel takes the e820 memory layout and optionally modifies it with - * quirks and other tweaks, and feeds that into the generic Linux memory - * allocation code routines via a platform independent interface (memblock, etc.). - * * We organize the E820 table into two main data structures: * * - 'e820_table_firmware': the original firmware version passed to us by the @@ -45,16 +38,16 @@ * - the hibernation code uses it to generate a kernel-independent MD5 * fingerprint of the physical memory layout of a system. * - * - kexec, which is a bootloader in disguise, uses the original e820 + * - kexec, which is a bootloader in disguise, uses the original E820 * layout to pass to the kexec-ed kernel. This way the original kernel - * can have a restricted e820 map while the kexec()-ed kexec-kernel + * can have a restricted E820 map while the kexec()-ed kexec-kernel * can have access to full memory - etc. * - * - 'e820_table': this is the main e820 table that is massaged by the + * - 'e820_table': this is the main E820 table that is massaged by the * low level x86 platform code, or modified by boot parameters, before * passed on to higher level MM layers. * - * Once the e820 map has been converted to the standard Linux memory layout + * Once the E820 map has been converted to the standard Linux memory layout * information its role stops - modifying it has no effect and does not get * re-propagated. So itsmain role is a temporary bootstrap storage of firmware * specific memory layout data during early bootup. @@ -75,8 +68,7 @@ EXPORT_SYMBOL(pci_mem_start); * This function checks if any part of the range is mapped * with type. */ -int -e820_any_mapped(u64 start, u64 end, unsigned type) +int e820_any_mapped(u64 start, u64 end, unsigned type) { int i; @@ -94,10 +86,10 @@ e820_any_mapped(u64 start, u64 end, unsigned type) EXPORT_SYMBOL_GPL(e820_any_mapped); /* - * This function checks if the entire range is mapped with type. + * This function checks if the entire range is mapped with 'type'. * - * Note: this function only works correct if the e820 table is sorted and - * not-overlapping, which is the case + * Note: this function only works correctly once the E820 table is sorted and + * not-overlapping (at least for the range specified), which is the case normally. */ int __init e820_all_mapped(u64 start, u64 end, unsigned type) { @@ -108,18 +100,21 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type) if (type && ei->type != type) continue; - /* is the region (part) in overlap with the current region ?*/ + + /* Is the region (part) in overlap with the current region? */ if (ei->addr >= end || ei->addr + ei->size <= start) continue; - /* if the region is at the beginning of we move - * start to the end of the region since it's ok until there + /* + * If the region is at the beginning of we move + * 'start' to the end of the region since it's ok until there */ if (ei->addr <= start) start = ei->addr + ei->size; + /* - * if start is now at or beyond end, we're done, full - * coverage + * If 'start' is now at or beyond 'end', we're done, full + * coverage of the desired range exists: */ if (start >= end) return 1; @@ -128,16 +123,14 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type) } /* - * Add a memory region to the kernel e820 map. + * Add a memory region to the kernel E820 map. */ static void __init __e820_add_region(struct e820_table *table, u64 start, u64 size, int type) { int x = table->nr_entries; if (x >= ARRAY_SIZE(table->entries)) { - printk(KERN_ERR "e820: too many entries; ignoring [mem %#010llx-%#010llx]\n", - (unsigned long long) start, - (unsigned long long) (start + size - 1)); + printk(KERN_ERR "e820: too many entries; ignoring [mem %#010llx-%#010llx]\n", start, start + size - 1); return; } @@ -155,29 +148,15 @@ void __init e820_add_region(u64 start, u64 size, int type) static void __init e820_print_type(u32 type) { switch (type) { - case E820_RAM: - case E820_RESERVED_KERN: - printk(KERN_CONT "usable"); - break; - case E820_RESERVED: - printk(KERN_CONT "reserved"); - break; - case E820_ACPI: - printk(KERN_CONT "ACPI data"); - break; - case E820_NVS: - printk(KERN_CONT "ACPI NVS"); - break; - case E820_UNUSABLE: - printk(KERN_CONT "unusable"); - break; - case E820_PMEM: - case E820_PRAM: - printk(KERN_CONT "persistent (type %u)", type); - break; - default: - printk(KERN_CONT "type %u", type); - break; + case E820_RAM: /* Fall through: */ + case E820_RESERVED_KERN: printk(KERN_CONT "usable"); break; + case E820_RESERVED: printk(KERN_CONT "reserved"); break; + case E820_ACPI: printk(KERN_CONT "ACPI data"); break; + case E820_NVS: printk(KERN_CONT "ACPI NVS"); break; + case E820_UNUSABLE: printk(KERN_CONT "unusable"); break; + case E820_PMEM: /* Fall through: */ + case E820_PRAM: printk(KERN_CONT "persistent (type %u)", type); break; + default: printk(KERN_CONT "type %u", type); break; } } @@ -187,26 +166,26 @@ void __init e820_print_map(char *who) for (i = 0; i < e820_table->nr_entries; i++) { printk(KERN_INFO "%s: [mem %#018Lx-%#018Lx] ", who, - (unsigned long long) e820_table->entries[i].addr, - (unsigned long long) - (e820_table->entries[i].addr + e820_table->entries[i].size - 1)); + e820_table->entries[i].addr, + e820_table->entries[i].addr + e820_table->entries[i].size - 1); + e820_print_type(e820_table->entries[i].type); printk(KERN_CONT "\n"); } } /* - * Sanitize the BIOS e820 map. + * Sanitize the BIOS E820 map. * - * Some e820 responses include overlapping entries. The following - * replaces the original e820 map with a new one, removing overlaps, + * Some E820 responses include overlapping entries. The following + * replaces the original E820 map with a new one, removing overlaps, * and resolving conflicting memory types in favor of highest * numbered type. * * The input parameter biosmap points to an array of 'struct * e820_entry' which on entry has elements in the range [0, *pnr_map) * valid, and which has space for up to max_nr_map entries. - * On return, the resulting sanitized e820 map entries will be in + * On return, the resulting sanitized E820 map entries will be in * overwritten in the same location, starting at biosmap. * * The integer pointed to by pnr_map must be valid on entry (the @@ -257,8 +236,10 @@ void __init e820_print_map(char *who) * ______________________4_ */ struct change_member { - struct e820_entry *pbios; /* pointer to original bios entry */ - unsigned long long addr; /* address for this change point */ + /* Pointer to the original BIOS entry: */ + struct e820_entry *pbios; + /* Address for this change point: */ + unsigned long long addr; }; static int __init cpcompare(const void *a, const void *b) @@ -268,7 +249,7 @@ static int __init cpcompare(const void *a, const void *b) /* * Inputs are pointers to two elements of change_point[]. If their - * addresses are unequal, their difference dominates. If the addresses + * addresses are not equal, their difference dominates. If the addresses * are equal, then consider one that represents the end of its region * to be greater than one that does not. */ @@ -278,8 +259,7 @@ static int __init cpcompare(const void *a, const void *b) return (ap->addr != ap->pbios->addr) - (bp->addr != bp->pbios->addr); } -int __init sanitize_e820_table(struct e820_entry *biosmap, int max_nr_map, - u32 *pnr_map) +int __init sanitize_e820_table(struct e820_entry *biosmap, int max_nr_map, u32 *pnr_map) { static struct change_member change_point_list[2*E820_X_MAX] __initdata; static struct change_member *change_point[2*E820_X_MAX] __initdata; @@ -293,112 +273,96 @@ int __init sanitize_e820_table(struct e820_entry *biosmap, int max_nr_map, int old_nr, new_nr, chg_nr; int i; - /* if there's only one memory region, don't bother */ + /* If there's only one memory region, don't bother: */ if (*pnr_map < 2) return -1; old_nr = *pnr_map; BUG_ON(old_nr > max_nr_map); - /* bail out if we find any unreasonable addresses in bios map */ - for (i = 0; i < old_nr; i++) + /* Bail out if we find any unreasonable addresses in the BIOS map: */ + for (i = 0; i < old_nr; i++) { if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) return -1; + } - /* create pointers for initial change-point information (for sorting) */ + /* Create pointers for initial change-point information (for sorting): */ for (i = 0; i < 2 * old_nr; i++) change_point[i] = &change_point_list[i]; - /* record all known change-points (starting and ending addresses), - omitting those that are for empty memory regions */ + /* + * Record all known change-points (starting and ending addresses), + * omitting empty memory regions: + */ chgidx = 0; for (i = 0; i < old_nr; i++) { if (biosmap[i].size != 0) { - change_point[chgidx]->addr = biosmap[i].addr; - change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + - biosmap[i].size; - change_point[chgidx++]->pbios = &biosmap[i]; + change_point[chgidx]->addr = biosmap[i].addr; + change_point[chgidx++]->pbios = &biosmap[i]; + change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; + change_point[chgidx++]->pbios = &biosmap[i]; } } chg_nr = chgidx; - /* sort change-point list by memory addresses (low -> high) */ + /* Sort change-point list by memory addresses (low -> high): */ sort(change_point, chg_nr, sizeof *change_point, cpcompare, NULL); - /* create a new bios memory map, removing overlaps */ - overlap_entries = 0; /* number of entries in the overlap table */ - new_bios_entry = 0; /* index for creating new bios map entries */ - last_type = 0; /* start with undefined memory type */ - last_addr = 0; /* start with 0 as last starting address */ + /* Create a new BIOS memory map, removing overlaps: */ + overlap_entries = 0; /* Number of entries in the overlap table */ + new_bios_entry = 0; /* Index for creating new bios map entries */ + last_type = 0; /* Start with undefined memory type */ + last_addr = 0; /* Start with 0 as last starting address */ - /* loop through change-points, determining affect on the new bios map */ + /* Loop through change-points, determining effect on the new BIOS map: */ for (chgidx = 0; chgidx < chg_nr; chgidx++) { - /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == - change_point[chgidx]->pbios->addr) { - /* - * add map entry to overlap list (> 1 entry - * implies an overlap) - */ - overlap_list[overlap_entries++] = - change_point[chgidx]->pbios; + /* Keep track of all overlapping BIOS entries */ + if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) { + /* Add map entry to overlap list (> 1 entry implies an overlap) */ + overlap_list[overlap_entries++] = change_point[chgidx]->pbios; } else { - /* - * remove entry from list (order independent, - * so swap with last) - */ + /* Remove entry from list (order independent, so swap with last): */ for (i = 0; i < overlap_entries; i++) { - if (overlap_list[i] == - change_point[chgidx]->pbios) - overlap_list[i] = - overlap_list[overlap_entries-1]; + if (overlap_list[i] == change_point[chgidx]->pbios) + overlap_list[i] = overlap_list[overlap_entries-1]; } overlap_entries--; } /* - * if there are overlapping entries, decide which + * If there are overlapping entries, decide which * "type" to use (larger value takes precedence -- * 1=usable, 2,3,4,4+=unusable) */ current_type = 0; - for (i = 0; i < overlap_entries; i++) + for (i = 0; i < overlap_entries; i++) { if (overlap_list[i]->type > current_type) current_type = overlap_list[i]->type; - /* - * continue building up new bios map based on this - * information - */ + } + + /* Continue building up new BIOS map based on this information: */ if (current_type != last_type || current_type == E820_PRAM) { if (last_type != 0) { - new_bios[new_bios_entry].size = - change_point[chgidx]->addr - last_addr; - /* - * move forward only if the new size - * was non-zero - */ + new_bios[new_bios_entry].size = change_point[chgidx]->addr - last_addr; + /* Move forward only if the new size was non-zero: */ if (new_bios[new_bios_entry].size != 0) - /* - * no more space left for new - * bios entries ? - */ + /* No more space left for new BIOS entries? */ if (++new_bios_entry >= max_nr_map) break; } if (current_type != 0) { - new_bios[new_bios_entry].addr = - change_point[chgidx]->addr; + new_bios[new_bios_entry].addr = change_point[chgidx]->addr; new_bios[new_bios_entry].type = current_type; last_addr = change_point[chgidx]->addr; } last_type = current_type; } } - /* retain count for new bios entries */ + + /* Retain count for new BIOS entries: */ new_nr = new_bios_entry; - /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr * sizeof(struct e820_entry)); + /* Copy new BIOS mapping into the original location: */ + memcpy(biosmap, new_bios, new_nr*sizeof(struct e820_entry)); *pnr_map = new_nr; return 0; @@ -412,7 +376,7 @@ static int __init __append_e820_table(struct e820_entry *biosmap, int nr_map) u64 end = start + size - 1; u32 type = biosmap->type; - /* Overflow in 64 bits? Ignore the memory map. */ + /* Ignore the entry on 64-bit overflow: */ if (start > end && likely(size)) return -1; @@ -425,7 +389,7 @@ static int __init __append_e820_table(struct e820_entry *biosmap, int nr_map) } /* - * Copy the BIOS e820 map into a safe place. + * Copy the BIOS E820 map into a safe place. * * Sanity-check it while we're at it.. * @@ -442,9 +406,8 @@ static int __init append_e820_table(struct e820_entry *biosmap, int nr_map) return __append_e820_table(biosmap, nr_map); } -static u64 __init __e820_update_range(struct e820_table *table, u64 start, - u64 size, unsigned old_type, - unsigned new_type) +static u64 __init +__e820_update_range(struct e820_table *table, u64 start, u64 size, unsigned old_type, unsigned new_type) { u64 end; unsigned int i; @@ -456,8 +419,7 @@ static u64 __init __e820_update_range(struct e820_table *table, u64 start, size = ULLONG_MAX - start; end = start + size; - printk(KERN_DEBUG "e820: update [mem %#010Lx-%#010Lx] ", - (unsigned long long) start, (unsigned long long) (end - 1)); + printk(KERN_DEBUG "e820: update [mem %#010Lx-%#010Lx] ", start, end - 1); e820_print_type(old_type); printk(KERN_CONT " ==> "); e820_print_type(new_type); @@ -472,14 +434,15 @@ static u64 __init __e820_update_range(struct e820_table *table, u64 start, continue; ei_end = ei->addr + ei->size; - /* totally covered by new range? */ + + /* Completely covered by new range? */ if (ei->addr >= start && ei_end <= end) { ei->type = new_type; real_updated_size += ei->size; continue; } - /* new range is totally covered? */ + /* New range is completely covered? */ if (ei->addr < start && ei_end > end) { __e820_add_region(table, start, size, new_type); __e820_add_region(table, end, ei_end - end, ei->type); @@ -488,45 +451,41 @@ static u64 __init __e820_update_range(struct e820_table *table, u64 start, continue; } - /* partially covered */ + /* Partially covered: */ final_start = max(start, ei->addr); final_end = min(end, ei_end); if (final_start >= final_end) continue; - __e820_add_region(table, final_start, final_end - final_start, - new_type); + __e820_add_region(table, final_start, final_end - final_start, new_type); real_updated_size += final_end - final_start; /* - * left range could be head or tail, so need to update - * size at first. + * Left range could be head or tail, so need to update + * its size first: */ ei->size -= final_end - final_start; if (ei->addr < final_start) continue; + ei->addr = final_end; } return real_updated_size; } -u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, - unsigned new_type) +u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, unsigned new_type) { return __e820_update_range(e820_table, start, size, old_type, new_type); } -static u64 __init e820_update_range_firmware(u64 start, u64 size, - unsigned old_type, unsigned new_type) +static u64 __init e820_update_range_firmware(u64 start, u64 size, unsigned old_type, unsigned new_type) { - return __e820_update_range(e820_table_firmware, start, size, old_type, - new_type); + return __e820_update_range(e820_table_firmware, start, size, old_type, new_type); } -/* make e820 not cover the range */ -u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, - int checktype) +/* Remove a range of memory from the E820 table: */ +u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, int checktype) { int i; u64 end; @@ -536,8 +495,7 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, size = ULLONG_MAX - start; end = start + size; - printk(KERN_DEBUG "e820: remove [mem %#010Lx-%#010Lx] ", - (unsigned long long) start, (unsigned long long) (end - 1)); + printk(KERN_DEBUG "e820: remove [mem %#010Lx-%#010Lx] ", start, end - 1); if (checktype) e820_print_type(old_type); printk(KERN_CONT "\n"); @@ -551,14 +509,15 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, continue; ei_end = ei->addr + ei->size; - /* totally covered? */ + + /* Completely covered? */ if (ei->addr >= start && ei_end <= end) { real_removed_size += ei->size; memset(ei, 0, sizeof(struct e820_entry)); continue; } - /* new range is totally covered? */ + /* Is the new range completely covered? */ if (ei->addr < start && ei_end > end) { e820_add_region(end, ei_end - end, ei->type); ei->size = start - ei->addr; @@ -566,20 +525,22 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, continue; } - /* partially covered */ + /* Partially covered: */ final_start = max(start, ei->addr); final_end = min(end, ei_end); if (final_start >= final_end) continue; + real_removed_size += final_end - final_start; /* - * left range could be head or tail, so need to update - * size at first. + * Left range could be head or tail, so need to update + * the size first: */ ei->size -= final_end - final_start; if (ei->addr < final_start) continue; + ei->addr = final_end; } return real_removed_size; @@ -589,19 +550,22 @@ void __init update_e820(void) { if (sanitize_e820_table(e820_table->entries, ARRAY_SIZE(e820_table->entries), &e820_table->nr_entries)) return; + printk(KERN_INFO "e820: modified physical RAM map:\n"); e820_print_map("modified"); } + static void __init update_e820_table_firmware(void) { sanitize_e820_table(e820_table_firmware->entries, ARRAY_SIZE(e820_table_firmware->entries), &e820_table_firmware->nr_entries); } + #define MAX_GAP_END 0x100000000ull + /* - * Search for a gap in the e820 memory space from 0 to MAX_GAP_END. + * Search for a gap in the E820 memory space from 0 to MAX_GAP_END (4GB). */ -static int __init e820_search_gap(unsigned long *gapstart, - unsigned long *gapsize) +static int __init e820_search_gap(unsigned long *gapstart, unsigned long *gapsize) { unsigned long long last = MAX_GAP_END; int i = e820_table->nr_entries; @@ -613,7 +577,7 @@ static int __init e820_search_gap(unsigned long *gapstart, /* * Since "last" is at most 4GB, we know we'll - * fit in 32 bits if this condition is true + * fit in 32 bits if this condition is true: */ if (last > end) { unsigned long gap = last - end; @@ -631,9 +595,11 @@ static int __init e820_search_gap(unsigned long *gapstart, } /* - * Search for the biggest gap in the low 32 bits of the e820 - * memory space. We pass this space to PCI to assign MMIO resources - * for hotplug or unconfigured devices in. + * Search for the biggest gap in the low 32 bits of the E820 + * memory space. We pass this space to the PCI subsystem, so + * that it can assign MMIO resources for hotplug or + * unconfigured devices in. + * * Hopefully the BIOS let enough space left. */ __init void e820_setup_gap(void) @@ -648,8 +614,8 @@ __init void e820_setup_gap(void) #ifdef CONFIG_X86_64 gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; printk(KERN_ERR - "e820: cannot find a gap in the 32bit address range\n" - "e820: PCI devices with unassigned 32bit BARs may break!\n"); + "e820: Cannot find an available gap in the 32-bit address range\n" + "e820: PCI devices with unassigned 32-bit BARs may not work!\n"); #else gapstart = 0x10000000; #endif @@ -660,43 +626,44 @@ __init void e820_setup_gap(void) */ pci_mem_start = gapstart; - printk(KERN_INFO - "e820: [mem %#010lx-%#010lx] available for PCI devices\n", - gapstart, gapstart + gapsize - 1); + printk(KERN_INFO "e820: [mem %#010lx-%#010lx] available for PCI devices\n", gapstart, gapstart + gapsize - 1); } /* * Called late during init, in free_initmem(). * - * Initial e820 and e820_table_firmware are largish __initdata arrays. - * Copy them to (usually much smaller) dynamically allocated area. - * This is done after all tweaks we ever do to them: - * all functions which modify them are __init functions, - * they won't exist after this point. + * Initial e820_table and e820_table_firmware are largish __initdata arrays. + * + * Copy them to a (usually much smaller) dynamically allocated area that is + * sized precisely after the number of e820 entries. + * + * This is done after we've performed all the fixes and tweaks to the tables. + * All functions which modify them are __init functions, which won't exist + * after free_initmem(). */ __init void e820_reallocate_tables(void) { struct e820_table *n; int size; - size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry) * e820_table->nr_entries; + size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table->nr_entries; n = kmalloc(size, GFP_KERNEL); BUG_ON(!n); memcpy(n, e820_table, size); e820_table = n; - size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry) * e820_table_firmware->nr_entries; + size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_firmware->nr_entries; n = kmalloc(size, GFP_KERNEL); BUG_ON(!n); memcpy(n, e820_table_firmware, size); e820_table_firmware = n; } -/** - * Because of the size limitation of struct boot_params, only first - * 128 E820 memory entries are passed to kernel via - * boot_params.e820_table, others are passed via SETUP_E820_EXT node of - * linked list of struct setup_data, which is parsed here. +/* + * Because of the small fixed size of struct boot_params, only the first + * 128 E820 memory entries are passed to the kernel via boot_params.e820_table, + * the remaining (if any) entries are passed via the SETUP_E820_EXT node of + * struct setup_data, which is parsed here. */ void __init parse_e820_ext(u64 phys_addr, u32 data_len) { @@ -707,8 +674,10 @@ void __init parse_e820_ext(u64 phys_addr, u32 data_len) sdata = early_memremap(phys_addr, data_len); entries = sdata->len / sizeof(struct e820_entry); extmap = (struct e820_entry *)(sdata->data); + __append_e820_table(extmap, entries); sanitize_e820_table(e820_table->entries, ARRAY_SIZE(e820_table->entries), &e820_table->nr_entries); + early_memunmap(sdata, data_len); printk(KERN_INFO "e820: extended physical RAM map:\n"); e820_print_map("extended"); @@ -716,10 +685,10 @@ void __init parse_e820_ext(u64 phys_addr, u32 data_len) /** * Find the ranges of physical addresses that do not correspond to - * e820 RAM areas and mark the corresponding pages as nosave for - * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). + * E820 RAM areas and mark the corresponding pages as 'nosave' for + * hibernation (32-bit) or software suspend and suspend to RAM (64-bit). * - * This function requires the e820 map to be sorted and without any + * This function requires the E820 map to be sorted and without any * overlapping entries. */ void __init e820_mark_nosave_regions(unsigned long limit_pfn) @@ -744,9 +713,9 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn) } #ifdef CONFIG_ACPI -/** - * Mark ACPI NVS memory region, so that we can save/restore it during - * hibernation and the subsequent resume. +/* + * Register ACPI NVS memory regions, so that we can save/restore them during + * hibernation and the subsequent resume: */ static int __init e820_mark_nvs_memory(void) { @@ -828,6 +797,7 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) last_pfn, max_arch_pfn); return last_pfn; } + unsigned long __init e820_end_of_ram_pfn(void) { return e820_end_pfn(MAX_ARCH_PFN, E820_RAM); @@ -846,7 +816,7 @@ static void __init early_panic(char *msg) static int userdef __initdata; -/* "mem=nopentium" disables the 4MB page tables. */ +/* The "mem=nopentium" boot option disables 4MB page tables on 32-bit kernels: */ static int __init parse_memopt(char *p) { u64 mem_size; @@ -866,9 +836,11 @@ static int __init parse_memopt(char *p) userdef = 1; mem_size = memparse(p, &p); - /* don't remove all of memory when handling "mem={invalid}" param */ + + /* Don't remove all memory when getting "mem={invalid}" parameter: */ if (mem_size == 0) return -EINVAL; + e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); return 0; @@ -887,7 +859,7 @@ static int __init parse_memmap_one(char *p) #ifdef CONFIG_CRASH_DUMP /* * If we are doing a crash dump, we still need to know - * the real mem size before original memory map is + * the real memory size before the original memory map is * reset. */ saved_max_pfn = e820_end_of_ram_pfn(); @@ -915,11 +887,13 @@ static int __init parse_memmap_one(char *p) } else if (*p == '!') { start_at = memparse(p+1, &p); e820_add_region(start_at, mem_size, E820_PRAM); - } else + } else { e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); + } return *p == '\0' ? 0 : -EINVAL; } + static int __init parse_memmap_opt(char *str) { while (str) { @@ -939,8 +913,7 @@ early_param("memmap", parse_memmap_opt); void __init finish_e820_parsing(void) { if (userdef) { - if (sanitize_e820_table(e820_table->entries, ARRAY_SIZE(e820_table->entries), - &e820_table->nr_entries) < 0) + if (sanitize_e820_table(e820_table->entries, ARRAY_SIZE(e820_table->entries), &e820_table->nr_entries) < 0) early_panic("Invalid user supplied memory map"); printk(KERN_INFO "e820: user-defined physical RAM map:\n"); @@ -951,49 +924,42 @@ void __init finish_e820_parsing(void) static const char *__init e820_type_to_string(int e820_type) { switch (e820_type) { - case E820_RESERVED_KERN: - case E820_RAM: return "System RAM"; - case E820_ACPI: return "ACPI Tables"; - case E820_NVS: return "ACPI Non-volatile Storage"; - case E820_UNUSABLE: return "Unusable memory"; - case E820_PRAM: return "Persistent Memory (legacy)"; - case E820_PMEM: return "Persistent Memory"; - default: return "reserved"; + case E820_RESERVED_KERN: /* Fall-through: */ + case E820_RAM: return "System RAM"; + case E820_ACPI: return "ACPI Tables"; + case E820_NVS: return "ACPI Non-volatile Storage"; + case E820_UNUSABLE: return "Unusable memory"; + case E820_PRAM: return "Persistent Memory (legacy)"; + case E820_PMEM: return "Persistent Memory"; + default: return "Reserved"; } } static unsigned long __init e820_type_to_iomem_type(int e820_type) { switch (e820_type) { - case E820_RESERVED_KERN: - case E820_RAM: - return IORESOURCE_SYSTEM_RAM; - case E820_ACPI: - case E820_NVS: - case E820_UNUSABLE: - case E820_PRAM: - case E820_PMEM: - default: - return IORESOURCE_MEM; + case E820_RESERVED_KERN: /* Fall-through: */ + case E820_RAM: return IORESOURCE_SYSTEM_RAM; + case E820_ACPI: /* Fall-through: */ + case E820_NVS: /* Fall-through: */ + case E820_UNUSABLE: /* Fall-through: */ + case E820_PRAM: /* Fall-through: */ + case E820_PMEM: /* Fall-through: */ + default: return IORESOURCE_MEM; } } static unsigned long __init e820_type_to_iores_desc(int e820_type) { switch (e820_type) { - case E820_ACPI: - return IORES_DESC_ACPI_TABLES; - case E820_NVS: - return IORES_DESC_ACPI_NV_STORAGE; - case E820_PMEM: - return IORES_DESC_PERSISTENT_MEMORY; - case E820_PRAM: - return IORES_DESC_PERSISTENT_MEMORY_LEGACY; - case E820_RESERVED_KERN: - case E820_RAM: - case E820_UNUSABLE: - default: - return IORES_DESC_NONE; + case E820_ACPI: return IORES_DESC_ACPI_TABLES; + case E820_NVS: return IORES_DESC_ACPI_NV_STORAGE; + case E820_PMEM: return IORES_DESC_PERSISTENT_MEMORY; + case E820_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY; + case E820_RESERVED_KERN: /* Fall-through: */ + case E820_RAM: /* Fall-through: */ + case E820_UNUSABLE: /* Fall-through: */ + default: return IORES_DESC_NONE; } } @@ -1018,9 +984,11 @@ static bool __init do_mark_busy(u32 type, struct resource *res) } /* - * Mark e820 reserved areas as busy for the resource manager. + * Mark E820 reserved areas as busy for the resource manager: */ + static struct resource __initdata *e820_res; + void __init e820_reserve_resources(void) { int i; @@ -1056,9 +1024,8 @@ void __init e820_reserve_resources(void) for (i = 0; i < e820_table_firmware->nr_entries; i++) { struct e820_entry *entry = &e820_table_firmware->entries[i]; - firmware_map_add_early(entry->addr, - entry->addr + entry->size, - e820_type_to_string(entry->type)); + + firmware_map_add_early(entry->addr, entry->addr + entry->size, e820_type_to_string(entry->type)); } } @@ -1094,7 +1061,7 @@ void __init e820_reserve_resources_late(void) } /* - * Try to bump up RAM regions to reasonable boundaries to + * Try to bump up RAM regions to reasonable boundaries, to * avoid stolen RAM: */ for (i = 0; i < e820_table->nr_entries; i++) { @@ -1103,24 +1070,27 @@ void __init e820_reserve_resources_late(void) if (entry->type != E820_RAM) continue; + start = entry->addr + entry->size; end = round_up(start, ram_alignment(start)) - 1; if (end > MAX_RESOURCE_SIZE) end = MAX_RESOURCE_SIZE; if (start >= end) continue; - printk(KERN_DEBUG - "e820: reserve RAM buffer [mem %#010llx-%#010llx]\n", - start, end); - reserve_region_with_split(&iomem_resource, start, end, - "RAM buffer"); + + printk(KERN_DEBUG "e820: reserve RAM buffer [mem %#010llx-%#010llx]\n", start, end); + reserve_region_with_split(&iomem_resource, start, end, "RAM buffer"); } } +/* + * Pass the firmware (bootloader) E820 map to the kernel and process it: + */ char *__init e820__memory_setup_default(void) { char *who = "BIOS-e820"; u32 new_nr; + /* * Try to copy the BIOS-supplied E820-map. * @@ -1128,17 +1098,14 @@ char *__init e820__memory_setup_default(void) * the next section from 1mb->appropriate_mem_k */ new_nr = boot_params.e820_entries; - sanitize_e820_table(boot_params.e820_table, - ARRAY_SIZE(boot_params.e820_table), - &new_nr); + sanitize_e820_table(boot_params.e820_table, ARRAY_SIZE(boot_params.e820_table), &new_nr); boot_params.e820_entries = new_nr; - if (append_e820_table(boot_params.e820_table, boot_params.e820_entries) - < 0) { + + if (append_e820_table(boot_params.e820_table, boot_params.e820_entries) < 0) { u64 mem_size; - /* compare results from other methods and take the greater */ - if (boot_params.alt_mem_k - < boot_params.screen_info.ext_mem_k) { + /* Compare results from other methods and take the one that gives more RAM: */ + if (boot_params.alt_mem_k < boot_params.screen_info.ext_mem_k) { mem_size = boot_params.screen_info.ext_mem_k; who = "BIOS-88"; } else { @@ -1151,7 +1118,6 @@ char *__init e820__memory_setup_default(void) e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM); } - /* In case someone cares... */ return who; } @@ -1165,7 +1131,9 @@ void __init e820__memory_setup(void) char *who; who = x86_init.resources.memory_setup(); + memcpy(e820_table_firmware, e820_table, sizeof(struct e820_table)); + printk(KERN_INFO "e820: BIOS-provided physical RAM map:\n"); e820_print_map(who); } @@ -1221,8 +1189,7 @@ void __init memblock_find_dma_reserve(void) nr_pages += end_pfn - start_pfn; } - for_each_free_mem_range(u, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, - NULL) { + for_each_free_mem_range(u, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, NULL) { start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN); end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN); if (start_pfn < end_pfn)