提交 0cad3ff4 编写于 作者: L Linus Torvalds

Merge branch 'akpm' (Fixes from Andrew)

Merge misc fixes from Andrew Morton.

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (12 patches)
  revert "mm: fix-up zone present pages"
  tmpfs: change final i_blocks BUG to WARNING
  tmpfs: fix shmem_getpage_gfp() VM_BUG_ON
  mm: highmem: don't treat PKMAP_ADDR(LAST_PKMAP) as a highmem address
  mm: revert "mm: vmscan: scale number of pages reclaimed by reclaim/compaction based on failures"
  rapidio: fix kernel-doc warnings
  swapfile: fix name leak in swapoff
  memcg: fix hotplugged memory zone oops
  mips, arc: fix build failure
  memcg: oom: fix totalpages calculation for memory.swappiness==0
  mm: fix build warning for uninitialized value
  mm: add anon_vma_lock to validate_mm()
......@@ -466,6 +466,10 @@ Note:
5.3 swappiness
Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
Please note that unlike the global swappiness, memcg knob set to 0
really prevents from any swapping even if there is a swap storage
available. This might lead to memcg OOM killer if there are no file
pages to reclaim.
Following cgroups' swappiness can't be changed.
- root cgroup (uses /proc/sys/vm/swappiness).
......
......@@ -637,7 +637,6 @@ mem_init (void)
high_memory = __va(max_low_pfn * PAGE_SIZE);
reset_zone_present_pages();
for_each_online_pgdat(pgdat)
if (pgdat->bdata->node_bootmem_map)
totalram_pages += free_all_bootmem_node(pgdat);
......
......@@ -11,6 +11,7 @@
*/
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/irqflags.h>
#include <asm/bcache.h>
......
......@@ -401,7 +401,7 @@ EXPORT_SYMBOL_GPL(rio_release_inb_pwrite);
/**
* rio_map_inb_region -- Map inbound memory region.
* @mport: Master port.
* @lstart: physical address of memory region to be mapped
* @local: physical address of memory region to be mapped
* @rbase: RIO base address assigned to this window
* @size: Size of the memory region
* @rflags: Flags for mapping.
......
......@@ -1684,9 +1684,5 @@ static inline unsigned int debug_guardpage_minorder(void) { return 0; }
static inline bool page_is_guard(struct page *page) { return false; }
#endif /* CONFIG_DEBUG_PAGEALLOC */
extern void reset_zone_present_pages(void);
extern void fixup_zone_present_pages(int nid, unsigned long start_pfn,
unsigned long end_pfn);
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
......@@ -752,7 +752,7 @@ extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
unsigned long size,
enum memmap_context context);
extern void lruvec_init(struct lruvec *lruvec, struct zone *zone);
extern void lruvec_init(struct lruvec *lruvec);
static inline struct zone *lruvec_zone(struct lruvec *lruvec)
{
......
......@@ -275,9 +275,11 @@ struct rio_id_table {
* struct rio_net - RIO network info
* @node: Node in global list of RIO networks
* @devices: List of devices in this network
* @switches: List of switches in this netowrk
* @mports: List of master ports accessing this network
* @hport: Default port for accessing this network
* @id: RIO network ID
* @destid_table: destID allocation table
*/
struct rio_net {
struct list_head node; /* node in list of networks */
......
......@@ -198,8 +198,6 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
int order = ilog2(BITS_PER_LONG);
__free_pages_bootmem(pfn_to_page(start), order);
fixup_zone_present_pages(page_to_nid(pfn_to_page(start)),
start, start + BITS_PER_LONG);
count += BITS_PER_LONG;
start += BITS_PER_LONG;
} else {
......@@ -210,9 +208,6 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
if (vec & 1) {
page = pfn_to_page(start + off);
__free_pages_bootmem(page, 0);
fixup_zone_present_pages(
page_to_nid(page),
start + off, start + off + 1);
count++;
}
vec >>= 1;
......@@ -226,11 +221,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
pages = bdata->node_low_pfn - bdata->node_min_pfn;
pages = bootmem_bootmap_pages(pages);
count += pages;
while (pages--) {
fixup_zone_present_pages(page_to_nid(page),
page_to_pfn(page), page_to_pfn(page) + 1);
while (pages--)
__free_pages_bootmem(page++, 0);
}
bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);
......
......@@ -98,7 +98,7 @@ struct page *kmap_to_page(void *vaddr)
{
unsigned long addr = (unsigned long)vaddr;
if (addr >= PKMAP_ADDR(0) && addr <= PKMAP_ADDR(LAST_PKMAP)) {
if (addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP)) {
int i = (addr - PKMAP_ADDR(0)) >> PAGE_SHIFT;
return pte_page(pkmap_page_table[i]);
}
......
......@@ -1055,12 +1055,24 @@ struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
struct mem_cgroup *memcg)
{
struct mem_cgroup_per_zone *mz;
struct lruvec *lruvec;
if (mem_cgroup_disabled())
return &zone->lruvec;
if (mem_cgroup_disabled()) {
lruvec = &zone->lruvec;
goto out;
}
mz = mem_cgroup_zoneinfo(memcg, zone_to_nid(zone), zone_idx(zone));
return &mz->lruvec;
lruvec = &mz->lruvec;
out:
/*
* Since a node can be onlined after the mem_cgroup was created,
* we have to be prepared to initialize lruvec->zone here;
* and if offlined then reonlined, we need to reinitialize it.
*/
if (unlikely(lruvec->zone != zone))
lruvec->zone = zone;
return lruvec;
}
/*
......@@ -1087,9 +1099,12 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
struct mem_cgroup_per_zone *mz;
struct mem_cgroup *memcg;
struct page_cgroup *pc;
struct lruvec *lruvec;
if (mem_cgroup_disabled())
return &zone->lruvec;
if (mem_cgroup_disabled()) {
lruvec = &zone->lruvec;
goto out;
}
pc = lookup_page_cgroup(page);
memcg = pc->mem_cgroup;
......@@ -1107,7 +1122,16 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
pc->mem_cgroup = memcg = root_mem_cgroup;
mz = page_cgroup_zoneinfo(memcg, page);
return &mz->lruvec;
lruvec = &mz->lruvec;
out:
/*
* Since a node can be onlined after the mem_cgroup was created,
* we have to be prepared to initialize lruvec->zone here;
* and if offlined then reonlined, we need to reinitialize it.
*/
if (unlikely(lruvec->zone != zone))
lruvec->zone = zone;
return lruvec;
}
/**
......@@ -1452,17 +1476,26 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)
static u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
{
u64 limit;
u64 memsw;
limit = res_counter_read_u64(&memcg->res, RES_LIMIT);
limit += total_swap_pages << PAGE_SHIFT;
memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
/*
* If memsw is finite and limits the amount of swap space available
* to this memcg, return that limit.
* Do not consider swap space if we cannot swap due to swappiness
*/
return min(limit, memsw);
if (mem_cgroup_swappiness(memcg)) {
u64 memsw;
limit += total_swap_pages << PAGE_SHIFT;
memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
/*
* If memsw is finite and limits the amount of swap space
* available to this memcg, return that limit.
*/
limit = min(limit, memsw);
}
return limit;
}
void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
......@@ -3688,17 +3721,17 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
int node, int zid, enum lru_list lru)
{
struct mem_cgroup_per_zone *mz;
struct lruvec *lruvec;
unsigned long flags, loop;
struct list_head *list;
struct page *busy;
struct zone *zone;
zone = &NODE_DATA(node)->node_zones[zid];
mz = mem_cgroup_zoneinfo(memcg, node, zid);
list = &mz->lruvec.lists[lru];
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
list = &lruvec->lists[lru];
loop = mz->lru_size[lru];
loop = mem_cgroup_get_lru_size(lruvec, lru);
/* give some margin against EBUSY etc...*/
loop += 256;
busy = NULL;
......@@ -4736,7 +4769,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
mz = &pn->zoneinfo[zone];
lruvec_init(&mz->lruvec, &NODE_DATA(node)->node_zones[zone]);
lruvec_init(&mz->lruvec);
mz->usage_in_excess = 0;
mz->on_tree = false;
mz->memcg = memcg;
......
......@@ -2527,9 +2527,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
int ret = 0;
int page_mkwrite = 0;
struct page *dirty_page = NULL;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
bool mmun_called = false; /* For mmu_notifiers */
unsigned long mmun_start = 0; /* For mmu_notifiers */
unsigned long mmun_end = 0; /* For mmu_notifiers */
old_page = vm_normal_page(vma, address, orig_pte);
if (!old_page) {
......@@ -2708,8 +2707,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto oom_free_new;
mmun_start = address & PAGE_MASK;
mmun_end = (address & PAGE_MASK) + PAGE_SIZE;
mmun_called = true;
mmun_end = mmun_start + PAGE_SIZE;
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
/*
......@@ -2778,7 +2776,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
page_cache_release(new_page);
unlock:
pte_unmap_unlock(page_table, ptl);
if (mmun_called)
if (mmun_end > mmun_start)
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
if (old_page) {
/*
......
......@@ -106,7 +106,6 @@ static void get_page_bootmem(unsigned long info, struct page *page,
void __ref put_page_bootmem(struct page *page)
{
unsigned long type;
struct zone *zone;
type = (unsigned long) page->lru.next;
BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
......@@ -117,12 +116,6 @@ void __ref put_page_bootmem(struct page *page)
set_page_private(page, 0);
INIT_LIST_HEAD(&page->lru);
__free_pages_bootmem(page, 0);
zone = page_zone(page);
zone_span_writelock(zone);
zone->present_pages++;
zone_span_writeunlock(zone);
totalram_pages++;
}
}
......
......@@ -334,8 +334,10 @@ void validate_mm(struct mm_struct *mm)
struct vm_area_struct *vma = mm->mmap;
while (vma) {
struct anon_vma_chain *avc;
vma_lock_anon_vma(vma);
list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
anon_vma_interval_tree_verify(avc);
vma_unlock_anon_vma(vma);
vma = vma->vm_next;
i++;
}
......
......@@ -87,7 +87,7 @@ int memmap_valid_within(unsigned long pfn,
}
#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
void lruvec_init(struct lruvec *lruvec, struct zone *zone)
void lruvec_init(struct lruvec *lruvec)
{
enum lru_list lru;
......@@ -95,8 +95,4 @@ void lruvec_init(struct lruvec *lruvec, struct zone *zone)
for_each_lru(lru)
INIT_LIST_HEAD(&lruvec->lists[lru]);
#ifdef CONFIG_MEMCG
lruvec->zone = zone;
#endif
}
......@@ -116,8 +116,6 @@ static unsigned long __init __free_memory_core(phys_addr_t start,
return 0;
__free_pages_memory(start_pfn, end_pfn);
fixup_zone_present_pages(pfn_to_nid(start >> PAGE_SHIFT),
start_pfn, end_pfn);
return end_pfn - start_pfn;
}
......@@ -128,7 +126,6 @@ unsigned long __init free_low_memory_core_early(int nodeid)
phys_addr_t start, end, size;
u64 i;
reset_zone_present_pages();
for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL)
count += __free_memory_core(start, end);
......
......@@ -4505,7 +4505,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
zone->zone_pgdat = pgdat;
zone_pcp_init(zone);
lruvec_init(&zone->lruvec, zone);
lruvec_init(&zone->lruvec);
if (!size)
continue;
......@@ -6098,37 +6098,3 @@ void dump_page(struct page *page)
dump_page_flags(page->flags);
mem_cgroup_print_bad_page(page);
}
/* reset zone->present_pages */
void reset_zone_present_pages(void)
{
struct zone *z;
int i, nid;
for_each_node_state(nid, N_HIGH_MEMORY) {
for (i = 0; i < MAX_NR_ZONES; i++) {
z = NODE_DATA(nid)->node_zones + i;
z->present_pages = 0;
}
}
}
/* calculate zone's present pages in buddy system */
void fixup_zone_present_pages(int nid, unsigned long start_pfn,
unsigned long end_pfn)
{
struct zone *z;
unsigned long zone_start_pfn, zone_end_pfn;
int i;
for (i = 0; i < MAX_NR_ZONES; i++) {
z = NODE_DATA(nid)->node_zones + i;
zone_start_pfn = z->zone_start_pfn;
zone_end_pfn = zone_start_pfn + z->spanned_pages;
/* if the two regions intersect */
if (!(zone_start_pfn >= end_pfn || zone_end_pfn <= start_pfn))
z->present_pages += min(end_pfn, zone_end_pfn) -
max(start_pfn, zone_start_pfn);
}
}
......@@ -643,7 +643,7 @@ static void shmem_evict_inode(struct inode *inode)
kfree(info->symlink);
simple_xattrs_free(&info->xattrs);
BUG_ON(inode->i_blocks);
WARN_ON(inode->i_blocks);
shmem_free_inode(inode->i_sb);
clear_inode(inode);
}
......@@ -1145,8 +1145,20 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
if (!error) {
error = shmem_add_to_page_cache(page, mapping, index,
gfp, swp_to_radix_entry(swap));
/* We already confirmed swap, and make no allocation */
VM_BUG_ON(error);
/*
* We already confirmed swap under page lock, and make
* no memory allocation here, so usually no possibility
* of error; but free_swap_and_cache() only trylocks a
* page, so it is just possible that the entry has been
* truncated or holepunched since swap was confirmed.
* shmem_undo_range() will have done some of the
* unaccounting, now delete_from_swap_cache() will do
* the rest (including mem_cgroup_uncharge_swapcache).
* Reset swap.val? No, leave it so "failed" goes back to
* "repeat": reading a hole and writing should succeed.
*/
if (error)
delete_from_swap_cache(page);
}
if (error)
goto failed;
......
......@@ -1494,9 +1494,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
BUG_ON(!current->mm);
pathname = getname(specialfile);
err = PTR_ERR(pathname);
if (IS_ERR(pathname))
goto out;
return PTR_ERR(pathname);
victim = file_open_name(pathname, O_RDWR|O_LARGEFILE, 0);
err = PTR_ERR(victim);
......@@ -1608,6 +1607,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
out_dput:
filp_close(victim, NULL);
out:
putname(pathname);
return err;
}
......
......@@ -1760,28 +1760,6 @@ static bool in_reclaim_compaction(struct scan_control *sc)
return false;
}
#ifdef CONFIG_COMPACTION
/*
* If compaction is deferred for sc->order then scale the number of pages
* reclaimed based on the number of consecutive allocation failures
*/
static unsigned long scale_for_compaction(unsigned long pages_for_compaction,
struct lruvec *lruvec, struct scan_control *sc)
{
struct zone *zone = lruvec_zone(lruvec);
if (zone->compact_order_failed <= sc->order)
pages_for_compaction <<= zone->compact_defer_shift;
return pages_for_compaction;
}
#else
static unsigned long scale_for_compaction(unsigned long pages_for_compaction,
struct lruvec *lruvec, struct scan_control *sc)
{
return pages_for_compaction;
}
#endif
/*
* Reclaim/compaction is used for high-order allocation requests. It reclaims
* order-0 pages before compacting the zone. should_continue_reclaim() returns
......@@ -1829,9 +1807,6 @@ static inline bool should_continue_reclaim(struct lruvec *lruvec,
* inactive lists are large enough, continue reclaiming
*/
pages_for_compaction = (2UL << sc->order);
pages_for_compaction = scale_for_compaction(pages_for_compaction,
lruvec, sc);
inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE);
if (nr_swap_pages > 0)
inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册