diff --git a/include/linux/swap.h b/include/linux/swap.h index a49112536c02c519a6ccd77b8552dfc15af9cc8f..893096e67bdbb2f8f12e81d52ce635ce5789c543 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -178,6 +178,8 @@ extern int vm_swappiness; extern int isolate_lru_page(struct page *p); extern int putback_lru_pages(struct list_head *l); +extern int migrate_pages(struct list_head *l, struct list_head *t); + #ifdef CONFIG_MMU /* linux/mm/shmem.c */ extern int shmem_unuse(swp_entry_t entry, struct page *page); diff --git a/mm/vmscan.c b/mm/vmscan.c index 6c30a8c59795ca024556d1ed30a13238e8aaeddb..a537a7f1635782fbfaff538dd669c0c22c15d920 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -373,6 +373,43 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) return PAGE_CLEAN; } +static int remove_mapping(struct address_space *mapping, struct page *page) +{ + if (!mapping) + return 0; /* truncate got there first */ + + write_lock_irq(&mapping->tree_lock); + + /* + * The non-racy check for busy page. It is critical to check + * PageDirty _after_ making sure that the page is freeable and + * not in use by anybody. (pagecache + us == 2) + */ + if (unlikely(page_count(page) != 2)) + goto cannot_free; + smp_rmb(); + if (unlikely(PageDirty(page))) + goto cannot_free; + + if (PageSwapCache(page)) { + swp_entry_t swap = { .val = page_private(page) }; + __delete_from_swap_cache(page); + write_unlock_irq(&mapping->tree_lock); + swap_free(swap); + __put_page(page); /* The pagecache ref */ + return 1; + } + + __remove_from_page_cache(page); + write_unlock_irq(&mapping->tree_lock); + __put_page(page); + return 1; + +cannot_free: + write_unlock_irq(&mapping->tree_lock); + return 0; +} + /* * shrink_list adds the number of reclaimed pages to sc->nr_reclaimed */ @@ -504,36 +541,8 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) goto free_it; } - if (!mapping) - goto keep_locked; /* truncate got there first */ - - write_lock_irq(&mapping->tree_lock); - - /* - * The non-racy check for busy page. It is critical to check - * PageDirty _after_ making sure that the page is freeable and - * not in use by anybody. (pagecache + us == 2) - */ - if (unlikely(page_count(page) != 2)) - goto cannot_free; - smp_rmb(); - if (unlikely(PageDirty(page))) - goto cannot_free; - -#ifdef CONFIG_SWAP - if (PageSwapCache(page)) { - swp_entry_t swap = { .val = page_private(page) }; - __delete_from_swap_cache(page); - write_unlock_irq(&mapping->tree_lock); - swap_free(swap); - __put_page(page); /* The pagecache ref */ - goto free_it; - } -#endif /* CONFIG_SWAP */ - - __remove_from_page_cache(page); - write_unlock_irq(&mapping->tree_lock); - __put_page(page); + if (!remove_mapping(mapping, page)) + goto keep_locked; free_it: unlock_page(page); @@ -542,10 +551,6 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) __pagevec_release_nonlru(&freed_pvec); continue; -cannot_free: - write_unlock_irq(&mapping->tree_lock); - goto keep_locked; - activate_locked: SetPageActive(page); pgactivate++; @@ -563,6 +568,147 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) return reclaimed; } +/* + * swapout a single page + * page is locked upon entry, unlocked on exit + * + * return codes: + * 0 = complete + * 1 = retry + */ +static int swap_page(struct page *page) +{ + struct address_space *mapping = page_mapping(page); + + if (page_mapped(page) && mapping) + if (try_to_unmap(page) != SWAP_SUCCESS) + goto unlock_retry; + + if (PageDirty(page)) { + /* Page is dirty, try to write it out here */ + switch(pageout(page, mapping)) { + case PAGE_KEEP: + case PAGE_ACTIVATE: + goto unlock_retry; + + case PAGE_SUCCESS: + goto retry; + + case PAGE_CLEAN: + ; /* try to free the page below */ + } + } + + if (PagePrivate(page)) { + if (!try_to_release_page(page, GFP_KERNEL) || + (!mapping && page_count(page) == 1)) + goto unlock_retry; + } + + if (remove_mapping(mapping, page)) { + /* Success */ + unlock_page(page); + return 0; + } + +unlock_retry: + unlock_page(page); + +retry: + return 1; +} +/* + * migrate_pages + * + * Two lists are passed to this function. The first list + * contains the pages isolated from the LRU to be migrated. + * The second list contains new pages that the pages isolated + * can be moved to. If the second list is NULL then all + * pages are swapped out. + * + * The function returns after 10 attempts or if no pages + * are movable anymore because t has become empty + * or no retryable pages exist anymore. + * + * SIMPLIFIED VERSION: This implementation of migrate_pages + * is only swapping out pages and never touches the second + * list. The direct migration patchset + * extends this function to avoid the use of swap. + */ +int migrate_pages(struct list_head *l, struct list_head *t) +{ + int retry; + LIST_HEAD(failed); + int nr_failed = 0; + int pass = 0; + struct page *page; + struct page *page2; + int swapwrite = current->flags & PF_SWAPWRITE; + + if (!swapwrite) + current->flags |= PF_SWAPWRITE; + +redo: + retry = 0; + + list_for_each_entry_safe(page, page2, l, lru) { + cond_resched(); + + /* + * Skip locked pages during the first two passes to give the + * functions holding the lock time to release the page. Later we use + * lock_page to have a higher chance of acquiring the lock. + */ + if (pass > 2) + lock_page(page); + else + if (TestSetPageLocked(page)) + goto retry_later; + + /* + * Only wait on writeback if we have already done a pass where + * we we may have triggered writeouts for lots of pages. + */ + if (pass > 0) + wait_on_page_writeback(page); + else + if (PageWriteback(page)) { + unlock_page(page); + goto retry_later; + } + +#ifdef CONFIG_SWAP + if (PageAnon(page) && !PageSwapCache(page)) { + if (!add_to_swap(page)) { + unlock_page(page); + list_move(&page->lru, &failed); + nr_failed++; + continue; + } + } +#endif /* CONFIG_SWAP */ + + /* + * Page is properly locked and writeback is complete. + * Try to migrate the page. + */ + if (swap_page(page)) { +retry_later: + retry++; + } + } + if (retry && pass++ < 10) + goto redo; + + if (!swapwrite) + current->flags &= ~PF_SWAPWRITE; + + if (!list_empty(&failed)) + list_splice(&failed, l); + + return nr_failed + retry; +} + /* * zone->lru_lock is heavily contended. Some of the functions that * shrink the lists perform better by taking out a batch of pages