diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 2cb01f6ec5d019e7e431d9f18cdba6f3bde573e4..b437fe6257b02adb5520e377c5af045c8a397487 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1211,6 +1211,17 @@ int set_page_dirty(struct page *page) if (likely(mapping)) { int (*spd)(struct page *) = mapping->a_ops->set_page_dirty; + /* + * readahead/lru_deactivate_page could remain + * PG_readahead/PG_reclaim due to race with end_page_writeback + * About readahead, if the page is written, the flags would be + * reset. So no problem. + * About lru_deactivate_page, if the page is redirty, the flag + * will be reset. So no problem. but if the page is used by readahead + * it will confuse readahead and make it restart the size rampup + * process. But it's a trivial problem. + */ + ClearPageReclaim(page); #ifdef CONFIG_BLOCK if (!spd) spd = __set_page_dirty_buffers; @@ -1266,7 +1277,6 @@ int clear_page_dirty_for_io(struct page *page) BUG_ON(!PageLocked(page)); - ClearPageReclaim(page); if (mapping && mapping_cap_account_dirty(mapping)) { /* * Yes, Virginia, this is indeed insane. diff --git a/mm/swap.c b/mm/swap.c index 1b9e4ebaffc80f07b94300619682a271bdf5511d..0a33714a7cba41786ede53139a77362653c2fd37 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -354,26 +354,61 @@ void add_page_to_unevictable_list(struct page *page) * head of the list, rather than the tail, to give the flusher * threads some time to write it out, as this is much more * effective than the single-page writeout from reclaim. + * + * If the page isn't page_mapped and dirty/writeback, the page + * could reclaim asap using PG_reclaim. + * + * 1. active, mapped page -> none + * 2. active, dirty/writeback page -> inactive, head, PG_reclaim + * 3. inactive, mapped page -> none + * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim + * 5. inactive, clean -> inactive, tail + * 6. Others -> none + * + * In 4, why it moves inactive's head, the VM expects the page would + * be write it out by flusher threads as this is much more effective + * than the single-page writeout from reclaim. */ static void lru_deactivate(struct page *page, struct zone *zone) { int lru, file; + bool active; - if (!PageLRU(page) || !PageActive(page)) + if (!PageLRU(page)) return; /* Some processes are using the page */ if (page_mapped(page)) return; + active = PageActive(page); + file = page_is_file_cache(page); lru = page_lru_base_type(page); - del_page_from_lru_list(zone, page, lru + LRU_ACTIVE); + del_page_from_lru_list(zone, page, lru + active); ClearPageActive(page); ClearPageReferenced(page); add_page_to_lru_list(zone, page, lru); - __count_vm_event(PGDEACTIVATE); + if (PageWriteback(page) || PageDirty(page)) { + /* + * PG_reclaim could be raced with end_page_writeback + * It can make readahead confusing. But race window + * is _really_ small and it's non-critical problem. + */ + SetPageReclaim(page); + } else { + /* + * The page's writeback ends up during pagevec + * We moves tha page into tail of inactive. + */ + list_move_tail(&page->lru, &zone->lru[lru].list); + mem_cgroup_rotate_reclaimable_page(page); + __count_vm_event(PGROTATED); + } + + if (active) + __count_vm_event(PGDEACTIVATE); update_page_reclaim_stat(zone, page, file, 0); }