diff --git a/include/linux/migrate.h b/include/linux/migrate.h index a9de6d3ae07d290dbf1df09089dfdfbcdd605511..ade4993f5fab4d84facfa9876842cfc692dc1b7f 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -56,6 +56,8 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); +void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep, + spinlock_t *ptl); #else static inline void putback_movable_pages(struct list_head *l) {} diff --git a/mm/filemap.c b/mm/filemap.c index 1b502f2e5253045c2ab9e61f3bd50992069b3e36..f8e1ee68ecac5aef40539f7454f81095cee907f2 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,7 @@ #include #include #include +#include #include "internal.h" #define CREATE_TRACE_POINTS @@ -1323,6 +1325,95 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR; } +#ifdef CONFIG_MIGRATION +/** + * migration_entry_wait_on_locked - Wait for a migration entry to be removed + * @entry: migration swap entry. + * @ptep: mapped pte pointer. Will return with the ptep unmapped. Only required + * for pte entries, pass NULL for pmd entries. + * @ptl: already locked ptl. This function will drop the lock. + * + * Wait for a migration entry referencing the given page to be removed. This is + * equivalent to put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE) except + * this can be called without taking a reference on the page. Instead this + * should be called while holding the ptl for the migration entry referencing + * the page. + * + * Returns after unmapping and unlocking the pte/ptl with pte_unmap_unlock(). + * + * This follows the same logic as wait_on_page_bit_common() so see the comments + * there. + */ +void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep, + spinlock_t *ptl) +{ + struct wait_page_queue wait_page; + wait_queue_entry_t *wait = &wait_page.wait; + bool thrashing = false; + bool delayacct = false; + unsigned long pflags; + wait_queue_head_t *q; + struct page *page = compound_head(migration_entry_to_page(entry)); + + q = page_waitqueue(page); + if (!PageUptodate(page) && PageWorkingset(page)) { + if (!PageSwapBacked(page)) { + delayacct_thrashing_start(); + delayacct = true; + } + psi_memstall_enter(&pflags); + thrashing = true; + } + + init_wait(wait); + wait->func = wake_page_function; + wait_page.page = page; + wait_page.bit_nr = PG_locked; + wait->flags = 0; + + spin_lock_irq(&q->lock); + SetPageWaiters(page); + if (!trylock_page_bit_common(page, PG_locked, wait)) + __add_wait_queue_entry_tail(q, wait); + spin_unlock_irq(&q->lock); + + /* + * If a migration entry exists for the page the migration path must hold + * a valid reference to the page, and it must take the ptl to remove the + * migration entry. So the page is valid until the ptl is dropped. + */ + if (ptep) + pte_unmap_unlock(ptep, ptl); + else + spin_unlock(ptl); + + for (;;) { + unsigned int flags; + + set_current_state(TASK_UNINTERRUPTIBLE); + + /* Loop until we've been woken or interrupted */ + flags = smp_load_acquire(&wait->flags); + if (!(flags & WQ_FLAG_WOKEN)) { + if (signal_pending_state(TASK_UNINTERRUPTIBLE, current)) + break; + + io_schedule(); + continue; + } + break; + } + + finish_wait(q, wait); + + if (thrashing) { + if (delayacct) + delayacct_thrashing_end(); + psi_memstall_leave(&pflags); + } +} +#endif + void wait_on_page_bit(struct page *page, int bit_nr) { wait_queue_head_t *q = page_waitqueue(page); diff --git a/mm/migrate.c b/mm/migrate.c index 3f6c76b97989d78ddecbeb11df231bc9ca679546..b3c47cc9b6225b9d5411ef3b9075f3edd264b4ff 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -315,7 +315,6 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, { pte_t pte; swp_entry_t entry; - struct page *page; spin_lock(ptl); pte = *ptep; @@ -326,18 +325,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, if (!is_migration_entry(entry)) goto out; - page = migration_entry_to_page(entry); - page = compound_head(page); - - /* - * Once page cache replacement of page migration started, page_count - * is zero; but we must not call put_and_wait_on_page_locked() without - * a ref. Use get_page_unless_zero(), and just fault again if it fails. - */ - if (!get_page_unless_zero(page)) - goto out; - pte_unmap_unlock(ptep, ptl); - put_and_wait_on_page_locked(page); + migration_entry_wait_on_locked(entry, ptep, ptl); return; out: pte_unmap_unlock(ptep, ptl); @@ -362,16 +350,11 @@ void migration_entry_wait_huge(struct vm_area_struct *vma, void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) { spinlock_t *ptl; - struct page *page; ptl = pmd_lock(mm, pmd); if (!is_pmd_migration_entry(*pmd)) goto unlock; - page = migration_entry_to_page(pmd_to_swp_entry(*pmd)); - if (!get_page_unless_zero(page)) - goto unlock; - spin_unlock(ptl); - put_and_wait_on_page_locked(page); + migration_entry_wait_on_locked(pmd_to_swp_entry(*pmd), NULL, ptl); return; unlock: spin_unlock(ptl); @@ -2558,22 +2541,8 @@ static bool migrate_vma_check_page(struct page *page, struct page *fault_page) return false; /* Page from ZONE_DEVICE have one extra reference */ - if (is_zone_device_page(page)) { - /* - * Private page can never be pin as they have no valid pte and - * GUP will fail for those. Yet if there is a pending migration - * a thread might try to wait on the pte migration entry and - * will bump the page reference count. Sadly there is no way to - * differentiate a regular pin from migration wait. Hence to - * avoid 2 racing thread trying to migrate back to CPU to enter - * infinite loop (one stopping migration because the other is - * waiting on pte migration entry). We always return true here. - * - * FIXME proper solution is to rework migration_entry_wait() so - * it does not need to take a reference on page. - */ - return is_device_private_page(page); - } + if (is_zone_device_page(page)) + extra++; /* For file back page */ if (page_mapping(page))