diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 70d0256edd3149e61d53cd8e07de8195eba2940c..c960923d9ec22b94a37ea7cc49a45b8f70a10725 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -93,7 +93,8 @@ extern int sysctl_compact_unevictable_allowed; extern int fragmentation_index(struct zone *zone, unsigned int order); extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, - const struct alloc_context *ac, enum compact_priority prio); + const struct alloc_context *ac, enum compact_priority prio, + struct page **page); extern void reset_isolation_suitable(pg_data_t *pgdat); extern enum compact_result compaction_suitable(struct zone *zone, int order, unsigned int alloc_flags, int classzone_idx); diff --git a/include/linux/sched.h b/include/linux/sched.h index f9b43c989577fdf50da7147ee28a7b0a8c4e24ad..ebfb34fb9b30dea640a6c9cbe9c51d8c1429030e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -47,6 +47,7 @@ struct pid_namespace; struct pipe_inode_info; struct rcu_node; struct reclaim_state; +struct capture_control; struct robust_list_head; struct sched_attr; struct sched_param; @@ -958,6 +959,9 @@ struct task_struct { struct io_context *io_context; +#ifdef CONFIG_COMPACTION + struct capture_control *capture_control; +#endif /* Ptrace state: */ unsigned long ptrace_message; kernel_siginfo_t *last_siginfo; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7cbb5658be80c89d41d3a8f1d9614e7405152cb8..916e956e92bebf4cf2caed0298fba5e3da02aa30 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2190,6 +2190,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) INIT_HLIST_HEAD(&p->preempt_notifiers); #endif +#ifdef CONFIG_COMPACTION + p->capture_control = NULL; +#endif init_numa_balancing(clone_flags, p); } diff --git a/mm/compaction.c b/mm/compaction.c index 3084cee77fdab2ed856d7b523345e48e7e6cbec7..1cc871da3fdae931350eb17c12d8b044bf73d567 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2056,7 +2056,8 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order, return false; } -static enum compact_result compact_zone(struct compact_control *cc) +static enum compact_result +compact_zone(struct compact_control *cc, struct capture_control *capc) { enum compact_result ret; unsigned long start_pfn = cc->zone->zone_start_pfn; @@ -2225,6 +2226,11 @@ static enum compact_result compact_zone(struct compact_control *cc) } } + /* Stop if a page has been captured */ + if (capc && capc->page) { + ret = COMPACT_SUCCESS; + break; + } } out: @@ -2258,7 +2264,8 @@ static enum compact_result compact_zone(struct compact_control *cc) static enum compact_result compact_zone_order(struct zone *zone, int order, gfp_t gfp_mask, enum compact_priority prio, - unsigned int alloc_flags, int classzone_idx) + unsigned int alloc_flags, int classzone_idx, + struct page **capture) { enum compact_result ret; struct compact_control cc = { @@ -2279,14 +2286,24 @@ static enum compact_result compact_zone_order(struct zone *zone, int order, .ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY), .ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY) }; + struct capture_control capc = { + .cc = &cc, + .page = NULL, + }; + + if (capture) + current->capture_control = &capc; INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); - ret = compact_zone(&cc); + ret = compact_zone(&cc, &capc); VM_BUG_ON(!list_empty(&cc.freepages)); VM_BUG_ON(!list_empty(&cc.migratepages)); + *capture = capc.page; + current->capture_control = NULL; + return ret; } @@ -2304,7 +2321,7 @@ int sysctl_extfrag_threshold = 500; */ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, const struct alloc_context *ac, - enum compact_priority prio) + enum compact_priority prio, struct page **capture) { int may_perform_io = gfp_mask & __GFP_IO; struct zoneref *z; @@ -2332,7 +2349,7 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, } status = compact_zone_order(zone, order, gfp_mask, prio, - alloc_flags, ac_classzone_idx(ac)); + alloc_flags, ac_classzone_idx(ac), capture); rc = max(status, rc); /* The allocation should succeed, stop compacting */ @@ -2400,7 +2417,7 @@ static void compact_node(int nid) INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); - compact_zone(&cc); + compact_zone(&cc, NULL); VM_BUG_ON(!list_empty(&cc.freepages)); VM_BUG_ON(!list_empty(&cc.migratepages)); @@ -2535,7 +2552,7 @@ static void kcompactd_do_work(pg_data_t *pgdat) if (kthread_should_stop()) return; - status = compact_zone(&cc); + status = compact_zone(&cc, NULL); if (status == COMPACT_SUCCESS) { compaction_defer_reset(zone, cc.order, false); diff --git a/mm/internal.h b/mm/internal.h index 31bb0be6fd525b76c732d7774333da5fae1fefa0..9eeaf2b95166fc65b5c0e2f4ca47099b6cbcb442 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -209,6 +209,15 @@ struct compact_control { bool rescan; /* Rescanning the same pageblock */ }; +/* + * Used in direct compaction when a page should be taken from the freelists + * immediately when one is created during the free path. + */ +struct capture_control { + struct compact_control *cc; + struct page *page; +}; + unsigned long isolate_freepages_range(struct compact_control *cc, unsigned long start_pfn, unsigned long end_pfn); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2e132b9e7a9328600e03cf987a93b5bd5957851c..09bf2c5f8b4b938fefe726d1910580407fdd644d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -789,6 +789,57 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, return 0; } +#ifdef CONFIG_COMPACTION +static inline struct capture_control *task_capc(struct zone *zone) +{ + struct capture_control *capc = current->capture_control; + + return capc && + !(current->flags & PF_KTHREAD) && + !capc->page && + capc->cc->zone == zone && + capc->cc->direct_compaction ? capc : NULL; +} + +static inline bool +compaction_capture(struct capture_control *capc, struct page *page, + int order, int migratetype) +{ + if (!capc || order != capc->cc->order) + return false; + + /* Do not accidentally pollute CMA or isolated regions*/ + if (is_migrate_cma(migratetype) || + is_migrate_isolate(migratetype)) + return false; + + /* + * Do not let lower order allocations polluate a movable pageblock. + * This might let an unmovable request use a reclaimable pageblock + * and vice-versa but no more than normal fallback logic which can + * have trouble finding a high-order free page. + */ + if (order < pageblock_order && migratetype == MIGRATE_MOVABLE) + return false; + + capc->page = page; + return true; +} + +#else +static inline struct capture_control *task_capc(struct zone *zone) +{ + return NULL; +} + +static inline bool +compaction_capture(struct capture_control *capc, struct page *page, + int order, int migratetype) +{ + return false; +} +#endif /* CONFIG_COMPACTION */ + /* * Freeing function for a buddy system allocator. * @@ -822,6 +873,7 @@ static inline void __free_one_page(struct page *page, unsigned long uninitialized_var(buddy_pfn); struct page *buddy; unsigned int max_order; + struct capture_control *capc = task_capc(zone); max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1); @@ -837,6 +889,11 @@ static inline void __free_one_page(struct page *page, continue_merging: while (order < max_order - 1) { + if (compaction_capture(capc, page, order, migratetype)) { + __mod_zone_freepage_state(zone, -(1 << order), + migratetype); + return; + } buddy_pfn = __find_buddy_pfn(pfn, order); buddy = page + (buddy_pfn - pfn); @@ -3710,7 +3767,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, const struct alloc_context *ac, enum compact_priority prio, enum compact_result *compact_result) { - struct page *page; + struct page *page = NULL; unsigned long pflags; unsigned int noreclaim_flag; @@ -3721,13 +3778,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, noreclaim_flag = memalloc_noreclaim_save(); *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, - prio); + prio, &page); memalloc_noreclaim_restore(noreclaim_flag); psi_memstall_leave(&pflags); - if (*compact_result <= COMPACT_INACTIVE) + if (*compact_result <= COMPACT_INACTIVE) { + WARN_ON_ONCE(page); return NULL; + } /* * At least in one zone compaction wasn't deferred or skipped, so let's @@ -3735,7 +3794,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, */ count_vm_event(COMPACTSTALL); - page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac); + /* Prep a captured page if available */ + if (page) + prep_new_page(page, order, gfp_mask, alloc_flags); + + /* Try get a page from the freelist if available */ + if (!page) + page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac); if (page) { struct zone *zone = page_zone(page);