diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 68250a57aace9f017a6cfe3eaf0468a652100535..b0d530cf46d101a34733d0383445893e0125e629 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -95,7 +95,8 @@ extern int sysctl_compact_unevictable_allowed; extern int fragmentation_index(struct zone *zone, unsigned int order); extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, - const struct alloc_context *ac, enum compact_priority prio); + const struct alloc_context *ac, enum compact_priority prio, + struct page **page); extern void reset_isolation_suitable(pg_data_t *pgdat); extern enum compact_result compaction_suitable(struct zone *zone, int order, unsigned int alloc_flags, int classzone_idx); diff --git a/include/linux/sched.h b/include/linux/sched.h index 8ace05026bfb515f0ed4d90f819ab841d8ad031d..e5f31cba4abcdd9ab68e72bec762eba47269c1b9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -48,6 +48,7 @@ struct pid_namespace; struct pipe_inode_info; struct rcu_node; struct reclaim_state; +struct capture_control; struct robust_list_head; struct sched_attr; struct sched_param; @@ -970,6 +971,9 @@ struct task_struct { struct io_context *io_context; +#ifdef CONFIG_COMPACTION + struct capture_control *capture_control; +#endif /* Ptrace state: */ unsigned long ptrace_message; siginfo_t *last_siginfo; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index df4181ca8032b55cadcfc4f653e496d251e81ea2..d2760d7077c25c762008d933abebe164b3c55a83 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2124,6 +2124,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) INIT_HLIST_HEAD(&p->preempt_notifiers); #endif +#ifdef CONFIG_COMPACTION + p->capture_control = NULL; +#endif init_numa_balancing(clone_flags, p); } diff --git a/mm/compaction.c b/mm/compaction.c index 28e1600f0b47c65cf22a9444d441ddfffe95ef35..504ef6ffdf5f3704e010b18da5a8028610cc185d 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2056,7 +2056,8 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order, return false; } -static enum compact_result compact_zone(struct compact_control *cc) +static enum compact_result +compact_zone(struct compact_control *cc, struct capture_control *capc) { enum compact_result ret; unsigned long start_pfn = cc->zone->zone_start_pfn; @@ -2225,6 +2226,11 @@ static enum compact_result compact_zone(struct compact_control *cc) } } + /* Stop if a page has been captured */ + if (capc && capc->page) { + ret = COMPACT_SUCCESS; + break; + } } out: @@ -2258,7 +2264,8 @@ static enum compact_result compact_zone(struct compact_control *cc) static enum compact_result compact_zone_order(struct zone *zone, int order, gfp_t gfp_mask, enum compact_priority prio, - unsigned int alloc_flags, int classzone_idx) + unsigned int alloc_flags, int classzone_idx, + struct page **capture) { enum compact_result ret; struct compact_control cc = { @@ -2279,14 +2286,24 @@ static enum compact_result compact_zone_order(struct zone *zone, int order, .ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY), .ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY) }; + struct capture_control capc = { + .cc = &cc, + .page = NULL, + }; + + if (capture) + current->capture_control = &capc; INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); - ret = compact_zone(&cc); + ret = compact_zone(&cc, &capc); VM_BUG_ON(!list_empty(&cc.freepages)); VM_BUG_ON(!list_empty(&cc.migratepages)); + *capture = capc.page; + current->capture_control = NULL; + return ret; } @@ -2304,7 +2321,7 @@ int sysctl_extfrag_threshold = 500; */ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, const struct alloc_context *ac, - enum compact_priority prio) + enum compact_priority prio, struct page **capture) { int may_perform_io = gfp_mask & __GFP_IO; struct zoneref *z; @@ -2332,7 +2349,7 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, } status = compact_zone_order(zone, order, gfp_mask, prio, - alloc_flags, ac_classzone_idx(ac)); + alloc_flags, ac_classzone_idx(ac), capture); rc = max(status, rc); /* The allocation should succeed, stop compacting */ @@ -2400,7 +2417,7 @@ static void compact_node(int nid) INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); - compact_zone(&cc); + compact_zone(&cc, NULL); VM_BUG_ON(!list_empty(&cc.freepages)); VM_BUG_ON(!list_empty(&cc.migratepages)); @@ -2543,7 +2560,7 @@ static void kcompactd_do_work(pg_data_t *pgdat) if (kthread_should_stop()) return; - status = compact_zone(&cc); + status = compact_zone(&cc, NULL); if (status == COMPACT_SUCCESS) { compaction_defer_reset(zone, cc.order, false); diff --git a/mm/internal.h b/mm/internal.h index e3f3556b2a32af7f067eb700da55584857a586e7..511571cec9f24b96c4cdc2bee518acfdc14bb3c2 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -210,6 +210,15 @@ struct compact_control { bool rescan; /* Rescanning the same pageblock */ }; +/* + * Used in direct compaction when a page should be taken from the freelists + * immediately when one is created during the free path. + */ +struct capture_control { + struct compact_control *cc; + struct page *page; +}; + unsigned long isolate_freepages_range(struct compact_control *cc, unsigned long start_pfn, unsigned long end_pfn); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dd6090ac55a1452a7eb5b42807d778d6a7e7fd75..4e10ce1d8a4066501592e7e2eeb04a2cfde10e30 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -811,6 +811,57 @@ static inline void del_page_from_free_list(struct page *page, struct zone *zone, zone->free_area[order].nr_free--; } +#ifdef CONFIG_COMPACTION +static inline struct capture_control *task_capc(struct zone *zone) +{ + struct capture_control *capc = current->capture_control; + + return capc && + !(current->flags & PF_KTHREAD) && + !capc->page && + capc->cc->zone == zone && + capc->cc->direct_compaction ? capc : NULL; +} + +static inline bool +compaction_capture(struct capture_control *capc, struct page *page, + int order, int migratetype) +{ + if (!capc || order != capc->cc->order) + return false; + + /* Do not accidentally pollute CMA or isolated regions*/ + if (is_migrate_cma(migratetype) || + is_migrate_isolate(migratetype)) + return false; + + /* + * Do not let lower order allocations polluate a movable pageblock. + * This might let an unmovable request use a reclaimable pageblock + * and vice-versa but no more than normal fallback logic which can + * have trouble finding a high-order free page. + */ + if (order < pageblock_order && migratetype == MIGRATE_MOVABLE) + return false; + + capc->page = page; + return true; +} + +#else +static inline struct capture_control *task_capc(struct zone *zone) +{ + return NULL; +} + +static inline bool +compaction_capture(struct capture_control *capc, struct page *page, + int order, int migratetype) +{ + return false; +} +#endif /* CONFIG_COMPACTION */ + /* * Freeing function for a buddy system allocator. * @@ -844,6 +895,7 @@ static inline void __free_one_page(struct page *page, unsigned long uninitialized_var(buddy_pfn); struct page *buddy; unsigned int max_order; + struct capture_control *capc = task_capc(zone); max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1); @@ -859,6 +911,11 @@ static inline void __free_one_page(struct page *page, continue_merging: while (order < max_order - 1) { + if (compaction_capture(capc, page, order, migratetype)) { + __mod_zone_freepage_state(zone, -(1 << order), + migratetype); + return; + } buddy_pfn = __find_buddy_pfn(pfn, order); buddy = page + (buddy_pfn - pfn); @@ -3717,7 +3774,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, const struct alloc_context *ac, enum compact_priority prio, enum compact_result *compact_result) { - struct page *page; + struct page *page = NULL; unsigned long pflags; unsigned int noreclaim_flag; u64 start; @@ -3730,14 +3787,16 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, noreclaim_flag = memalloc_noreclaim_save(); *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, - prio); + prio, &page); memalloc_noreclaim_restore(noreclaim_flag); memcg_lat_stat_end(MEM_LAT_DIRECT_COMPACT, start); psi_memstall_leave(&pflags); - if (*compact_result <= COMPACT_INACTIVE) + if (*compact_result <= COMPACT_INACTIVE) { + WARN_ON_ONCE(page); return NULL; + } /* * At least in one zone compaction wasn't deferred or skipped, so let's @@ -3745,7 +3804,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, */ count_vm_event(COMPACTSTALL); - page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac); + /* Prep a captured page if available */ + if (page) + prep_new_page(page, order, gfp_mask, alloc_flags); + + /* Try get a page from the freelist if available */ + if (!page) + page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac); if (page) { struct zone *zone = page_zone(page);