diff --git a/fs/fcntl.c b/fs/fcntl.c index 4137d96534a6cb6a73d7a70dc75db543b12a3441..0c70a8ed2a985ed3f2d807b1f73fababca00f8e5 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -29,6 +29,7 @@ #include #include #include +#include #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) @@ -319,6 +320,22 @@ static long fcntl_rw_hint(struct file *file, unsigned int cmd, } } +static long fcntl_mapping_percpu(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct address_space *mapping = filp->f_mapping; + unsigned long flag = arg; + + if (!mapping) + return -EINVAL; + + if (flag) + mapping_set_percpu_ref(mapping); + else + mapping_clear_percpu_ref(mapping); + return 0; +} + static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, struct file *filp) { @@ -426,6 +443,9 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, case F_SET_FILE_RW_HINT: err = fcntl_rw_hint(filp, cmd, arg); break; + case F_MAPPING_PERCPU: + err = fcntl_mapping_percpu(filp, cmd, arg); + break; default: break; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 65d91b1fc6b28cbc5bde6194dfaebf32a71f6012..0e173a4d9cec48c6d0757b054fb43319b44fa688 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -528,6 +528,10 @@ static inline int pgd_devmap(pgd_t pgd) static inline int put_page_testzero(struct page *page) { VM_BUG_ON_PAGE(page_ref_count(page) == 0, page); + if (PagePercpuRef(page)) { + percpu_ref_put(page_percpu_ref(page)); + return 0; + } return page_ref_dec_and_test(page); } @@ -539,6 +543,10 @@ static inline int put_page_testzero(struct page *page) */ static inline int get_page_unless_zero(struct page *page) { + if (PagePercpuRef(page)) { + percpu_ref_get(page_percpu_ref(page)); + return true; + } return page_ref_add_unless(page, 1, 0); } @@ -928,6 +936,11 @@ static inline bool is_device_public_page(const struct page *page) static inline void get_page(struct page *page) { page = compound_head(page); + + if (PagePercpuRef(page)) { + percpu_ref_get(page_percpu_ref(page)); + return; + } /* * Getting a normal page or the head of a compound page * requires to already have an elevated page->_refcount. @@ -939,6 +952,11 @@ static inline void get_page(struct page *page) static inline __must_check bool try_get_page(struct page *page) { page = compound_head(page); + + if (PagePercpuRef(page)) { + percpu_ref_get(page_percpu_ref(page)); + return true; + } if (WARN_ON_ONCE(page_ref_count(page) <= 0)) return false; page_ref_inc(page); @@ -949,6 +967,11 @@ static inline void put_page(struct page *page) { page = compound_head(page); + if (PagePercpuRef(page)) { + percpu_ref_put(page_percpu_ref(page)); + return; + } + /* * For devmap managed pages we need to catch refcount transition from * 2 to 1, when refcount reach one it means the page is free and we diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 3f066ce63a63a9c0ec9e528ced8088bcabc10606..7eb776a677d7a76b551bd215afde38a6af3de106 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -101,6 +101,7 @@ enum pageflags { PG_young, PG_idle, #endif + PG_percpu_ref, __NR_PAGEFLAGS, /* Filesystems */ @@ -385,6 +386,7 @@ SETPAGEFLAG(Young, young, PF_ANY) TESTCLEARFLAG(Young, young, PF_ANY) PAGEFLAG(Idle, idle, PF_ANY) #endif +PAGEFLAG(PercpuRef, percpu_ref, PF_ANY) /* * On an anonymous page mapped into a user virtual memory area, diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 14d14beb1f7f1d493574cb064c668de4b2f5d107..3deab403c6a3355c50847b8de9b336536e23a90b 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -180,4 +180,15 @@ static inline void page_ref_unfreeze(struct page *page, int count) __page_ref_unfreeze(page, count); } +static inline struct percpu_ref *page_percpu_ref(struct page *page) +{ + return *(struct percpu_ref **)&page->lru; +} + +static inline void page_set_percpu_ref(struct page *page, + struct percpu_ref *ref) +{ + *(struct percpu_ref **)&page->lru = ref; +} + #endif diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 520627fc19ee182708672fd0a400cc9862aa20ad..e889d992961592f2b70a1b7595aaf170fd40ffdc 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -29,6 +29,7 @@ enum mapping_flags { AS_EXITING = 4, /* final truncate in progress */ /* writeback related tags are not used */ AS_NO_WRITEBACK_TAGS = 5, + AS_PERCPU_REF = 6, /* percpu ref counter for special inode */ }; /** @@ -97,6 +98,21 @@ static inline int mapping_use_writeback_tags(struct address_space *mapping) return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags); } +static inline void mapping_set_percpu_ref(struct address_space *mapping) +{ + set_bit(AS_PERCPU_REF, &mapping->flags); +} + +static inline void mapping_clear_percpu_ref(struct address_space *mapping) +{ + clear_bit(AS_PERCPU_REF, &mapping->flags); +} + +static inline int mapping_percpu_ref(struct address_space *mapping) +{ + return test_bit(AS_PERCPU_REF, &mapping->flags); +} + static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { return mapping->gfp_mask; @@ -170,6 +186,10 @@ static inline int page_cache_get_speculative(struct page *page) # ifdef CONFIG_PREEMPT_COUNT VM_BUG_ON(!in_atomic() && !irqs_disabled()); # endif + if (PagePercpuRef(page)) { + percpu_ref_get(page_percpu_ref(page)); + return 1; + } /* * Preempt must be disabled here - we rely on rcu_read_lock doing * this for us. @@ -183,6 +203,10 @@ static inline int page_cache_get_speculative(struct page *page) page_ref_inc(page); #else + if (PagePercpuRef(page)) { + percpu_ref_get(page_percpu_ref(page)); + return 1; + } if (unlikely(!get_page_unless_zero(page))) { /* * Either the page has been freed, or will be freed. diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index a81cffb76d890185f09ddcf08496a7dd432233d6..2994f1c86a4664df5c6feff6c4e42d79b0d7dd14 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -104,7 +104,8 @@ IF_HAVE_PG_MLOCK(PG_mlocked, "mlocked" ) \ IF_HAVE_PG_UNCACHED(PG_uncached, "uncached" ) \ IF_HAVE_PG_HWPOISON(PG_hwpoison, "hwpoison" ) \ IF_HAVE_PG_IDLE(PG_young, "young" ) \ -IF_HAVE_PG_IDLE(PG_idle, "idle" ) +IF_HAVE_PG_IDLE(PG_idle, "idle"), \ + {1UL << PG_percpu_ref, "percpu_ref" } #define show_page_flags(flags) \ (flags) ? __print_flags(flags, "|", \ diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 6448cdd9a350d3a0c6513c7a76aad29a68689bd4..6dcddf7a73356bada9c56830459125fa4c20fc56 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -53,6 +53,8 @@ #define F_GET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 13) #define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14) +#define F_MAPPING_PERCPU (F_LINUX_SPECIFIC_BASE + 15) + /* * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be * used to clear any hints previously set. diff --git a/mm/filemap.c b/mm/filemap.c index c56c4196215230bd3932245393691c7913d90650..8a8bf78d3ac0a9ce4e307692d54f606e10942c2b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -48,6 +48,66 @@ #include +struct percpu_page { + struct percpu_ref ref; + struct page *page; +}; + +static void free_page_ref(struct percpu_ref *ref) +{ + struct percpu_page *p = (struct percpu_page *)ref; + struct page *page = p->page; + + percpu_ref_exit(ref); + kfree(page_percpu_ref(page)); + page_set_percpu_ref(page, NULL); + + ClearPagePercpuRef(page); + /* really free the page */ + put_page(page); +} + +static void page_cache_init(struct address_space *mapping, struct page *page) +{ + struct percpu_page *p; + + if (!mapping_percpu_ref(mapping)) + return; + + p = kzalloc(sizeof(struct percpu_page), GFP_KERNEL); + if (!p) + return; + if (percpu_ref_init(&p->ref, free_page_ref, 0, GFP_KERNEL)) + goto err; + + p->page = page; + page_set_percpu_ref(page, &p->ref); + SetPagePercpuRef(page); + get_page(page); + return; +err: + kfree(p); +} + +static void page_cache_exit(struct page *page) +{ + if (!PagePercpuRef(page)) + return; + + put_page(page); + ClearPagePercpuRef(page); + percpu_ref_exit(page_percpu_ref(page)); + kfree(page_percpu_ref(page)); + page_set_percpu_ref(page, NULL); +} + +static void page_cache_kill(struct page *page) +{ + if (!PagePercpuRef(page)) + return; + percpu_ref_kill(page_percpu_ref(page)); +} + /* * Shared mappings implemented 30.11.1994. It's not fully working yet, * though. @@ -264,6 +324,7 @@ void __delete_from_page_cache(struct page *page, void *shadow) unaccount_page_cache_page(mapping, page); page_cache_tree_delete(mapping, page, shadow); + page_cache_kill(page); } static void page_cache_free_page(struct address_space *mapping, @@ -384,8 +445,10 @@ void delete_from_page_cache_batch(struct address_space *mapping, page_cache_tree_delete_batch(mapping, pvec); xa_unlock_irqrestore(&mapping->i_pages, flags); - for (i = 0; i < pagevec_count(pvec); i++) + for (i = 0; i < pagevec_count(pvec); i++) { + page_cache_kill(pvec->pages[i]); page_cache_free_page(mapping, pvec->pages[i]); + } } int filemap_check_errors(struct address_space *mapping) @@ -966,7 +1029,8 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, workingset_activation(page); } else ClearPageActive(page); - lru_cache_add(page); + if (!PagePercpuRef(page)) + lru_cache_add(page); } return ret; } @@ -1630,8 +1694,10 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, if (fgp_flags & FGP_ACCESSED) __SetPageReferenced(page); + page_cache_init(mapping, page); err = add_to_page_cache_lru(page, mapping, offset, gfp_mask); if (unlikely(err)) { + page_cache_exit(page); put_page(page); page = NULL; if (err == -EEXIST) @@ -2320,9 +2386,11 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb, error = -ENOMEM; goto out; } + page_cache_init(mapping, page); error = add_to_page_cache_lru(page, mapping, index, mapping_gfp_constraint(mapping, GFP_KERNEL)); if (error) { + page_cache_exit(page); put_page(page); if (error == -EEXIST) { error = 0; @@ -2837,8 +2905,10 @@ static struct page *do_read_cache_page(struct address_space *mapping, page = __page_cache_alloc(gfp); if (!page) return ERR_PTR(-ENOMEM); + page_cache_init(mapping, page); err = add_to_page_cache_lru(page, mapping, index, gfp); if (unlikely(err)) { + page_cache_exit(page); put_page(page); if (err == -EEXIST) goto repeat; diff --git a/mm/swap.c b/mm/swap.c index 45fdbfb6b2a608857165d6efc625c01b4ab92438..320ac35bde26f906e06a84c98e7a12657ace82c9 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -372,6 +372,8 @@ static void __lru_cache_activate_page(struct page *page) void mark_page_accessed(struct page *page) { page = compound_head(page); + if (PagePercpuRef(page)) + return; if (!PageActive(page) && !PageUnevictable(page) && PageReferenced(page)) {