提交 0f7ddea6 编写于 作者: L Linus Torvalds

Merge tag 'netfs-folio-20211111' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs

Pull netfs, 9p, afs and ceph (partial) foliation from David Howells:
 "This converts netfslib, 9p and afs to use folios. It also partially
  converts ceph so that it uses folios on the boundaries with netfslib.

  To help with this, a couple of folio helper functions are added in the
  first two patches.

  These patches don't touch fscache and cachefiles as I intend to remove
  all the code that deals with pages directly from there. Only nfs and
  cifs are using the old fscache I/O API now. The new API uses iov_iter
  instead.

  Thanks to Jeff Layton, Dominique Martinet and AuriStor for testing and
  retesting the patches"

* tag 'netfs-folio-20211111' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs:
  afs: Use folios in directory handling
  netfs, 9p, afs, ceph: Use folios
  folio: Add a function to get the host inode for a folio
  folio: Add a function to change the private data attached to a folio
......@@ -108,7 +108,9 @@ static const struct netfs_read_request_ops v9fs_req_ops = {
*/
static int v9fs_vfs_readpage(struct file *file, struct page *page)
{
return netfs_readpage(file, page, &v9fs_req_ops, NULL);
struct folio *folio = page_folio(page);
return netfs_readpage(file, folio, &v9fs_req_ops, NULL);
}
/**
......@@ -130,13 +132,15 @@ static void v9fs_vfs_readahead(struct readahead_control *ractl)
static int v9fs_release_page(struct page *page, gfp_t gfp)
{
if (PagePrivate(page))
struct folio *folio = page_folio(page);
if (folio_test_private(folio))
return 0;
#ifdef CONFIG_9P_FSCACHE
if (PageFsCache(page)) {
if (folio_test_fscache(folio)) {
if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS))
return 0;
wait_on_page_fscache(page);
folio_wait_fscache(folio);
}
#endif
return 1;
......@@ -152,55 +156,58 @@ static int v9fs_release_page(struct page *page, gfp_t gfp)
static void v9fs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length)
{
wait_on_page_fscache(page);
struct folio *folio = page_folio(page);
folio_wait_fscache(folio);
}
static int v9fs_vfs_writepage_locked(struct page *page)
static int v9fs_vfs_write_folio_locked(struct folio *folio)
{
struct inode *inode = page->mapping->host;
struct inode *inode = folio_inode(folio);
struct v9fs_inode *v9inode = V9FS_I(inode);
loff_t start = page_offset(page);
loff_t size = i_size_read(inode);
loff_t start = folio_pos(folio);
loff_t i_size = i_size_read(inode);
struct iov_iter from;
int err, len;
size_t len = folio_size(folio);
int err;
if (start >= i_size)
return 0; /* Simultaneous truncation occurred */
if (page->index == size >> PAGE_SHIFT)
len = size & ~PAGE_MASK;
else
len = PAGE_SIZE;
len = min_t(loff_t, i_size - start, len);
iov_iter_xarray(&from, WRITE, &page->mapping->i_pages, start, len);
iov_iter_xarray(&from, WRITE, &folio_mapping(folio)->i_pages, start, len);
/* We should have writeback_fid always set */
BUG_ON(!v9inode->writeback_fid);
set_page_writeback(page);
folio_start_writeback(folio);
p9_client_write(v9inode->writeback_fid, start, &from, &err);
end_page_writeback(page);
folio_end_writeback(folio);
return err;
}
static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
{
struct folio *folio = page_folio(page);
int retval;
p9_debug(P9_DEBUG_VFS, "page %p\n", page);
p9_debug(P9_DEBUG_VFS, "folio %p\n", folio);
retval = v9fs_vfs_writepage_locked(page);
retval = v9fs_vfs_write_folio_locked(folio);
if (retval < 0) {
if (retval == -EAGAIN) {
redirty_page_for_writepage(wbc, page);
folio_redirty_for_writepage(wbc, folio);
retval = 0;
} else {
SetPageError(page);
mapping_set_error(page->mapping, retval);
mapping_set_error(folio_mapping(folio), retval);
}
} else
retval = 0;
unlock_page(page);
folio_unlock(folio);
return retval;
}
......@@ -213,14 +220,15 @@ static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
static int v9fs_launder_page(struct page *page)
{
struct folio *folio = page_folio(page);
int retval;
if (clear_page_dirty_for_io(page)) {
retval = v9fs_vfs_writepage_locked(page);
if (folio_clear_dirty_for_io(folio)) {
retval = v9fs_vfs_write_folio_locked(folio);
if (retval)
return retval;
}
wait_on_page_fscache(page);
folio_wait_fscache(folio);
return 0;
}
......@@ -265,10 +273,10 @@ v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
loff_t pos, unsigned int len, unsigned int flags,
struct page **pagep, void **fsdata)
struct page **subpagep, void **fsdata)
{
int retval;
struct page *page;
struct folio *folio;
struct v9fs_inode *v9inode = V9FS_I(mapping->host);
p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
......@@ -279,31 +287,32 @@ static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
* file. We need to do this before we get a lock on the page in case
* there's more than one writer competing for the same cache block.
*/
retval = netfs_write_begin(filp, mapping, pos, len, flags, &page, fsdata,
retval = netfs_write_begin(filp, mapping, pos, len, flags, &folio, fsdata,
&v9fs_req_ops, NULL);
if (retval < 0)
return retval;
*pagep = find_subpage(page, pos / PAGE_SIZE);
*subpagep = &folio->page;
return retval;
}
static int v9fs_write_end(struct file *filp, struct address_space *mapping,
loff_t pos, unsigned int len, unsigned int copied,
struct page *page, void *fsdata)
struct page *subpage, void *fsdata)
{
loff_t last_pos = pos + copied;
struct inode *inode = page->mapping->host;
struct folio *folio = page_folio(subpage);
struct inode *inode = mapping->host;
p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
if (!PageUptodate(page)) {
if (!folio_test_uptodate(folio)) {
if (unlikely(copied < len)) {
copied = 0;
goto out;
}
SetPageUptodate(page);
folio_mark_uptodate(folio);
}
/*
......@@ -314,10 +323,10 @@ static int v9fs_write_end(struct file *filp, struct address_space *mapping,
inode_add_bytes(inode, last_pos - inode->i_size);
i_size_write(inode, last_pos);
}
set_page_dirty(page);
folio_mark_dirty(folio);
out:
unlock_page(page);
put_page(page);
folio_unlock(folio);
folio_put(folio);
return copied;
}
......
......@@ -528,13 +528,13 @@ static vm_fault_t
v9fs_vm_page_mkwrite(struct vm_fault *vmf)
{
struct v9fs_inode *v9inode;
struct page *page = vmf->page;
struct folio *folio = page_folio(vmf->page);
struct file *filp = vmf->vma->vm_file;
struct inode *inode = file_inode(filp);
p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n",
page, (unsigned long)filp->private_data);
p9_debug(P9_DEBUG_VFS, "folio %p fid %lx\n",
folio, (unsigned long)filp->private_data);
v9inode = V9FS_I(inode);
......@@ -542,24 +542,24 @@ v9fs_vm_page_mkwrite(struct vm_fault *vmf)
* be modified. We then assume the entire page will need writing back.
*/
#ifdef CONFIG_9P_FSCACHE
if (PageFsCache(page) &&
wait_on_page_fscache_killable(page) < 0)
return VM_FAULT_RETRY;
if (folio_test_fscache(folio) &&
folio_wait_fscache_killable(folio) < 0)
return VM_FAULT_NOPAGE;
#endif
/* Update file times before taking page lock */
file_update_time(filp);
BUG_ON(!v9inode->writeback_fid);
if (lock_page_killable(page) < 0)
if (folio_lock_killable(folio) < 0)
return VM_FAULT_RETRY;
if (page->mapping != inode->i_mapping)
if (folio_mapping(folio) != inode->i_mapping)
goto out_unlock;
wait_for_stable_page(page);
folio_wait_stable(folio);
return VM_FAULT_LOCKED;
out_unlock:
unlock_page(page);
folio_unlock(folio);
return VM_FAULT_NOPAGE;
}
......
......@@ -103,13 +103,13 @@ struct afs_lookup_cookie {
};
/*
* Drop the refs that we're holding on the pages we were reading into. We've
* Drop the refs that we're holding on the folios we were reading into. We've
* got refs on the first nr_pages pages.
*/
static void afs_dir_read_cleanup(struct afs_read *req)
{
struct address_space *mapping = req->vnode->vfs_inode.i_mapping;
struct page *page;
struct folio *folio;
pgoff_t last = req->nr_pages - 1;
XA_STATE(xas, &mapping->i_pages, 0);
......@@ -118,65 +118,56 @@ static void afs_dir_read_cleanup(struct afs_read *req)
return;
rcu_read_lock();
xas_for_each(&xas, page, last) {
if (xas_retry(&xas, page))
xas_for_each(&xas, folio, last) {
if (xas_retry(&xas, folio))
continue;
BUG_ON(xa_is_value(page));
BUG_ON(PageCompound(page));
ASSERTCMP(page->mapping, ==, mapping);
BUG_ON(xa_is_value(folio));
ASSERTCMP(folio_file_mapping(folio), ==, mapping);
put_page(page);
folio_put(folio);
}
rcu_read_unlock();
}
/*
* check that a directory page is valid
* check that a directory folio is valid
*/
static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
loff_t i_size)
static bool afs_dir_check_folio(struct afs_vnode *dvnode, struct folio *folio,
loff_t i_size)
{
struct afs_xdr_dir_page *dbuf;
loff_t latter, off;
int tmp, qty;
union afs_xdr_dir_block *block;
size_t offset, size;
loff_t pos;
/* Determine how many magic numbers there should be in this page, but
/* Determine how many magic numbers there should be in this folio, but
* we must take care because the directory may change size under us.
*/
off = page_offset(page);
if (i_size <= off)
pos = folio_pos(folio);
if (i_size <= pos)
goto checked;
latter = i_size - off;
if (latter >= PAGE_SIZE)
qty = PAGE_SIZE;
else
qty = latter;
qty /= sizeof(union afs_xdr_dir_block);
/* check them */
dbuf = kmap_atomic(page);
for (tmp = 0; tmp < qty; tmp++) {
if (dbuf->blocks[tmp].hdr.magic != AFS_DIR_MAGIC) {
printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n",
__func__, dvnode->vfs_inode.i_ino, tmp, qty,
ntohs(dbuf->blocks[tmp].hdr.magic));
trace_afs_dir_check_failed(dvnode, off, i_size);
kunmap(page);
size = min_t(loff_t, folio_size(folio), i_size - pos);
for (offset = 0; offset < size; offset += sizeof(*block)) {
block = kmap_local_folio(folio, offset);
if (block->hdr.magic != AFS_DIR_MAGIC) {
printk("kAFS: %s(%lx): [%llx] bad magic %zx/%zx is %04hx\n",
__func__, dvnode->vfs_inode.i_ino,
pos, offset, size, ntohs(block->hdr.magic));
trace_afs_dir_check_failed(dvnode, pos + offset, i_size);
kunmap_local(block);
trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic);
goto error;
}
/* Make sure each block is NUL terminated so we can reasonably
* use string functions on it. The filenames in the page
* use string functions on it. The filenames in the folio
* *should* be NUL-terminated anyway.
*/
((u8 *)&dbuf->blocks[tmp])[AFS_DIR_BLOCK_SIZE - 1] = 0;
}
kunmap_atomic(dbuf);
((u8 *)block)[AFS_DIR_BLOCK_SIZE - 1] = 0;
kunmap_local(block);
}
checked:
afs_stat_v(dvnode, n_read_dir);
return true;
......@@ -190,11 +181,11 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
*/
static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req)
{
struct afs_xdr_dir_page *dbuf;
union afs_xdr_dir_block *block;
struct address_space *mapping = dvnode->vfs_inode.i_mapping;
struct page *page;
unsigned int i, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block);
struct folio *folio;
pgoff_t last = req->nr_pages - 1;
size_t offset, size;
XA_STATE(xas, &mapping->i_pages, 0);
......@@ -205,30 +196,28 @@ static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req)
req->pos, req->nr_pages,
req->iter->iov_offset, iov_iter_count(req->iter));
xas_for_each(&xas, page, last) {
if (xas_retry(&xas, page))
xas_for_each(&xas, folio, last) {
if (xas_retry(&xas, folio))
continue;
BUG_ON(PageCompound(page));
BUG_ON(page->mapping != mapping);
dbuf = kmap_atomic(page);
for (i = 0; i < qty; i++) {
union afs_xdr_dir_block *block = &dbuf->blocks[i];
BUG_ON(folio_file_mapping(folio) != mapping);
pr_warn("[%02lx] %32phN\n", page->index * qty + i, block);
size = min_t(loff_t, folio_size(folio), req->actual_len - folio_pos(folio));
for (offset = 0; offset < size; offset += sizeof(*block)) {
block = kmap_local_folio(folio, offset);
pr_warn("[%02lx] %32phN\n", folio_index(folio) + offset, block);
kunmap_local(block);
}
kunmap_atomic(dbuf);
}
}
/*
* Check all the pages in a directory. All the pages are held pinned.
* Check all the blocks in a directory. All the folios are held pinned.
*/
static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req)
{
struct address_space *mapping = dvnode->vfs_inode.i_mapping;
struct page *page;
struct folio *folio;
pgoff_t last = req->nr_pages - 1;
int ret = 0;
......@@ -238,14 +227,13 @@ static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req)
return 0;
rcu_read_lock();
xas_for_each(&xas, page, last) {
if (xas_retry(&xas, page))
xas_for_each(&xas, folio, last) {
if (xas_retry(&xas, folio))
continue;
BUG_ON(PageCompound(page));
BUG_ON(page->mapping != mapping);
BUG_ON(folio_file_mapping(folio) != mapping);
if (!afs_dir_check_page(dvnode, page, req->file_size)) {
if (!afs_dir_check_folio(dvnode, folio, req->actual_len)) {
afs_dir_dump(dvnode, req);
ret = -EIO;
break;
......@@ -274,15 +262,16 @@ static int afs_dir_open(struct inode *inode, struct file *file)
/*
* Read the directory into the pagecache in one go, scrubbing the previous
* contents. The list of pages is returned, pinning them so that they don't
* contents. The list of folios is returned, pinning them so that they don't
* get reclaimed during the iteration.
*/
static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
__acquires(&dvnode->validate_lock)
{
struct address_space *mapping = dvnode->vfs_inode.i_mapping;
struct afs_read *req;
loff_t i_size;
int nr_pages, i, n;
int nr_pages, i;
int ret;
_enter("");
......@@ -320,43 +309,30 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
req->iter = &req->def_iter;
/* Fill in any gaps that we might find where the memory reclaimer has
* been at work and pin all the pages. If there are any gaps, we will
* been at work and pin all the folios. If there are any gaps, we will
* need to reread the entire directory contents.
*/
i = req->nr_pages;
while (i < nr_pages) {
struct page *pages[8], *page;
n = find_get_pages_contig(dvnode->vfs_inode.i_mapping, i,
min_t(unsigned int, nr_pages - i,
ARRAY_SIZE(pages)),
pages);
_debug("find %u at %u/%u", n, i, nr_pages);
if (n == 0) {
gfp_t gfp = dvnode->vfs_inode.i_mapping->gfp_mask;
struct folio *folio;
folio = filemap_get_folio(mapping, i);
if (!folio) {
if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
afs_stat_v(dvnode, n_inval);
ret = -ENOMEM;
page = __page_cache_alloc(gfp);
if (!page)
folio = __filemap_get_folio(mapping,
i, FGP_LOCK | FGP_CREAT,
mapping->gfp_mask);
if (!folio)
goto error;
ret = add_to_page_cache_lru(page,
dvnode->vfs_inode.i_mapping,
i, gfp);
if (ret < 0)
goto error;
attach_page_private(page, (void *)1);
unlock_page(page);
req->nr_pages++;
i++;
} else {
req->nr_pages += n;
i += n;
folio_attach_private(folio, (void *)1);
folio_unlock(folio);
}
req->nr_pages += folio_nr_pages(folio);
i += folio_nr_pages(folio);
}
/* If we're going to reload, we need to lock all the pages to prevent
......@@ -424,7 +400,7 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
size_t nlen;
int tmp;
_enter("%u,%x,%p,,",(unsigned)ctx->pos,blkoff,block);
_enter("%llx,%x", ctx->pos, blkoff);
curr = (ctx->pos - blkoff) / sizeof(union afs_xdr_dirent);
......@@ -513,12 +489,10 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
struct key *key, afs_dataversion_t *_dir_version)
{
struct afs_vnode *dvnode = AFS_FS_I(dir);
struct afs_xdr_dir_page *dbuf;
union afs_xdr_dir_block *dblock;
struct afs_read *req;
struct page *page;
unsigned blkoff, limit;
void __rcu **slot;
struct folio *folio;
unsigned offset, size;
int ret;
_enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos);
......@@ -540,43 +514,30 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
/* walk through the blocks in sequence */
ret = 0;
while (ctx->pos < req->actual_len) {
blkoff = ctx->pos & ~(sizeof(union afs_xdr_dir_block) - 1);
/* Fetch the appropriate page from the directory and re-add it
/* Fetch the appropriate folio from the directory and re-add it
* to the LRU. We have all the pages pinned with an extra ref.
*/
rcu_read_lock();
page = NULL;
slot = radix_tree_lookup_slot(&dvnode->vfs_inode.i_mapping->i_pages,
blkoff / PAGE_SIZE);
if (slot)
page = radix_tree_deref_slot(slot);
rcu_read_unlock();
if (!page) {
folio = __filemap_get_folio(dir->i_mapping, ctx->pos / PAGE_SIZE,
FGP_ACCESSED, 0);
if (!folio) {
ret = afs_bad(dvnode, afs_file_error_dir_missing_page);
break;
}
mark_page_accessed(page);
limit = blkoff & ~(PAGE_SIZE - 1);
offset = round_down(ctx->pos, sizeof(*dblock)) - folio_file_pos(folio);
size = min_t(loff_t, folio_size(folio),
req->actual_len - folio_file_pos(folio));
dbuf = kmap(page);
/* deal with the individual blocks stashed on this page */
do {
dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) /
sizeof(union afs_xdr_dir_block)];
ret = afs_dir_iterate_block(dvnode, ctx, dblock, blkoff);
if (ret != 1) {
kunmap(page);
dblock = kmap_local_folio(folio, offset);
ret = afs_dir_iterate_block(dvnode, ctx, dblock,
folio_file_pos(folio) + offset);
kunmap_local(dblock);
if (ret != 1)
goto out;
}
blkoff += sizeof(union afs_xdr_dir_block);
} while (offset += sizeof(*dblock), offset < size);
} while (ctx->pos < dir->i_size && blkoff < limit);
kunmap(page);
ret = 0;
}
......@@ -2037,42 +1998,42 @@ static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
}
/*
* Release a directory page and clean up its private state if it's not busy
* - return true if the page can now be released, false if not
* Release a directory folio and clean up its private state if it's not busy
* - return true if the folio can now be released, false if not
*/
static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags)
static int afs_dir_releasepage(struct page *subpage, gfp_t gfp_flags)
{
struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host);
struct folio *folio = page_folio(subpage);
struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
_enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index);
_enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, folio_index(folio));
detach_page_private(page);
folio_detach_private(folio);
/* The directory will need reloading. */
if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
afs_stat_v(dvnode, n_relpg);
return 1;
return true;
}
/*
* invalidate part or all of a page
* - release a page and clean up its private data if offset is 0 (indicating
* the entire page)
* Invalidate part or all of a folio.
*/
static void afs_dir_invalidatepage(struct page *page, unsigned int offset,
static void afs_dir_invalidatepage(struct page *subpage, unsigned int offset,
unsigned int length)
{
struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host);
struct folio *folio = page_folio(subpage);
struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
_enter("{%lu},%u,%u", page->index, offset, length);
_enter("{%lu},%u,%u", folio_index(folio), offset, length);
BUG_ON(!PageLocked(page));
BUG_ON(!folio_test_locked(folio));
/* The directory will need reloading. */
if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
afs_stat_v(dvnode, n_inval);
/* we clean up only if the entire page is being invalidated */
if (offset == 0 && length == thp_size(page))
detach_page_private(page);
/* we clean up only if the entire folio is being invalidated */
if (offset == 0 && length == folio_size(folio))
folio_detach_private(folio);
}
......@@ -104,6 +104,25 @@ static void afs_clear_contig_bits(union afs_xdr_dir_block *block,
block->hdr.bitmap[7] &= ~(u8)(mask >> 7 * 8);
}
/*
* Get a new directory folio.
*/
static struct folio *afs_dir_get_folio(struct afs_vnode *vnode, pgoff_t index)
{
struct address_space *mapping = vnode->vfs_inode.i_mapping;
struct folio *folio;
folio = __filemap_get_folio(mapping, index,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
mapping->gfp_mask);
if (!folio)
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
else if (folio && !folio_test_private(folio))
folio_attach_private(folio, (void *)1);
return folio;
}
/*
* Scan a directory block looking for a dirent of the right name.
*/
......@@ -188,13 +207,11 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
enum afs_edit_dir_reason why)
{
union afs_xdr_dir_block *meta, *block;
struct afs_xdr_dir_page *meta_page, *dir_page;
union afs_xdr_dirent *de;
struct page *page0, *page;
struct folio *folio0, *folio;
unsigned int need_slots, nr_blocks, b;
pgoff_t index;
loff_t i_size;
gfp_t gfp;
int slot;
_enter(",,{%d,%s},", name->len, name->name);
......@@ -206,10 +223,8 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
return;
}
gfp = vnode->vfs_inode.i_mapping->gfp_mask;
page0 = find_or_create_page(vnode->vfs_inode.i_mapping, 0, gfp);
if (!page0) {
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
folio0 = afs_dir_get_folio(vnode, 0);
if (!folio0) {
_leave(" [fgp]");
return;
}
......@@ -217,42 +232,35 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
/* Work out how many slots we're going to need. */
need_slots = afs_dir_calc_slots(name->len);
meta_page = kmap(page0);
meta = &meta_page->blocks[0];
meta = kmap_local_folio(folio0, 0);
if (i_size == 0)
goto new_directory;
nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
/* Find a block that has sufficient slots available. Each VM page
/* Find a block that has sufficient slots available. Each folio
* contains two or more directory blocks.
*/
for (b = 0; b < nr_blocks + 1; b++) {
/* If the directory extended into a new page, then we need to
* tack a new page on the end.
/* If the directory extended into a new folio, then we need to
* tack a new folio on the end.
*/
index = b / AFS_DIR_BLOCKS_PER_PAGE;
if (index == 0) {
page = page0;
dir_page = meta_page;
} else {
if (nr_blocks >= AFS_DIR_MAX_BLOCKS)
goto error;
gfp = vnode->vfs_inode.i_mapping->gfp_mask;
page = find_or_create_page(vnode->vfs_inode.i_mapping,
index, gfp);
if (!page)
if (nr_blocks >= AFS_DIR_MAX_BLOCKS)
goto error;
if (index >= folio_nr_pages(folio0)) {
folio = afs_dir_get_folio(vnode, index);
if (!folio)
goto error;
if (!PagePrivate(page))
attach_page_private(page, (void *)1);
dir_page = kmap(page);
} else {
folio = folio0;
}
block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio));
/* Abandon the edit if we got a callback break. */
if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
goto invalidated;
block = &dir_page->blocks[b % AFS_DIR_BLOCKS_PER_PAGE];
_debug("block %u: %2u %3u %u",
b,
(b < AFS_DIR_BLOCKS_WITH_CTR) ? meta->meta.alloc_ctrs[b] : 99,
......@@ -266,7 +274,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
afs_set_i_size(vnode, (b + 1) * AFS_DIR_BLOCK_SIZE);
}
/* Only lower dir pages have a counter in the header. */
/* Only lower dir blocks have a counter in the header. */
if (b >= AFS_DIR_BLOCKS_WITH_CTR ||
meta->meta.alloc_ctrs[b] >= need_slots) {
/* We need to try and find one or more consecutive
......@@ -279,10 +287,10 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
}
}
if (page != page0) {
unlock_page(page);
kunmap(page);
put_page(page);
kunmap_local(block);
if (folio != folio0) {
folio_unlock(folio);
folio_put(folio);
}
}
......@@ -298,8 +306,8 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
i_size = AFS_DIR_BLOCK_SIZE;
afs_set_i_size(vnode, i_size);
slot = AFS_DIR_RESV_BLOCKS0;
page = page0;
block = meta;
folio = folio0;
block = kmap_local_folio(folio, 0);
nr_blocks = 1;
b = 0;
......@@ -318,10 +326,10 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
/* Adjust the bitmap. */
afs_set_contig_bits(block, slot, need_slots);
if (page != page0) {
unlock_page(page);
kunmap(page);
put_page(page);
kunmap_local(block);
if (folio != folio0) {
folio_unlock(folio);
folio_put(folio);
}
/* Adjust the allocation counter. */
......@@ -333,18 +341,19 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
_debug("Insert %s in %u[%u]", name->name, b, slot);
out_unmap:
unlock_page(page0);
kunmap(page0);
put_page(page0);
kunmap_local(meta);
folio_unlock(folio0);
folio_put(folio0);
_leave("");
return;
invalidated:
trace_afs_edit_dir(vnode, why, afs_edit_dir_create_inval, 0, 0, 0, 0, name->name);
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
if (page != page0) {
kunmap(page);
put_page(page);
kunmap_local(block);
if (folio != folio0) {
folio_unlock(folio);
folio_put(folio);
}
goto out_unmap;
......@@ -364,10 +373,9 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
void afs_edit_dir_remove(struct afs_vnode *vnode,
struct qstr *name, enum afs_edit_dir_reason why)
{
struct afs_xdr_dir_page *meta_page, *dir_page;
union afs_xdr_dir_block *meta, *block;
union afs_xdr_dirent *de;
struct page *page0, *page;
struct folio *folio0, *folio;
unsigned int need_slots, nr_blocks, b;
pgoff_t index;
loff_t i_size;
......@@ -384,9 +392,8 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
}
nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
page0 = find_lock_page(vnode->vfs_inode.i_mapping, 0);
if (!page0) {
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
folio0 = afs_dir_get_folio(vnode, 0);
if (!folio0) {
_leave(" [fgp]");
return;
}
......@@ -394,30 +401,27 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
/* Work out how many slots we're going to discard. */
need_slots = afs_dir_calc_slots(name->len);
meta_page = kmap(page0);
meta = &meta_page->blocks[0];
meta = kmap_local_folio(folio0, 0);
/* Find a page that has sufficient slots available. Each VM page
/* Find a block that has sufficient slots available. Each folio
* contains two or more directory blocks.
*/
for (b = 0; b < nr_blocks; b++) {
index = b / AFS_DIR_BLOCKS_PER_PAGE;
if (index != 0) {
page = find_lock_page(vnode->vfs_inode.i_mapping, index);
if (!page)
if (index >= folio_nr_pages(folio0)) {
folio = afs_dir_get_folio(vnode, index);
if (!folio)
goto error;
dir_page = kmap(page);
} else {
page = page0;
dir_page = meta_page;
folio = folio0;
}
block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio));
/* Abandon the edit if we got a callback break. */
if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
goto invalidated;
block = &dir_page->blocks[b % AFS_DIR_BLOCKS_PER_PAGE];
if (b > AFS_DIR_BLOCKS_WITH_CTR ||
meta->meta.alloc_ctrs[b] <= AFS_DIR_SLOTS_PER_BLOCK - 1 - need_slots) {
slot = afs_dir_scan_block(block, name, b);
......@@ -425,10 +429,10 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
goto found_dirent;
}
if (page != page0) {
unlock_page(page);
kunmap(page);
put_page(page);
kunmap_local(block);
if (folio != folio0) {
folio_unlock(folio);
folio_put(folio);
}
}
......@@ -449,10 +453,10 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
/* Adjust the bitmap. */
afs_clear_contig_bits(block, slot, need_slots);
if (page != page0) {
unlock_page(page);
kunmap(page);
put_page(page);
kunmap_local(block);
if (folio != folio0) {
folio_unlock(folio);
folio_put(folio);
}
/* Adjust the allocation counter. */
......@@ -464,9 +468,9 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
_debug("Remove %s from %u[%u]", name->name, b, slot);
out_unmap:
unlock_page(page0);
kunmap(page0);
put_page(page0);
kunmap_local(meta);
folio_unlock(folio0);
folio_put(folio0);
_leave("");
return;
......@@ -474,10 +478,10 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_inval,
0, 0, 0, 0, name->name);
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
if (page != page0) {
unlock_page(page);
kunmap(page);
put_page(page);
kunmap_local(block);
if (folio != folio0) {
folio_unlock(folio);
folio_put(folio);
}
goto out_unmap;
......
......@@ -324,21 +324,24 @@ static int afs_symlink_readpage(struct file *file, struct page *page)
{
struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
struct afs_read *fsreq;
struct folio *folio = page_folio(page);
int ret;
fsreq = afs_alloc_read(GFP_NOFS);
if (!fsreq)
return -ENOMEM;
fsreq->pos = page->index * PAGE_SIZE;
fsreq->len = PAGE_SIZE;
fsreq->pos = folio_pos(folio);
fsreq->len = folio_size(folio);
fsreq->vnode = vnode;
fsreq->iter = &fsreq->def_iter;
iov_iter_xarray(&fsreq->def_iter, READ, &page->mapping->i_pages,
fsreq->pos, fsreq->len);
ret = afs_fetch_data(fsreq->vnode, fsreq);
page_endio(page, false, ret);
if (ret == 0)
SetPageUptodate(page);
unlock_page(page);
return ret;
}
......@@ -362,7 +365,7 @@ static int afs_begin_cache_operation(struct netfs_read_request *rreq)
}
static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len,
struct page *page, void **_fsdata)
struct folio *folio, void **_fsdata)
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
......@@ -385,7 +388,9 @@ const struct netfs_read_request_ops afs_req_ops = {
static int afs_readpage(struct file *file, struct page *page)
{
return netfs_readpage(file, page, &afs_req_ops, NULL);
struct folio *folio = page_folio(page);
return netfs_readpage(file, folio, &afs_req_ops, NULL);
}
static void afs_readahead(struct readahead_control *ractl)
......@@ -397,29 +402,29 @@ static void afs_readahead(struct readahead_control *ractl)
* Adjust the dirty region of the page on truncation or full invalidation,
* getting rid of the markers altogether if the region is entirely invalidated.
*/
static void afs_invalidate_dirty(struct page *page, unsigned int offset,
static void afs_invalidate_dirty(struct folio *folio, unsigned int offset,
unsigned int length)
{
struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
unsigned long priv;
unsigned int f, t, end = offset + length;
priv = page_private(page);
priv = (unsigned long)folio_get_private(folio);
/* we clean up only if the entire page is being invalidated */
if (offset == 0 && length == thp_size(page))
if (offset == 0 && length == folio_size(folio))
goto full_invalidate;
/* If the page was dirtied by page_mkwrite(), the PTE stays writable
* and we don't get another notification to tell us to expand it
* again.
*/
if (afs_is_page_dirty_mmapped(priv))
if (afs_is_folio_dirty_mmapped(priv))
return;
/* We may need to shorten the dirty region */
f = afs_page_dirty_from(page, priv);
t = afs_page_dirty_to(page, priv);
f = afs_folio_dirty_from(folio, priv);
t = afs_folio_dirty_to(folio, priv);
if (t <= offset || f >= end)
return; /* Doesn't overlap */
......@@ -437,17 +442,17 @@ static void afs_invalidate_dirty(struct page *page, unsigned int offset,
if (f == t)
goto undirty;
priv = afs_page_dirty(page, f, t);
set_page_private(page, priv);
trace_afs_page_dirty(vnode, tracepoint_string("trunc"), page);
priv = afs_folio_dirty(folio, f, t);
folio_change_private(folio, (void *)priv);
trace_afs_folio_dirty(vnode, tracepoint_string("trunc"), folio);
return;
undirty:
trace_afs_page_dirty(vnode, tracepoint_string("undirty"), page);
clear_page_dirty_for_io(page);
trace_afs_folio_dirty(vnode, tracepoint_string("undirty"), folio);
folio_clear_dirty_for_io(folio);
full_invalidate:
trace_afs_page_dirty(vnode, tracepoint_string("inval"), page);
detach_page_private(page);
trace_afs_folio_dirty(vnode, tracepoint_string("inval"), folio);
folio_detach_private(folio);
}
/*
......@@ -458,14 +463,16 @@ static void afs_invalidate_dirty(struct page *page, unsigned int offset,
static void afs_invalidatepage(struct page *page, unsigned int offset,
unsigned int length)
{
_enter("{%lu},%u,%u", page->index, offset, length);
struct folio *folio = page_folio(page);
_enter("{%lu},%u,%u", folio_index(folio), offset, length);
BUG_ON(!PageLocked(page));
if (PagePrivate(page))
afs_invalidate_dirty(page, offset, length);
afs_invalidate_dirty(folio, offset, length);
wait_on_page_fscache(page);
folio_wait_fscache(folio);
_leave("");
}
......@@ -475,30 +482,31 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
*/
static int afs_releasepage(struct page *page, gfp_t gfp_flags)
{
struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
struct folio *folio = page_folio(page);
struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
_enter("{{%llx:%llu}[%lu],%lx},%x",
vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
vnode->fid.vid, vnode->fid.vnode, folio_index(folio), folio->flags,
gfp_flags);
/* deny if page is being written to the cache and the caller hasn't
* elected to wait */
#ifdef CONFIG_AFS_FSCACHE
if (PageFsCache(page)) {
if (folio_test_fscache(folio)) {
if (!(gfp_flags & __GFP_DIRECT_RECLAIM) || !(gfp_flags & __GFP_FS))
return false;
wait_on_page_fscache(page);
folio_wait_fscache(folio);
}
#endif
if (PagePrivate(page)) {
trace_afs_page_dirty(vnode, tracepoint_string("rel"), page);
detach_page_private(page);
if (folio_test_private(folio)) {
trace_afs_folio_dirty(vnode, tracepoint_string("rel"), folio);
folio_detach_private(folio);
}
/* indicate that the page can be released */
/* Indicate that the folio can be released */
_leave(" = T");
return 1;
return true;
}
static void afs_add_open_mmap(struct afs_vnode *vnode)
......
......@@ -876,59 +876,59 @@ struct afs_vnode_cache_aux {
} __packed;
/*
* We use page->private to hold the amount of the page that we've written to,
* We use folio->private to hold the amount of the folio that we've written to,
* splitting the field into two parts. However, we need to represent a range
* 0...PAGE_SIZE, so we reduce the resolution if the size of the page
* 0...FOLIO_SIZE, so we reduce the resolution if the size of the folio
* exceeds what we can encode.
*/
#ifdef CONFIG_64BIT
#define __AFS_PAGE_PRIV_MASK 0x7fffffffUL
#define __AFS_PAGE_PRIV_SHIFT 32
#define __AFS_PAGE_PRIV_MMAPPED 0x80000000UL
#define __AFS_FOLIO_PRIV_MASK 0x7fffffffUL
#define __AFS_FOLIO_PRIV_SHIFT 32
#define __AFS_FOLIO_PRIV_MMAPPED 0x80000000UL
#else
#define __AFS_PAGE_PRIV_MASK 0x7fffUL
#define __AFS_PAGE_PRIV_SHIFT 16
#define __AFS_PAGE_PRIV_MMAPPED 0x8000UL
#define __AFS_FOLIO_PRIV_MASK 0x7fffUL
#define __AFS_FOLIO_PRIV_SHIFT 16
#define __AFS_FOLIO_PRIV_MMAPPED 0x8000UL
#endif
static inline unsigned int afs_page_dirty_resolution(struct page *page)
static inline unsigned int afs_folio_dirty_resolution(struct folio *folio)
{
int shift = thp_order(page) + PAGE_SHIFT - (__AFS_PAGE_PRIV_SHIFT - 1);
int shift = folio_shift(folio) - (__AFS_FOLIO_PRIV_SHIFT - 1);
return (shift > 0) ? shift : 0;
}
static inline size_t afs_page_dirty_from(struct page *page, unsigned long priv)
static inline size_t afs_folio_dirty_from(struct folio *folio, unsigned long priv)
{
unsigned long x = priv & __AFS_PAGE_PRIV_MASK;
unsigned long x = priv & __AFS_FOLIO_PRIV_MASK;
/* The lower bound is inclusive */
return x << afs_page_dirty_resolution(page);
return x << afs_folio_dirty_resolution(folio);
}
static inline size_t afs_page_dirty_to(struct page *page, unsigned long priv)
static inline size_t afs_folio_dirty_to(struct folio *folio, unsigned long priv)
{
unsigned long x = (priv >> __AFS_PAGE_PRIV_SHIFT) & __AFS_PAGE_PRIV_MASK;
unsigned long x = (priv >> __AFS_FOLIO_PRIV_SHIFT) & __AFS_FOLIO_PRIV_MASK;
/* The upper bound is immediately beyond the region */
return (x + 1) << afs_page_dirty_resolution(page);
return (x + 1) << afs_folio_dirty_resolution(folio);
}
static inline unsigned long afs_page_dirty(struct page *page, size_t from, size_t to)
static inline unsigned long afs_folio_dirty(struct folio *folio, size_t from, size_t to)
{
unsigned int res = afs_page_dirty_resolution(page);
unsigned int res = afs_folio_dirty_resolution(folio);
from >>= res;
to = (to - 1) >> res;
return (to << __AFS_PAGE_PRIV_SHIFT) | from;
return (to << __AFS_FOLIO_PRIV_SHIFT) | from;
}
static inline unsigned long afs_page_dirty_mmapped(unsigned long priv)
static inline unsigned long afs_folio_dirty_mmapped(unsigned long priv)
{
return priv | __AFS_PAGE_PRIV_MMAPPED;
return priv | __AFS_FOLIO_PRIV_MMAPPED;
}
static inline bool afs_is_page_dirty_mmapped(unsigned long priv)
static inline bool afs_is_folio_dirty_mmapped(unsigned long priv)
{
return priv & __AFS_PAGE_PRIV_MMAPPED;
return priv & __AFS_FOLIO_PRIV_MMAPPED;
}
#include <trace/events/afs.h>
......
此差异已折叠。
......@@ -63,7 +63,7 @@
(CONGESTION_ON_THRESH(congestion_kb) >> 2))
static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
struct page *page, void **_fsdata);
struct folio *folio, void **_fsdata);
static inline struct ceph_snap_context *page_snap_context(struct page *page)
{
......@@ -317,13 +317,14 @@ static const struct netfs_read_request_ops ceph_netfs_read_ops = {
};
/* read a single page, without unlocking it. */
static int ceph_readpage(struct file *file, struct page *page)
static int ceph_readpage(struct file *file, struct page *subpage)
{
struct folio *folio = page_folio(subpage);
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_vino vino = ceph_vino(inode);
u64 off = page_offset(page);
u64 len = thp_size(page);
size_t len = folio_size(folio);
u64 off = folio_file_pos(folio);
if (ci->i_inline_version != CEPH_INLINE_NONE) {
/*
......@@ -331,19 +332,19 @@ static int ceph_readpage(struct file *file, struct page *page)
* into page cache while getting Fcr caps.
*/
if (off == 0) {
unlock_page(page);
folio_unlock(folio);
return -EINVAL;
}
zero_user_segment(page, 0, thp_size(page));
SetPageUptodate(page);
unlock_page(page);
zero_user_segment(&folio->page, 0, folio_size(folio));
folio_mark_uptodate(folio);
folio_unlock(folio);
return 0;
}
dout("readpage ino %llx.%llx file %p off %llu len %llu page %p index %lu\n",
vino.ino, vino.snap, file, off, len, page, page->index);
dout("readpage ino %llx.%llx file %p off %llu len %zu folio %p index %lu\n",
vino.ino, vino.snap, file, off, len, folio, folio_index(folio));
return netfs_readpage(file, page, &ceph_netfs_read_ops, NULL);
return netfs_readpage(file, folio, &ceph_netfs_read_ops, NULL);
}
static void ceph_readahead(struct readahead_control *ractl)
......@@ -1187,18 +1188,18 @@ ceph_find_incompatible(struct page *page)
}
static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
struct page *page, void **_fsdata)
struct folio *folio, void **_fsdata)
{
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_snap_context *snapc;
snapc = ceph_find_incompatible(page);
snapc = ceph_find_incompatible(folio_page(folio, 0));
if (snapc) {
int r;
unlock_page(page);
put_page(page);
folio_unlock(folio);
folio_put(folio);
if (IS_ERR(snapc))
return PTR_ERR(snapc);
......@@ -1216,12 +1217,12 @@ static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned
* clean, or already dirty within the same snap context.
*/
static int ceph_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
loff_t pos, unsigned len, unsigned aop_flags,
struct page **pagep, void **fsdata)
{
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct page *page = NULL;
struct folio *folio = NULL;
pgoff_t index = pos >> PAGE_SHIFT;
int r;
......@@ -1230,39 +1231,43 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
* for inline_version sent to the MDS.
*/
if (ci->i_inline_version != CEPH_INLINE_NONE) {
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
if (aop_flags & AOP_FLAG_NOFS)
fgp_flags |= FGP_NOFS;
folio = __filemap_get_folio(mapping, index, fgp_flags,
mapping_gfp_mask(mapping));
if (!folio)
return -ENOMEM;
/*
* The inline_version on a new inode is set to 1. If that's the
* case, then the page is brand new and isn't yet Uptodate.
* case, then the folio is brand new and isn't yet Uptodate.
*/
r = 0;
if (index == 0 && ci->i_inline_version != 1) {
if (!PageUptodate(page)) {
if (!folio_test_uptodate(folio)) {
WARN_ONCE(1, "ceph: write_begin called on still-inlined inode (inline_version %llu)!\n",
ci->i_inline_version);
r = -EINVAL;
}
goto out;
}
zero_user_segment(page, 0, thp_size(page));
SetPageUptodate(page);
zero_user_segment(&folio->page, 0, folio_size(folio));
folio_mark_uptodate(folio);
goto out;
}
r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &page, NULL,
r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &folio, NULL,
&ceph_netfs_read_ops, NULL);
out:
if (r == 0)
wait_on_page_fscache(page);
folio_wait_fscache(folio);
if (r < 0) {
if (page)
put_page(page);
if (folio)
folio_put(folio);
} else {
WARN_ON_ONCE(!PageLocked(page));
*pagep = page;
WARN_ON_ONCE(!folio_test_locked(folio));
*pagep = &folio->page;
}
return r;
}
......@@ -1273,32 +1278,33 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
*/
static int ceph_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
struct page *subpage, void *fsdata)
{
struct folio *folio = page_folio(subpage);
struct inode *inode = file_inode(file);
bool check_cap = false;
dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
inode, page, (int)pos, (int)copied, (int)len);
dout("write_end file %p inode %p folio %p %d~%d (%d)\n", file,
inode, folio, (int)pos, (int)copied, (int)len);
if (!PageUptodate(page)) {
if (!folio_test_uptodate(folio)) {
/* just return that nothing was copied on a short copy */
if (copied < len) {
copied = 0;
goto out;
}
SetPageUptodate(page);
folio_mark_uptodate(folio);
}
/* did file size increase? */
if (pos+copied > i_size_read(inode))
check_cap = ceph_inode_set_size(inode, pos+copied);
set_page_dirty(page);
folio_mark_dirty(folio);
out:
unlock_page(page);
put_page(page);
folio_unlock(folio);
folio_put(folio);
if (check_cap)
ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
......
......@@ -230,7 +230,7 @@ static void netfs_rreq_completed(struct netfs_read_request *rreq, bool was_async
/*
* Deal with the completion of writing the data to the cache. We have to clear
* the PG_fscache bits on the pages involved and release the caller's ref.
* the PG_fscache bits on the folios involved and release the caller's ref.
*
* May be called in softirq mode and we inherit a ref from the caller.
*/
......@@ -238,7 +238,7 @@ static void netfs_rreq_unmark_after_write(struct netfs_read_request *rreq,
bool was_async)
{
struct netfs_read_subrequest *subreq;
struct page *page;
struct folio *folio;
pgoff_t unlocked = 0;
bool have_unlocked = false;
......@@ -247,14 +247,14 @@ static void netfs_rreq_unmark_after_write(struct netfs_read_request *rreq,
list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE);
xas_for_each(&xas, page, (subreq->start + subreq->len - 1) / PAGE_SIZE) {
xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) {
/* We might have multiple writes from the same huge
* page, but we mustn't unlock a page more than once.
* folio, but we mustn't unlock a folio more than once.
*/
if (have_unlocked && page->index <= unlocked)
if (have_unlocked && folio_index(folio) <= unlocked)
continue;
unlocked = page->index;
end_page_fscache(page);
unlocked = folio_index(folio);
folio_end_fscache(folio);
have_unlocked = true;
}
}
......@@ -367,18 +367,17 @@ static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq,
}
/*
* Unlock the pages in a read operation. We need to set PG_fscache on any
* pages we're going to write back before we unlock them.
* Unlock the folios in a read operation. We need to set PG_fscache on any
* folios we're going to write back before we unlock them.
*/
static void netfs_rreq_unlock(struct netfs_read_request *rreq)
{
struct netfs_read_subrequest *subreq;
struct page *page;
struct folio *folio;
unsigned int iopos, account = 0;
pgoff_t start_page = rreq->start / PAGE_SIZE;
pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
bool subreq_failed = false;
int i;
XA_STATE(xas, &rreq->mapping->i_pages, start_page);
......@@ -403,9 +402,9 @@ static void netfs_rreq_unlock(struct netfs_read_request *rreq)
trace_netfs_rreq(rreq, netfs_rreq_trace_unlock);
rcu_read_lock();
xas_for_each(&xas, page, last_page) {
unsigned int pgpos = (page->index - start_page) * PAGE_SIZE;
unsigned int pgend = pgpos + thp_size(page);
xas_for_each(&xas, folio, last_page) {
unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
unsigned int pgend = pgpos + folio_size(folio);
bool pg_failed = false;
for (;;) {
......@@ -414,7 +413,7 @@ static void netfs_rreq_unlock(struct netfs_read_request *rreq)
break;
}
if (test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags))
set_page_fscache(page);
folio_start_fscache(folio);
pg_failed |= subreq_failed;
if (pgend < iopos + subreq->len)
break;
......@@ -433,17 +432,16 @@ static void netfs_rreq_unlock(struct netfs_read_request *rreq)
}
if (!pg_failed) {
for (i = 0; i < thp_nr_pages(page); i++)
flush_dcache_page(page);
SetPageUptodate(page);
flush_dcache_folio(folio);
folio_mark_uptodate(folio);
}
if (!test_bit(NETFS_RREQ_DONT_UNLOCK_PAGES, &rreq->flags)) {
if (page->index == rreq->no_unlock_page &&
test_bit(NETFS_RREQ_NO_UNLOCK_PAGE, &rreq->flags))
if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
if (folio_index(folio) == rreq->no_unlock_folio &&
test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
_debug("no unlock");
else
unlock_page(page);
folio_unlock(folio);
}
}
rcu_read_unlock();
......@@ -876,7 +874,6 @@ void netfs_readahead(struct readahead_control *ractl,
void *netfs_priv)
{
struct netfs_read_request *rreq;
struct page *page;
unsigned int debug_index = 0;
int ret;
......@@ -911,11 +908,11 @@ void netfs_readahead(struct readahead_control *ractl,
} while (rreq->submitted < rreq->len);
/* Drop the refs on the pages here rather than in the cache or
/* Drop the refs on the folios here rather than in the cache or
* filesystem. The locks will be dropped in netfs_rreq_unlock().
*/
while ((page = readahead_page(ractl)))
put_page(page);
while (readahead_folio(ractl))
;
/* If we decrement nr_rd_ops to 0, the ref belongs to us. */
if (atomic_dec_and_test(&rreq->nr_rd_ops))
......@@ -935,7 +932,7 @@ EXPORT_SYMBOL(netfs_readahead);
/**
* netfs_readpage - Helper to manage a readpage request
* @file: The file to read from
* @page: The page to read
* @folio: The folio to read
* @ops: The network filesystem's operations for the helper to use
* @netfs_priv: Private netfs data to be retained in the request
*
......@@ -950,7 +947,7 @@ EXPORT_SYMBOL(netfs_readahead);
* This is usable whether or not caching is enabled.
*/
int netfs_readpage(struct file *file,
struct page *page,
struct folio *folio,
const struct netfs_read_request_ops *ops,
void *netfs_priv)
{
......@@ -958,23 +955,23 @@ int netfs_readpage(struct file *file,
unsigned int debug_index = 0;
int ret;
_enter("%lx", page_index(page));
_enter("%lx", folio_index(folio));
rreq = netfs_alloc_read_request(ops, netfs_priv, file);
if (!rreq) {
if (netfs_priv)
ops->cleanup(netfs_priv, page_file_mapping(page));
unlock_page(page);
ops->cleanup(netfs_priv, folio_file_mapping(folio));
folio_unlock(folio);
return -ENOMEM;
}
rreq->mapping = page_file_mapping(page);
rreq->start = page_file_offset(page);
rreq->len = thp_size(page);
rreq->mapping = folio_file_mapping(folio);
rreq->start = folio_file_pos(folio);
rreq->len = folio_size(folio);
if (ops->begin_cache_operation) {
ret = ops->begin_cache_operation(rreq);
if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) {
unlock_page(page);
folio_unlock(folio);
goto out;
}
}
......@@ -1012,40 +1009,40 @@ int netfs_readpage(struct file *file,
EXPORT_SYMBOL(netfs_readpage);
/**
* netfs_skip_page_read - prep a page for writing without reading first
* @page: page being prepared
* netfs_skip_folio_read - prep a folio for writing without reading first
* @folio: The folio being prepared
* @pos: starting position for the write
* @len: length of write
*
* In some cases, write_begin doesn't need to read at all:
* - full page write
* - write that lies in a page that is completely beyond EOF
* - write that covers the the page from start to EOF or beyond it
* - full folio write
* - write that lies in a folio that is completely beyond EOF
* - write that covers the folio from start to EOF or beyond it
*
* If any of these criteria are met, then zero out the unwritten parts
* of the page and return true. Otherwise, return false.
* of the folio and return true. Otherwise, return false.
*/
static bool netfs_skip_page_read(struct page *page, loff_t pos, size_t len)
static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len)
{
struct inode *inode = page->mapping->host;
struct inode *inode = folio_inode(folio);
loff_t i_size = i_size_read(inode);
size_t offset = offset_in_thp(page, pos);
size_t offset = offset_in_folio(folio, pos);
/* Full page write */
if (offset == 0 && len >= thp_size(page))
/* Full folio write */
if (offset == 0 && len >= folio_size(folio))
return true;
/* pos beyond last page in the file */
/* pos beyond last folio in the file */
if (pos - offset >= i_size)
goto zero_out;
/* Write that covers from the start of the page to EOF or beyond */
/* Write that covers from the start of the folio to EOF or beyond */
if (offset == 0 && (pos + len) >= i_size)
goto zero_out;
return false;
zero_out:
zero_user_segments(page, 0, offset, offset + len, thp_size(page));
zero_user_segments(&folio->page, 0, offset, offset + len, folio_size(folio));
return true;
}
......@@ -1054,9 +1051,9 @@ static bool netfs_skip_page_read(struct page *page, loff_t pos, size_t len)
* @file: The file to read from
* @mapping: The mapping to read from
* @pos: File position at which the write will begin
* @len: The length of the write (may extend beyond the end of the page chosen)
* @flags: AOP_* flags
* @_page: Where to put the resultant page
* @len: The length of the write (may extend beyond the end of the folio chosen)
* @aop_flags: AOP_* flags
* @_folio: Where to put the resultant folio
* @_fsdata: Place for the netfs to store a cookie
* @ops: The network filesystem's operations for the helper to use
* @netfs_priv: Private netfs data to be retained in the request
......@@ -1072,37 +1069,41 @@ static bool netfs_skip_page_read(struct page *page, loff_t pos, size_t len)
* issue_op, is mandatory.
*
* The check_write_begin() operation can be provided to check for and flush
* conflicting writes once the page is grabbed and locked. It is passed a
* conflicting writes once the folio is grabbed and locked. It is passed a
* pointer to the fsdata cookie that gets returned to the VM to be passed to
* write_end. It is permitted to sleep. It should return 0 if the request
* should go ahead; unlock the page and return -EAGAIN to cause the page to be
* regot; or return an error.
* should go ahead; unlock the folio and return -EAGAIN to cause the folio to
* be regot; or return an error.
*
* This is usable whether or not caching is enabled.
*/
int netfs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned int len, unsigned int flags,
struct page **_page, void **_fsdata,
loff_t pos, unsigned int len, unsigned int aop_flags,
struct folio **_folio, void **_fsdata,
const struct netfs_read_request_ops *ops,
void *netfs_priv)
{
struct netfs_read_request *rreq;
struct page *page, *xpage;
struct folio *folio;
struct inode *inode = file_inode(file);
unsigned int debug_index = 0;
unsigned int debug_index = 0, fgp_flags;
pgoff_t index = pos >> PAGE_SHIFT;
int ret;
DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
retry:
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
if (aop_flags & AOP_FLAG_NOFS)
fgp_flags |= FGP_NOFS;
folio = __filemap_get_folio(mapping, index, fgp_flags,
mapping_gfp_mask(mapping));
if (!folio)
return -ENOMEM;
if (ops->check_write_begin) {
/* Allow the netfs (eg. ceph) to flush conflicts. */
ret = ops->check_write_begin(file, pos, len, page, _fsdata);
ret = ops->check_write_begin(file, pos, len, folio, _fsdata);
if (ret < 0) {
trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
if (ret == -EAGAIN)
......@@ -1111,28 +1112,28 @@ int netfs_write_begin(struct file *file, struct address_space *mapping,
}
}
if (PageUptodate(page))
goto have_page;
if (folio_test_uptodate(folio))
goto have_folio;
/* If the page is beyond the EOF, we want to clear it - unless it's
* within the cache granule containing the EOF, in which case we need
* to preload the granule.
*/
if (!ops->is_cache_enabled(inode) &&
netfs_skip_page_read(page, pos, len)) {
netfs_skip_folio_read(folio, pos, len)) {
netfs_stat(&netfs_n_rh_write_zskip);
goto have_page_no_wait;
goto have_folio_no_wait;
}
ret = -ENOMEM;
rreq = netfs_alloc_read_request(ops, netfs_priv, file);
if (!rreq)
goto error;
rreq->mapping = page->mapping;
rreq->start = page_offset(page);
rreq->len = thp_size(page);
rreq->no_unlock_page = page->index;
__set_bit(NETFS_RREQ_NO_UNLOCK_PAGE, &rreq->flags);
rreq->mapping = folio_file_mapping(folio);
rreq->start = folio_file_pos(folio);
rreq->len = folio_size(folio);
rreq->no_unlock_folio = folio_index(folio);
__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
netfs_priv = NULL;
if (ops->begin_cache_operation) {
......@@ -1147,14 +1148,14 @@ int netfs_write_begin(struct file *file, struct address_space *mapping,
/* Expand the request to meet caching requirements and download
* preferences.
*/
ractl._nr_pages = thp_nr_pages(page);
ractl._nr_pages = folio_nr_pages(folio);
netfs_rreq_expand(rreq, &ractl);
netfs_get_read_request(rreq);
/* We hold the page locks, so we can drop the references */
while ((xpage = readahead_page(&ractl)))
if (xpage != page)
put_page(xpage);
/* We hold the folio locks, so we can drop the references */
folio_get(folio);
while (readahead_folio(&ractl))
;
atomic_set(&rreq->nr_rd_ops, 1);
do {
......@@ -1184,22 +1185,22 @@ int netfs_write_begin(struct file *file, struct address_space *mapping,
if (ret < 0)
goto error;
have_page:
ret = wait_on_page_fscache_killable(page);
have_folio:
ret = folio_wait_fscache_killable(folio);
if (ret < 0)
goto error;
have_page_no_wait:
have_folio_no_wait:
if (netfs_priv)
ops->cleanup(netfs_priv, mapping);
*_page = page;
*_folio = folio;
_leave(" = 0");
return 0;
error_put:
netfs_put_read_request(rreq, false);
error:
unlock_page(page);
put_page(page);
folio_unlock(folio);
folio_put(folio);
if (netfs_priv)
ops->cleanup(netfs_priv, mapping);
_leave(" = %d", ret);
......
......@@ -166,13 +166,13 @@ struct netfs_read_request {
short error; /* 0 or error that occurred */
loff_t i_size; /* Size of the file */
loff_t start; /* Start position */
pgoff_t no_unlock_page; /* Don't unlock this page after read */
pgoff_t no_unlock_folio; /* Don't unlock this folio after read */
refcount_t usage;
unsigned long flags;
#define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */
#define NETFS_RREQ_WRITE_TO_CACHE 1 /* Need to write to the cache */
#define NETFS_RREQ_NO_UNLOCK_PAGE 2 /* Don't unlock no_unlock_page on completion */
#define NETFS_RREQ_DONT_UNLOCK_PAGES 3 /* Don't unlock the pages on completion */
#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */
#define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */
#define NETFS_RREQ_FAILED 4 /* The request failed */
#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */
const struct netfs_read_request_ops *netfs_ops;
......@@ -190,7 +190,7 @@ struct netfs_read_request_ops {
void (*issue_op)(struct netfs_read_subrequest *subreq);
bool (*is_still_valid)(struct netfs_read_request *rreq);
int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
struct page *page, void **_fsdata);
struct folio *folio, void **_fsdata);
void (*done)(struct netfs_read_request *rreq);
void (*cleanup)(struct address_space *mapping, void *netfs_priv);
};
......@@ -240,11 +240,11 @@ extern void netfs_readahead(struct readahead_control *,
const struct netfs_read_request_ops *,
void *);
extern int netfs_readpage(struct file *,
struct page *,
struct folio *,
const struct netfs_read_request_ops *,
void *);
extern int netfs_write_begin(struct file *, struct address_space *,
loff_t, unsigned int, unsigned int, struct page **,
loff_t, unsigned int, unsigned int, struct folio **,
void **,
const struct netfs_read_request_ops *,
void *);
......
......@@ -253,6 +253,20 @@ static inline struct address_space *page_mapping_file(struct page *page)
return folio_mapping(folio);
}
/**
* folio_inode - Get the host inode for this folio.
* @folio: The folio.
*
* For folios which are in the page cache, return the inode that this folio
* belongs to.
*
* Do not call this for folios which aren't in the page cache.
*/
static inline struct inode *folio_inode(struct folio *folio)
{
return folio->mapping->host;
}
static inline bool page_cache_add_speculative(struct page *page, int count)
{
VM_BUG_ON_PAGE(PageTail(page), page);
......@@ -279,6 +293,25 @@ static inline void folio_attach_private(struct folio *folio, void *data)
folio_set_private(folio);
}
/**
* folio_change_private - Change private data on a folio.
* @folio: Folio to change the data on.
* @data: Data to set on the folio.
*
* Change the private data attached to a folio and return the old
* data. The page must previously have had data attached and the data
* must be detached before the folio will be freed.
*
* Return: Data that was previously attached to the folio.
*/
static inline void *folio_change_private(struct folio *folio, void *data)
{
void *old = folio_get_private(folio);
folio->private = data;
return old;
}
/**
* folio_detach_private - Detach private data from a folio.
* @folio: Folio to detach data from.
......
......@@ -1016,31 +1016,32 @@ TRACE_EVENT(afs_dir_check_failed,
__entry->vnode, __entry->off, __entry->i_size)
);
TRACE_EVENT(afs_page_dirty,
TP_PROTO(struct afs_vnode *vnode, const char *where, struct page *page),
TRACE_EVENT(afs_folio_dirty,
TP_PROTO(struct afs_vnode *vnode, const char *where, struct folio *folio),
TP_ARGS(vnode, where, page),
TP_ARGS(vnode, where, folio),
TP_STRUCT__entry(
__field(struct afs_vnode *, vnode )
__field(const char *, where )
__field(pgoff_t, page )
__field(pgoff_t, index )
__field(unsigned long, from )
__field(unsigned long, to )
),
TP_fast_assign(
unsigned long priv = (unsigned long)folio_get_private(folio);
__entry->vnode = vnode;
__entry->where = where;
__entry->page = page->index;
__entry->from = afs_page_dirty_from(page, page->private);
__entry->to = afs_page_dirty_to(page, page->private);
__entry->to |= (afs_is_page_dirty_mmapped(page->private) ?
(1UL << (BITS_PER_LONG - 1)) : 0);
__entry->index = folio_index(folio);
__entry->from = afs_folio_dirty_from(folio, priv);
__entry->to = afs_folio_dirty_to(folio, priv);
__entry->to |= (afs_is_folio_dirty_mmapped(priv) ?
(1UL << (BITS_PER_LONG - 1)) : 0);
),
TP_printk("vn=%p %lx %s %lx-%lx%s",
__entry->vnode, __entry->page, __entry->where,
__entry->vnode, __entry->index, __entry->where,
__entry->from,
__entry->to & ~(1UL << (BITS_PER_LONG - 1)),
__entry->to & (1UL << (BITS_PER_LONG - 1)) ? " M" : "")
......
......@@ -2967,7 +2967,7 @@ EXPORT_SYMBOL_GPL(folio_wait_writeback_killable);
*/
void folio_wait_stable(struct folio *folio)
{
if (folio->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES)
if (folio_inode(folio)->i_sb->s_iflags & SB_I_STABLE_WRITES)
folio_wait_writeback(folio);
}
EXPORT_SYMBOL_GPL(folio_wait_stable);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册