From 4343d00872e1de9a470d951bf09bdd18bc73f555 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:52 +0000 Subject: [PATCH] afs: Get rid of the afs_writeback record Get rid of the afs_writeback record that kAFS is using to match keys with writes made by that key. Instead, keep a list of keys that have a file open for writing and/or sync'ing and iterate through those. Signed-off-by: David Howells --- fs/afs/file.c | 83 ++++-- fs/afs/fsclient.c | 24 +- fs/afs/inode.c | 11 +- fs/afs/internal.h | 51 ++-- fs/afs/super.c | 4 +- fs/afs/write.c | 633 +++++++++++++++++++++++----------------------- mm/filemap.c | 1 + 7 files changed, 412 insertions(+), 395 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index e33b34f01795..c3a7bc1281f5 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -23,7 +23,6 @@ static int afs_readpage(struct file *file, struct page *page); static void afs_invalidatepage(struct page *page, unsigned int offset, unsigned int length); static int afs_releasepage(struct page *page, gfp_t gfp_flags); -static int afs_launder_page(struct page *page); static int afs_readpages(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages); @@ -62,6 +61,50 @@ const struct address_space_operations afs_fs_aops = { .writepages = afs_writepages, }; +/* + * Discard a pin on a writeback key. + */ +void afs_put_wb_key(struct afs_wb_key *wbk) +{ + if (refcount_dec_and_test(&wbk->usage)) { + key_put(wbk->key); + kfree(wbk); + } +} + +/* + * Cache key for writeback. + */ +int afs_cache_wb_key(struct afs_vnode *vnode, struct afs_file *af) +{ + struct afs_wb_key *wbk, *p; + + wbk = kzalloc(sizeof(struct afs_wb_key), GFP_KERNEL); + if (!wbk) + return -ENOMEM; + refcount_set(&wbk->usage, 2); + wbk->key = af->key; + + spin_lock(&vnode->wb_lock); + list_for_each_entry(p, &vnode->wb_keys, vnode_link) { + if (p->key == wbk->key) + goto found; + } + + key_get(wbk->key); + list_add_tail(&wbk->vnode_link, &vnode->wb_keys); + spin_unlock(&vnode->wb_lock); + af->wb = wbk; + return 0; + +found: + refcount_inc(&p->usage); + spin_unlock(&vnode->wb_lock); + af->wb = p; + kfree(wbk); + return 0; +} + /* * open an AFS file or directory and attach a key to it */ @@ -85,12 +128,18 @@ int afs_open(struct inode *inode, struct file *file) ret = -ENOMEM; goto error_key; } + af->key = key; ret = afs_validate(vnode, key); if (ret < 0) goto error_af; - af->key = key; + if (file->f_mode & FMODE_WRITE) { + ret = afs_cache_wb_key(vnode, af); + if (ret < 0) + goto error_af; + } + file->private_data = af; _leave(" = 0"); return 0; @@ -115,8 +164,11 @@ int afs_release(struct inode *inode, struct file *file) _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); file->private_data = NULL; + if (af->wb) + afs_put_wb_key(af->wb); key_put(af->key); kfree(af); + afs_prune_wb_keys(vnode); _leave(" = 0"); return 0; } @@ -516,16 +568,6 @@ static int afs_readpages(struct file *file, struct address_space *mapping, return ret; } -/* - * write back a dirty page - */ -static int afs_launder_page(struct page *page) -{ - _enter("{%lu}", page->index); - - return 0; -} - /* * invalidate part or all of a page * - release a page and clean up its private data if offset is 0 (indicating @@ -534,8 +576,6 @@ static int afs_launder_page(struct page *page) static void afs_invalidatepage(struct page *page, unsigned int offset, unsigned int length) { - struct afs_writeback *wb = (struct afs_writeback *) page_private(page); - _enter("{%lu},%u,%u", page->index, offset, length); BUG_ON(!PageLocked(page)); @@ -551,13 +591,8 @@ static void afs_invalidatepage(struct page *page, unsigned int offset, #endif if (PagePrivate(page)) { - if (wb && !PageWriteback(page)) { - set_page_private(page, 0); - afs_put_writeback(wb); - } - - if (!page_private(page)) - ClearPagePrivate(page); + set_page_private(page, 0); + ClearPagePrivate(page); } } @@ -570,7 +605,6 @@ static void afs_invalidatepage(struct page *page, unsigned int offset, */ static int afs_releasepage(struct page *page, gfp_t gfp_flags) { - struct afs_writeback *wb = (struct afs_writeback *) page_private(page); struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); _enter("{{%x:%u}[%lu],%lx},%x", @@ -587,10 +621,7 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags) #endif if (PagePrivate(page)) { - if (wb) { - set_page_private(page, 0); - afs_put_writeback(wb); - } + set_page_private(page, 0); ClearPagePrivate(page); } diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 10e5ead629c2..b90ef39ae914 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -1119,18 +1119,18 @@ static const struct afs_call_type afs_RXFSStoreData64 = { * store a set of pages to a very large file */ static int afs_fs_store_data64(struct afs_fs_cursor *fc, - struct afs_writeback *wb, + struct address_space *mapping, pgoff_t first, pgoff_t last, unsigned offset, unsigned to, loff_t size, loff_t pos, loff_t i_size) { - struct afs_vnode *vnode = wb->vnode; + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%x:%u},,", - key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); call = afs_alloc_flat_call(net, &afs_RXFSStoreData64, (4 + 6 + 3 * 2) * 4, @@ -1138,10 +1138,9 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->wb = wb; - call->key = wb->key; + call->key = fc->key; + call->mapping = mapping; call->reply[0] = vnode; - call->mapping = vnode->vfs_inode.i_mapping; call->first = first; call->last = last; call->first_offset = offset; @@ -1177,18 +1176,18 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, /* * store a set of pages */ -int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb, +int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, pgoff_t first, pgoff_t last, unsigned offset, unsigned to) { - struct afs_vnode *vnode = wb->vnode; + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); loff_t size, pos, i_size; __be32 *bp; _enter(",%x,{%x:%u},,", - key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); size = (loff_t)to - (loff_t)offset; if (first != last) @@ -1205,7 +1204,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb, (unsigned long long) i_size); if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32) - return afs_fs_store_data64(fc, wb, first, last, offset, to, + return afs_fs_store_data64(fc, mapping, first, last, offset, to, size, pos, i_size); call = afs_alloc_flat_call(net, &afs_RXFSStoreData, @@ -1214,10 +1213,9 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb, if (!call) return -ENOMEM; - call->wb = wb; - call->key = wb->key; + call->key = fc->key; + call->mapping = mapping; call->reply[0] = vnode; - call->mapping = vnode->vfs_inode.i_mapping; call->first = first; call->last = last; call->first_offset = offset; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index da2ba7a68cac..3415eb7484f6 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -482,7 +482,12 @@ void afs_evict_inode(struct inode *inode) vnode->cb_interest = NULL; } - ASSERT(list_empty(&vnode->writebacks)); + while (!list_empty(&vnode->wb_keys)) { + struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next, + struct afs_wb_key, vnode_link); + list_del(&wbk->vnode_link); + afs_put_wb_key(wbk); + } #ifdef CONFIG_AFS_FSCACHE fscache_relinquish_cookie(vnode->cache, 0); @@ -514,10 +519,8 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) } /* flush any dirty data outstanding on a regular file */ - if (S_ISREG(vnode->vfs_inode.i_mode)) { + if (S_ISREG(vnode->vfs_inode.i_mode)) filemap_write_and_wait(vnode->vfs_inode.i_mapping); - afs_writeback_all(vnode); - } if (attr->ia_valid & ATTR_FILE) { key = afs_file_key(attr->ia_file); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index facf5b9844d2..688562ae3bf8 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -89,8 +89,7 @@ struct afs_call { struct afs_server *cm_server; /* Server affected by incoming CM call */ struct afs_cb_interest *cbi; /* Callback interest for server used */ void *request; /* request data (first part) */ - struct address_space *mapping; /* page set */ - struct afs_writeback *wb; /* writeback being performed */ + struct address_space *mapping; /* Pages being written from */ void *buffer; /* reply receive buffer */ void *reply[4]; /* Where to put the reply */ pgoff_t first; /* first page in mapping to deal with */ @@ -138,11 +137,21 @@ struct afs_call_type { void (*work)(struct work_struct *work); }; +/* + * Key available for writeback on a file. + */ +struct afs_wb_key { + refcount_t usage; + struct key *key; + struct list_head vnode_link; /* Link in vnode->wb_keys */ +}; + /* * AFS open file information record. Pointed to by file->private_data. */ struct afs_file { struct key *key; /* The key this file was opened with */ + struct afs_wb_key *wb; /* Writeback key record for this file */ }; static inline struct key *afs_file_key(struct file *file) @@ -167,32 +176,6 @@ struct afs_read { struct page *pages[]; }; -/* - * record of an outstanding writeback on a vnode - */ -struct afs_writeback { - struct list_head link; /* link in vnode->writebacks */ - struct work_struct writer; /* work item to perform the writeback */ - struct afs_vnode *vnode; /* vnode to which this write applies */ - struct key *key; /* owner of this write */ - wait_queue_head_t waitq; /* completion and ready wait queue */ - pgoff_t first; /* first page in batch */ - pgoff_t point; /* last page in current store op */ - pgoff_t last; /* last page in batch (inclusive) */ - unsigned offset_first; /* offset into first page of start of write */ - unsigned to_last; /* offset into last page of end of write */ - int num_conflicts; /* count of conflicting writes in list */ - int usage; - bool conflicts; /* T if has dependent conflicts */ - enum { - AFS_WBACK_SYNCING, /* synchronisation being performed */ - AFS_WBACK_PENDING, /* write pending */ - AFS_WBACK_CONFLICTING, /* conflicting writes posted */ - AFS_WBACK_WRITING, /* writing back */ - AFS_WBACK_COMPLETE /* the writeback record has been unlinked */ - } state __attribute__((packed)); -}; - /* * AFS superblock private data * - there's one superblock per volume @@ -460,7 +443,7 @@ struct afs_vnode { struct afs_permits *permit_cache; /* cache of permits so far obtained */ struct mutex io_lock; /* Lock for serialising I/O on this mutex */ struct mutex validate_lock; /* lock for validating this vnode */ - spinlock_t writeback_lock; /* lock for writebacks */ + spinlock_t wb_lock; /* lock for wb_keys */ spinlock_t lock; /* waitqueue/flags lock */ unsigned long flags; #define AFS_VNODE_CB_PROMISED 0 /* Set if vnode has a callback promise */ @@ -476,7 +459,7 @@ struct afs_vnode { #define AFS_VNODE_AUTOCELL 10 /* set if Vnode is an auto mount point */ #define AFS_VNODE_PSEUDODIR 11 /* set if Vnode is a pseudo directory */ - struct list_head writebacks; /* alterations in pagecache that need writing */ + struct list_head wb_keys; /* List of keys available for writeback */ struct list_head pending_locks; /* locks waiting to be granted */ struct list_head granted_locks; /* locks granted on this file */ struct delayed_work lock_work; /* work to be done in locking */ @@ -648,6 +631,8 @@ extern const struct address_space_operations afs_fs_aops; extern const struct inode_operations afs_file_inode_operations; extern const struct file_operations afs_file_operations; +extern int afs_cache_wb_key(struct afs_vnode *, struct afs_file *); +extern void afs_put_wb_key(struct afs_wb_key *); extern int afs_open(struct inode *, struct file *); extern int afs_release(struct inode *, struct file *); extern int afs_fetch_data(struct afs_vnode *, struct key *, struct afs_read *); @@ -678,7 +663,7 @@ extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, struct afs_fid *, struct afs_file_status *); extern int afs_fs_rename(struct afs_fs_cursor *, const char *, struct afs_vnode *, const char *); -extern int afs_fs_store_data(struct afs_fs_cursor *, struct afs_writeback *, +extern int afs_fs_store_data(struct afs_fs_cursor *, struct address_space *, pgoff_t, pgoff_t, unsigned, unsigned); extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *); extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *); @@ -889,7 +874,6 @@ extern int afs_check_volume_status(struct afs_volume *, struct key *); * write.c */ extern int afs_set_page_dirty(struct page *); -extern void afs_put_writeback(struct afs_writeback *); extern int afs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata); @@ -900,9 +884,10 @@ extern int afs_writepage(struct page *, struct writeback_control *); extern int afs_writepages(struct address_space *, struct writeback_control *); extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); -extern int afs_writeback_all(struct afs_vnode *); extern int afs_flush(struct file *, fl_owner_t); extern int afs_fsync(struct file *, loff_t, loff_t, int); +extern void afs_prune_wb_keys(struct afs_vnode *); +extern int afs_launder_page(struct page *); /* * xattr.c diff --git a/fs/afs/super.c b/fs/afs/super.c index af1e769aaebf..875b5eb02242 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -546,9 +546,9 @@ static void afs_i_init_once(void *_vnode) inode_init_once(&vnode->vfs_inode); mutex_init(&vnode->io_lock); mutex_init(&vnode->validate_lock); - spin_lock_init(&vnode->writeback_lock); + spin_lock_init(&vnode->wb_lock); spin_lock_init(&vnode->lock); - INIT_LIST_HEAD(&vnode->writebacks); + INIT_LIST_HEAD(&vnode->wb_keys); INIT_LIST_HEAD(&vnode->pending_locks); INIT_LIST_HEAD(&vnode->granted_locks); INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work); diff --git a/fs/afs/write.c b/fs/afs/write.c index 1cdd0e3cd531..4c131371005b 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -8,6 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ + #include #include #include @@ -16,8 +17,18 @@ #include #include "internal.h" -static int afs_write_back_from_locked_page(struct afs_writeback *wb, - struct page *page); +/* + * We use page->private to hold the amount of the page that we've written to, + * splitting the field into two parts. However, we need to represent a range + * 0...PAGE_SIZE inclusive, so we can't support 64K pages on a 32-bit system. + */ +#if PAGE_SIZE > 32768 +#define AFS_PRIV_MAX 0xffffffff +#define AFS_PRIV_SHIFT 32 +#else +#define AFS_PRIV_MAX 0xffff +#define AFS_PRIV_SHIFT 16 +#endif /* * mark a page as having been made dirty and thus needing writeback @@ -28,58 +39,6 @@ int afs_set_page_dirty(struct page *page) return __set_page_dirty_nobuffers(page); } -/* - * unlink a writeback record because its usage has reached zero - * - must be called with the wb->vnode->writeback_lock held - */ -static void afs_unlink_writeback(struct afs_writeback *wb) -{ - struct afs_writeback *front; - struct afs_vnode *vnode = wb->vnode; - - list_del_init(&wb->link); - if (!list_empty(&vnode->writebacks)) { - /* if an fsync rises to the front of the queue then wake it - * up */ - front = list_entry(vnode->writebacks.next, - struct afs_writeback, link); - if (front->state == AFS_WBACK_SYNCING) { - _debug("wake up sync"); - front->state = AFS_WBACK_COMPLETE; - wake_up(&front->waitq); - } - } -} - -/* - * free a writeback record - */ -static void afs_free_writeback(struct afs_writeback *wb) -{ - _enter(""); - key_put(wb->key); - kfree(wb); -} - -/* - * dispose of a reference to a writeback record - */ -void afs_put_writeback(struct afs_writeback *wb) -{ - struct afs_vnode *vnode = wb->vnode; - - _enter("{%d}", wb->usage); - - spin_lock(&vnode->writeback_lock); - if (--wb->usage == 0) - afs_unlink_writeback(wb); - else - wb = NULL; - spin_unlock(&vnode->writeback_lock); - if (wb) - afs_free_writeback(wb); -} - /* * partly or wholly fill a page that's under preparation for writing */ @@ -125,42 +84,32 @@ int afs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - struct afs_writeback *candidate, *wb; struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); struct page *page; struct key *key = afs_file_key(file); - unsigned from = pos & (PAGE_SIZE - 1); - unsigned to = from + len; + unsigned long priv; + unsigned f, from = pos & (PAGE_SIZE - 1); + unsigned t, to = from + len; pgoff_t index = pos >> PAGE_SHIFT; int ret; _enter("{%x:%u},{%lx},%u,%u", vnode->fid.vid, vnode->fid.vnode, index, from, to); - candidate = kzalloc(sizeof(*candidate), GFP_KERNEL); - if (!candidate) - return -ENOMEM; - candidate->vnode = vnode; - candidate->first = candidate->last = index; - candidate->offset_first = from; - candidate->to_last = to; - INIT_LIST_HEAD(&candidate->link); - candidate->usage = 1; - candidate->state = AFS_WBACK_PENDING; - init_waitqueue_head(&candidate->waitq); + /* We want to store information about how much of a page is altered in + * page->private. + */ + BUILD_BUG_ON(PAGE_SIZE > 32768 && sizeof(page->private) < 8); page = grab_cache_page_write_begin(mapping, index, flags); - if (!page) { - kfree(candidate); + if (!page) return -ENOMEM; - } if (!PageUptodate(page) && len != PAGE_SIZE) { ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page); if (ret < 0) { unlock_page(page); put_page(page); - kfree(candidate); _leave(" = %d [prep]", ret); return ret; } @@ -171,79 +120,52 @@ int afs_write_begin(struct file *file, struct address_space *mapping, *pagep = page; try_again: - spin_lock(&vnode->writeback_lock); - - /* see if this page is already pending a writeback under a suitable key - * - if so we can just join onto that one */ - wb = (struct afs_writeback *) page_private(page); - if (wb) { - if (wb->key == key && wb->state == AFS_WBACK_PENDING) - goto subsume_in_current_wb; - goto flush_conflicting_wb; + /* See if this page is already partially written in a way that we can + * merge the new write with. + */ + t = f = 0; + if (PagePrivate(page)) { + priv = page_private(page); + f = priv & AFS_PRIV_MAX; + t = priv >> AFS_PRIV_SHIFT; + ASSERTCMP(f, <=, t); } - if (index > 0) { - /* see if we can find an already pending writeback that we can - * append this page to */ - list_for_each_entry(wb, &vnode->writebacks, link) { - if (wb->last == index - 1 && wb->key == key && - wb->state == AFS_WBACK_PENDING) - goto append_to_previous_wb; - } + if (f != t) { + if (to < f || from > t) + goto flush_conflicting_write; + if (from < f) + f = from; + if (to > t) + t = to; + } else { + f = from; + t = to; } - list_add_tail(&candidate->link, &vnode->writebacks); - candidate->key = key_get(key); - spin_unlock(&vnode->writeback_lock); + priv = (unsigned long)t << AFS_PRIV_SHIFT; + priv |= f; SetPagePrivate(page); - set_page_private(page, (unsigned long) candidate); - _leave(" = 0 [new]"); - return 0; - -subsume_in_current_wb: - _debug("subsume"); - ASSERTRANGE(wb->first, <=, index, <=, wb->last); - if (index == wb->first && from < wb->offset_first) - wb->offset_first = from; - if (index == wb->last && to > wb->to_last) - wb->to_last = to; - spin_unlock(&vnode->writeback_lock); - kfree(candidate); - _leave(" = 0 [sub]"); - return 0; - -append_to_previous_wb: - _debug("append into %lx-%lx", wb->first, wb->last); - wb->usage++; - wb->last++; - wb->to_last = to; - spin_unlock(&vnode->writeback_lock); - SetPagePrivate(page); - set_page_private(page, (unsigned long) wb); - kfree(candidate); - _leave(" = 0 [app]"); + set_page_private(page, priv); + _leave(" = 0"); return 0; - /* the page is currently bound to another context, so if it's dirty we - * need to flush it before we can use the new context */ -flush_conflicting_wb: + /* The previous write and this write aren't adjacent or overlapping, so + * flush the page out. + */ +flush_conflicting_write: _debug("flush conflict"); - if (wb->state == AFS_WBACK_PENDING) - wb->state = AFS_WBACK_CONFLICTING; - spin_unlock(&vnode->writeback_lock); - if (clear_page_dirty_for_io(page)) { - ret = afs_write_back_from_locked_page(wb, page); - if (ret < 0) { - afs_put_writeback(candidate); - _leave(" = %d", ret); - return ret; - } + ret = write_one_page(page); + if (ret < 0) { + _leave(" = %d", ret); + return ret; } - /* the page holds a ref on the writeback record */ - afs_put_writeback(wb); - set_page_private(page, 0); - ClearPagePrivate(page); + ret = lock_page_killable(page); + if (ret < 0) { + _leave(" = %d", ret); + return ret; + } goto try_again; } @@ -266,11 +188,11 @@ int afs_write_end(struct file *file, struct address_space *mapping, i_size = i_size_read(&vnode->vfs_inode); if (maybe_i_size > i_size) { - spin_lock(&vnode->writeback_lock); + spin_lock(&vnode->wb_lock); i_size = i_size_read(&vnode->vfs_inode); if (maybe_i_size > i_size) i_size_write(&vnode->vfs_inode, maybe_i_size); - spin_unlock(&vnode->writeback_lock); + spin_unlock(&vnode->wb_lock); } if (!PageUptodate(page)) { @@ -299,9 +221,10 @@ int afs_write_end(struct file *file, struct address_space *mapping, /* * kill all the pages in the given range */ -static void afs_kill_pages(struct afs_vnode *vnode, bool error, +static void afs_kill_pages(struct address_space *mapping, pgoff_t first, pgoff_t last) { + struct afs_vnode *vnode = AFS_FS_I(mapping->host); struct pagevec pv; unsigned count, loop; @@ -316,23 +239,62 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error, count = last - first + 1; if (count > PAGEVEC_SIZE) count = PAGEVEC_SIZE; - pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, - first, count, pv.pages); + pv.nr = find_get_pages_contig(mapping, first, count, pv.pages); ASSERTCMP(pv.nr, ==, count); for (loop = 0; loop < count; loop++) { struct page *page = pv.pages[loop]; ClearPageUptodate(page); - if (error) - SetPageError(page); - if (PageWriteback(page)) - end_page_writeback(page); + SetPageError(page); + end_page_writeback(page); if (page->index >= first) first = page->index + 1; + lock_page(page); + generic_error_remove_page(mapping, page); } __pagevec_release(&pv); - } while (first < last); + } while (first <= last); + + _leave(""); +} + +/* + * Redirty all the pages in a given range. + */ +static void afs_redirty_pages(struct writeback_control *wbc, + struct address_space *mapping, + pgoff_t first, pgoff_t last) +{ + struct afs_vnode *vnode = AFS_FS_I(mapping->host); + struct pagevec pv; + unsigned count, loop; + + _enter("{%x:%u},%lx-%lx", + vnode->fid.vid, vnode->fid.vnode, first, last); + + pagevec_init(&pv, 0); + + do { + _debug("redirty %lx-%lx", first, last); + + count = last - first + 1; + if (count > PAGEVEC_SIZE) + count = PAGEVEC_SIZE; + pv.nr = find_get_pages_contig(mapping, first, count, pv.pages); + ASSERTCMP(pv.nr, ==, count); + + for (loop = 0; loop < count; loop++) { + struct page *page = pv.pages[loop]; + + redirty_page_for_writepage(wbc, page); + end_page_writeback(page); + if (page->index >= first) + first = page->index + 1; + } + + __pagevec_release(&pv); + } while (first <= last); _leave(""); } @@ -340,26 +302,55 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error, /* * write to a file */ -static int afs_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last, +static int afs_store_data(struct address_space *mapping, + pgoff_t first, pgoff_t last, unsigned offset, unsigned to) { + struct afs_vnode *vnode = AFS_FS_I(mapping->host); struct afs_fs_cursor fc; - struct afs_vnode *vnode = wb->vnode; - int ret; + struct afs_wb_key *wbk = NULL; + struct list_head *p; + int ret = -ENOKEY, ret2; - _enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x", + _enter("%s{%x:%u.%u},%lx,%lx,%x,%x", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, - key_serial(wb->key), first, last, offset, to); + spin_lock(&vnode->wb_lock); + p = vnode->wb_keys.next; + + /* Iterate through the list looking for a valid key to use. */ +try_next_key: + while (p != &vnode->wb_keys) { + wbk = list_entry(p, struct afs_wb_key, vnode_link); + _debug("wbk %u", key_serial(wbk->key)); + ret2 = key_validate(wbk->key); + if (ret2 == 0) + goto found_key; + if (ret == -ENOKEY) + ret = ret2; + p = p->next; + } + + spin_unlock(&vnode->wb_lock); + afs_put_wb_key(wbk); + _leave(" = %d [no keys]", ret); + return ret; + +found_key: + refcount_inc(&wbk->usage); + spin_unlock(&vnode->wb_lock); + + _debug("USE WB KEY %u", key_serial(wbk->key)); + ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, wb->key)) { + if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = vnode->cb_break + vnode->cb_s_break; - afs_fs_store_data(&fc, wb, first, last, offset, to); + afs_fs_store_data(&fc, mapping, first, last, offset, to); } afs_check_for_remote_deletion(&fc, fc.vnode); @@ -367,20 +358,37 @@ static int afs_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last, ret = afs_end_vnode_operation(&fc); } + switch (ret) { + case -EACCES: + case -EPERM: + case -ENOKEY: + case -EKEYEXPIRED: + case -EKEYREJECTED: + case -EKEYREVOKED: + _debug("next"); + spin_lock(&vnode->wb_lock); + p = wbk->vnode_link.next; + afs_put_wb_key(wbk); + goto try_next_key; + } + + afs_put_wb_key(wbk); _leave(" = %d", ret); return ret; } /* - * synchronously write back the locked page and any subsequent non-locked dirty - * pages also covered by the same writeback record + * Synchronously write back the locked page and any subsequent non-locked dirty + * pages. */ -static int afs_write_back_from_locked_page(struct afs_writeback *wb, - struct page *primary_page) +static int afs_write_back_from_locked_page(struct address_space *mapping, + struct writeback_control *wbc, + struct page *primary_page, + pgoff_t final_page) { struct page *pages[8], *page; - unsigned long count; - unsigned n, offset, to; + unsigned long count, priv; + unsigned n, offset, to, f, t; pgoff_t start, first, last; int loop, ret; @@ -390,20 +398,28 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb, if (test_set_page_writeback(primary_page)) BUG(); - /* find all consecutive lockable dirty pages, stopping when we find a - * page that is not immediately lockable, is not dirty or is missing, - * or we reach the end of the range */ + /* Find all consecutive lockable dirty pages that have contiguous + * written regions, stopping when we find a page that is not + * immediately lockable, is not dirty or is missing, or we reach the + * end of the range. + */ start = primary_page->index; - if (start >= wb->last) + priv = page_private(primary_page); + offset = priv & AFS_PRIV_MAX; + to = priv >> AFS_PRIV_SHIFT; + + WARN_ON(offset == to); + + if (start >= final_page || to < PAGE_SIZE) goto no_more; + start++; do { _debug("more %lx [%lx]", start, count); - n = wb->last - start + 1; + n = final_page - start + 1; if (n > ARRAY_SIZE(pages)) n = ARRAY_SIZE(pages); - n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping, - start, n, pages); + n = find_get_pages_contig(mapping, start, ARRAY_SIZE(pages), pages); _debug("fgpc %u", n); if (n == 0) goto no_more; @@ -415,16 +431,27 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb, } for (loop = 0; loop < n; loop++) { + if (to != PAGE_SIZE) + break; page = pages[loop]; - if (page->index > wb->last) + if (page->index > final_page) break; if (!trylock_page(page)) break; - if (!PageDirty(page) || - page_private(page) != (unsigned long) wb) { + if (!PageDirty(page) || PageWriteback(page)) { + unlock_page(page); + break; + } + + priv = page_private(page); + f = priv & AFS_PRIV_MAX; + t = priv >> AFS_PRIV_SHIFT; + if (f != 0) { unlock_page(page); break; } + to = t; + if (!clear_page_dirty_for_io(page)) BUG(); if (test_set_page_writeback(page)) @@ -440,50 +467,55 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb, } start += loop; - } while (start <= wb->last && count < 65536); + } while (start <= final_page && count < 65536); no_more: - /* we now have a contiguous set of dirty pages, each with writeback set - * and the dirty mark cleared; the first page is locked and must remain - * so, all the rest are unlocked */ + /* We now have a contiguous set of dirty pages, each with writeback + * set; the first page is still locked at this point, but all the rest + * have been unlocked. + */ + unlock_page(primary_page); + first = primary_page->index; last = first + count - 1; - offset = (first == wb->first) ? wb->offset_first : 0; - to = (last == wb->last) ? wb->to_last : PAGE_SIZE; - _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to); - ret = afs_store_data(wb, first, last, offset, to); - if (ret < 0) { - switch (ret) { - case -EDQUOT: - case -ENOSPC: - mapping_set_error(wb->vnode->vfs_inode.i_mapping, -ENOSPC); - break; - case -EROFS: - case -EIO: - case -EREMOTEIO: - case -EFBIG: - case -ENOENT: - case -ENOMEDIUM: - case -ENXIO: - afs_kill_pages(wb->vnode, true, first, last); - mapping_set_error(wb->vnode->vfs_inode.i_mapping, -EIO); - break; - case -EACCES: - case -EPERM: - case -ENOKEY: - case -EKEYEXPIRED: - case -EKEYREJECTED: - case -EKEYREVOKED: - afs_kill_pages(wb->vnode, false, first, last); - break; - default: - break; - } - } else { + ret = afs_store_data(mapping, first, last, offset, to); + switch (ret) { + case 0: ret = count; + break; + + default: + pr_notice("kAFS: Unexpected error from FS.StoreData %d\n", ret); + /* Fall through */ + case -EACCES: + case -EPERM: + case -ENOKEY: + case -EKEYEXPIRED: + case -EKEYREJECTED: + case -EKEYREVOKED: + afs_redirty_pages(wbc, mapping, first, last); + mapping_set_error(mapping, ret); + break; + + case -EDQUOT: + case -ENOSPC: + afs_redirty_pages(wbc, mapping, first, last); + mapping_set_error(mapping, -ENOSPC); + break; + + case -EROFS: + case -EIO: + case -EREMOTEIO: + case -EFBIG: + case -ENOENT: + case -ENOMEDIUM: + case -ENXIO: + afs_kill_pages(mapping, first, last); + mapping_set_error(mapping, ret); + break; } _leave(" = %d", ret); @@ -496,16 +528,12 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb, */ int afs_writepage(struct page *page, struct writeback_control *wbc) { - struct afs_writeback *wb; int ret; _enter("{%lx},", page->index); - wb = (struct afs_writeback *) page_private(page); - ASSERT(wb != NULL); - - ret = afs_write_back_from_locked_page(wb, page); - unlock_page(page); + ret = afs_write_back_from_locked_page(page->mapping, wbc, page, + wbc->range_end >> PAGE_SHIFT); if (ret < 0) { _leave(" = %d", ret); return 0; @@ -524,7 +552,6 @@ static int afs_writepages_region(struct address_space *mapping, struct writeback_control *wbc, pgoff_t index, pgoff_t end, pgoff_t *_next) { - struct afs_writeback *wb; struct page *page; int ret, n; @@ -550,7 +577,12 @@ static int afs_writepages_region(struct address_space *mapping, * (changing page->mapping to NULL), or even swizzled back from * swapper_space to tmpfs file mapping */ - lock_page(page); + ret = lock_page_killable(page); + if (ret < 0) { + put_page(page); + _leave(" = %d", ret); + return ret; + } if (page->mapping != mapping || !PageDirty(page)) { unlock_page(page); @@ -566,17 +598,9 @@ static int afs_writepages_region(struct address_space *mapping, continue; } - wb = (struct afs_writeback *) page_private(page); - ASSERT(wb != NULL); - - spin_lock(&wb->vnode->writeback_lock); - wb->state = AFS_WBACK_WRITING; - spin_unlock(&wb->vnode->writeback_lock); - if (!clear_page_dirty_for_io(page)) BUG(); - ret = afs_write_back_from_locked_page(wb, page); - unlock_page(page); + ret = afs_write_back_from_locked_page(mapping, wbc, page, end); put_page(page); if (ret < 0) { _leave(" = %d", ret); @@ -632,17 +656,13 @@ int afs_writepages(struct address_space *mapping, */ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) { - struct afs_writeback *wb = call->wb; struct pagevec pv; unsigned count, loop; pgoff_t first = call->first, last = call->last; - bool free_wb; _enter("{%x:%u},{%lx-%lx}", vnode->fid.vid, vnode->fid.vnode, first, last); - ASSERT(wb != NULL); - pagevec_init(&pv, 0); do { @@ -651,35 +671,19 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) count = last - first + 1; if (count > PAGEVEC_SIZE) count = PAGEVEC_SIZE; - pv.nr = find_get_pages_contig(call->mapping, first, count, - pv.pages); + pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, + first, count, pv.pages); ASSERTCMP(pv.nr, ==, count); - spin_lock(&vnode->writeback_lock); for (loop = 0; loop < count; loop++) { - struct page *page = pv.pages[loop]; - end_page_writeback(page); - if (page_private(page) == (unsigned long) wb) { - set_page_private(page, 0); - ClearPagePrivate(page); - wb->usage--; - } + set_page_private(pv.pages[loop], 0); + end_page_writeback(pv.pages[loop]); } - free_wb = false; - if (wb->usage == 0) { - afs_unlink_writeback(wb); - free_wb = true; - } - spin_unlock(&vnode->writeback_lock); first += count; - if (free_wb) { - afs_free_writeback(wb); - wb = NULL; - } - __pagevec_release(&pv); } while (first <= last); + afs_prune_wb_keys(vnode); _leave(""); } @@ -710,28 +714,6 @@ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) return result; } -/* - * flush the vnode to the fileserver - */ -int afs_writeback_all(struct afs_vnode *vnode) -{ - struct address_space *mapping = vnode->vfs_inode.i_mapping; - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .range_cyclic = 1, - }; - int ret; - - _enter(""); - - ret = mapping->a_ops->writepages(mapping, &wbc); - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - - _leave(" = %d", ret); - return ret; -} - /* * flush any dirty pages for this process, and check for write errors. * - the return status from this call provides a reliable indication of @@ -740,61 +722,13 @@ int afs_writeback_all(struct afs_vnode *vnode) int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file_inode(file); - struct afs_writeback *wb, *xwb; struct afs_vnode *vnode = AFS_FS_I(inode); - int ret; _enter("{%x:%u},{n=%pD},%d", vnode->fid.vid, vnode->fid.vnode, file, datasync); - ret = file_write_and_wait_range(file, start, end); - if (ret) - return ret; - inode_lock(inode); - - /* use a writeback record as a marker in the queue - when this reaches - * the front of the queue, all the outstanding writes are either - * completed or rejected */ - wb = kzalloc(sizeof(*wb), GFP_KERNEL); - if (!wb) { - ret = -ENOMEM; - goto out; - } - wb->vnode = vnode; - wb->first = 0; - wb->last = -1; - wb->offset_first = 0; - wb->to_last = PAGE_SIZE; - wb->usage = 1; - wb->state = AFS_WBACK_SYNCING; - init_waitqueue_head(&wb->waitq); - - spin_lock(&vnode->writeback_lock); - list_for_each_entry(xwb, &vnode->writebacks, link) { - if (xwb->state == AFS_WBACK_PENDING) - xwb->state = AFS_WBACK_CONFLICTING; - } - list_add_tail(&wb->link, &vnode->writebacks); - spin_unlock(&vnode->writeback_lock); - - /* push all the outstanding writebacks to the server */ - ret = afs_writeback_all(vnode); - if (ret < 0) { - afs_put_writeback(wb); - _leave(" = %d [wb]", ret); - goto out; - } - - /* wait for the preceding writes to actually complete */ - ret = wait_event_interruptible(wb->waitq, - wb->state == AFS_WBACK_COMPLETE || - vnode->writebacks.next == &wb->link); - afs_put_writeback(wb); - _leave(" = %d", ret); -out: - inode_unlock(inode); - return ret; + return file_write_and_wait_range(file, start, end); } /* @@ -831,3 +765,68 @@ int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page) _leave(" = 0"); return 0; } + +/* + * Prune the keys cached for writeback. The caller must hold vnode->wb_lock. + */ +void afs_prune_wb_keys(struct afs_vnode *vnode) +{ + LIST_HEAD(graveyard); + struct afs_wb_key *wbk, *tmp; + + /* Discard unused keys */ + spin_lock(&vnode->wb_lock); + + if (!mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_WRITEBACK) && + !mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_DIRTY)) { + list_for_each_entry_safe(wbk, tmp, &vnode->wb_keys, vnode_link) { + if (refcount_read(&wbk->usage) == 1) + list_move(&wbk->vnode_link, &graveyard); + } + } + + spin_unlock(&vnode->wb_lock); + + while (!list_empty(&graveyard)) { + wbk = list_entry(graveyard.next, struct afs_wb_key, vnode_link); + list_del(&wbk->vnode_link); + afs_put_wb_key(wbk); + } +} + +/* + * Clean up a page during invalidation. + */ +int afs_launder_page(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct afs_vnode *vnode = AFS_FS_I(mapping->host); + unsigned long priv; + unsigned int f, t; + int ret = 0; + + _enter("{%lx}", page->index); + + priv = page_private(page); + if (clear_page_dirty_for_io(page)) { + f = 0; + t = PAGE_SIZE; + if (PagePrivate(page)) { + f = priv & AFS_PRIV_MAX; + t = priv >> AFS_PRIV_SHIFT; + } + + ret = afs_store_data(mapping, page->index, page->index, t, f); + } + + set_page_private(page, 0); + ClearPagePrivate(page); + +#ifdef CONFIG_AFS_FSCACHE + if (PageFsCache(page)) { + fscache_wait_on_page_write(vnode->cache, page); + fscache_uncache_page(vnode->cache, page); + } +#endif + return ret; +} diff --git a/mm/filemap.c b/mm/filemap.c index 594d73fef8b4..5bcc87adbeeb 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1041,6 +1041,7 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr) wait_queue_head_t *q = page_waitqueue(page); return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false); } +EXPORT_SYMBOL(wait_on_page_bit_killable); /** * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue -- GitLab