diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index a91f30890011ddf69ba9b859484966c943db7fb5..b6426f15b4ae85f5469b962806004237b9b5607e 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -173,12 +173,13 @@ prototypes: sector_t (*bmap)(struct address_space *, sector_t); int (*invalidatepage) (struct page *, unsigned long); int (*releasepage) (struct page *, int); + void (*freepage)(struct page *); int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, loff_t offset, unsigned long nr_segs); int (*launder_page) (struct page *); locking rules: - All except set_page_dirty may block + All except set_page_dirty and freepage may block BKL PageLocked(page) i_mutex writepage: no yes, unlocks (see below) @@ -193,6 +194,7 @@ perform_write: no n/a yes bmap: no invalidatepage: no yes releasepage: no yes +freepage: no yes direct_IO: no launder_page: no yes @@ -288,6 +290,9 @@ buffers from the page in preparation for freeing it. It returns zero to indicate that the buffers are (or may be) freeable. If ->releasepage is zero, the kernel assumes that the fs has no private interest in the buffers. + ->freepage() is called when the kernel is done dropping the page +from the page cache. + ->launder_page() may be called prior to releasing a page if it is still found to be dirty. It returns zero if the page was successfully cleaned, or an error value if not. Note that in order to prevent the page diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 55c28b79d8dce3afcb239f45cc1cc62c44685d3c..20899e095e7e6879580b53aa9aa0282fb4b5ce63 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -534,6 +534,7 @@ struct address_space_operations { sector_t (*bmap)(struct address_space *, sector_t); int (*invalidatepage) (struct page *, unsigned long); int (*releasepage) (struct page *, int); + void (*freepage)(struct page *); ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, loff_t offset, unsigned long nr_segs); struct page* (*get_xip_page)(struct address_space *, sector_t, @@ -678,6 +679,12 @@ struct address_space_operations { need to ensure this. Possibly it can clear the PageUptodate bit if it cannot free private data yet. + freepage: freepage is called once the page is no longer visible in + the page cache in order to allow the cleanup of any private + data. Since it may be called by the memory reclaimer, it + should not assume that the original address_space mapping still + exists, and it should not block. + direct_IO: called by the generic read/write routines to perform direct_IO - that is IO requests which bypass the page cache and transfer data directly between the storage and the diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f0a384e2ae633a1dc2de619d102d5935ff32a273..996dd8989a9135203cab110f57e7c1f8df49e200 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -57,7 +57,7 @@ static int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); static int nfs_fsync_dir(struct file *, int); static loff_t nfs_llseek_dir(struct file *, loff_t, int); -static int nfs_readdir_clear_array(struct page*, gfp_t); +static void nfs_readdir_clear_array(struct page*); const struct file_operations nfs_dir_operations = { .llseek = nfs_llseek_dir, @@ -83,8 +83,8 @@ const struct inode_operations nfs_dir_inode_operations = { .setattr = nfs_setattr, }; -const struct address_space_operations nfs_dir_addr_space_ops = { - .releasepage = nfs_readdir_clear_array, +const struct address_space_operations nfs_dir_aops = { + .freepage = nfs_readdir_clear_array, }; #ifdef CONFIG_NFS_V3 @@ -178,6 +178,7 @@ typedef struct { struct page *page; unsigned long page_index; u64 *dir_cookie; + u64 last_cookie; loff_t current_index; decode_dirent_t decode; @@ -213,17 +214,15 @@ void nfs_readdir_release_array(struct page *page) * we are freeing strings created by nfs_add_to_readdir_array() */ static -int nfs_readdir_clear_array(struct page *page, gfp_t mask) +void nfs_readdir_clear_array(struct page *page) { - struct nfs_cache_array *array = nfs_readdir_get_array(page); + struct nfs_cache_array *array; int i; - if (IS_ERR(array)) - return PTR_ERR(array); + array = kmap_atomic(page, KM_USER0); for (i = 0; i < array->size; i++) kfree(array->array[i].string.name); - nfs_readdir_release_array(page); - return 0; + kunmap_atomic(array, KM_USER0); } /* @@ -272,7 +271,7 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) goto out; array->last_cookie = entry->cookie; array->size++; - if (entry->eof == 1) + if (entry->eof != 0) array->eof_index = array->size; out: nfs_readdir_release_array(page); @@ -312,15 +311,14 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des for (i = 0; i < array->size; i++) { if (array->array[i].cookie == *desc->dir_cookie) { desc->cache_entry_index = i; - status = 0; - goto out; + return 0; } } - if (i == array->eof_index) { - desc->eof = 1; + if (array->eof_index >= 0) { status = -EBADCOOKIE; + if (*desc->dir_cookie == array->last_cookie) + desc->eof = 1; } -out: return status; } @@ -328,10 +326,7 @@ static int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) { struct nfs_cache_array *array; - int status = -EBADCOOKIE; - - if (desc->dir_cookie == NULL) - goto out; + int status; array = nfs_readdir_get_array(desc->page); if (IS_ERR(array)) { @@ -344,6 +339,10 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) else status = nfs_readdir_search_for_cookie(array, desc); + if (status == -EAGAIN) { + desc->last_cookie = array->last_cookie; + desc->page_index++; + } nfs_readdir_release_array(desc->page); out: return status; @@ -490,7 +489,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en count++; - if (desc->plus == 1) + if (desc->plus != 0) nfs_prime_dcache(desc->file->f_path.dentry, entry); status = nfs_readdir_add_to_array(entry, page); @@ -498,7 +497,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en break; } while (!entry->eof); - if (count == 0 || (status == -EBADCOOKIE && entry->eof == 1)) { + if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) { array = nfs_readdir_get_array(page); if (!IS_ERR(array)) { array->eof_index = array->size; @@ -563,7 +562,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, unsigned int array_size = ARRAY_SIZE(pages); entry.prev_cookie = 0; - entry.cookie = *desc->dir_cookie; + entry.cookie = desc->last_cookie; entry.eof = 0; entry.fh = nfs_alloc_fhandle(); entry.fattr = nfs_alloc_fattr(); @@ -636,6 +635,8 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) static void cache_page_release(nfs_readdir_descriptor_t *desc) { + if (!desc->page->mapping) + nfs_readdir_clear_array(desc->page); page_cache_release(desc->page); desc->page = NULL; } @@ -660,9 +661,8 @@ int find_cache_page(nfs_readdir_descriptor_t *desc) return PTR_ERR(desc->page); res = nfs_readdir_search_array(desc); - if (res == 0) - return 0; - cache_page_release(desc); + if (res != 0) + cache_page_release(desc); return res; } @@ -672,22 +672,16 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) { int res; - if (desc->page_index == 0) + if (desc->page_index == 0) { desc->current_index = 0; - while (1) { - res = find_cache_page(desc); - if (res != -EAGAIN) - break; - desc->page_index++; + desc->last_cookie = 0; } + do { + res = find_cache_page(desc); + } while (res == -EAGAIN); return res; } -static inline unsigned int dt_type(struct inode *inode) -{ - return (inode->i_mode >> 12) & 15; -} - /* * Once we've found the start of the dirent within a page: fill 'er up... */ @@ -717,13 +711,12 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, break; } file->f_pos++; - desc->cache_entry_index = i; if (i < (array->size-1)) *desc->dir_cookie = array->array[i+1].cookie; else *desc->dir_cookie = array->last_cookie; } - if (i == array->eof_index) + if (array->eof_index >= 0) desc->eof = 1; nfs_readdir_release_array(desc->page); @@ -764,6 +757,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, } desc->page_index = 0; + desc->last_cookie = *desc->dir_cookie; desc->page = page; status = nfs_readdir_xdr_to_array(desc, page, inode); @@ -791,7 +785,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct inode *inode = dentry->d_inode; nfs_readdir_descriptor_t my_desc, *desc = &my_desc; - int res = -ENOMEM; + int res; dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -816,7 +810,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (res < 0) goto out; - while (desc->eof != 1) { + do { res = readdir_search_pagecache(desc); if (res == -EBADCOOKIE) { @@ -844,7 +838,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) res = nfs_do_filldir(desc, dirent, filldir); if (res < 0) break; - } + } while (!desc->eof); out: nfs_unblock_sillyrename(dentry); if (res > 0) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 60677f9f13110d1a98774595de819fddf2c0ccdf..7bf029ef4084c6e7f02e05e355382168e85e2cb8 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -693,6 +693,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { struct inode *inode = filp->f_mapping->host; int status = 0; + unsigned int saved_type = fl->fl_type; /* Try local locking first */ posix_test_lock(filp, fl); @@ -700,6 +701,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) /* found a conflict */ goto out; } + fl->fl_type = saved_type; if (nfs_have_delegation(inode, FMODE_READ)) goto out_noconflict; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 314f57164602eda0762c0a226db44aa19be461f5..e67e31c734163c6fe6c0eef0d338a2b87ae55763 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -289,6 +289,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) } else if (S_ISDIR(inode->i_mode)) { inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; + inode->i_data.a_ops = &nfs_dir_aops; if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); /* Deal with crossing mountpoints */ diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index eceafe74f4737ea5fbcea90b8ac19749737667b9..4f981f1f668925cba6bebe6ed4cf485da6a56dbb 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -505,13 +505,13 @@ static struct rpc_procinfo mnt3_procedures[] = { static struct rpc_version mnt_version1 = { .number = 1, - .nrprocs = 2, + .nrprocs = ARRAY_SIZE(mnt_procedures), .procs = mnt_procedures, }; static struct rpc_version mnt_version3 = { .number = 3, - .nrprocs = 2, + .nrprocs = ARRAY_SIZE(mnt3_procedures), .procs = mnt3_procedures, }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6a653ffd8e4e121e37c29d88ff2f5b0b4dbe264f..4435e5e1f904e3e5ba26f4eb578fc30c5f6d997b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3361,6 +3361,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) ret = nfs_revalidate_inode(server, inode); if (ret < 0) return ret; + if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) + nfs_zap_acl_cache(inode); ret = nfs4_read_cached_acl(inode, buf, buflen); if (ret != -ENOENT) return ret; @@ -3389,6 +3391,13 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl nfs_inode_return_delegation(inode); buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); ret = nfs4_call_sync(server, &msg, &arg, &res, 1); + /* + * Acl update can result in inode attribute update. + * so mark the attribute cache invalid. + */ + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; + spin_unlock(&inode->i_lock); nfs_access_zap_cache(inode); nfs_zap_acl_cache(inode); return ret; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 137b549e63dbcfa3971aac43091eb91a29a7b69f..b68536cc9046ec937b9b4ccddb98b2acf468f1db 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -115,7 +115,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req) { if (!nfs_lock_request_dontget(req)) return 0; - if (req->wb_page != NULL) + if (test_bit(PG_MAPPED, &req->wb_flags)) radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); return 1; } @@ -125,7 +125,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req) */ void nfs_clear_page_tag_locked(struct nfs_page *req) { - if (req->wb_page != NULL) { + if (test_bit(PG_MAPPED, &req->wb_flags)) { struct inode *inode = req->wb_context->path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index e4b62c6f5a6e9eb721eda53d836c41055ab2e1b7..aedcaa7f291fbe4405f2b68c13f407c7de6be9ba 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -152,7 +152,6 @@ static void nfs_readpage_release(struct nfs_page *req) (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); - nfs_clear_request(req); nfs_release_request(req); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 3c045044fca25183147704f3689c744549ebd77b..4100630c9a5b65f47bb5379ee2bc62f78e60284e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1069,12 +1069,10 @@ static int nfs_parse_mount_options(char *raw, mnt->flags |= NFS_MOUNT_VER3; mnt->version = 3; break; -#ifdef CONFIG_NFS_V4 case Opt_v4: mnt->flags &= ~NFS_MOUNT_VER3; mnt->version = 4; break; -#endif case Opt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; @@ -1286,12 +1284,10 @@ static int nfs_parse_mount_options(char *raw, mnt->flags |= NFS_MOUNT_VER3; mnt->version = 3; break; -#ifdef CONFIG_NFS_V4 case NFS4_VERSION: mnt->flags &= ~NFS_MOUNT_VER3; mnt->version = 4; break; -#endif default: goto out_invalid_value; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4c14c17a5276a410bb383e3650f106ab56cf47d6..10d648ea128bf6345df70da0e15ee8ce443ab8e6 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -390,6 +390,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) if (nfs_have_delegation(inode, FMODE_WRITE)) nfsi->change_attr++; } + set_bit(PG_MAPPED, &req->wb_flags); SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); nfsi->npages++; @@ -415,6 +416,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) spin_lock(&inode->i_lock); set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); + clear_bit(PG_MAPPED, &req->wb_flags); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; if (!nfsi->npages) { @@ -422,7 +424,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) iput(inode); } else spin_unlock(&inode->i_lock); - nfs_clear_request(req); nfs_release_request(req); } diff --git a/include/linux/fs.h b/include/linux/fs.h index c9e06cc70dad585c6c185db2d1503d3fad4a5a48..090f0eacde296ec52a06fe5eb1b13c07808b7ada 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -602,6 +602,7 @@ struct address_space_operations { sector_t (*bmap)(struct address_space *, sector_t); void (*invalidatepage) (struct page *, unsigned long); int (*releasepage) (struct page *, gfp_t); + void (*freepage)(struct page *); ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, loff_t offset, unsigned long nr_segs); int (*get_xip_mem)(struct address_space *, pgoff_t, int, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c66fdb7d6998129701f6eaa653f1d28dd641e332..29d504d5d1c31f5698433657dd534c1500f317b2 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -401,6 +401,7 @@ extern const struct inode_operations nfs3_file_inode_operations; #endif /* CONFIG_NFS_V3 */ extern const struct file_operations nfs_file_operations; extern const struct address_space_operations nfs_file_aops; +extern const struct address_space_operations nfs_dir_aops; static inline struct nfs_open_context *nfs_file_open_context(struct file *filp) { diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index f8b60e7f4c44d9b652a94bca2ab8945130bdb061..d55cee73f63477a326b73cf7eb15e3e277567b14 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -29,6 +29,7 @@ */ enum { PG_BUSY = 0, + PG_MAPPED, PG_CLEAN, PG_NEED_COMMIT, PG_NEED_RESCHED, diff --git a/mm/filemap.c b/mm/filemap.c index ea89840fc65fa2b499ce3c19c657e9e8a680b240..6b9aee20f2426b88024a25749402fc5ca1ff7a0a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -143,13 +143,18 @@ void __remove_from_page_cache(struct page *page) void remove_from_page_cache(struct page *page) { struct address_space *mapping = page->mapping; + void (*freepage)(struct page *); BUG_ON(!PageLocked(page)); + freepage = mapping->a_ops->freepage; spin_lock_irq(&mapping->tree_lock); __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); + + if (freepage) + freepage(page); } EXPORT_SYMBOL(remove_from_page_cache); diff --git a/mm/truncate.c b/mm/truncate.c index ba887bff48c5a263c315a6ef098284df4df5bb44..3c2d5ddfa0d49f3cf31449e214d8fbd50e92ad08 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -390,6 +390,10 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); + + if (mapping->a_ops->freepage) + mapping->a_ops->freepage(page); + page_cache_release(page); /* pagecache ref */ return 1; failed: diff --git a/mm/vmscan.c b/mm/vmscan.c index d31d7ce52c0ea2dbbf85b26019c9fdd70fab624a..9ca587c692748adbc715b440ff5aa8c89357c511 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -494,9 +494,16 @@ static int __remove_mapping(struct address_space *mapping, struct page *page) spin_unlock_irq(&mapping->tree_lock); swapcache_free(swap, page); } else { + void (*freepage)(struct page *); + + freepage = mapping->a_ops->freepage; + __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); + + if (freepage != NULL) + freepage(page); } return 1;