提交 b61f7dcf 编写于 作者: D David Howells

afs: Fix directory page locking

The afs directory loading code (primarily afs_read_dir()) locks all the
pages that hold a directory's content blob to defend against
getdents/getdents races and getdents/lookup races where the competitors
issue conflicting reads on the same data.  As the reads will complete
consecutively, they may retrieve different versions of the data and
one may overwrite the data that the other is busy parsing.

Fix this by not locking the pages at all, but rather by turning the
validation lock into an rwsem and getting an exclusive lock on it whilst
reading the data or validating the attributes and a shared lock whilst
parsing the data.  Sharing the attribute validation lock should be fine as
the data fetch will retrieve the attributes also.

The individual page locks aren't needed at all as the only place they're
being used is to serialise data loading.

Without this patch, the:

 	if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
		...
	}

part of afs_read_dir() may be skipped, leaving the pages unlocked when we
hit the success: clause - in which case we try to unlock the not-locked
pages, leading to the following oops:

  page:ffffe38b405b4300 count:3 mapcount:0 mapping:ffff98156c83a978 index:0x0
  flags: 0xfffe000001004(referenced|private)
  raw: 000fffe000001004 ffff98156c83a978 0000000000000000 00000003ffffffff
  raw: dead000000000100 dead000000000200 0000000000000001 ffff98156b27c000
  page dumped because: VM_BUG_ON_PAGE(!PageLocked(page))
  page->mem_cgroup:ffff98156b27c000
  ------------[ cut here ]------------
  kernel BUG at mm/filemap.c:1205!
  ...
  RIP: 0010:unlock_page+0x43/0x50
  ...
  Call Trace:
   afs_dir_iterate+0x789/0x8f0 [kafs]
   ? _cond_resched+0x15/0x30
   ? kmem_cache_alloc_trace+0x166/0x1d0
   ? afs_do_lookup+0x69/0x490 [kafs]
   ? afs_do_lookup+0x101/0x490 [kafs]
   ? key_default_cmp+0x20/0x20
   ? request_key+0x3c/0x80
   ? afs_lookup+0xf1/0x340 [kafs]
   ? __lookup_slow+0x97/0x150
   ? lookup_slow+0x35/0x50
   ? walk_component+0x1bf/0x490
   ? path_lookupat.isra.52+0x75/0x200
   ? filename_lookup.part.66+0xa0/0x170
   ? afs_end_vnode_operation+0x41/0x60 [kafs]
   ? __check_object_size+0x9c/0x171
   ? strncpy_from_user+0x4a/0x170
   ? vfs_statx+0x73/0xe0
   ? __do_sys_newlstat+0x39/0x70
   ? __x64_sys_getdents+0xc9/0x140
   ? __x64_sys_getdents+0x140/0x140
   ? do_syscall_64+0x5b/0x160
   ? entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: f3ddee8d ("afs: Fix directory handling")
Reported-by: NMarc Dionne <marc.dionne@auristor.com>
Signed-off-by: NDavid Howells <dhowells@redhat.com>
上级 f0ab773f
...@@ -180,6 +180,7 @@ static int afs_dir_open(struct inode *inode, struct file *file) ...@@ -180,6 +180,7 @@ static int afs_dir_open(struct inode *inode, struct file *file)
* get reclaimed during the iteration. * get reclaimed during the iteration.
*/ */
static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key) static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
__acquires(&dvnode->validate_lock)
{ {
struct afs_read *req; struct afs_read *req;
loff_t i_size; loff_t i_size;
...@@ -261,18 +262,21 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key) ...@@ -261,18 +262,21 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
/* If we're going to reload, we need to lock all the pages to prevent /* If we're going to reload, we need to lock all the pages to prevent
* races. * races.
*/ */
if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { ret = -ERESTARTSYS;
ret = -ERESTARTSYS; if (down_read_killable(&dvnode->validate_lock) < 0)
for (i = 0; i < req->nr_pages; i++) goto error;
if (lock_page_killable(req->pages[i]) < 0)
goto error_unlock;
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
goto success; goto success;
up_read(&dvnode->validate_lock);
if (down_write_killable(&dvnode->validate_lock) < 0)
goto error;
if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
ret = afs_fetch_data(dvnode, key, req); ret = afs_fetch_data(dvnode, key, req);
if (ret < 0) if (ret < 0)
goto error_unlock_all; goto error_unlock;
task_io_account_read(PAGE_SIZE * req->nr_pages); task_io_account_read(PAGE_SIZE * req->nr_pages);
...@@ -284,33 +288,26 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key) ...@@ -284,33 +288,26 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
for (i = 0; i < req->nr_pages; i++) for (i = 0; i < req->nr_pages; i++)
if (!afs_dir_check_page(dvnode, req->pages[i], if (!afs_dir_check_page(dvnode, req->pages[i],
req->actual_len)) req->actual_len))
goto error_unlock_all; goto error_unlock;
// TODO: Trim excess pages // TODO: Trim excess pages
set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags); set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
} }
downgrade_write(&dvnode->validate_lock);
success: success:
i = req->nr_pages;
while (i > 0)
unlock_page(req->pages[--i]);
return req; return req;
error_unlock_all:
i = req->nr_pages;
error_unlock: error_unlock:
while (i > 0) up_write(&dvnode->validate_lock);
unlock_page(req->pages[--i]);
error: error:
afs_put_read(req); afs_put_read(req);
_leave(" = %d", ret); _leave(" = %d", ret);
return ERR_PTR(ret); return ERR_PTR(ret);
content_has_grown: content_has_grown:
i = req->nr_pages; up_write(&dvnode->validate_lock);
while (i > 0)
unlock_page(req->pages[--i]);
afs_put_read(req); afs_put_read(req);
goto retry; goto retry;
} }
...@@ -473,6 +470,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, ...@@ -473,6 +470,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
} }
out: out:
up_read(&dvnode->validate_lock);
afs_put_read(req); afs_put_read(req);
_leave(" = %d", ret); _leave(" = %d", ret);
return ret; return ret;
......
...@@ -415,7 +415,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) ...@@ -415,7 +415,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
if (valid) if (valid)
goto valid; goto valid;
mutex_lock(&vnode->validate_lock); down_write(&vnode->validate_lock);
/* if the promise has expired, we need to check the server again to get /* if the promise has expired, we need to check the server again to get
* a new promise - note that if the (parent) directory's metadata was * a new promise - note that if the (parent) directory's metadata was
...@@ -444,13 +444,13 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) ...@@ -444,13 +444,13 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
* different */ * different */
if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
afs_zap_data(vnode); afs_zap_data(vnode);
mutex_unlock(&vnode->validate_lock); up_write(&vnode->validate_lock);
valid: valid:
_leave(" = 0"); _leave(" = 0");
return 0; return 0;
error_unlock: error_unlock:
mutex_unlock(&vnode->validate_lock); up_write(&vnode->validate_lock);
_leave(" = %d", ret); _leave(" = %d", ret);
return ret; return ret;
} }
......
...@@ -494,7 +494,7 @@ struct afs_vnode { ...@@ -494,7 +494,7 @@ struct afs_vnode {
#endif #endif
struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */ struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */
struct mutex io_lock; /* Lock for serialising I/O on this mutex */ struct mutex io_lock; /* Lock for serialising I/O on this mutex */
struct mutex validate_lock; /* lock for validating this vnode */ struct rw_semaphore validate_lock; /* lock for validating this vnode */
spinlock_t wb_lock; /* lock for wb_keys */ spinlock_t wb_lock; /* lock for wb_keys */
spinlock_t lock; /* waitqueue/flags lock */ spinlock_t lock; /* waitqueue/flags lock */
unsigned long flags; unsigned long flags;
......
...@@ -590,7 +590,7 @@ static void afs_i_init_once(void *_vnode) ...@@ -590,7 +590,7 @@ static void afs_i_init_once(void *_vnode)
memset(vnode, 0, sizeof(*vnode)); memset(vnode, 0, sizeof(*vnode));
inode_init_once(&vnode->vfs_inode); inode_init_once(&vnode->vfs_inode);
mutex_init(&vnode->io_lock); mutex_init(&vnode->io_lock);
mutex_init(&vnode->validate_lock); init_rwsem(&vnode->validate_lock);
spin_lock_init(&vnode->wb_lock); spin_lock_init(&vnode->wb_lock);
spin_lock_init(&vnode->lock); spin_lock_init(&vnode->lock);
INIT_LIST_HEAD(&vnode->wb_keys); INIT_LIST_HEAD(&vnode->wb_keys);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册