提交 6c5daf01 编写于 作者: L Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6:
  truncate: use new helpers
  truncate: new helpers
  fs: fix overflow in sys_mount() for in-kernel calls
  fs: Make unload_nls() NULL pointer safe
  freeze_bdev: grab active reference to frozen superblocks
  freeze_bdev: kill bd_mount_sem
  exofs: remove BKL from super operations
  fs/romfs: correct error-handling code
  vfs: seq_file: add helpers for data filling
  vfs: remove redundant position check in do_sendfile
  vfs: change sb->s_maxbytes to a loff_t
  vfs: explicitly cast s_maxbytes in fiemap_check_ranges
  libfs: return error code on failed attr set
  seq_file: return a negative error code when seq_path_root() fails.
  vfs: optimize touch_time() too
  vfs: optimization for touch_atime()
  vfs: split generic_forget_inode() so that hugetlbfs does not have to copy it
  fs/inode.c: add dev-id and inode number for debugging in init_special_inode()
  libfs: make simple_read_from_buffer conventional
......@@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the
mm start up ... this is a loose form of stability on mm_users. For
example, it is used in copy_mm to protect against a racing tlb_gather_mmu
single address space optimization, so that the zap_page_range (from
vmtruncate) does not lose sending ipi's to cloned threads that might
truncate) does not lose sending ipi's to cloned threads that might
be spawned underneath it and go to user mode to drag in pte's into tlbs.
swap_lock
......
......@@ -18,7 +18,7 @@
/* Taken over from the old code... */
/* POSIX UID/GID verification for setting inode attributes. */
int inode_change_ok(struct inode *inode, struct iattr *attr)
int inode_change_ok(const struct inode *inode, struct iattr *attr)
{
int retval = -EPERM;
unsigned int ia_valid = attr->ia_valid;
......@@ -60,9 +60,51 @@ int inode_change_ok(struct inode *inode, struct iattr *attr)
error:
return retval;
}
EXPORT_SYMBOL(inode_change_ok);
/**
* inode_newsize_ok - may this inode be truncated to a given size
* @inode: the inode to be truncated
* @offset: the new size to assign to the inode
* @Returns: 0 on success, -ve errno on failure
*
* inode_newsize_ok will check filesystem limits and ulimits to check that the
* new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
* when necessary. Caller must not proceed with inode size change if failure is
* returned. @inode must be a file (not directory), with appropriate
* permissions to allow truncate (inode_newsize_ok does NOT check these
* conditions).
*
* inode_newsize_ok must be called with i_mutex held.
*/
int inode_newsize_ok(const struct inode *inode, loff_t offset)
{
if (inode->i_size < offset) {
unsigned long limit;
limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
if (limit != RLIM_INFINITY && offset > limit)
goto out_sig;
if (offset > inode->i_sb->s_maxbytes)
goto out_big;
} else {
/*
* truncation of in-use swapfiles is disallowed - it would
* cause subsequent swapout to scribble on the now-freed
* blocks.
*/
if (IS_SWAPFILE(inode))
return -ETXTBSY;
}
return 0;
out_sig:
send_sig(SIGXFSZ, current, 0);
out_big:
return -EFBIG;
}
EXPORT_SYMBOL(inode_newsize_ok);
int inode_setattr(struct inode * inode, struct iattr * attr)
{
unsigned int ia_valid = attr->ia_valid;
......
......@@ -737,12 +737,7 @@ befs_put_super(struct super_block *sb)
{
kfree(BEFS_SB(sb)->mount_opts.iocharset);
BEFS_SB(sb)->mount_opts.iocharset = NULL;
if (BEFS_SB(sb)->nls) {
unload_nls(BEFS_SB(sb)->nls);
BEFS_SB(sb)->nls = NULL;
}
unload_nls(BEFS_SB(sb)->nls);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
}
......
......@@ -216,8 +216,6 @@ EXPORT_SYMBOL(fsync_bdev);
* freeze_bdev -- lock a filesystem and force it into a consistent state
* @bdev: blockdevice to lock
*
* This takes the block device bd_mount_sem to make sure no new mounts
* happen on bdev until thaw_bdev() is called.
* If a superblock is found on this device, we take the s_umount semaphore
* on it to make sure nobody unmounts until the snapshot creation is done.
* The reference counter (bd_fsfreeze_count) guarantees that only the last
......@@ -232,46 +230,55 @@ struct super_block *freeze_bdev(struct block_device *bdev)
int error = 0;
mutex_lock(&bdev->bd_fsfreeze_mutex);
if (bdev->bd_fsfreeze_count > 0) {
bdev->bd_fsfreeze_count++;
if (++bdev->bd_fsfreeze_count > 1) {
/*
* We don't even need to grab a reference - the first call
* to freeze_bdev grab an active reference and only the last
* thaw_bdev drops it.
*/
sb = get_super(bdev);
drop_super(sb);
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return sb;
}
bdev->bd_fsfreeze_count++;
down(&bdev->bd_mount_sem);
sb = get_super(bdev);
if (sb && !(sb->s_flags & MS_RDONLY)) {
sb->s_frozen = SB_FREEZE_WRITE;
smp_wmb();
sync_filesystem(sb);
sb->s_frozen = SB_FREEZE_TRANS;
smp_wmb();
sync_blockdev(sb->s_bdev);
if (sb->s_op->freeze_fs) {
error = sb->s_op->freeze_fs(sb);
if (error) {
printk(KERN_ERR
"VFS:Filesystem freeze failed\n");
sb->s_frozen = SB_UNFROZEN;
drop_super(sb);
up(&bdev->bd_mount_sem);
bdev->bd_fsfreeze_count--;
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return ERR_PTR(error);
}
sb = get_active_super(bdev);
if (!sb)
goto out;
if (sb->s_flags & MS_RDONLY) {
deactivate_locked_super(sb);
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return sb;
}
sb->s_frozen = SB_FREEZE_WRITE;
smp_wmb();
sync_filesystem(sb);
sb->s_frozen = SB_FREEZE_TRANS;
smp_wmb();
sync_blockdev(sb->s_bdev);
if (sb->s_op->freeze_fs) {
error = sb->s_op->freeze_fs(sb);
if (error) {
printk(KERN_ERR
"VFS:Filesystem freeze failed\n");
sb->s_frozen = SB_UNFROZEN;
deactivate_locked_super(sb);
bdev->bd_fsfreeze_count--;
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return ERR_PTR(error);
}
}
up_write(&sb->s_umount);
out:
sync_blockdev(bdev);
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */
return sb; /* thaw_bdev releases s->s_umount */
}
EXPORT_SYMBOL(freeze_bdev);
......@@ -284,44 +291,44 @@ EXPORT_SYMBOL(freeze_bdev);
*/
int thaw_bdev(struct block_device *bdev, struct super_block *sb)
{
int error = 0;
int error = -EINVAL;
mutex_lock(&bdev->bd_fsfreeze_mutex);
if (!bdev->bd_fsfreeze_count) {
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return -EINVAL;
}
bdev->bd_fsfreeze_count--;
if (bdev->bd_fsfreeze_count > 0) {
if (sb)
drop_super(sb);
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return 0;
}
if (sb) {
BUG_ON(sb->s_bdev != bdev);
if (!(sb->s_flags & MS_RDONLY)) {
if (sb->s_op->unfreeze_fs) {
error = sb->s_op->unfreeze_fs(sb);
if (error) {
printk(KERN_ERR
"VFS:Filesystem thaw failed\n");
sb->s_frozen = SB_FREEZE_TRANS;
bdev->bd_fsfreeze_count++;
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return error;
}
}
sb->s_frozen = SB_UNFROZEN;
smp_wmb();
wake_up(&sb->s_wait_unfrozen);
if (!bdev->bd_fsfreeze_count)
goto out_unlock;
error = 0;
if (--bdev->bd_fsfreeze_count > 0)
goto out_unlock;
if (!sb)
goto out_unlock;
BUG_ON(sb->s_bdev != bdev);
down_write(&sb->s_umount);
if (sb->s_flags & MS_RDONLY)
goto out_deactivate;
if (sb->s_op->unfreeze_fs) {
error = sb->s_op->unfreeze_fs(sb);
if (error) {
printk(KERN_ERR
"VFS:Filesystem thaw failed\n");
sb->s_frozen = SB_FREEZE_TRANS;
bdev->bd_fsfreeze_count++;
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return error;
}
drop_super(sb);
}
up(&bdev->bd_mount_sem);
sb->s_frozen = SB_UNFROZEN;
smp_wmb();
wake_up(&sb->s_wait_unfrozen);
out_deactivate:
if (sb)
deactivate_locked_super(sb);
out_unlock:
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return 0;
}
......@@ -430,7 +437,6 @@ static void init_once(void *foo)
memset(bdev, 0, sizeof(*bdev));
mutex_init(&bdev->bd_mutex);
sema_init(&bdev->bd_mount_sem, 1);
INIT_LIST_HEAD(&bdev->bd_inodes);
INIT_LIST_HEAD(&bdev->bd_list);
#ifdef CONFIG_SYSFS
......
......@@ -2239,16 +2239,10 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size)
struct address_space *mapping = inode->i_mapping;
struct page *page;
void *fsdata;
unsigned long limit;
int err;
err = -EFBIG;
limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
if (limit != RLIM_INFINITY && size > (loff_t)limit) {
send_sig(SIGXFSZ, current, 0);
goto out;
}
if (size > inode->i_sb->s_maxbytes)
err = inode_newsize_ok(inode, size);
if (err)
goto out;
err = pagecache_write_begin(NULL, mapping, size, 0,
......
......@@ -185,8 +185,7 @@ cifs_read_super(struct super_block *sb, void *data,
cifs_sb->mountdata = NULL;
}
#endif
if (cifs_sb->local_nls)
unload_nls(cifs_sb->local_nls);
unload_nls(cifs_sb->local_nls);
kfree(cifs_sb);
}
return rc;
......
......@@ -1557,57 +1557,24 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
static int cifs_vmtruncate(struct inode *inode, loff_t offset)
{
struct address_space *mapping = inode->i_mapping;
unsigned long limit;
loff_t oldsize;
int err;
spin_lock(&inode->i_lock);
if (inode->i_size < offset)
goto do_expand;
/*
* truncation of in-use swapfiles is disallowed - it would cause
* subsequent swapout to scribble on the now-freed blocks.
*/
if (IS_SWAPFILE(inode)) {
spin_unlock(&inode->i_lock);
goto out_busy;
}
i_size_write(inode, offset);
spin_unlock(&inode->i_lock);
/*
* unmap_mapping_range is called twice, first simply for efficiency
* so that truncate_inode_pages does fewer single-page unmaps. However
* after this first call, and before truncate_inode_pages finishes,
* it is possible for private pages to be COWed, which remain after
* truncate_inode_pages finishes, hence the second unmap_mapping_range
* call must be made for correctness.
*/
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
truncate_inode_pages(mapping, offset);
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
goto out_truncate;
do_expand:
limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
if (limit != RLIM_INFINITY && offset > limit) {
err = inode_newsize_ok(inode, offset);
if (err) {
spin_unlock(&inode->i_lock);
goto out_sig;
}
if (offset > inode->i_sb->s_maxbytes) {
spin_unlock(&inode->i_lock);
goto out_big;
goto out;
}
oldsize = inode->i_size;
i_size_write(inode, offset);
spin_unlock(&inode->i_lock);
out_truncate:
truncate_pagecache(inode, oldsize, offset);
if (inode->i_op->truncate)
inode->i_op->truncate(inode);
return 0;
out_sig:
send_sig(SIGXFSZ, current, 0);
out_big:
return -EFBIG;
out_busy:
return -ETXTBSY;
out:
return err;
}
static int
......
......@@ -768,13 +768,13 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
char __user * type, unsigned long flags,
void __user * data)
{
unsigned long type_page;
char *kernel_type;
unsigned long data_page;
unsigned long dev_page;
char *kernel_dev;
char *dir_page;
int retval;
retval = copy_mount_options (type, &type_page);
retval = copy_mount_string(type, &kernel_type);
if (retval < 0)
goto out;
......@@ -783,38 +783,38 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
if (IS_ERR(dir_page))
goto out1;
retval = copy_mount_options (dev_name, &dev_page);
retval = copy_mount_string(dev_name, &kernel_dev);
if (retval < 0)
goto out2;
retval = copy_mount_options (data, &data_page);
retval = copy_mount_options(data, &data_page);
if (retval < 0)
goto out3;
retval = -EINVAL;
if (type_page && data_page) {
if (!strcmp((char *)type_page, SMBFS_NAME)) {
if (kernel_type && data_page) {
if (!strcmp(kernel_type, SMBFS_NAME)) {
do_smb_super_data_conv((void *)data_page);
} else if (!strcmp((char *)type_page, NCPFS_NAME)) {
} else if (!strcmp(kernel_type, NCPFS_NAME)) {
do_ncp_super_data_conv((void *)data_page);
} else if (!strcmp((char *)type_page, NFS4_NAME)) {
} else if (!strcmp(kernel_type, NFS4_NAME)) {
if (do_nfs4_super_data_conv((void *) data_page))
goto out4;
}
}
retval = do_mount((char*)dev_page, dir_page, (char*)type_page,
retval = do_mount(kernel_dev, dir_page, kernel_type,
flags, (void*)data_page);
out4:
free_page(data_page);
out3:
free_page(dev_page);
kfree(kernel_dev);
out2:
putname(dir_page);
out1:
free_page(type_page);
kfree(kernel_type);
out:
return retval;
}
......
......@@ -214,7 +214,6 @@ int exofs_sync_fs(struct super_block *sb, int wait)
}
lock_super(sb);
lock_kernel();
sbi = sb->s_fs_info;
fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
......@@ -245,7 +244,6 @@ int exofs_sync_fs(struct super_block *sb, int wait)
out:
if (or)
osd_end_request(or);
unlock_kernel();
unlock_super(sb);
kfree(fscb);
return ret;
......@@ -268,8 +266,6 @@ static void exofs_put_super(struct super_block *sb)
int num_pend;
struct exofs_sb_info *sbi = sb->s_fs_info;
lock_kernel();
if (sb->s_dirt)
exofs_write_super(sb);
......@@ -286,8 +282,6 @@ static void exofs_put_super(struct super_block *sb)
osduld_put_device(sbi->s_dev);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
unlock_kernel();
}
/*
......
......@@ -470,19 +470,11 @@ static void fat_put_super(struct super_block *sb)
iput(sbi->fat_inode);
if (sbi->nls_disk) {
unload_nls(sbi->nls_disk);
sbi->nls_disk = NULL;
sbi->options.codepage = fat_default_codepage;
}
if (sbi->nls_io) {
unload_nls(sbi->nls_io);
sbi->nls_io = NULL;
}
if (sbi->options.iocharset != fat_default_iocharset) {
unload_nls(sbi->nls_disk);
unload_nls(sbi->nls_io);
if (sbi->options.iocharset != fat_default_iocharset)
kfree(sbi->options.iocharset);
sbi->options.iocharset = fat_default_iocharset;
}
sb->s_fs_info = NULL;
kfree(sbi);
......
......@@ -1276,14 +1276,9 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
return 0;
if (attr->ia_valid & ATTR_SIZE) {
unsigned long limit;
if (IS_SWAPFILE(inode))
return -ETXTBSY;
limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) {
send_sig(SIGXFSZ, current, 0);
return -EFBIG;
}
err = inode_newsize_ok(inode, attr->ia_size);
if (err)
return err;
is_truncate = true;
}
......@@ -1350,8 +1345,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
* FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
*/
if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
if (outarg.attr.size < oldsize)
fuse_truncate(inode->i_mapping, outarg.attr.size);
truncate_pagecache(inode, oldsize, outarg.attr.size);
invalidate_inode_pages2(inode->i_mapping);
}
......
......@@ -606,8 +606,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
u64 attr_valid);
void fuse_truncate(struct address_space *mapping, loff_t offset);
/**
* Initialize the client device
*/
......
......@@ -140,14 +140,6 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
return 0;
}
void fuse_truncate(struct address_space *mapping, loff_t offset)
{
/* See vmtruncate() */
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
truncate_inode_pages(mapping, offset);
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
}
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
u64 attr_valid)
{
......@@ -205,8 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
spin_unlock(&fc->lock);
if (S_ISREG(inode->i_mode) && oldsize != attr->size) {
if (attr->size < oldsize)
fuse_truncate(inode->i_mapping, attr->size);
truncate_pagecache(inode, oldsize, attr->size);
invalidate_inode_pages2(inode->i_mapping);
}
}
......
......@@ -344,10 +344,8 @@ void hfs_mdb_put(struct super_block *sb)
brelse(HFS_SB(sb)->mdb_bh);
brelse(HFS_SB(sb)->alt_mdb_bh);
if (HFS_SB(sb)->nls_io)
unload_nls(HFS_SB(sb)->nls_io);
if (HFS_SB(sb)->nls_disk)
unload_nls(HFS_SB(sb)->nls_disk);
unload_nls(HFS_SB(sb)->nls_io);
unload_nls(HFS_SB(sb)->nls_disk);
free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0);
kfree(HFS_SB(sb));
......
......@@ -229,8 +229,7 @@ static void hfsplus_put_super(struct super_block *sb)
iput(HFSPLUS_SB(sb).alloc_file);
iput(HFSPLUS_SB(sb).hidden_dir);
brelse(HFSPLUS_SB(sb).s_vhbh);
if (HFSPLUS_SB(sb).nls)
unload_nls(HFSPLUS_SB(sb).nls);
unload_nls(HFSPLUS_SB(sb).nls);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
......@@ -464,8 +463,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
cleanup:
hfsplus_put_super(sb);
if (nls)
unload_nls(nls);
unload_nls(nls);
return err;
}
......
......@@ -380,36 +380,11 @@ static void hugetlbfs_delete_inode(struct inode *inode)
static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
{
struct super_block *sb = inode->i_sb;
if (!hlist_unhashed(&inode->i_hash)) {
if (!(inode->i_state & (I_DIRTY|I_SYNC)))
list_move(&inode->i_list, &inode_unused);
inodes_stat.nr_unused++;
if (!sb || (sb->s_flags & MS_ACTIVE)) {
spin_unlock(&inode_lock);
return;
}
inode->i_state |= I_WILL_FREE;
spin_unlock(&inode_lock);
/*
* write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK
* in our backing_dev_info.
*/
write_inode_now(inode, 1);
spin_lock(&inode_lock);
inode->i_state &= ~I_WILL_FREE;
inodes_stat.nr_unused--;
hlist_del_init(&inode->i_hash);
if (generic_detach_inode(inode)) {
truncate_hugepages(inode, 0);
clear_inode(inode);
destroy_inode(inode);
}
list_del_init(&inode->i_list);
list_del_init(&inode->i_sb_list);
inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
truncate_hugepages(inode, 0);
clear_inode(inode);
destroy_inode(inode);
}
static void hugetlbfs_drop_inode(struct inode *inode)
......
......@@ -1241,7 +1241,16 @@ void generic_delete_inode(struct inode *inode)
}
EXPORT_SYMBOL(generic_delete_inode);
static void generic_forget_inode(struct inode *inode)
/**
* generic_detach_inode - remove inode from inode lists
* @inode: inode to remove
*
* Remove inode from inode lists, write it if it's dirty. This is just an
* internal VFS helper exported for hugetlbfs. Do not use!
*
* Returns 1 if inode should be completely destroyed.
*/
int generic_detach_inode(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
......@@ -1251,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode)
inodes_stat.nr_unused++;
if (sb->s_flags & MS_ACTIVE) {
spin_unlock(&inode_lock);
return;
return 0;
}
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_WILL_FREE;
......@@ -1269,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode)
inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
return 1;
}
EXPORT_SYMBOL_GPL(generic_detach_inode);
static void generic_forget_inode(struct inode *inode)
{
if (!generic_detach_inode(inode))
return;
if (inode->i_data.nrpages)
truncate_inode_pages(&inode->i_data, 0);
clear_inode(inode);
......@@ -1399,31 +1416,31 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
struct inode *inode = dentry->d_inode;
struct timespec now;
if (mnt_want_write(mnt))
return;
if (inode->i_flags & S_NOATIME)
goto out;
return;
if (IS_NOATIME(inode))
goto out;
return;
if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
goto out;
return;
if (mnt->mnt_flags & MNT_NOATIME)
goto out;
return;
if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
goto out;
return;
now = current_fs_time(inode->i_sb);
if (!relatime_need_update(mnt, inode, now))
goto out;
return;
if (timespec_equal(&inode->i_atime, &now))
goto out;
return;
if (mnt_want_write(mnt))
return;
inode->i_atime = now;
mark_inode_dirty_sync(inode);
out:
mnt_drop_write(mnt);
}
EXPORT_SYMBOL(touch_atime);
......@@ -1444,34 +1461,37 @@ void file_update_time(struct file *file)
{
struct inode *inode = file->f_path.dentry->d_inode;
struct timespec now;
int sync_it = 0;
int err;
enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
/* First try to exhaust all avenues to not sync */
if (IS_NOCMTIME(inode))
return;
err = mnt_want_write_file(file);
if (err)
return;
now = current_fs_time(inode->i_sb);
if (!timespec_equal(&inode->i_mtime, &now)) {
inode->i_mtime = now;
sync_it = 1;
}
if (!timespec_equal(&inode->i_mtime, &now))
sync_it = S_MTIME;
if (!timespec_equal(&inode->i_ctime, &now)) {
inode->i_ctime = now;
sync_it = 1;
}
if (!timespec_equal(&inode->i_ctime, &now))
sync_it |= S_CTIME;
if (IS_I_VERSION(inode)) {
inode_inc_iversion(inode);
sync_it = 1;
}
if (IS_I_VERSION(inode))
sync_it |= S_VERSION;
if (!sync_it)
return;
if (sync_it)
mark_inode_dirty_sync(inode);
/* Finally allowed to write? Takes lock. */
if (mnt_want_write_file(file))
return;
/* Only change inode inside the lock region */
if (sync_it & S_VERSION)
inode_inc_iversion(inode);
if (sync_it & S_CTIME)
inode->i_ctime = now;
if (sync_it & S_MTIME)
inode->i_mtime = now;
mark_inode_dirty_sync(inode);
mnt_drop_write(file->f_path.mnt);
}
EXPORT_SYMBOL(file_update_time);
......@@ -1599,7 +1619,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
else if (S_ISSOCK(mode))
inode->i_fop = &bad_sock_fops;
else
printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n",
mode);
printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
" inode %s:%lu\n", mode, inode->i_sb->s_id,
inode->i_ino);
}
EXPORT_SYMBOL(init_special_inode);
......@@ -57,6 +57,7 @@ extern int check_unsafe_exec(struct linux_binprm *);
* namespace.c
*/
extern int copy_mount_options(const void __user *, unsigned long *);
extern int copy_mount_string(const void __user *, char **);
extern void free_vfsmnt(struct vfsmount *);
extern struct vfsmount *alloc_vfsmnt(const char *);
......
......@@ -162,20 +162,21 @@ EXPORT_SYMBOL(fiemap_check_flags);
static int fiemap_check_ranges(struct super_block *sb,
u64 start, u64 len, u64 *new_len)
{
u64 maxbytes = (u64) sb->s_maxbytes;
*new_len = len;
if (len == 0)
return -EINVAL;
if (start > sb->s_maxbytes)
if (start > maxbytes)
return -EFBIG;
/*
* Shrink request scope to what the fs can actually handle.
*/
if ((len > sb->s_maxbytes) ||
(sb->s_maxbytes - len) < start)
*new_len = sb->s_maxbytes - start;
if (len > maxbytes || (maxbytes - len) < start)
*new_len = maxbytes - start;
return 0;
}
......
......@@ -46,10 +46,7 @@ static void isofs_put_super(struct super_block *sb)
#ifdef CONFIG_JOLIET
lock_kernel();
if (sbi->s_nls_iocharset) {
unload_nls(sbi->s_nls_iocharset);
sbi->s_nls_iocharset = NULL;
}
unload_nls(sbi->s_nls_iocharset);
unlock_kernel();
#endif
......@@ -912,8 +909,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
printk(KERN_WARNING "%s: get root inode failed\n", __func__);
out_no_inode:
#ifdef CONFIG_JOLIET
if (sbi->s_nls_iocharset)
unload_nls(sbi->s_nls_iocharset);
unload_nls(sbi->s_nls_iocharset);
#endif
goto out_freesbi;
out_no_read:
......
......@@ -178,13 +178,11 @@ static void jfs_put_super(struct super_block *sb)
rc = jfs_umount(sb);
if (rc)
jfs_err("jfs_umount failed with return code %d", rc);
if (sbi->nls_tab)
unload_nls(sbi->nls_tab);
sbi->nls_tab = NULL;
unload_nls(sbi->nls_tab);
truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
iput(sbi->direct_inode);
sbi->direct_inode = NULL;
kfree(sbi);
......@@ -347,8 +345,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
if (nls_map != (void *) -1) {
/* Discard old (if remount) */
if (sbi->nls_tab)
unload_nls(sbi->nls_tab);
unload_nls(sbi->nls_tab);
sbi->nls_tab = nls_map;
}
return 1;
......
......@@ -527,14 +527,18 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
const void *from, size_t available)
{
loff_t pos = *ppos;
size_t ret;
if (pos < 0)
return -EINVAL;
if (pos >= available)
if (pos >= available || !count)
return 0;
if (count > available - pos)
count = available - pos;
if (copy_to_user(to, from + pos, count))
ret = copy_to_user(to, from + pos, count);
if (ret == count)
return -EFAULT;
count -= ret;
*ppos = pos + count;
return count;
}
......@@ -735,10 +739,11 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
if (copy_from_user(attr->set_buf, buf, size))
goto out;
ret = len; /* claim we got the whole input */
attr->set_buf[size] = '\0';
val = simple_strtol(attr->set_buf, NULL, 0);
attr->set(attr->data, val);
ret = attr->set(attr->data, val);
if (ret == 0)
ret = len; /* on success, claim we got the whole input */
out:
mutex_unlock(&attr->mutex);
return ret;
......
......@@ -1640,7 +1640,7 @@ static int do_new_mount(struct path *path, char *type, int flags,
{
struct vfsmount *mnt;
if (!type || !memchr(type, 0, PAGE_SIZE))
if (!type)
return -EINVAL;
/* we need capabilities... */
......@@ -1871,6 +1871,23 @@ int copy_mount_options(const void __user * data, unsigned long *where)
return 0;
}
int copy_mount_string(const void __user *data, char **where)
{
char *tmp;
if (!data) {
*where = NULL;
return 0;
}
tmp = strndup_user(data, PAGE_SIZE);
if (IS_ERR(tmp))
return PTR_ERR(tmp);
*where = tmp;
return 0;
}
/*
* Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
* be given to the mount() call (ie: read-only, no-dev, no-suid etc).
......@@ -1900,8 +1917,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
return -EINVAL;
if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
return -EINVAL;
if (data_page)
((char *)data_page)[PAGE_SIZE - 1] = 0;
......@@ -2070,40 +2085,42 @@ EXPORT_SYMBOL(create_mnt_ns);
SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
char __user *, type, unsigned long, flags, void __user *, data)
{
int retval;
int ret;
char *kernel_type;
char *kernel_dir;
char *kernel_dev;
unsigned long data_page;
unsigned long type_page;
unsigned long dev_page;
char *dir_page;
retval = copy_mount_options(type, &type_page);
if (retval < 0)
return retval;
ret = copy_mount_string(type, &kernel_type);
if (ret < 0)
goto out_type;
dir_page = getname(dir_name);
retval = PTR_ERR(dir_page);
if (IS_ERR(dir_page))
goto out1;
kernel_dir = getname(dir_name);
if (IS_ERR(kernel_dir)) {
ret = PTR_ERR(kernel_dir);
goto out_dir;
}
retval = copy_mount_options(dev_name, &dev_page);
if (retval < 0)
goto out2;
ret = copy_mount_string(dev_name, &kernel_dev);
if (ret < 0)
goto out_dev;
retval = copy_mount_options(data, &data_page);
if (retval < 0)
goto out3;
ret = copy_mount_options(data, &data_page);
if (ret < 0)
goto out_data;
retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
flags, (void *)data_page);
free_page(data_page);
ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,
(void *) data_page);
out3:
free_page(dev_page);
out2:
putname(dir_page);
out1:
free_page(type_page);
return retval;
free_page(data_page);
out_data:
kfree(kernel_dev);
out_dev:
putname(kernel_dir);
out_dir:
kfree(kernel_type);
out_type:
return ret;
}
/*
......
......@@ -746,16 +746,8 @@ static void ncp_put_super(struct super_block *sb)
#ifdef CONFIG_NCPFS_NLS
/* unload the NLS charsets */
if (server->nls_vol)
{
unload_nls(server->nls_vol);
server->nls_vol = NULL;
}
if (server->nls_io)
{
unload_nls(server->nls_io);
server->nls_io = NULL;
}
unload_nls(server->nls_vol);
unload_nls(server->nls_io);
#endif /* CONFIG_NCPFS_NLS */
if (server->info_filp)
......
......@@ -223,10 +223,8 @@ ncp_set_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
oldset_io = server->nls_io;
server->nls_io = iocharset;
if (oldset_cp)
unload_nls(oldset_cp);
if (oldset_io)
unload_nls(oldset_io);
unload_nls(oldset_cp);
unload_nls(oldset_io);
return 0;
}
......
......@@ -458,49 +458,21 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
*/
static int nfs_vmtruncate(struct inode * inode, loff_t offset)
{
if (i_size_read(inode) < offset) {
unsigned long limit;
limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
if (limit != RLIM_INFINITY && offset > limit)
goto out_sig;
if (offset > inode->i_sb->s_maxbytes)
goto out_big;
spin_lock(&inode->i_lock);
i_size_write(inode, offset);
spin_unlock(&inode->i_lock);
} else {
struct address_space *mapping = inode->i_mapping;
loff_t oldsize;
int err;
/*
* truncation of in-use swapfiles is disallowed - it would
* cause subsequent swapout to scribble on the now-freed
* blocks.
*/
if (IS_SWAPFILE(inode))
return -ETXTBSY;
spin_lock(&inode->i_lock);
i_size_write(inode, offset);
spin_unlock(&inode->i_lock);
err = inode_newsize_ok(inode, offset);
if (err)
goto out;
/*
* unmap_mapping_range is called twice, first simply for
* efficiency so that truncate_inode_pages does fewer
* single-page unmaps. However after this first call, and
* before truncate_inode_pages finishes, it is possible for
* private pages to be COWed, which remain after
* truncate_inode_pages finishes, hence the second
* unmap_mapping_range call must be made for correctness.
*/
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
truncate_inode_pages(mapping, offset);
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
}
return 0;
out_sig:
send_sig(SIGXFSZ, current, 0);
out_big:
return -EFBIG;
spin_lock(&inode->i_lock);
oldsize = inode->i_size;
i_size_write(inode, offset);
spin_unlock(&inode->i_lock);
truncate_pagecache(inode, oldsize, offset);
out:
return err;
}
/**
......
......@@ -270,7 +270,8 @@ struct nls_table *load_nls(char *charset)
void unload_nls(struct nls_table *nls)
{
module_put(nls->owner);
if (nls)
module_put(nls->owner);
}
static const wchar_t charset2uni[256] = {
......
......@@ -201,8 +201,7 @@ static bool parse_options(ntfs_volume *vol, char *opt)
v, old_nls->charset);
nls_map = old_nls;
} else /* nls_map */ {
if (old_nls)
unload_nls(old_nls);
unload_nls(old_nls);
}
} else if (!strcmp(p, "utf8")) {
bool val = false;
......@@ -2427,10 +2426,9 @@ static void ntfs_put_super(struct super_block *sb)
ntfs_free(vol->upcase);
vol->upcase = NULL;
}
if (vol->nls_map) {
unload_nls(vol->nls_map);
vol->nls_map = NULL;
}
unload_nls(vol->nls_map);
sb->s_fs_info = NULL;
kfree(vol);
......
......@@ -69,14 +69,11 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
/* make various checks */
order = get_order(newsize);
if (unlikely(order >= MAX_ORDER))
goto too_big;
return -EFBIG;
limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
if (limit != RLIM_INFINITY && newsize > limit)
goto fsize_exceeded;
if (newsize > inode->i_sb->s_maxbytes)
goto too_big;
ret = inode_newsize_ok(inode, newsize);
if (ret)
return ret;
i_size_write(inode, newsize);
......@@ -118,12 +115,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
return 0;
fsize_exceeded:
send_sig(SIGXFSZ, current, 0);
too_big:
return -EFBIG;
add_error:
add_error:
while (loop < npages)
__free_page(pages + loop++);
return ret;
......
......@@ -839,9 +839,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
pos = *ppos;
retval = -EINVAL;
if (unlikely(pos < 0))
goto fput_out;
if (unlikely(pos + count > max)) {
retval = -EOVERFLOW;
if (pos >= max)
......
......@@ -429,20 +429,21 @@ EXPORT_SYMBOL(mangle_path);
*/
int seq_path(struct seq_file *m, struct path *path, char *esc)
{
if (m->count < m->size) {
char *s = m->buf + m->count;
char *p = d_path(path, s, m->size - m->count);
char *buf;
size_t size = seq_get_buf(m, &buf);
int res = -1;
if (size) {
char *p = d_path(path, buf, size);
if (!IS_ERR(p)) {
s = mangle_path(s, p, esc);
if (s) {
p = m->buf + m->count;
m->count = s - m->buf;
return s - p;
}
char *end = mangle_path(buf, p, esc);
if (end)
res = end - buf;
}
}
m->count = m->size;
return -1;
seq_commit(m, res);
return res;
}
EXPORT_SYMBOL(seq_path);
......@@ -454,26 +455,28 @@ EXPORT_SYMBOL(seq_path);
int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
char *esc)
{
int err = -ENAMETOOLONG;
if (m->count < m->size) {
char *s = m->buf + m->count;
char *buf;
size_t size = seq_get_buf(m, &buf);
int res = -ENAMETOOLONG;
if (size) {
char *p;
spin_lock(&dcache_lock);
p = __d_path(path, root, s, m->size - m->count);
p = __d_path(path, root, buf, size);
spin_unlock(&dcache_lock);
err = PTR_ERR(p);
res = PTR_ERR(p);
if (!IS_ERR(p)) {
s = mangle_path(s, p, esc);
if (s) {
p = m->buf + m->count;
m->count = s - m->buf;
return 0;
}
char *end = mangle_path(buf, p, esc);
if (end)
res = end - buf;
else
res = -ENAMETOOLONG;
}
}
m->count = m->size;
return err;
seq_commit(m, res);
return res < 0 ? res : 0;
}
/*
......@@ -481,20 +484,21 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
*/
int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
{
if (m->count < m->size) {
char *s = m->buf + m->count;
char *p = dentry_path(dentry, s, m->size - m->count);
char *buf;
size_t size = seq_get_buf(m, &buf);
int res = -1;
if (size) {
char *p = dentry_path(dentry, buf, size);
if (!IS_ERR(p)) {
s = mangle_path(s, p, esc);
if (s) {
p = m->buf + m->count;
m->count = s - m->buf;
return s - p;
}
char *end = mangle_path(buf, p, esc);
if (end)
res = end - buf;
}
}
m->count = m->size;
return -1;
seq_commit(m, res);
return res;
}
int seq_bitmap(struct seq_file *m, const unsigned long *bits,
......
......@@ -459,14 +459,8 @@ smb_show_options(struct seq_file *s, struct vfsmount *m)
static void
smb_unload_nls(struct smb_sb_info *server)
{
if (server->remote_nls) {
unload_nls(server->remote_nls);
server->remote_nls = NULL;
}
if (server->local_nls) {
unload_nls(server->local_nls);
server->local_nls = NULL;
}
unload_nls(server->remote_nls);
unload_nls(server->local_nls);
}
static void
......
......@@ -465,6 +465,48 @@ struct super_block * get_super(struct block_device *bdev)
}
EXPORT_SYMBOL(get_super);
/**
* get_active_super - get an active reference to the superblock of a device
* @bdev: device to get the superblock for
*
* Scans the superblock list and finds the superblock of the file system
* mounted on the device given. Returns the superblock with an active
* reference and s_umount held exclusively or %NULL if none was found.
*/
struct super_block *get_active_super(struct block_device *bdev)
{
struct super_block *sb;
if (!bdev)
return NULL;
spin_lock(&sb_lock);
list_for_each_entry(sb, &super_blocks, s_list) {
if (sb->s_bdev != bdev)
continue;
sb->s_count++;
spin_unlock(&sb_lock);
down_write(&sb->s_umount);
if (sb->s_root) {
spin_lock(&sb_lock);
if (sb->s_count > S_BIAS) {
atomic_inc(&sb->s_active);
sb->s_count--;
spin_unlock(&sb_lock);
return sb;
}
spin_unlock(&sb_lock);
}
up_write(&sb->s_umount);
put_super(sb);
yield();
spin_lock(&sb_lock);
}
spin_unlock(&sb_lock);
return NULL;
}
struct super_block * user_get_super(dev_t dev)
{
......@@ -527,11 +569,15 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
{
int retval;
int remount_rw;
if (sb->s_frozen != SB_UNFROZEN)
return -EBUSY;
#ifdef CONFIG_BLOCK
if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
return -EACCES;
#endif
if (flags & MS_RDONLY)
acct_auto_close(sb);
shrink_dcache_sb(sb);
......@@ -743,9 +789,14 @@ int get_sb_bdev(struct file_system_type *fs_type,
* will protect the lockfs code from trying to start a snapshot
* while we are mounting
*/
down(&bdev->bd_mount_sem);
mutex_lock(&bdev->bd_fsfreeze_mutex);
if (bdev->bd_fsfreeze_count > 0) {
mutex_unlock(&bdev->bd_fsfreeze_mutex);
error = -EBUSY;
goto error_bdev;
}
s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
up(&bdev->bd_mount_sem);
mutex_unlock(&bdev->bd_fsfreeze_mutex);
if (IS_ERR(s))
goto error_s;
......@@ -892,6 +943,16 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
if (error)
goto out_sb;
/*
* filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
* but s_maxbytes was an unsigned long long for many releases. Throw
* this warning for a little while to try and catch filesystems that
* violate this rule. This warning should be either removed or
* converted to a BUG() in 2.6.34.
*/
WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
"negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes);
mnt->mnt_mountpoint = mnt->mnt_root;
mnt->mnt_parent = mnt;
up_write(&mnt->mnt_sb->s_umount);
......
......@@ -641,7 +641,6 @@ struct block_device {
struct super_block * bd_super;
int bd_openers;
struct mutex bd_mutex; /* open/close mutex */
struct semaphore bd_mount_sem;
struct list_head bd_inodes;
void * bd_holder;
int bd_holders;
......@@ -1316,7 +1315,7 @@ struct super_block {
unsigned long s_blocksize;
unsigned char s_blocksize_bits;
unsigned char s_dirt;
unsigned long long s_maxbytes; /* Max file size */
loff_t s_maxbytes; /* Max file size */
struct file_system_type *s_type;
const struct super_operations *s_op;
const struct dquot_operations *dq_op;
......@@ -2157,6 +2156,7 @@ extern ino_t iunique(struct super_block *, ino_t);
extern int inode_needs_sync(struct inode *inode);
extern void generic_delete_inode(struct inode *inode);
extern void generic_drop_inode(struct inode *inode);
extern int generic_detach_inode(struct inode *inode);
extern struct inode *ilookup5_nowait(struct super_block *sb,
unsigned long hashval, int (*test)(struct inode *, void *),
......@@ -2335,6 +2335,7 @@ extern void get_filesystem(struct file_system_type *fs);
extern void put_filesystem(struct file_system_type *fs);
extern struct file_system_type *get_fs_type(const char *name);
extern struct super_block *get_super(struct block_device *);
extern struct super_block *get_active_super(struct block_device *bdev);
extern struct super_block *user_get_super(dev_t);
extern void drop_super(struct super_block *sb);
......@@ -2382,7 +2383,8 @@ extern int buffer_migrate_page(struct address_space *,
#define buffer_migrate_page NULL
#endif
extern int inode_change_ok(struct inode *, struct iattr *);
extern int inode_change_ok(const struct inode *, struct iattr *);
extern int inode_newsize_ok(const struct inode *, loff_t offset);
extern int __must_check inode_setattr(struct inode *, struct iattr *);
extern void file_update_time(struct file *file);
......
......@@ -792,8 +792,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
unmap_mapping_range(mapping, holebegin, holelen, 0);
}
extern int vmtruncate(struct inode * inode, loff_t offset);
extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
extern int vmtruncate(struct inode *inode, loff_t offset);
extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
int truncate_inode_page(struct address_space *mapping, struct page *page);
int generic_error_remove_page(struct address_space *mapping, struct page *page);
......
......@@ -35,6 +35,44 @@ struct seq_operations {
#define SEQ_SKIP 1
/**
* seq_get_buf - get buffer to write arbitrary data to
* @m: the seq_file handle
* @bufp: the beginning of the buffer is stored here
*
* Return the number of bytes available in the buffer, or zero if
* there's no space.
*/
static inline size_t seq_get_buf(struct seq_file *m, char **bufp)
{
BUG_ON(m->count > m->size);
if (m->count < m->size)
*bufp = m->buf + m->count;
else
*bufp = NULL;
return m->size - m->count;
}
/**
* seq_commit - commit data to the buffer
* @m: the seq_file handle
* @num: the number of bytes to commit
*
* Commit @num bytes of data written to a buffer previously acquired
* by seq_buf_get. To signal an error condition, or that the data
* didn't fit in the available space, pass a negative @num value.
*/
static inline void seq_commit(struct seq_file *m, int num)
{
if (num < 0) {
m->count = m->size;
} else {
BUG_ON(m->count + num > m->size);
m->count += num;
}
}
char *mangle_path(char *s, char *p, char *esc);
int seq_open(struct file *, const struct seq_operations *);
ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
......
......@@ -58,7 +58,7 @@
/*
* Lock ordering:
*
* ->i_mmap_lock (vmtruncate)
* ->i_mmap_lock (truncate_pagecache)
* ->private_lock (__free_pte->__set_page_dirty_buffers)
* ->swap_lock (exclusive_swap_page, others)
* ->mapping->tree_lock
......
......@@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long addr = vma->vm_start;
/*
* Hide vma from rmap and vmtruncate before freeing pgtables
* Hide vma from rmap and truncate_pagecache before freeing
* pgtables
*/
anon_vma_unlink(vma);
unlink_file_vma(vma);
......@@ -2408,7 +2409,7 @@ static inline void unmap_mapping_range_list(struct list_head *head,
* @mapping: the address space containing mmaps to be unmapped.
* @holebegin: byte in first page to unmap, relative to the start of
* the underlying file. This will be rounded down to a PAGE_SIZE
* boundary. Note that this is different from vmtruncate(), which
* boundary. Note that this is different from truncate_pagecache(), which
* must keep the partial page. In contrast, we must get rid of
* partial pages.
* @holelen: size of prospective hole in bytes. This will be rounded
......@@ -2459,63 +2460,6 @@ void unmap_mapping_range(struct address_space *mapping,
}
EXPORT_SYMBOL(unmap_mapping_range);
/**
* vmtruncate - unmap mappings "freed" by truncate() syscall
* @inode: inode of the file used
* @offset: file offset to start truncating
*
* NOTE! We have to be ready to update the memory sharing
* between the file and the memory map for a potential last
* incomplete page. Ugly, but necessary.
*/
int vmtruncate(struct inode * inode, loff_t offset)
{
if (inode->i_size < offset) {
unsigned long limit;
limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
if (limit != RLIM_INFINITY && offset > limit)
goto out_sig;
if (offset > inode->i_sb->s_maxbytes)
goto out_big;
i_size_write(inode, offset);
} else {
struct address_space *mapping = inode->i_mapping;
/*
* truncation of in-use swapfiles is disallowed - it would
* cause subsequent swapout to scribble on the now-freed
* blocks.
*/
if (IS_SWAPFILE(inode))
return -ETXTBSY;
i_size_write(inode, offset);
/*
* unmap_mapping_range is called twice, first simply for
* efficiency so that truncate_inode_pages does fewer
* single-page unmaps. However after this first call, and
* before truncate_inode_pages finishes, it is possible for
* private pages to be COWed, which remain after
* truncate_inode_pages finishes, hence the second
* unmap_mapping_range call must be made for correctness.
*/
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
truncate_inode_pages(mapping, offset);
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
}
if (inode->i_op->truncate)
inode->i_op->truncate(inode);
return 0;
out_sig:
send_sig(SIGXFSZ, current, 0);
out_big:
return -EFBIG;
}
EXPORT_SYMBOL(vmtruncate);
int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
{
struct address_space *mapping = inode->i_mapping;
......
......@@ -86,8 +86,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
if (vma->vm_file) {
/*
* Subtle point from Rajesh Venkatasubramanian: before
* moving file-based ptes, we must lock vmtruncate out,
* since it might clean the dst vma before the src vma,
* moving file-based ptes, we must lock truncate_pagecache
* out, since it might clean the dst vma before the src vma,
* and we propagate stale pages into the dst afterward.
*/
mapping = vma->vm_file->f_mapping;
......
......@@ -82,46 +82,6 @@ DECLARE_RWSEM(nommu_region_sem);
struct vm_operations_struct generic_file_vm_ops = {
};
/*
* Handle all mappings that got truncated by a "truncate()"
* system call.
*
* NOTE! We have to be ready to update the memory sharing
* between the file and the memory map for a potential last
* incomplete page. Ugly, but necessary.
*/
int vmtruncate(struct inode *inode, loff_t offset)
{
struct address_space *mapping = inode->i_mapping;
unsigned long limit;
if (inode->i_size < offset)
goto do_expand;
i_size_write(inode, offset);
truncate_inode_pages(mapping, offset);
goto out_truncate;
do_expand:
limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
if (limit != RLIM_INFINITY && offset > limit)
goto out_sig;
if (offset > inode->i_sb->s_maxbytes)
goto out;
i_size_write(inode, offset);
out_truncate:
if (inode->i_op->truncate)
inode->i_op->truncate(inode);
return 0;
out_sig:
send_sig(SIGXFSZ, current, 0);
out:
return -EFBIG;
}
EXPORT_SYMBOL(vmtruncate);
/*
* Return the total memory allocated for this pointer, not
* just what the caller asked for.
......
......@@ -497,3 +497,67 @@ int invalidate_inode_pages2(struct address_space *mapping)
return invalidate_inode_pages2_range(mapping, 0, -1);
}
EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
/**
* truncate_pagecache - unmap and remove pagecache that has been truncated
* @inode: inode
* @old: old file offset
* @new: new file offset
*
* inode's new i_size must already be written before truncate_pagecache
* is called.
*
* This function should typically be called before the filesystem
* releases resources associated with the freed range (eg. deallocates
* blocks). This way, pagecache will always stay logically coherent
* with on-disk format, and the filesystem would not have to deal with
* situations such as writepage being called for a page that has already
* had its underlying blocks deallocated.
*/
void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
{
if (new < old) {
struct address_space *mapping = inode->i_mapping;
/*
* unmap_mapping_range is called twice, first simply for
* efficiency so that truncate_inode_pages does fewer
* single-page unmaps. However after this first call, and
* before truncate_inode_pages finishes, it is possible for
* private pages to be COWed, which remain after
* truncate_inode_pages finishes, hence the second
* unmap_mapping_range call must be made for correctness.
*/
unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
truncate_inode_pages(mapping, new);
unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
}
}
EXPORT_SYMBOL(truncate_pagecache);
/**
* vmtruncate - unmap mappings "freed" by truncate() syscall
* @inode: inode of the file used
* @offset: file offset to start truncating
*
* NOTE! We have to be ready to update the memory sharing
* between the file and the memory map for a potential last
* incomplete page. Ugly, but necessary.
*/
int vmtruncate(struct inode *inode, loff_t offset)
{
loff_t oldsize;
int error;
error = inode_newsize_ok(inode, offset);
if (error)
return error;
oldsize = inode->i_size;
i_size_write(inode, offset);
truncate_pagecache(inode, oldsize, offset);
if (inode->i_op->truncate)
inode->i_op->truncate(inode);
return error;
}
EXPORT_SYMBOL(vmtruncate);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册