diff --git a/Documentation/vm/locking b/Documentation/vm/locking index f366fa956179505cc602ff79f90adc83f95f7a77..25fadb448760008dc6408ed0cca2a72c66b2efd0 100644 --- a/Documentation/vm/locking +++ b/Documentation/vm/locking @@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the mm start up ... this is a loose form of stability on mm_users. For example, it is used in copy_mm to protect against a racing tlb_gather_mmu single address space optimization, so that the zap_page_range (from -vmtruncate) does not lose sending ipi's to cloned threads that might +truncate) does not lose sending ipi's to cloned threads that might be spawned underneath it and go to user mode to drag in pte's into tlbs. swap_lock diff --git a/fs/attr.c b/fs/attr.c index 9fe1b1bd30a808c82b1ebfbd8f644732e4f36f3c..96d394bdaddfa29e52b74d815dd5842e87cb6820 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -18,7 +18,7 @@ /* Taken over from the old code... */ /* POSIX UID/GID verification for setting inode attributes. */ -int inode_change_ok(struct inode *inode, struct iattr *attr) +int inode_change_ok(const struct inode *inode, struct iattr *attr) { int retval = -EPERM; unsigned int ia_valid = attr->ia_valid; @@ -60,9 +60,51 @@ int inode_change_ok(struct inode *inode, struct iattr *attr) error: return retval; } - EXPORT_SYMBOL(inode_change_ok); +/** + * inode_newsize_ok - may this inode be truncated to a given size + * @inode: the inode to be truncated + * @offset: the new size to assign to the inode + * @Returns: 0 on success, -ve errno on failure + * + * inode_newsize_ok will check filesystem limits and ulimits to check that the + * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ + * when necessary. Caller must not proceed with inode size change if failure is + * returned. @inode must be a file (not directory), with appropriate + * permissions to allow truncate (inode_newsize_ok does NOT check these + * conditions). + * + * inode_newsize_ok must be called with i_mutex held. + */ +int inode_newsize_ok(const struct inode *inode, loff_t offset) +{ + if (inode->i_size < offset) { + unsigned long limit; + + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + if (limit != RLIM_INFINITY && offset > limit) + goto out_sig; + if (offset > inode->i_sb->s_maxbytes) + goto out_big; + } else { + /* + * truncation of in-use swapfiles is disallowed - it would + * cause subsequent swapout to scribble on the now-freed + * blocks. + */ + if (IS_SWAPFILE(inode)) + return -ETXTBSY; + } + + return 0; +out_sig: + send_sig(SIGXFSZ, current, 0); +out_big: + return -EFBIG; +} +EXPORT_SYMBOL(inode_newsize_ok); + int inode_setattr(struct inode * inode, struct iattr * attr) { unsigned int ia_valid = attr->ia_valid; diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index dd376c124e71561193f0a824f407314d6edda4aa..33baf27fac78e5c4fbc36e12e3a4a16b2322afe4 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -737,12 +737,7 @@ befs_put_super(struct super_block *sb) { kfree(BEFS_SB(sb)->mount_opts.iocharset); BEFS_SB(sb)->mount_opts.iocharset = NULL; - - if (BEFS_SB(sb)->nls) { - unload_nls(BEFS_SB(sb)->nls); - BEFS_SB(sb)->nls = NULL; - } - + unload_nls(BEFS_SB(sb)->nls); kfree(sb->s_fs_info); sb->s_fs_info = NULL; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 5d1ed50bd46c591c1ef63b312c4844d013f5239b..9cf4b926f8e47a509f0994ad0e696d79205335bb 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -216,8 +216,6 @@ EXPORT_SYMBOL(fsync_bdev); * freeze_bdev -- lock a filesystem and force it into a consistent state * @bdev: blockdevice to lock * - * This takes the block device bd_mount_sem to make sure no new mounts - * happen on bdev until thaw_bdev() is called. * If a superblock is found on this device, we take the s_umount semaphore * on it to make sure nobody unmounts until the snapshot creation is done. * The reference counter (bd_fsfreeze_count) guarantees that only the last @@ -232,46 +230,55 @@ struct super_block *freeze_bdev(struct block_device *bdev) int error = 0; mutex_lock(&bdev->bd_fsfreeze_mutex); - if (bdev->bd_fsfreeze_count > 0) { - bdev->bd_fsfreeze_count++; + if (++bdev->bd_fsfreeze_count > 1) { + /* + * We don't even need to grab a reference - the first call + * to freeze_bdev grab an active reference and only the last + * thaw_bdev drops it. + */ sb = get_super(bdev); + drop_super(sb); mutex_unlock(&bdev->bd_fsfreeze_mutex); return sb; } - bdev->bd_fsfreeze_count++; - - down(&bdev->bd_mount_sem); - sb = get_super(bdev); - if (sb && !(sb->s_flags & MS_RDONLY)) { - sb->s_frozen = SB_FREEZE_WRITE; - smp_wmb(); - - sync_filesystem(sb); - - sb->s_frozen = SB_FREEZE_TRANS; - smp_wmb(); - - sync_blockdev(sb->s_bdev); - - if (sb->s_op->freeze_fs) { - error = sb->s_op->freeze_fs(sb); - if (error) { - printk(KERN_ERR - "VFS:Filesystem freeze failed\n"); - sb->s_frozen = SB_UNFROZEN; - drop_super(sb); - up(&bdev->bd_mount_sem); - bdev->bd_fsfreeze_count--; - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return ERR_PTR(error); - } + + sb = get_active_super(bdev); + if (!sb) + goto out; + if (sb->s_flags & MS_RDONLY) { + deactivate_locked_super(sb); + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return sb; + } + + sb->s_frozen = SB_FREEZE_WRITE; + smp_wmb(); + + sync_filesystem(sb); + + sb->s_frozen = SB_FREEZE_TRANS; + smp_wmb(); + + sync_blockdev(sb->s_bdev); + + if (sb->s_op->freeze_fs) { + error = sb->s_op->freeze_fs(sb); + if (error) { + printk(KERN_ERR + "VFS:Filesystem freeze failed\n"); + sb->s_frozen = SB_UNFROZEN; + deactivate_locked_super(sb); + bdev->bd_fsfreeze_count--; + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return ERR_PTR(error); } } + up_write(&sb->s_umount); + out: sync_blockdev(bdev); mutex_unlock(&bdev->bd_fsfreeze_mutex); - - return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */ + return sb; /* thaw_bdev releases s->s_umount */ } EXPORT_SYMBOL(freeze_bdev); @@ -284,44 +291,44 @@ EXPORT_SYMBOL(freeze_bdev); */ int thaw_bdev(struct block_device *bdev, struct super_block *sb) { - int error = 0; + int error = -EINVAL; mutex_lock(&bdev->bd_fsfreeze_mutex); - if (!bdev->bd_fsfreeze_count) { - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return -EINVAL; - } - - bdev->bd_fsfreeze_count--; - if (bdev->bd_fsfreeze_count > 0) { - if (sb) - drop_super(sb); - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return 0; - } - - if (sb) { - BUG_ON(sb->s_bdev != bdev); - if (!(sb->s_flags & MS_RDONLY)) { - if (sb->s_op->unfreeze_fs) { - error = sb->s_op->unfreeze_fs(sb); - if (error) { - printk(KERN_ERR - "VFS:Filesystem thaw failed\n"); - sb->s_frozen = SB_FREEZE_TRANS; - bdev->bd_fsfreeze_count++; - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return error; - } - } - sb->s_frozen = SB_UNFROZEN; - smp_wmb(); - wake_up(&sb->s_wait_unfrozen); + if (!bdev->bd_fsfreeze_count) + goto out_unlock; + + error = 0; + if (--bdev->bd_fsfreeze_count > 0) + goto out_unlock; + + if (!sb) + goto out_unlock; + + BUG_ON(sb->s_bdev != bdev); + down_write(&sb->s_umount); + if (sb->s_flags & MS_RDONLY) + goto out_deactivate; + + if (sb->s_op->unfreeze_fs) { + error = sb->s_op->unfreeze_fs(sb); + if (error) { + printk(KERN_ERR + "VFS:Filesystem thaw failed\n"); + sb->s_frozen = SB_FREEZE_TRANS; + bdev->bd_fsfreeze_count++; + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return error; } - drop_super(sb); } - up(&bdev->bd_mount_sem); + sb->s_frozen = SB_UNFROZEN; + smp_wmb(); + wake_up(&sb->s_wait_unfrozen); + +out_deactivate: + if (sb) + deactivate_locked_super(sb); +out_unlock: mutex_unlock(&bdev->bd_fsfreeze_mutex); return 0; } @@ -430,7 +437,6 @@ static void init_once(void *foo) memset(bdev, 0, sizeof(*bdev)); mutex_init(&bdev->bd_mutex); - sema_init(&bdev->bd_mount_sem, 1); INIT_LIST_HEAD(&bdev->bd_inodes); INIT_LIST_HEAD(&bdev->bd_list); #ifdef CONFIG_SYSFS diff --git a/fs/buffer.c b/fs/buffer.c index 209f7f15f5f801b4023f6e121643b3744ce1b11e..24afd7422ae866851ecfbd12d7e1c231c5bffda5 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2239,16 +2239,10 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size) struct address_space *mapping = inode->i_mapping; struct page *page; void *fsdata; - unsigned long limit; int err; - err = -EFBIG; - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && size > (loff_t)limit) { - send_sig(SIGXFSZ, current, 0); - goto out; - } - if (size > inode->i_sb->s_maxbytes) + err = inode_newsize_ok(inode, size); + if (err) goto out; err = pagecache_write_begin(NULL, mapping, size, 0, diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index d79ce2e95c2357a92cf8e252ed27cf7d59557dc5..90c5b39f03135cf0763bdba37701154cc9b40a61 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -185,8 +185,7 @@ cifs_read_super(struct super_block *sb, void *data, cifs_sb->mountdata = NULL; } #endif - if (cifs_sb->local_nls) - unload_nls(cifs_sb->local_nls); + unload_nls(cifs_sb->local_nls); kfree(cifs_sb); } return rc; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 1f09c7619319d794dd10a3d58a3543aa34e36ca9..5e2492535daa4ca626ec1e45e812ebd194fa7a2b 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1557,57 +1557,24 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from) static int cifs_vmtruncate(struct inode *inode, loff_t offset) { - struct address_space *mapping = inode->i_mapping; - unsigned long limit; + loff_t oldsize; + int err; spin_lock(&inode->i_lock); - if (inode->i_size < offset) - goto do_expand; - /* - * truncation of in-use swapfiles is disallowed - it would cause - * subsequent swapout to scribble on the now-freed blocks. - */ - if (IS_SWAPFILE(inode)) { - spin_unlock(&inode->i_lock); - goto out_busy; - } - i_size_write(inode, offset); - spin_unlock(&inode->i_lock); - /* - * unmap_mapping_range is called twice, first simply for efficiency - * so that truncate_inode_pages does fewer single-page unmaps. However - * after this first call, and before truncate_inode_pages finishes, - * it is possible for private pages to be COWed, which remain after - * truncate_inode_pages finishes, hence the second unmap_mapping_range - * call must be made for correctness. - */ - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(mapping, offset); - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - goto out_truncate; - -do_expand: - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) { + err = inode_newsize_ok(inode, offset); + if (err) { spin_unlock(&inode->i_lock); - goto out_sig; - } - if (offset > inode->i_sb->s_maxbytes) { - spin_unlock(&inode->i_lock); - goto out_big; + goto out; } + + oldsize = inode->i_size; i_size_write(inode, offset); spin_unlock(&inode->i_lock); -out_truncate: + truncate_pagecache(inode, oldsize, offset); if (inode->i_op->truncate) inode->i_op->truncate(inode); - return 0; -out_sig: - send_sig(SIGXFSZ, current, 0); -out_big: - return -EFBIG; -out_busy: - return -ETXTBSY; +out: + return err; } static int diff --git a/fs/compat.c b/fs/compat.c index 3aa48834a222b87819eb5302a63a507b1b910071..d576b552e8e2eb90a98324e79b7d8917fb96c04c 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -768,13 +768,13 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name, char __user * type, unsigned long flags, void __user * data) { - unsigned long type_page; + char *kernel_type; unsigned long data_page; - unsigned long dev_page; + char *kernel_dev; char *dir_page; int retval; - retval = copy_mount_options (type, &type_page); + retval = copy_mount_string(type, &kernel_type); if (retval < 0) goto out; @@ -783,38 +783,38 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name, if (IS_ERR(dir_page)) goto out1; - retval = copy_mount_options (dev_name, &dev_page); + retval = copy_mount_string(dev_name, &kernel_dev); if (retval < 0) goto out2; - retval = copy_mount_options (data, &data_page); + retval = copy_mount_options(data, &data_page); if (retval < 0) goto out3; retval = -EINVAL; - if (type_page && data_page) { - if (!strcmp((char *)type_page, SMBFS_NAME)) { + if (kernel_type && data_page) { + if (!strcmp(kernel_type, SMBFS_NAME)) { do_smb_super_data_conv((void *)data_page); - } else if (!strcmp((char *)type_page, NCPFS_NAME)) { + } else if (!strcmp(kernel_type, NCPFS_NAME)) { do_ncp_super_data_conv((void *)data_page); - } else if (!strcmp((char *)type_page, NFS4_NAME)) { + } else if (!strcmp(kernel_type, NFS4_NAME)) { if (do_nfs4_super_data_conv((void *) data_page)) goto out4; } } - retval = do_mount((char*)dev_page, dir_page, (char*)type_page, + retval = do_mount(kernel_dev, dir_page, kernel_type, flags, (void*)data_page); out4: free_page(data_page); out3: - free_page(dev_page); + kfree(kernel_dev); out2: putname(dir_page); out1: - free_page(type_page); + kfree(kernel_type); out: return retval; } diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 5ab10c3bbebec0dd1ba2ab81c340b6be415dcafd..9f500dec3b5901982adcc2618670f72ca4b3c01a 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -214,7 +214,6 @@ int exofs_sync_fs(struct super_block *sb, int wait) } lock_super(sb); - lock_kernel(); sbi = sb->s_fs_info; fscb->s_nextid = cpu_to_le64(sbi->s_nextid); fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); @@ -245,7 +244,6 @@ int exofs_sync_fs(struct super_block *sb, int wait) out: if (or) osd_end_request(or); - unlock_kernel(); unlock_super(sb); kfree(fscb); return ret; @@ -268,8 +266,6 @@ static void exofs_put_super(struct super_block *sb) int num_pend; struct exofs_sb_info *sbi = sb->s_fs_info; - lock_kernel(); - if (sb->s_dirt) exofs_write_super(sb); @@ -286,8 +282,6 @@ static void exofs_put_super(struct super_block *sb) osduld_put_device(sbi->s_dev); kfree(sb->s_fs_info); sb->s_fs_info = NULL; - - unlock_kernel(); } /* diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 8970d8c49bb00eaa791a390f5dfd87b56a438294..04629d1302fc45e42aef40b180cfd1fcc4b0f73b 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -470,19 +470,11 @@ static void fat_put_super(struct super_block *sb) iput(sbi->fat_inode); - if (sbi->nls_disk) { - unload_nls(sbi->nls_disk); - sbi->nls_disk = NULL; - sbi->options.codepage = fat_default_codepage; - } - if (sbi->nls_io) { - unload_nls(sbi->nls_io); - sbi->nls_io = NULL; - } - if (sbi->options.iocharset != fat_default_iocharset) { + unload_nls(sbi->nls_disk); + unload_nls(sbi->nls_io); + + if (sbi->options.iocharset != fat_default_iocharset) kfree(sbi->options.iocharset); - sbi->options.iocharset = fat_default_iocharset; - } sb->s_fs_info = NULL; kfree(sbi); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index e703654e7f40d901975d3a0953ce353a755248d7..992f6c9410bb0f27c8e6b02f21505c1fbda26aa5 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1276,14 +1276,9 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, return 0; if (attr->ia_valid & ATTR_SIZE) { - unsigned long limit; - if (IS_SWAPFILE(inode)) - return -ETXTBSY; - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) { - send_sig(SIGXFSZ, current, 0); - return -EFBIG; - } + err = inode_newsize_ok(inode, attr->ia_size); + if (err) + return err; is_truncate = true; } @@ -1350,8 +1345,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. */ if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { - if (outarg.attr.size < oldsize) - fuse_truncate(inode->i_mapping, outarg.attr.size); + truncate_pagecache(inode, oldsize, outarg.attr.size); invalidate_inode_pages2(inode->i_mapping); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index fc9c79feb5f7c2150edae61b4af2b56d2291f7b9..01cc462ff45d5ccdf0bc0bc0526e493ed60fc729 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -606,8 +606,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, u64 attr_valid); -void fuse_truncate(struct address_space *mapping, loff_t offset); - /** * Initialize the client device */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6da947daabda1894f55a5079226ade24b30d72e0..1a822ce2b24b7a83fd8c579921d07d769e4c2645 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -140,14 +140,6 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) return 0; } -void fuse_truncate(struct address_space *mapping, loff_t offset) -{ - /* See vmtruncate() */ - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(mapping, offset); - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); -} - void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, u64 attr_valid) { @@ -205,8 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, spin_unlock(&fc->lock); if (S_ISREG(inode->i_mode) && oldsize != attr->size) { - if (attr->size < oldsize) - fuse_truncate(inode->i_mapping, attr->size); + truncate_pagecache(inode, oldsize, attr->size); invalidate_inode_pages2(inode->i_mapping); } } diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 7b6165f25fbefe14cacb5a8be3464d79f20e84a1..8bbe03c3f6d54e5286ff3c0c0cce4e6c5f653134 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -344,10 +344,8 @@ void hfs_mdb_put(struct super_block *sb) brelse(HFS_SB(sb)->mdb_bh); brelse(HFS_SB(sb)->alt_mdb_bh); - if (HFS_SB(sb)->nls_io) - unload_nls(HFS_SB(sb)->nls_io); - if (HFS_SB(sb)->nls_disk) - unload_nls(HFS_SB(sb)->nls_disk); + unload_nls(HFS_SB(sb)->nls_io); + unload_nls(HFS_SB(sb)->nls_disk); free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0); kfree(HFS_SB(sb)); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index c0759fe0855b623863eac1345c8b8e4bf35608a9..43022f3d514871d9f2405ff32eacbd1319f8e26a 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -229,8 +229,7 @@ static void hfsplus_put_super(struct super_block *sb) iput(HFSPLUS_SB(sb).alloc_file); iput(HFSPLUS_SB(sb).hidden_dir); brelse(HFSPLUS_SB(sb).s_vhbh); - if (HFSPLUS_SB(sb).nls) - unload_nls(HFSPLUS_SB(sb).nls); + unload_nls(HFSPLUS_SB(sb).nls); kfree(sb->s_fs_info); sb->s_fs_info = NULL; @@ -464,8 +463,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) cleanup: hfsplus_put_super(sb); - if (nls) - unload_nls(nls); + unload_nls(nls); return err; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 133335479c241768e4b7490d155c48881d9de373..87a1258953b8e387fe49367acfd6d2ff5c164ca1 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -380,36 +380,11 @@ static void hugetlbfs_delete_inode(struct inode *inode) static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock) { - struct super_block *sb = inode->i_sb; - - if (!hlist_unhashed(&inode->i_hash)) { - if (!(inode->i_state & (I_DIRTY|I_SYNC))) - list_move(&inode->i_list, &inode_unused); - inodes_stat.nr_unused++; - if (!sb || (sb->s_flags & MS_ACTIVE)) { - spin_unlock(&inode_lock); - return; - } - inode->i_state |= I_WILL_FREE; - spin_unlock(&inode_lock); - /* - * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK - * in our backing_dev_info. - */ - write_inode_now(inode, 1); - spin_lock(&inode_lock); - inode->i_state &= ~I_WILL_FREE; - inodes_stat.nr_unused--; - hlist_del_init(&inode->i_hash); + if (generic_detach_inode(inode)) { + truncate_hugepages(inode, 0); + clear_inode(inode); + destroy_inode(inode); } - list_del_init(&inode->i_list); - list_del_init(&inode->i_sb_list); - inode->i_state |= I_FREEING; - inodes_stat.nr_inodes--; - spin_unlock(&inode_lock); - truncate_hugepages(inode, 0); - clear_inode(inode); - destroy_inode(inode); } static void hugetlbfs_drop_inode(struct inode *inode) diff --git a/fs/inode.c b/fs/inode.c index 76582b06ab975d76afcba7f5800d6c8e3873ebc6..4d8e3be55976272732f6f42610ab7813f66b8133 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1241,7 +1241,16 @@ void generic_delete_inode(struct inode *inode) } EXPORT_SYMBOL(generic_delete_inode); -static void generic_forget_inode(struct inode *inode) +/** + * generic_detach_inode - remove inode from inode lists + * @inode: inode to remove + * + * Remove inode from inode lists, write it if it's dirty. This is just an + * internal VFS helper exported for hugetlbfs. Do not use! + * + * Returns 1 if inode should be completely destroyed. + */ +int generic_detach_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; @@ -1251,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode) inodes_stat.nr_unused++; if (sb->s_flags & MS_ACTIVE) { spin_unlock(&inode_lock); - return; + return 0; } WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_WILL_FREE; @@ -1269,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode) inode->i_state |= I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); + return 1; +} +EXPORT_SYMBOL_GPL(generic_detach_inode); + +static void generic_forget_inode(struct inode *inode) +{ + if (!generic_detach_inode(inode)) + return; if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); @@ -1399,31 +1416,31 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry) struct inode *inode = dentry->d_inode; struct timespec now; - if (mnt_want_write(mnt)) - return; if (inode->i_flags & S_NOATIME) - goto out; + return; if (IS_NOATIME(inode)) - goto out; + return; if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) - goto out; + return; if (mnt->mnt_flags & MNT_NOATIME) - goto out; + return; if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) - goto out; + return; now = current_fs_time(inode->i_sb); if (!relatime_need_update(mnt, inode, now)) - goto out; + return; if (timespec_equal(&inode->i_atime, &now)) - goto out; + return; + + if (mnt_want_write(mnt)) + return; inode->i_atime = now; mark_inode_dirty_sync(inode); -out: mnt_drop_write(mnt); } EXPORT_SYMBOL(touch_atime); @@ -1444,34 +1461,37 @@ void file_update_time(struct file *file) { struct inode *inode = file->f_path.dentry->d_inode; struct timespec now; - int sync_it = 0; - int err; + enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; + /* First try to exhaust all avenues to not sync */ if (IS_NOCMTIME(inode)) return; - err = mnt_want_write_file(file); - if (err) - return; - now = current_fs_time(inode->i_sb); - if (!timespec_equal(&inode->i_mtime, &now)) { - inode->i_mtime = now; - sync_it = 1; - } + if (!timespec_equal(&inode->i_mtime, &now)) + sync_it = S_MTIME; - if (!timespec_equal(&inode->i_ctime, &now)) { - inode->i_ctime = now; - sync_it = 1; - } + if (!timespec_equal(&inode->i_ctime, &now)) + sync_it |= S_CTIME; - if (IS_I_VERSION(inode)) { - inode_inc_iversion(inode); - sync_it = 1; - } + if (IS_I_VERSION(inode)) + sync_it |= S_VERSION; + + if (!sync_it) + return; - if (sync_it) - mark_inode_dirty_sync(inode); + /* Finally allowed to write? Takes lock. */ + if (mnt_want_write_file(file)) + return; + + /* Only change inode inside the lock region */ + if (sync_it & S_VERSION) + inode_inc_iversion(inode); + if (sync_it & S_CTIME) + inode->i_ctime = now; + if (sync_it & S_MTIME) + inode->i_mtime = now; + mark_inode_dirty_sync(inode); mnt_drop_write(file->f_path.mnt); } EXPORT_SYMBOL(file_update_time); @@ -1599,7 +1619,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) else if (S_ISSOCK(mode)) inode->i_fop = &bad_sock_fops; else - printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n", - mode); + printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for" + " inode %s:%lu\n", mode, inode->i_sb->s_id, + inode->i_ino); } EXPORT_SYMBOL(init_special_inode); diff --git a/fs/internal.h b/fs/internal.h index d55ef562f0bb588939d3b870d675a94e163a5a1e..515175b8b72e95f47893d3b1f32017da481e0aec 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -57,6 +57,7 @@ extern int check_unsafe_exec(struct linux_binprm *); * namespace.c */ extern int copy_mount_options(const void __user *, unsigned long *); +extern int copy_mount_string(const void __user *, char **); extern void free_vfsmnt(struct vfsmount *); extern struct vfsmount *alloc_vfsmnt(const char *); diff --git a/fs/ioctl.c b/fs/ioctl.c index 5612880fcbe7d7436f3579c7c1add7360170e407..7b17a14396ff792152ae6da49d178355d5a5e7ea 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -162,20 +162,21 @@ EXPORT_SYMBOL(fiemap_check_flags); static int fiemap_check_ranges(struct super_block *sb, u64 start, u64 len, u64 *new_len) { + u64 maxbytes = (u64) sb->s_maxbytes; + *new_len = len; if (len == 0) return -EINVAL; - if (start > sb->s_maxbytes) + if (start > maxbytes) return -EFBIG; /* * Shrink request scope to what the fs can actually handle. */ - if ((len > sb->s_maxbytes) || - (sb->s_maxbytes - len) < start) - *new_len = sb->s_maxbytes - start; + if (len > maxbytes || (maxbytes - len) < start) + *new_len = maxbytes - start; return 0; } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 85f96bc651c727ba04915138c50b45e9b63a3acd..6b4dcd4f2943e632c9d89115a2395084be95adc5 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -46,10 +46,7 @@ static void isofs_put_super(struct super_block *sb) #ifdef CONFIG_JOLIET lock_kernel(); - if (sbi->s_nls_iocharset) { - unload_nls(sbi->s_nls_iocharset); - sbi->s_nls_iocharset = NULL; - } + unload_nls(sbi->s_nls_iocharset); unlock_kernel(); #endif @@ -912,8 +909,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) printk(KERN_WARNING "%s: get root inode failed\n", __func__); out_no_inode: #ifdef CONFIG_JOLIET - if (sbi->s_nls_iocharset) - unload_nls(sbi->s_nls_iocharset); + unload_nls(sbi->s_nls_iocharset); #endif goto out_freesbi; out_no_read: diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 37e6dcda8fc84f587508f8db58abdecee69524e1..2234c73fc5773531bd59ee81b3bbdf45d05a83ff 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -178,13 +178,11 @@ static void jfs_put_super(struct super_block *sb) rc = jfs_umount(sb); if (rc) jfs_err("jfs_umount failed with return code %d", rc); - if (sbi->nls_tab) - unload_nls(sbi->nls_tab); - sbi->nls_tab = NULL; + + unload_nls(sbi->nls_tab); truncate_inode_pages(sbi->direct_inode->i_mapping, 0); iput(sbi->direct_inode); - sbi->direct_inode = NULL; kfree(sbi); @@ -347,8 +345,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, if (nls_map != (void *) -1) { /* Discard old (if remount) */ - if (sbi->nls_tab) - unload_nls(sbi->nls_tab); + unload_nls(sbi->nls_tab); sbi->nls_tab = nls_map; } return 1; diff --git a/fs/libfs.c b/fs/libfs.c index dcec3d3ea64f944cd51d36f35b4150279c5659f2..219576c52d807e15b779d53be3a42dfa1baae9ae 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -527,14 +527,18 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available) { loff_t pos = *ppos; + size_t ret; + if (pos < 0) return -EINVAL; - if (pos >= available) + if (pos >= available || !count) return 0; if (count > available - pos) count = available - pos; - if (copy_to_user(to, from + pos, count)) + ret = copy_to_user(to, from + pos, count); + if (ret == count) return -EFAULT; + count -= ret; *ppos = pos + count; return count; } @@ -735,10 +739,11 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, if (copy_from_user(attr->set_buf, buf, size)) goto out; - ret = len; /* claim we got the whole input */ attr->set_buf[size] = '\0'; val = simple_strtol(attr->set_buf, NULL, 0); - attr->set(attr->data, val); + ret = attr->set(attr->data, val); + if (ret == 0) + ret = len; /* on success, claim we got the whole input */ out: mutex_unlock(&attr->mutex); return ret; diff --git a/fs/namespace.c b/fs/namespace.c index 7230787d18b02979122218429b4bf8af59b24cf7..bdc3cb4fd2220c6fde0f35159a902768a75d60f6 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1640,7 +1640,7 @@ static int do_new_mount(struct path *path, char *type, int flags, { struct vfsmount *mnt; - if (!type || !memchr(type, 0, PAGE_SIZE)) + if (!type) return -EINVAL; /* we need capabilities... */ @@ -1871,6 +1871,23 @@ int copy_mount_options(const void __user * data, unsigned long *where) return 0; } +int copy_mount_string(const void __user *data, char **where) +{ + char *tmp; + + if (!data) { + *where = NULL; + return 0; + } + + tmp = strndup_user(data, PAGE_SIZE); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + *where = tmp; + return 0; +} + /* * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to * be given to the mount() call (ie: read-only, no-dev, no-suid etc). @@ -1900,8 +1917,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) return -EINVAL; - if (dev_name && !memchr(dev_name, 0, PAGE_SIZE)) - return -EINVAL; if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; @@ -2070,40 +2085,42 @@ EXPORT_SYMBOL(create_mnt_ns); SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, char __user *, type, unsigned long, flags, void __user *, data) { - int retval; + int ret; + char *kernel_type; + char *kernel_dir; + char *kernel_dev; unsigned long data_page; - unsigned long type_page; - unsigned long dev_page; - char *dir_page; - retval = copy_mount_options(type, &type_page); - if (retval < 0) - return retval; + ret = copy_mount_string(type, &kernel_type); + if (ret < 0) + goto out_type; - dir_page = getname(dir_name); - retval = PTR_ERR(dir_page); - if (IS_ERR(dir_page)) - goto out1; + kernel_dir = getname(dir_name); + if (IS_ERR(kernel_dir)) { + ret = PTR_ERR(kernel_dir); + goto out_dir; + } - retval = copy_mount_options(dev_name, &dev_page); - if (retval < 0) - goto out2; + ret = copy_mount_string(dev_name, &kernel_dev); + if (ret < 0) + goto out_dev; - retval = copy_mount_options(data, &data_page); - if (retval < 0) - goto out3; + ret = copy_mount_options(data, &data_page); + if (ret < 0) + goto out_data; - retval = do_mount((char *)dev_page, dir_page, (char *)type_page, - flags, (void *)data_page); - free_page(data_page); + ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags, + (void *) data_page); -out3: - free_page(dev_page); -out2: - putname(dir_page); -out1: - free_page(type_page); - return retval; + free_page(data_page); +out_data: + kfree(kernel_dev); +out_dev: + putname(kernel_dir); +out_dir: + kfree(kernel_type); +out_type: + return ret; } /* diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index b99ce205b1bd7d6715b81ca7d87452d0011ecb2d..cf98da1be23e861dbbeedc76849969f0fa454aff 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -746,16 +746,8 @@ static void ncp_put_super(struct super_block *sb) #ifdef CONFIG_NCPFS_NLS /* unload the NLS charsets */ - if (server->nls_vol) - { - unload_nls(server->nls_vol); - server->nls_vol = NULL; - } - if (server->nls_io) - { - unload_nls(server->nls_io); - server->nls_io = NULL; - } + unload_nls(server->nls_vol); + unload_nls(server->nls_io); #endif /* CONFIG_NCPFS_NLS */ if (server->info_filp) diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 53a7ed7eb9c66da4b69090789afae1e3c5772e7b..0d58caf4a6e1414e51ecd55b09dc11ebbb11beec 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -223,10 +223,8 @@ ncp_set_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg) oldset_io = server->nls_io; server->nls_io = iocharset; - if (oldset_cp) - unload_nls(oldset_cp); - if (oldset_io) - unload_nls(oldset_io); + unload_nls(oldset_cp); + unload_nls(oldset_io); return 0; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 060022b4651c38744c533413ed355f161bd05aad..faa091865ad05c956114ab7c7642994845717382 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -458,49 +458,21 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) */ static int nfs_vmtruncate(struct inode * inode, loff_t offset) { - if (i_size_read(inode) < offset) { - unsigned long limit; - - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) - goto out_sig; - if (offset > inode->i_sb->s_maxbytes) - goto out_big; - spin_lock(&inode->i_lock); - i_size_write(inode, offset); - spin_unlock(&inode->i_lock); - } else { - struct address_space *mapping = inode->i_mapping; + loff_t oldsize; + int err; - /* - * truncation of in-use swapfiles is disallowed - it would - * cause subsequent swapout to scribble on the now-freed - * blocks. - */ - if (IS_SWAPFILE(inode)) - return -ETXTBSY; - spin_lock(&inode->i_lock); - i_size_write(inode, offset); - spin_unlock(&inode->i_lock); + err = inode_newsize_ok(inode, offset); + if (err) + goto out; - /* - * unmap_mapping_range is called twice, first simply for - * efficiency so that truncate_inode_pages does fewer - * single-page unmaps. However after this first call, and - * before truncate_inode_pages finishes, it is possible for - * private pages to be COWed, which remain after - * truncate_inode_pages finishes, hence the second - * unmap_mapping_range call must be made for correctness. - */ - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(mapping, offset); - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - } - return 0; -out_sig: - send_sig(SIGXFSZ, current, 0); -out_big: - return -EFBIG; + spin_lock(&inode->i_lock); + oldsize = inode->i_size; + i_size_write(inode, offset); + spin_unlock(&inode->i_lock); + + truncate_pagecache(inode, oldsize, offset); +out: + return err; } /** diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 477d37d83b316367e1ac04fb31ba98e375a37b1a..2224b4d07bf0116b478f9d8cf565c424f9b64b78 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -270,7 +270,8 @@ struct nls_table *load_nls(char *charset) void unload_nls(struct nls_table *nls) { - module_put(nls->owner); + if (nls) + module_put(nls->owner); } static const wchar_t charset2uni[256] = { diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index abaaa1cbf8de4dac7ac9e8bfb6e143e82b06bff7..80b04770e8e9c22a64a4becf0686260627e0d7e6 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -201,8 +201,7 @@ static bool parse_options(ntfs_volume *vol, char *opt) v, old_nls->charset); nls_map = old_nls; } else /* nls_map */ { - if (old_nls) - unload_nls(old_nls); + unload_nls(old_nls); } } else if (!strcmp(p, "utf8")) { bool val = false; @@ -2427,10 +2426,9 @@ static void ntfs_put_super(struct super_block *sb) ntfs_free(vol->upcase); vol->upcase = NULL; } - if (vol->nls_map) { - unload_nls(vol->nls_map); - vol->nls_map = NULL; - } + + unload_nls(vol->nls_map); + sb->s_fs_info = NULL; kfree(vol); diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 11f0c06316ded778283af0bdbe53d06101225df0..32fae4040ebf46a94bc84cf1260c2edebce0c330 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -69,14 +69,11 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) /* make various checks */ order = get_order(newsize); if (unlikely(order >= MAX_ORDER)) - goto too_big; + return -EFBIG; - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && newsize > limit) - goto fsize_exceeded; - - if (newsize > inode->i_sb->s_maxbytes) - goto too_big; + ret = inode_newsize_ok(inode, newsize); + if (ret) + return ret; i_size_write(inode, newsize); @@ -118,12 +115,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) return 0; - fsize_exceeded: - send_sig(SIGXFSZ, current, 0); - too_big: - return -EFBIG; - - add_error: +add_error: while (loop < npages) __free_page(pages + loop++); return ret; diff --git a/fs/read_write.c b/fs/read_write.c index 6c8c55dec2bcd6b2759f9698abab5b663258cc4b..3ac28987f22a38b3c2005a74e8ee3dac01621679 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -839,9 +839,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); pos = *ppos; - retval = -EINVAL; - if (unlikely(pos < 0)) - goto fput_out; if (unlikely(pos + count > max)) { retval = -EOVERFLOW; if (pos >= max) diff --git a/fs/seq_file.c b/fs/seq_file.c index 6c959275f2d0ef41577d9cdb09086f3b9c670883..eae7d9dbf3ffed297d62b8321137de25ba965c83 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -429,20 +429,21 @@ EXPORT_SYMBOL(mangle_path); */ int seq_path(struct seq_file *m, struct path *path, char *esc) { - if (m->count < m->size) { - char *s = m->buf + m->count; - char *p = d_path(path, s, m->size - m->count); + char *buf; + size_t size = seq_get_buf(m, &buf); + int res = -1; + + if (size) { + char *p = d_path(path, buf, size); if (!IS_ERR(p)) { - s = mangle_path(s, p, esc); - if (s) { - p = m->buf + m->count; - m->count = s - m->buf; - return s - p; - } + char *end = mangle_path(buf, p, esc); + if (end) + res = end - buf; } } - m->count = m->size; - return -1; + seq_commit(m, res); + + return res; } EXPORT_SYMBOL(seq_path); @@ -454,26 +455,28 @@ EXPORT_SYMBOL(seq_path); int seq_path_root(struct seq_file *m, struct path *path, struct path *root, char *esc) { - int err = -ENAMETOOLONG; - if (m->count < m->size) { - char *s = m->buf + m->count; + char *buf; + size_t size = seq_get_buf(m, &buf); + int res = -ENAMETOOLONG; + + if (size) { char *p; spin_lock(&dcache_lock); - p = __d_path(path, root, s, m->size - m->count); + p = __d_path(path, root, buf, size); spin_unlock(&dcache_lock); - err = PTR_ERR(p); + res = PTR_ERR(p); if (!IS_ERR(p)) { - s = mangle_path(s, p, esc); - if (s) { - p = m->buf + m->count; - m->count = s - m->buf; - return 0; - } + char *end = mangle_path(buf, p, esc); + if (end) + res = end - buf; + else + res = -ENAMETOOLONG; } } - m->count = m->size; - return err; + seq_commit(m, res); + + return res < 0 ? res : 0; } /* @@ -481,20 +484,21 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root, */ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc) { - if (m->count < m->size) { - char *s = m->buf + m->count; - char *p = dentry_path(dentry, s, m->size - m->count); + char *buf; + size_t size = seq_get_buf(m, &buf); + int res = -1; + + if (size) { + char *p = dentry_path(dentry, buf, size); if (!IS_ERR(p)) { - s = mangle_path(s, p, esc); - if (s) { - p = m->buf + m->count; - m->count = s - m->buf; - return s - p; - } + char *end = mangle_path(buf, p, esc); + if (end) + res = end - buf; } } - m->count = m->size; - return -1; + seq_commit(m, res); + + return res; } int seq_bitmap(struct seq_file *m, const unsigned long *bits, diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 1402d2d54f5239d43663e01c792506946bebdbe9..1c4c8f089970041c49de182704bf537dbfce9259 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -459,14 +459,8 @@ smb_show_options(struct seq_file *s, struct vfsmount *m) static void smb_unload_nls(struct smb_sb_info *server) { - if (server->remote_nls) { - unload_nls(server->remote_nls); - server->remote_nls = NULL; - } - if (server->local_nls) { - unload_nls(server->local_nls); - server->local_nls = NULL; - } + unload_nls(server->remote_nls); + unload_nls(server->local_nls); } static void diff --git a/fs/super.c b/fs/super.c index 0e7207b9815c78a33f237e678da91f0adb969c45..19eb70b374bcb3239cd7a75ee901336f942b02e4 100644 --- a/fs/super.c +++ b/fs/super.c @@ -465,6 +465,48 @@ struct super_block * get_super(struct block_device *bdev) } EXPORT_SYMBOL(get_super); + +/** + * get_active_super - get an active reference to the superblock of a device + * @bdev: device to get the superblock for + * + * Scans the superblock list and finds the superblock of the file system + * mounted on the device given. Returns the superblock with an active + * reference and s_umount held exclusively or %NULL if none was found. + */ +struct super_block *get_active_super(struct block_device *bdev) +{ + struct super_block *sb; + + if (!bdev) + return NULL; + + spin_lock(&sb_lock); + list_for_each_entry(sb, &super_blocks, s_list) { + if (sb->s_bdev != bdev) + continue; + + sb->s_count++; + spin_unlock(&sb_lock); + down_write(&sb->s_umount); + if (sb->s_root) { + spin_lock(&sb_lock); + if (sb->s_count > S_BIAS) { + atomic_inc(&sb->s_active); + sb->s_count--; + spin_unlock(&sb_lock); + return sb; + } + spin_unlock(&sb_lock); + } + up_write(&sb->s_umount); + put_super(sb); + yield(); + spin_lock(&sb_lock); + } + spin_unlock(&sb_lock); + return NULL; +} struct super_block * user_get_super(dev_t dev) { @@ -527,11 +569,15 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) { int retval; int remount_rw; - + + if (sb->s_frozen != SB_UNFROZEN) + return -EBUSY; + #ifdef CONFIG_BLOCK if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev)) return -EACCES; #endif + if (flags & MS_RDONLY) acct_auto_close(sb); shrink_dcache_sb(sb); @@ -743,9 +789,14 @@ int get_sb_bdev(struct file_system_type *fs_type, * will protect the lockfs code from trying to start a snapshot * while we are mounting */ - down(&bdev->bd_mount_sem); + mutex_lock(&bdev->bd_fsfreeze_mutex); + if (bdev->bd_fsfreeze_count > 0) { + mutex_unlock(&bdev->bd_fsfreeze_mutex); + error = -EBUSY; + goto error_bdev; + } s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); - up(&bdev->bd_mount_sem); + mutex_unlock(&bdev->bd_fsfreeze_mutex); if (IS_ERR(s)) goto error_s; @@ -892,6 +943,16 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void if (error) goto out_sb; + /* + * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE + * but s_maxbytes was an unsigned long long for many releases. Throw + * this warning for a little while to try and catch filesystems that + * violate this rule. This warning should be either removed or + * converted to a BUG() in 2.6.34. + */ + WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " + "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes); + mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; up_write(&mnt->mnt_sb->s_umount); diff --git a/include/linux/fs.h b/include/linux/fs.h index 78e95b8b66d4071dfaf24e319c3aa1470ffd646f..2adaa2529f184fda637a6a2aeea55bda41693104 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -641,7 +641,6 @@ struct block_device { struct super_block * bd_super; int bd_openers; struct mutex bd_mutex; /* open/close mutex */ - struct semaphore bd_mount_sem; struct list_head bd_inodes; void * bd_holder; int bd_holders; @@ -1316,7 +1315,7 @@ struct super_block { unsigned long s_blocksize; unsigned char s_blocksize_bits; unsigned char s_dirt; - unsigned long long s_maxbytes; /* Max file size */ + loff_t s_maxbytes; /* Max file size */ struct file_system_type *s_type; const struct super_operations *s_op; const struct dquot_operations *dq_op; @@ -2157,6 +2156,7 @@ extern ino_t iunique(struct super_block *, ino_t); extern int inode_needs_sync(struct inode *inode); extern void generic_delete_inode(struct inode *inode); extern void generic_drop_inode(struct inode *inode); +extern int generic_detach_inode(struct inode *inode); extern struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), @@ -2335,6 +2335,7 @@ extern void get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); +extern struct super_block *get_active_super(struct block_device *bdev); extern struct super_block *user_get_super(dev_t); extern void drop_super(struct super_block *sb); @@ -2382,7 +2383,8 @@ extern int buffer_migrate_page(struct address_space *, #define buffer_migrate_page NULL #endif -extern int inode_change_ok(struct inode *, struct iattr *); +extern int inode_change_ok(const struct inode *, struct iattr *); +extern int inode_newsize_ok(const struct inode *, loff_t offset); extern int __must_check inode_setattr(struct inode *, struct iattr *); extern void file_update_time(struct file *file); diff --git a/include/linux/mm.h b/include/linux/mm.h index 6953a5a53e4495eeee028ee6c3b8f3bef00839b7..df08551cb0ad04aa6a16e03c8ab7b40999995bdf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -792,8 +792,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, unmap_mapping_range(mapping, holebegin, holelen, 0); } -extern int vmtruncate(struct inode * inode, loff_t offset); -extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); +extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new); +extern int vmtruncate(struct inode *inode, loff_t offset); +extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end); int truncate_inode_page(struct address_space *mapping, struct page *page); int generic_error_remove_page(struct address_space *mapping, struct page *page); diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 0c6a86b795968621a107c2c8c611b5ef7d387516..8366d8f12e537ac9cfb3e9cc053371bcea174025 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -35,6 +35,44 @@ struct seq_operations { #define SEQ_SKIP 1 +/** + * seq_get_buf - get buffer to write arbitrary data to + * @m: the seq_file handle + * @bufp: the beginning of the buffer is stored here + * + * Return the number of bytes available in the buffer, or zero if + * there's no space. + */ +static inline size_t seq_get_buf(struct seq_file *m, char **bufp) +{ + BUG_ON(m->count > m->size); + if (m->count < m->size) + *bufp = m->buf + m->count; + else + *bufp = NULL; + + return m->size - m->count; +} + +/** + * seq_commit - commit data to the buffer + * @m: the seq_file handle + * @num: the number of bytes to commit + * + * Commit @num bytes of data written to a buffer previously acquired + * by seq_buf_get. To signal an error condition, or that the data + * didn't fit in the available space, pass a negative @num value. + */ +static inline void seq_commit(struct seq_file *m, int num) +{ + if (num < 0) { + m->count = m->size; + } else { + BUG_ON(m->count + num > m->size); + m->count += num; + } +} + char *mangle_path(char *s, char *p, char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); diff --git a/mm/filemap.c b/mm/filemap.c index c1fc205a92c6eae9840961ad34dbfc88b58a80a4..6c84e598b4a9f7a0c2901387f32307c4e96ebaae 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -58,7 +58,7 @@ /* * Lock ordering: * - * ->i_mmap_lock (vmtruncate) + * ->i_mmap_lock (truncate_pagecache) * ->private_lock (__free_pte->__set_page_dirty_buffers) * ->swap_lock (exclusive_swap_page, others) * ->mapping->tree_lock diff --git a/mm/memory.c b/mm/memory.c index 987389a809e77accb1915dbd291d3e38b8dc1e60..7e91b5f9f690e4ae9d7c3e58bc1530c995311089 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr = vma->vm_start; /* - * Hide vma from rmap and vmtruncate before freeing pgtables + * Hide vma from rmap and truncate_pagecache before freeing + * pgtables */ anon_vma_unlink(vma); unlink_file_vma(vma); @@ -2408,7 +2409,7 @@ static inline void unmap_mapping_range_list(struct list_head *head, * @mapping: the address space containing mmaps to be unmapped. * @holebegin: byte in first page to unmap, relative to the start of * the underlying file. This will be rounded down to a PAGE_SIZE - * boundary. Note that this is different from vmtruncate(), which + * boundary. Note that this is different from truncate_pagecache(), which * must keep the partial page. In contrast, we must get rid of * partial pages. * @holelen: size of prospective hole in bytes. This will be rounded @@ -2459,63 +2460,6 @@ void unmap_mapping_range(struct address_space *mapping, } EXPORT_SYMBOL(unmap_mapping_range); -/** - * vmtruncate - unmap mappings "freed" by truncate() syscall - * @inode: inode of the file used - * @offset: file offset to start truncating - * - * NOTE! We have to be ready to update the memory sharing - * between the file and the memory map for a potential last - * incomplete page. Ugly, but necessary. - */ -int vmtruncate(struct inode * inode, loff_t offset) -{ - if (inode->i_size < offset) { - unsigned long limit; - - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) - goto out_sig; - if (offset > inode->i_sb->s_maxbytes) - goto out_big; - i_size_write(inode, offset); - } else { - struct address_space *mapping = inode->i_mapping; - - /* - * truncation of in-use swapfiles is disallowed - it would - * cause subsequent swapout to scribble on the now-freed - * blocks. - */ - if (IS_SWAPFILE(inode)) - return -ETXTBSY; - i_size_write(inode, offset); - - /* - * unmap_mapping_range is called twice, first simply for - * efficiency so that truncate_inode_pages does fewer - * single-page unmaps. However after this first call, and - * before truncate_inode_pages finishes, it is possible for - * private pages to be COWed, which remain after - * truncate_inode_pages finishes, hence the second - * unmap_mapping_range call must be made for correctness. - */ - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(mapping, offset); - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - } - - if (inode->i_op->truncate) - inode->i_op->truncate(inode); - return 0; - -out_sig: - send_sig(SIGXFSZ, current, 0); -out_big: - return -EFBIG; -} -EXPORT_SYMBOL(vmtruncate); - int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) { struct address_space *mapping = inode->i_mapping; diff --git a/mm/mremap.c b/mm/mremap.c index 20a07dba6be04fb20f21bbac5ed1e289a8ccd354..97bff2547719e702150e1cdc4d4a3f6b31a23213 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -86,8 +86,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, if (vma->vm_file) { /* * Subtle point from Rajesh Venkatasubramanian: before - * moving file-based ptes, we must lock vmtruncate out, - * since it might clean the dst vma before the src vma, + * moving file-based ptes, we must lock truncate_pagecache + * out, since it might clean the dst vma before the src vma, * and we propagate stale pages into the dst afterward. */ mapping = vma->vm_file->f_mapping; diff --git a/mm/nommu.c b/mm/nommu.c index 8d484241d0345e71750b71b36d37bfa7f21bdd06..56a446f059716ccb95736fa7670dd1f07e6b30b5 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -82,46 +82,6 @@ DECLARE_RWSEM(nommu_region_sem); struct vm_operations_struct generic_file_vm_ops = { }; -/* - * Handle all mappings that got truncated by a "truncate()" - * system call. - * - * NOTE! We have to be ready to update the memory sharing - * between the file and the memory map for a potential last - * incomplete page. Ugly, but necessary. - */ -int vmtruncate(struct inode *inode, loff_t offset) -{ - struct address_space *mapping = inode->i_mapping; - unsigned long limit; - - if (inode->i_size < offset) - goto do_expand; - i_size_write(inode, offset); - - truncate_inode_pages(mapping, offset); - goto out_truncate; - -do_expand: - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) - goto out_sig; - if (offset > inode->i_sb->s_maxbytes) - goto out; - i_size_write(inode, offset); - -out_truncate: - if (inode->i_op->truncate) - inode->i_op->truncate(inode); - return 0; -out_sig: - send_sig(SIGXFSZ, current, 0); -out: - return -EFBIG; -} - -EXPORT_SYMBOL(vmtruncate); - /* * Return the total memory allocated for this pointer, not * just what the caller asked for. diff --git a/mm/truncate.c b/mm/truncate.c index a17b3977cfdf8f8fbb4eeab03b458d3051d2bf33..450cebdabfc0470b2bcb0bc3f2de941feb6453c2 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -497,3 +497,67 @@ int invalidate_inode_pages2(struct address_space *mapping) return invalidate_inode_pages2_range(mapping, 0, -1); } EXPORT_SYMBOL_GPL(invalidate_inode_pages2); + +/** + * truncate_pagecache - unmap and remove pagecache that has been truncated + * @inode: inode + * @old: old file offset + * @new: new file offset + * + * inode's new i_size must already be written before truncate_pagecache + * is called. + * + * This function should typically be called before the filesystem + * releases resources associated with the freed range (eg. deallocates + * blocks). This way, pagecache will always stay logically coherent + * with on-disk format, and the filesystem would not have to deal with + * situations such as writepage being called for a page that has already + * had its underlying blocks deallocated. + */ +void truncate_pagecache(struct inode *inode, loff_t old, loff_t new) +{ + if (new < old) { + struct address_space *mapping = inode->i_mapping; + + /* + * unmap_mapping_range is called twice, first simply for + * efficiency so that truncate_inode_pages does fewer + * single-page unmaps. However after this first call, and + * before truncate_inode_pages finishes, it is possible for + * private pages to be COWed, which remain after + * truncate_inode_pages finishes, hence the second + * unmap_mapping_range call must be made for correctness. + */ + unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); + truncate_inode_pages(mapping, new); + unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); + } +} +EXPORT_SYMBOL(truncate_pagecache); + +/** + * vmtruncate - unmap mappings "freed" by truncate() syscall + * @inode: inode of the file used + * @offset: file offset to start truncating + * + * NOTE! We have to be ready to update the memory sharing + * between the file and the memory map for a potential last + * incomplete page. Ugly, but necessary. + */ +int vmtruncate(struct inode *inode, loff_t offset) +{ + loff_t oldsize; + int error; + + error = inode_newsize_ok(inode, offset); + if (error) + return error; + oldsize = inode->i_size; + i_size_write(inode, offset); + truncate_pagecache(inode, oldsize, offset); + if (inode->i_op->truncate) + inode->i_op->truncate(inode); + + return error; +} +EXPORT_SYMBOL(vmtruncate);