diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 72129493e9d3563687a6606223a123692fe13a5c..ec6dcdc181ee45fc8a74bb6a018f1fbab01dc1e7 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -849,6 +849,36 @@ xfs_setattr_size( return error; truncate_setsize(inode, newsize); + /* + * The "we can't serialise against page faults" pain gets worse. + * + * If the file is mapped then we have to clean the page at the old EOF + * when extending the file. Extending the file can expose changes the + * underlying page mapping (e.g. from beyond EOF to a hole or + * unwritten), and so on the next attempt to write to that page we need + * to remap it for write. i.e. we need .page_mkwrite() to be called. + * Hence we need to clean the page to clean the pte and so a new write + * fault will be triggered appropriately. + * + * If we do it before we change the inode size, then we can race with a + * page fault that maps the page with exactly the same problem. If we do + * it after we change the file size, then a new page fault can come in + * and allocate space before we've run the rest of the truncate + * transaction. That's kinda grotesque, but it's better than have data + * over a hole, and so that's the lesser evil that has been chosen here. + * + * The real solution, however, is to have some mechanism for locking out + * page faults while a truncate is in progress. + */ + if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) { + error = filemap_write_and_wait_range( + VFS_I(ip)->i_mapping, + round_down(oldsize, PAGE_CACHE_SIZE), + round_up(oldsize, PAGE_CACHE_SIZE) - 1); + if (error) + return error; + } + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error)