xfs_file.c 11.3 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2 3
 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
 * All Rights Reserved.
L
Linus Torvalds 已提交
4
 *
5 6
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
L
Linus Torvalds 已提交
7 8
 * published by the Free Software Foundation.
 *
9 10 11 12
 * This program is distributed in the hope that it would be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
L
Linus Torvalds 已提交
13
 *
14 15 16
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write the Free Software Foundation,
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
L
Linus Torvalds 已提交
17 18
 */
#include "xfs.h"
19
#include "xfs_bit.h"
L
Linus Torvalds 已提交
20
#include "xfs_log.h"
21
#include "xfs_inum.h"
L
Linus Torvalds 已提交
22
#include "xfs_sb.h"
23
#include "xfs_ag.h"
L
Linus Torvalds 已提交
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
#include "xfs_dir2.h"
#include "xfs_trans.h"
#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
#include "xfs_alloc.h"
#include "xfs_btree.h"
#include "xfs_attr_sf.h"
#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_error.h"
#include "xfs_rw.h"
#include "xfs_ioctl32.h"
40
#include "xfs_vnodeops.h"
L
Linus Torvalds 已提交
41 42 43 44

#include <linux/dcache.h>
#include <linux/smp_lock.h>

45
static struct vm_operations_struct xfs_file_vm_ops;
L
Linus Torvalds 已提交
46

47
STATIC_INLINE ssize_t
48
__xfs_file_read(
L
Linus Torvalds 已提交
49
	struct kiocb		*iocb,
50 51
	const struct iovec	*iov,
	unsigned long		nr_segs,
L
Linus Torvalds 已提交
52 53 54 55 56 57 58 59
	int			ioflags,
	loff_t			pos)
{
	struct file		*file = iocb->ki_filp;

	BUG_ON(iocb->ki_pos != pos);
	if (unlikely(file->f_flags & O_DIRECT))
		ioflags |= IO_ISDIRECT;
60 61
	return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov,
				nr_segs, &iocb->ki_pos, ioflags);
L
Linus Torvalds 已提交
62 63 64
}

STATIC ssize_t
65
xfs_file_aio_read(
L
Linus Torvalds 已提交
66
	struct kiocb		*iocb,
67 68
	const struct iovec	*iov,
	unsigned long		nr_segs,
L
Linus Torvalds 已提交
69 70
	loff_t			pos)
{
71
	return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos);
L
Linus Torvalds 已提交
72 73 74
}

STATIC ssize_t
75
xfs_file_aio_read_invis(
L
Linus Torvalds 已提交
76
	struct kiocb		*iocb,
77 78
	const struct iovec	*iov,
	unsigned long		nr_segs,
L
Linus Torvalds 已提交
79 80
	loff_t			pos)
{
81
	return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
L
Linus Torvalds 已提交
82 83
}

84
STATIC_INLINE ssize_t
85
__xfs_file_write(
86 87 88 89 90
	struct kiocb		*iocb,
	const struct iovec	*iov,
	unsigned long		nr_segs,
	int			ioflags,
	loff_t			pos)
L
Linus Torvalds 已提交
91 92 93 94 95 96
{
	struct file	*file = iocb->ki_filp;

	BUG_ON(iocb->ki_pos != pos);
	if (unlikely(file->f_flags & O_DIRECT))
		ioflags |= IO_ISDIRECT;
97 98
	return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs,
				&iocb->ki_pos, ioflags);
L
Linus Torvalds 已提交
99 100 101
}

STATIC ssize_t
102
xfs_file_aio_write(
L
Linus Torvalds 已提交
103
	struct kiocb		*iocb,
104 105
	const struct iovec	*iov,
	unsigned long		nr_segs,
L
Linus Torvalds 已提交
106 107
	loff_t			pos)
{
108
	return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos);
L
Linus Torvalds 已提交
109 110 111
}

STATIC ssize_t
112
xfs_file_aio_write_invis(
L
Linus Torvalds 已提交
113
	struct kiocb		*iocb,
114 115
	const struct iovec	*iov,
	unsigned long		nr_segs,
L
Linus Torvalds 已提交
116 117
	loff_t			pos)
{
118
	return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
L
Linus Torvalds 已提交
119 120
}

121 122 123
STATIC ssize_t
xfs_file_splice_read(
	struct file		*infilp,
124
	loff_t			*ppos,
125
	struct pipe_inode_info	*pipe,
126 127 128
	size_t			len,
	unsigned int		flags)
{
129 130
	return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
				   infilp, ppos, pipe, len, flags, 0);
131 132 133 134 135
}

STATIC ssize_t
xfs_file_splice_read_invis(
	struct file		*infilp,
136
	loff_t			*ppos,
137
	struct pipe_inode_info	*pipe,
138 139 140
	size_t			len,
	unsigned int		flags)
{
141 142
	return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
				   infilp, ppos, pipe, len, flags, IO_INVIS);
143 144 145 146
}

STATIC ssize_t
xfs_file_splice_write(
147
	struct pipe_inode_info	*pipe,
148
	struct file		*outfilp,
149
	loff_t			*ppos,
150 151 152
	size_t			len,
	unsigned int		flags)
{
153 154
	return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
				    pipe, outfilp, ppos, len, flags, 0);
155 156 157 158
}

STATIC ssize_t
xfs_file_splice_write_invis(
159
	struct pipe_inode_info	*pipe,
160
	struct file		*outfilp,
161
	loff_t			*ppos,
162 163 164
	size_t			len,
	unsigned int		flags)
{
165 166
	return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
				    pipe, outfilp, ppos, len, flags, IO_INVIS);
167
}
L
Linus Torvalds 已提交
168 169

STATIC int
170
xfs_file_open(
L
Linus Torvalds 已提交
171 172 173 174 175
	struct inode	*inode,
	struct file	*filp)
{
	if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
		return -EFBIG;
176
	return -xfs_open(XFS_I(inode));
L
Linus Torvalds 已提交
177 178 179
}

STATIC int
180
xfs_file_release(
L
Linus Torvalds 已提交
181 182 183
	struct inode	*inode,
	struct file	*filp)
{
184
	return -xfs_release(XFS_I(inode));
L
Linus Torvalds 已提交
185 186
}

D
David Chinner 已提交
187 188 189 190 191 192 193 194 195 196
/*
 * We ignore the datasync flag here because a datasync is effectively
 * identical to an fsync. That is, datasync implies that we need to write
 * only the metadata needed to be able to access the data that is written
 * if we crash after the call completes. Hence if we are writing beyond
 * EOF we have to log the inode size change as well, which makes it a
 * full fsync. If we don't write beyond EOF, the inode core will be
 * clean in memory and so we don't need to log the inode, just like
 * fsync.
 */
L
Linus Torvalds 已提交
197
STATIC int
198
xfs_file_fsync(
L
Linus Torvalds 已提交
199 200 201 202
	struct file	*filp,
	struct dentry	*dentry,
	int		datasync)
{
203
	xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED);
D
David Chinner 已提交
204
	return -xfs_fsync(XFS_I(dentry->d_inode));
L
Linus Torvalds 已提交
205 206
}

207 208 209 210 211 212 213 214 215
/*
 * Unfortunately we can't just use the clean and simple readdir implementation
 * below, because nfs might call back into ->lookup from the filldir callback
 * and that will deadlock the low-level btree code.
 *
 * Hopefully we'll find a better workaround that allows to use the optimal
 * version at least for local readdirs for 2.6.25.
 */
#if 0
L
Linus Torvalds 已提交
216
STATIC int
217
xfs_file_readdir(
L
Linus Torvalds 已提交
218 219 220 221
	struct file	*filp,
	void		*dirent,
	filldir_t	filldir)
{
C
Christoph Hellwig 已提交
222
	struct inode	*inode = filp->f_path.dentry->d_inode;
223
	xfs_inode_t	*ip = XFS_I(inode);
C
Christoph Hellwig 已提交
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
	int		error;
	size_t		bufsize;

	/*
	 * The Linux API doesn't pass down the total size of the buffer
	 * we read into down to the filesystem.  With the filldir concept
	 * it's not needed for correct information, but the XFS dir2 leaf
	 * code wants an estimate of the buffer size to calculate it's
	 * readahead window and size the buffers used for mapping to
	 * physical blocks.
	 *
	 * Try to give it an estimate that's good enough, maybe at some
	 * point we can change the ->readdir prototype to include the
	 * buffer size.
	 */
	bufsize = (size_t)min_t(loff_t, PAGE_SIZE, inode->i_size);

241
	error = xfs_readdir(ip, dirent, bufsize,
C
Christoph Hellwig 已提交
242 243 244 245
				(xfs_off_t *)&filp->f_pos, filldir);
	if (error)
		return -error;
	return 0;
L
Linus Torvalds 已提交
246
}
247 248 249 250
#else

struct hack_dirent {
	u64		ino;
251 252
	loff_t		offset;
	int		namlen;
253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
	unsigned int	d_type;
	char		name[];
};

struct hack_callback {
	char		*dirent;
	size_t		len;
	size_t		used;
};

STATIC int
xfs_hack_filldir(
	void		*__buf,
	const char	*name,
	int		namlen,
	loff_t		offset,
	u64		ino,
	unsigned int	d_type)
{
	struct hack_callback *buf = __buf;
	struct hack_dirent *de = (struct hack_dirent *)(buf->dirent + buf->used);
274
	unsigned int reclen;
275

276 277
	reclen = ALIGN(sizeof(struct hack_dirent) + namlen, sizeof(u64));
	if (buf->used + reclen > buf->len)
278 279 280 281 282 283 284
		return -EINVAL;

	de->namlen = namlen;
	de->offset = offset;
	de->ino = ino;
	de->d_type = d_type;
	memcpy(de->name, name, namlen);
285
	buf->used += reclen;
286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
	return 0;
}

STATIC int
xfs_file_readdir(
	struct file	*filp,
	void		*dirent,
	filldir_t	filldir)
{
	struct inode	*inode = filp->f_path.dentry->d_inode;
	xfs_inode_t	*ip = XFS_I(inode);
	struct hack_callback buf;
	struct hack_dirent *de;
	int		error;
	loff_t		size;
	int		eof = 0;
	xfs_off_t       start_offset, curr_offset, offset;

	/*
	 * Try fairly hard to get memory
	 */
	buf.len = PAGE_CACHE_SIZE;
	do {
		buf.dirent = kmalloc(buf.len, GFP_KERNEL);
		if (buf.dirent)
			break;
		buf.len >>= 1;
	} while (buf.len >= 1024);

	if (!buf.dirent)
		return -ENOMEM;

	curr_offset = filp->f_pos;
	if (curr_offset == 0x7fffffff)
		offset = 0xffffffff;
	else
		offset = filp->f_pos;

	while (!eof) {
325 326
		unsigned int reclen;

327 328 329 330 331 332 333 334 335 336 337 338 339
		start_offset = offset;

		buf.used = 0;
		error = -xfs_readdir(ip, &buf, buf.len, &offset,
				     xfs_hack_filldir);
		if (error || offset == start_offset) {
			size = 0;
			break;
		}

		size = buf.used;
		de = (struct hack_dirent *)buf.dirent;
		while (size > 0) {
340
			curr_offset = de->offset /* & 0x7fffffff */;
341 342 343 344 345 346
			if (filldir(dirent, de->name, de->namlen,
					curr_offset & 0x7fffffff,
					de->ino, de->d_type)) {
				goto done;
			}

347 348
			reclen = ALIGN(sizeof(struct hack_dirent) + de->namlen,
				       sizeof(u64));
349 350 351 352 353 354
			size -= reclen;
			de = (struct hack_dirent *)((char *)de + reclen);
		}
	}

 done:
355
	if (!error) {
356 357 358 359 360 361 362 363 364 365
		if (size == 0)
			filp->f_pos = offset & 0x7fffffff;
		else if (de)
			filp->f_pos = curr_offset;
	}

	kfree(buf.dirent);
	return error;
}
#endif
L
Linus Torvalds 已提交
366 367

STATIC int
368
xfs_file_mmap(
L
Linus Torvalds 已提交
369 370 371
	struct file	*filp,
	struct vm_area_struct *vma)
{
372
	vma->vm_ops = &xfs_file_vm_ops;
N
Nick Piggin 已提交
373
	vma->vm_flags |= VM_CAN_NONLINEAR;
374

375
	file_accessed(filp);
L
Linus Torvalds 已提交
376 377 378 379
	return 0;
}

STATIC long
380
xfs_file_ioctl(
L
Linus Torvalds 已提交
381 382
	struct file	*filp,
	unsigned int	cmd,
383
	unsigned long	p)
L
Linus Torvalds 已提交
384 385
{
	int		error;
386
	struct inode	*inode = filp->f_path.dentry->d_inode;
L
Linus Torvalds 已提交
387

388
	error = xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p);
389
	xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
L
Linus Torvalds 已提交
390 391 392 393 394 395 396 397 398 399 400

	/* NOTE:  some of the ioctl's return positive #'s as a
	 *	  byte count indicating success, such as
	 *	  readlink_by_handle.  So we don't "sign flip"
	 *	  like most other routines.  This means true
	 *	  errors need to be returned as a negative value.
	 */
	return error;
}

STATIC long
401
xfs_file_ioctl_invis(
L
Linus Torvalds 已提交
402 403
	struct file	*filp,
	unsigned int	cmd,
404
	unsigned long	p)
L
Linus Torvalds 已提交
405
{
406
	int		error;
407
	struct inode	*inode = filp->f_path.dentry->d_inode;
L
Linus Torvalds 已提交
408

409
	error = xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p);
410
	xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
L
Linus Torvalds 已提交
411 412 413 414 415 416 417 418 419 420

	/* NOTE:  some of the ioctl's return positive #'s as a
	 *	  byte count indicating success, such as
	 *	  readlink_by_handle.  So we don't "sign flip"
	 *	  like most other routines.  This means true
	 *	  errors need to be returned as a negative value.
	 */
	return error;
}

421 422 423 424 425 426 427 428 429 430 431 432 433 434
/*
 * mmap()d file has taken write protection fault and is being made
 * writable. We can set the page state up correctly for a writable
 * page, which means we can do correct delalloc accounting (ENOSPC
 * checking!) and unwritten extent mapping.
 */
STATIC int
xfs_vm_page_mkwrite(
	struct vm_area_struct	*vma,
	struct page		*page)
{
	return block_page_mkwrite(vma, page, xfs_get_blocks);
}

435
const struct file_operations xfs_file_operations = {
L
Linus Torvalds 已提交
436 437
	.llseek		= generic_file_llseek,
	.read		= do_sync_read,
438
	.write		= do_sync_write,
439 440
	.aio_read	= xfs_file_aio_read,
	.aio_write	= xfs_file_aio_write,
441 442
	.splice_read	= xfs_file_splice_read,
	.splice_write	= xfs_file_splice_write,
443
	.unlocked_ioctl	= xfs_file_ioctl,
L
Linus Torvalds 已提交
444
#ifdef CONFIG_COMPAT
445
	.compat_ioctl	= xfs_file_compat_ioctl,
L
Linus Torvalds 已提交
446
#endif
447 448 449 450
	.mmap		= xfs_file_mmap,
	.open		= xfs_file_open,
	.release	= xfs_file_release,
	.fsync		= xfs_file_fsync,
L
Linus Torvalds 已提交
451
#ifdef HAVE_FOP_OPEN_EXEC
452
	.open_exec	= xfs_file_open_exec,
L
Linus Torvalds 已提交
453 454 455
#endif
};

456
const struct file_operations xfs_invis_file_operations = {
L
Linus Torvalds 已提交
457 458
	.llseek		= generic_file_llseek,
	.read		= do_sync_read,
459
	.write		= do_sync_write,
460 461
	.aio_read	= xfs_file_aio_read_invis,
	.aio_write	= xfs_file_aio_write_invis,
462 463
	.splice_read	= xfs_file_splice_read_invis,
	.splice_write	= xfs_file_splice_write_invis,
464
	.unlocked_ioctl	= xfs_file_ioctl_invis,
L
Linus Torvalds 已提交
465
#ifdef CONFIG_COMPAT
466
	.compat_ioctl	= xfs_file_compat_invis_ioctl,
L
Linus Torvalds 已提交
467
#endif
468 469 470 471
	.mmap		= xfs_file_mmap,
	.open		= xfs_file_open,
	.release	= xfs_file_release,
	.fsync		= xfs_file_fsync,
L
Linus Torvalds 已提交
472 473 474
};


475
const struct file_operations xfs_dir_file_operations = {
L
Linus Torvalds 已提交
476
	.read		= generic_read_dir,
477 478
	.readdir	= xfs_file_readdir,
	.unlocked_ioctl	= xfs_file_ioctl,
479
#ifdef CONFIG_COMPAT
480
	.compat_ioctl	= xfs_file_compat_ioctl,
481
#endif
482
	.fsync		= xfs_file_fsync,
L
Linus Torvalds 已提交
483 484
};

485
static struct vm_operations_struct xfs_file_vm_ops = {
486
	.fault		= filemap_fault,
487
	.page_mkwrite	= xfs_vm_page_mkwrite,
488
};