file.c 34.5 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
D
David Teigland 已提交
2 3
/*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
4
 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
D
David Teigland 已提交
5 6 7 8
 */

#include <linux/slab.h>
#include <linux/spinlock.h>
A
Arnd Bergmann 已提交
9
#include <linux/compat.h>
D
David Teigland 已提交
10 11 12 13 14 15
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/pagemap.h>
#include <linux/uio.h>
#include <linux/blkdev.h>
#include <linux/mm.h>
M
Miklos Szeredi 已提交
16
#include <linux/mount.h>
17
#include <linux/fs.h>
18
#include <linux/gfs2_ondisk.h>
19 20
#include <linux/falloc.h>
#include <linux/swap.h>
21
#include <linux/crc32.h>
22
#include <linux/writeback.h>
23
#include <linux/uaccess.h>
24 25
#include <linux/dlm.h>
#include <linux/dlm_plock.h>
26
#include <linux/delay.h>
27
#include <linux/backing-dev.h>
D
David Teigland 已提交
28 29

#include "gfs2.h"
30
#include "incore.h"
D
David Teigland 已提交
31
#include "bmap.h"
32
#include "aops.h"
D
David Teigland 已提交
33 34 35 36 37 38 39 40 41
#include "dir.h"
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "log.h"
#include "meta_io.h"
#include "quota.h"
#include "rgrp.h"
#include "trans.h"
42
#include "util.h"
D
David Teigland 已提交
43 44 45 46 47

/**
 * gfs2_llseek - seek to a location in a file
 * @file: the file
 * @offset: the offset
48
 * @whence: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
D
David Teigland 已提交
49 50 51 52 53 54 55
 *
 * SEEK_END requires the glock for the file because it references the
 * file's size.
 *
 * Returns: The new offset, or errno
 */

56
static loff_t gfs2_llseek(struct file *file, loff_t offset, int whence)
D
David Teigland 已提交
57
{
58
	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
D
David Teigland 已提交
59 60 61
	struct gfs2_holder i_gh;
	loff_t error;

62
	switch (whence) {
63
	case SEEK_END:
D
David Teigland 已提交
64 65 66
		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
					   &i_gh);
		if (!error) {
67
			error = generic_file_llseek(file, offset, whence);
D
David Teigland 已提交
68 69
			gfs2_glock_dq_uninit(&i_gh);
		}
70
		break;
71 72 73 74 75 76 77 78 79

	case SEEK_DATA:
		error = gfs2_seek_data(file, offset);
		break;

	case SEEK_HOLE:
		error = gfs2_seek_hole(file, offset);
		break;

80 81
	case SEEK_CUR:
	case SEEK_SET:
82 83 84 85
		/*
		 * These don't reference inode->i_size and don't depend on the
		 * block mapping, so we don't need the glock.
		 */
86
		error = generic_file_llseek(file, offset, whence);
87 88 89 90
		break;
	default:
		error = -EINVAL;
	}
D
David Teigland 已提交
91 92 93 94 95

	return error;
}

/**
A
Al Viro 已提交
96
 * gfs2_readdir - Iterator for a directory
D
David Teigland 已提交
97
 * @file: The directory to read from
A
Al Viro 已提交
98
 * @ctx: What to feed directory entries to
D
David Teigland 已提交
99 100 101 102
 *
 * Returns: errno
 */

A
Al Viro 已提交
103
static int gfs2_readdir(struct file *file, struct dir_context *ctx)
D
David Teigland 已提交
104
{
105
	struct inode *dir = file->f_mapping->host;
106
	struct gfs2_inode *dip = GFS2_I(dir);
D
David Teigland 已提交
107 108 109
	struct gfs2_holder d_gh;
	int error;

A
Al Viro 已提交
110 111
	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
	if (error)
D
David Teigland 已提交
112 113
		return error;

A
Al Viro 已提交
114
	error = gfs2_dir_read(dir, ctx, &file->f_ra);
D
David Teigland 已提交
115 116 117 118 119 120

	gfs2_glock_dq_uninit(&d_gh);

	return error;
}

121
/**
122
 * fsflag_gfs2flag
123
 *
124 125
 * The FS_JOURNAL_DATA_FL flag maps to GFS2_DIF_INHERIT_JDATA for directories,
 * and to GFS2_DIF_JDATA for non-directories.
126
 */
127 128 129 130 131 132 133 134 135 136 137
static struct {
	u32 fsflag;
	u32 gfsflag;
} fsflag_gfs2flag[] = {
	{FS_SYNC_FL, GFS2_DIF_SYNC},
	{FS_IMMUTABLE_FL, GFS2_DIF_IMMUTABLE},
	{FS_APPEND_FL, GFS2_DIF_APPENDONLY},
	{FS_NOATIME_FL, GFS2_DIF_NOATIME},
	{FS_INDEX_FL, GFS2_DIF_EXHASH},
	{FS_TOPDIR_FL, GFS2_DIF_TOPDIR},
	{FS_JOURNAL_DATA_FL, GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA},
138
};
139

140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
static inline u32 gfs2_gfsflags_to_fsflags(struct inode *inode, u32 gfsflags)
{
	int i;
	u32 fsflags = 0;

	if (S_ISDIR(inode->i_mode))
		gfsflags &= ~GFS2_DIF_JDATA;
	else
		gfsflags &= ~GFS2_DIF_INHERIT_JDATA;

	for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++)
		if (gfsflags & fsflag_gfs2flag[i].gfsflag)
			fsflags |= fsflag_gfs2flag[i].fsflag;
	return fsflags;
}

156
static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
157
{
A
Al Viro 已提交
158
	struct inode *inode = file_inode(filp);
159
	struct gfs2_inode *ip = GFS2_I(inode);
160
	struct gfs2_holder gh;
161 162
	int error;
	u32 fsflags;
163

164 165
	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
	error = gfs2_glock_nq(&gh);
166
	if (error)
167
		goto out_uninit;
168

169
	fsflags = gfs2_gfsflags_to_fsflags(inode, ip->i_diskflags);
170

171
	if (put_user(fsflags, ptr))
172 173
		error = -EFAULT;

174
	gfs2_glock_dq(&gh);
175
out_uninit:
176 177 178 179
	gfs2_holder_uninit(&gh);
	return error;
}

180 181 182 183 184
void gfs2_set_inode_flags(struct inode *inode)
{
	struct gfs2_inode *ip = GFS2_I(inode);
	unsigned int flags = inode->i_flags;

S
Steven Whitehouse 已提交
185 186
	flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC);
	if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode))
187
		flags |= S_NOSEC;
188
	if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
189
		flags |= S_IMMUTABLE;
190
	if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
191
		flags |= S_APPEND;
192
	if (ip->i_diskflags & GFS2_DIF_NOATIME)
193
		flags |= S_NOATIME;
194
	if (ip->i_diskflags & GFS2_DIF_SYNC)
195 196 197 198
		flags |= S_SYNC;
	inode->i_flags = flags;
}

199 200 201 202 203 204
/* Flags that can be set by user space */
#define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA|			\
			     GFS2_DIF_IMMUTABLE|		\
			     GFS2_DIF_APPENDONLY|		\
			     GFS2_DIF_NOATIME|			\
			     GFS2_DIF_SYNC|			\
205
			     GFS2_DIF_TOPDIR|			\
206 207 208
			     GFS2_DIF_INHERIT_JDATA)

/**
209 210 211
 * do_gfs2_set_flags - set flags on an inode
 * @filp: file pointer
 * @reqflags: The flags to set
212
 * @mask: Indicates which flags are valid
213
 * @fsflags: The FS_* inode flags passed in
214 215
 *
 */
216 217
static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask,
			     const u32 fsflags)
218
{
A
Al Viro 已提交
219
	struct inode *inode = file_inode(filp);
220 221
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_sbd *sdp = GFS2_SB(inode);
222 223 224
	struct buffer_head *bh;
	struct gfs2_holder gh;
	int error;
225
	u32 new_flags, flags, oldflags;
226

227
	error = mnt_want_write_file(filp);
228
	if (error)
229 230
		return error;

M
Miklos Szeredi 已提交
231 232 233 234
	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
	if (error)
		goto out_drop_write;

235 236 237 238 239
	oldflags = gfs2_gfsflags_to_fsflags(inode, ip->i_diskflags);
	error = vfs_ioc_setflags_prepare(inode, oldflags, fsflags);
	if (error)
		goto out;

240
	error = -EACCES;
241
	if (!inode_owner_or_capable(inode))
242 243 244
		goto out;

	error = 0;
245
	flags = ip->i_diskflags;
246
	new_flags = (flags & ~mask) | (reqflags & mask);
247 248 249 250 251 252 253 254
	if ((new_flags ^ flags) == 0)
		goto out;

	error = -EPERM;
	if (IS_IMMUTABLE(inode) && (new_flags & GFS2_DIF_IMMUTABLE))
		goto out;
	if (IS_APPEND(inode) && (new_flags & GFS2_DIF_APPENDONLY))
		goto out;
255
	if (((new_flags ^ flags) & GFS2_DIF_IMMUTABLE) &&
256
	    !capable(CAP_LINUX_IMMUTABLE))
257
		goto out;
258
	if (!IS_IMMUTABLE(inode)) {
259
		error = gfs2_permission(inode, MAY_WRITE);
260 261 262
		if (error)
			goto out;
	}
263
	if ((flags ^ new_flags) & GFS2_DIF_JDATA) {
264
		if (new_flags & GFS2_DIF_JDATA)
265
			gfs2_log_flush(sdp, ip->i_gl,
266 267
				       GFS2_LOG_HEAD_FLUSH_NORMAL |
				       GFS2_LFC_SET_FLAGS);
268 269 270 271 272 273
		error = filemap_fdatawrite(inode->i_mapping);
		if (error)
			goto out;
		error = filemap_fdatawait(inode->i_mapping);
		if (error)
			goto out;
274 275
		if (new_flags & GFS2_DIF_JDATA)
			gfs2_ordered_del_inode(ip);
276
	}
277
	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
278 279
	if (error)
		goto out;
280 281 282
	error = gfs2_meta_inode_buffer(ip, &bh);
	if (error)
		goto out_trans_end;
283
	inode->i_ctime = current_time(inode);
284
	gfs2_trans_add_meta(ip->i_gl, bh);
285
	ip->i_diskflags = new_flags;
286
	gfs2_dinode_out(ip, bh->b_data);
287
	brelse(bh);
288
	gfs2_set_inode_flags(inode);
289
	gfs2_set_aops(inode);
290 291
out_trans_end:
	gfs2_trans_end(sdp);
292 293
out:
	gfs2_glock_dq_uninit(&gh);
M
Miklos Szeredi 已提交
294
out_drop_write:
A
Al Viro 已提交
295
	mnt_drop_write_file(filp);
296 297 298
	return error;
}

299
static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
300
{
A
Al Viro 已提交
301
	struct inode *inode = file_inode(filp);
302 303 304
	u32 fsflags, gfsflags = 0;
	u32 mask;
	int i;
305

306
	if (get_user(fsflags, ptr))
307
		return -EFAULT;
308

309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
	for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++) {
		if (fsflags & fsflag_gfs2flag[i].fsflag) {
			fsflags &= ~fsflag_gfs2flag[i].fsflag;
			gfsflags |= fsflag_gfs2flag[i].gfsflag;
		}
	}
	if (fsflags || gfsflags & ~GFS2_FLAGS_USER_SET)
		return -EINVAL;

	mask = GFS2_FLAGS_USER_SET;
	if (S_ISDIR(inode->i_mode)) {
		mask &= ~GFS2_DIF_JDATA;
	} else {
		/* The GFS2_DIF_TOPDIR flag is only valid for directories. */
		if (gfsflags & GFS2_DIF_TOPDIR)
			return -EINVAL;
		mask &= ~(GFS2_DIF_TOPDIR | GFS2_DIF_INHERIT_JDATA);
326
	}
327

328
	return do_gfs2_set_flags(filp, gfsflags, mask, fsflags);
329 330
}

S
Steve Whitehouse 已提交
331 332 333 334 335 336 337 338 339 340 341
static int gfs2_getlabel(struct file *filp, char __user *label)
{
	struct inode *inode = file_inode(filp);
	struct gfs2_sbd *sdp = GFS2_SB(inode);

	if (copy_to_user(label, sdp->sd_sb.sb_locktable, GFS2_LOCKNAME_LEN))
		return -EFAULT;

	return 0;
}

342
static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
343 344
{
	switch(cmd) {
345
	case FS_IOC_GETFLAGS:
346
		return gfs2_get_flags(filp, (u32 __user *)arg);
347
	case FS_IOC_SETFLAGS:
348
		return gfs2_set_flags(filp, (u32 __user *)arg);
S
Steven Whitehouse 已提交
349 350
	case FITRIM:
		return gfs2_fitrim(filp, (void __user *)arg);
S
Steve Whitehouse 已提交
351 352
	case FS_IOC_GETFSLABEL:
		return gfs2_getlabel(filp, (char __user *)arg);
353
	}
S
Steve Whitehouse 已提交
354

355 356 357
	return -ENOTTY;
}

A
Arnd Bergmann 已提交
358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
#ifdef CONFIG_COMPAT
static long gfs2_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
	switch(cmd) {
	/* These are just misnamed, they actually get/put from/to user an int */
	case FS_IOC32_GETFLAGS:
		cmd = FS_IOC_GETFLAGS;
		break;
	case FS_IOC32_SETFLAGS:
		cmd = FS_IOC_SETFLAGS;
		break;
	/* Keep this list in sync with gfs2_ioctl */
	case FITRIM:
	case FS_IOC_GETFSLABEL:
		break;
	default:
		return -ENOIOCTLCMD;
	}

	return gfs2_ioctl(filp, cmd, (unsigned long)compat_ptr(arg));
}
#else
#define gfs2_compat_ioctl NULL
#endif

383 384
/**
 * gfs2_size_hint - Give a hint to the size of a write request
385
 * @filep: The struct file
386 387 388 389 390 391 392 393 394 395 396
 * @offset: The file offset of the write
 * @size: The length of the write
 *
 * When we are about to do a write, this function records the total
 * write size in order to provide a suitable hint to the lower layers
 * about how many blocks will be required.
 *
 */

static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
{
A
Al Viro 已提交
397
	struct inode *inode = file_inode(filep);
398 399 400 401 402
	struct gfs2_sbd *sdp = GFS2_SB(inode);
	struct gfs2_inode *ip = GFS2_I(inode);
	size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift;
	int hint = min_t(size_t, INT_MAX, blks);

403 404
	if (hint > atomic_read(&ip->i_sizehint))
		atomic_set(&ip->i_sizehint, hint);
405 406
}

407
/**
408
 * gfs2_allocate_page_backing - Allocate blocks for a write fault
409
 * @page: The (locked) page to allocate backing for
410
 * @length: Size of the allocation
411
 *
412 413 414 415
 * We try to allocate all the blocks required for the page in one go.  This
 * might fail for various reasons, so we keep trying until all the blocks to
 * back this page are allocated.  If some of the blocks are already allocated,
 * that is ok too.
416
 */
417
static int gfs2_allocate_page_backing(struct page *page, unsigned int length)
418
{
419
	u64 pos = page_offset(page);
420 421

	do {
422 423
		struct iomap iomap = { };

424
		if (gfs2_iomap_get_alloc(page->mapping->host, pos, length, &iomap))
425
			return -EIO;
426

427 428 429
		if (length < iomap.length)
			iomap.length = length;
		length -= iomap.length;
430
		pos += iomap.length;
431
	} while (length > 0);
432

433 434 435 436 437 438
	return 0;
}

/**
 * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable
 * @vma: The virtual memory area
439
 * @vmf: The virtual memory fault containing the page to become writable
440 441 442 443 444
 *
 * When the page becomes writable, we need to ensure that we have
 * blocks allocated on disk to back that page.
 */

445
static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
446
{
447
	struct page *page = vmf->page;
448
	struct inode *inode = file_inode(vmf->vma->vm_file);
449 450
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_sbd *sdp = GFS2_SB(inode);
451
	struct gfs2_alloc_parms ap = { .aflags = 0, };
452
	u64 offset = page_offset(page);
453 454
	unsigned int data_blocks, ind_blocks, rblocks;
	struct gfs2_holder gh;
455
	unsigned int length;
S
Steven Whitehouse 已提交
456
	loff_t size;
457 458
	int ret;

459
	sb_start_pagefault(inode->i_sb);
S
Steven Whitehouse 已提交
460

461
	ret = gfs2_qa_get(ip);
462
	if (ret)
463
		goto out;
464

465 466
	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
	ret = gfs2_glock_nq(&gh);
467
	if (ret)
468
		goto out_uninit;
469

470 471 472 473 474 475 476
	/* Check page index against inode size */
	size = i_size_read(inode);
	if (offset >= size) {
		ret = -EINVAL;
		goto out_unlock;
	}

477
	/* Update file times before taking page lock */
478
	file_update_time(vmf->vma->vm_file);
479

480 481 482 483 484 485 486 487
	/* page is wholly or partially inside EOF */
	if (offset > size - PAGE_SIZE)
		length = offset_in_page(size);
	else
		length = PAGE_SIZE;

	gfs2_size_hint(vmf->vma->vm_file, offset, length);

488 489 490
	set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
	set_bit(GIF_SW_PAGED, &ip->i_flags);

491 492 493 494 495 496 497
	/*
	 * iomap_writepage / iomap_writepages currently don't support inline
	 * files, so always unstuff here.
	 */

	if (!gfs2_is_stuffed(ip) &&
	    !gfs2_write_alloc_required(ip, offset, length)) {
S
Steven Whitehouse 已提交
498 499 500 501 502
		lock_page(page);
		if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
			ret = -EAGAIN;
			unlock_page(page);
		}
503
		goto out_unlock;
S
Steven Whitehouse 已提交
504 505
	}

506 507
	ret = gfs2_rindex_update(sdp);
	if (ret)
508 509
		goto out_unlock;

510
	gfs2_write_calc_reserv(ip, length, &data_blocks, &ind_blocks);
511
	ap.target = data_blocks + ind_blocks;
512 513 514
	ret = gfs2_quota_lock_check(ip, &ap);
	if (ret)
		goto out_unlock;
515
	ret = gfs2_inplace_reserve(ip, &ap);
516 517 518 519 520 521
	if (ret)
		goto out_quota_unlock;

	rblocks = RES_DINODE + ind_blocks;
	if (gfs2_is_jdata(ip))
		rblocks += data_blocks ? data_blocks : 1;
522
	if (ind_blocks || data_blocks) {
523
		rblocks += RES_STATFS + RES_QUOTA;
524
		rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
525
	}
526 527 528 529 530
	ret = gfs2_trans_begin(sdp, rblocks, 0);
	if (ret)
		goto out_trans_fail;

	lock_page(page);
S
Steven Whitehouse 已提交
531 532 533 534 535 536 537 538
	ret = -EAGAIN;
	/* If truncated, we must retry the operation, we may have raced
	 * with the glock demotion code.
	 */
	if (!PageUptodate(page) || page->mapping != inode->i_mapping)
		goto out_trans_end;

	/* Unstuff, if required, and allocate backing blocks for page */
539
	ret = 0;
S
Steven Whitehouse 已提交
540
	if (gfs2_is_stuffed(ip))
541
		ret = gfs2_unstuff_dinode(ip, page);
S
Steven Whitehouse 已提交
542
	if (ret == 0)
543
		ret = gfs2_allocate_page_backing(page, length);
544

S
Steven Whitehouse 已提交
545 546 547
out_trans_end:
	if (ret)
		unlock_page(page);
548 549 550 551 552 553 554
	gfs2_trans_end(sdp);
out_trans_fail:
	gfs2_inplace_release(ip);
out_quota_unlock:
	gfs2_quota_unlock(ip);
out_unlock:
	gfs2_glock_dq(&gh);
555
out_uninit:
556
	gfs2_qa_put(ip);
557
	gfs2_holder_uninit(&gh);
S
Steven Whitehouse 已提交
558 559
	if (ret == 0) {
		set_page_dirty(page);
560
		wait_for_stable_page(page);
S
Steven Whitehouse 已提交
561
	}
562
out:
563
	sb_end_pagefault(inode->i_sb);
S
Steven Whitehouse 已提交
564
	return block_page_mkwrite_return(ret);
565 566
}

567
static const struct vm_operations_struct gfs2_vm_ops = {
568
	.fault = filemap_fault,
569
	.map_pages = filemap_map_pages,
570 571 572
	.page_mkwrite = gfs2_page_mkwrite,
};

D
David Teigland 已提交
573 574 575 576 577
/**
 * gfs2_mmap -
 * @file: The file to map
 * @vma: The VMA which described the mapping
 *
578 579 580 581 582
 * There is no need to get a lock here unless we should be updating
 * atime. We ignore any locking errors since the only consequence is
 * a missed atime update (which will just be deferred until later).
 *
 * Returns: 0
D
David Teigland 已提交
583 584 585 586
 */

static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
{
587
	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
D
David Teigland 已提交
588

589 590
	if (!(file->f_flags & O_NOATIME) &&
	    !IS_NOATIME(&ip->i_inode)) {
591 592
		struct gfs2_holder i_gh;
		int error;
D
David Teigland 已提交
593

594 595
		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
					   &i_gh);
596 597
		if (error)
			return error;
598 599 600
		/* grab lock to update inode */
		gfs2_glock_dq_uninit(&i_gh);
		file_accessed(file);
601
	}
602
	vma->vm_ops = &gfs2_vm_ops;
D
David Teigland 已提交
603

604
	return 0;
D
David Teigland 已提交
605 606 607
}

/**
608 609 610
 * gfs2_open_common - This is common to open and atomic_open
 * @inode: The inode being opened
 * @file: The file being opened
D
David Teigland 已提交
611
 *
612 613 614 615 616 617
 * This maybe called under a glock or not depending upon how it has
 * been called. We must always be called under a glock for regular
 * files, however. For other file types, it does not matter whether
 * we hold the glock or not.
 *
 * Returns: Error code or 0 for success
D
David Teigland 已提交
618 619
 */

620
int gfs2_open_common(struct inode *inode, struct file *file)
D
David Teigland 已提交
621 622
{
	struct gfs2_file *fp;
623 624 625 626 627 628 629
	int ret;

	if (S_ISREG(inode->i_mode)) {
		ret = generic_file_open(inode, file);
		if (ret)
			return ret;
	}
D
David Teigland 已提交
630

631
	fp = kzalloc(sizeof(struct gfs2_file), GFP_NOFS);
D
David Teigland 已提交
632 633 634
	if (!fp)
		return -ENOMEM;

635
	mutex_init(&fp->f_fl_mutex);
D
David Teigland 已提交
636

637
	gfs2_assert_warn(GFS2_SB(inode), !file->private_data);
638
	file->private_data = fp;
639 640 641 642 643
	if (file->f_mode & FMODE_WRITE) {
		ret = gfs2_qa_get(GFS2_I(inode));
		if (ret)
			goto fail;
	}
644
	return 0;
645 646 647 648 649

fail:
	kfree(file->private_data);
	file->private_data = NULL;
	return ret;
650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671
}

/**
 * gfs2_open - open a file
 * @inode: the inode to open
 * @file: the struct file for this opening
 *
 * After atomic_open, this function is only used for opening files
 * which are already cached. We must still get the glock for regular
 * files to ensure that we have the file size uptodate for the large
 * file check which is in the common code. That is only an issue for
 * regular files though.
 *
 * Returns: errno
 */

static int gfs2_open(struct inode *inode, struct file *file)
{
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_holder i_gh;
	int error;
	bool need_unlock = false;
D
David Teigland 已提交
672

673
	if (S_ISREG(ip->i_inode.i_mode)) {
D
David Teigland 已提交
674 675 676
		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
					   &i_gh);
		if (error)
677 678 679
			return error;
		need_unlock = true;
	}
D
David Teigland 已提交
680

681
	error = gfs2_open_common(inode, file);
D
David Teigland 已提交
682

683
	if (need_unlock)
D
David Teigland 已提交
684 685 686 687 688 689
		gfs2_glock_dq_uninit(&i_gh);

	return error;
}

/**
690
 * gfs2_release - called to close a struct file
D
David Teigland 已提交
691 692 693 694 695 696
 * @inode: the inode the struct file belongs to
 * @file: the struct file being closed
 *
 * Returns: errno
 */

697
static int gfs2_release(struct inode *inode, struct file *file)
D
David Teigland 已提交
698
{
699
	struct gfs2_inode *ip = GFS2_I(inode);
D
David Teigland 已提交
700

B
Bob Peterson 已提交
701
	kfree(file->private_data);
702
	file->private_data = NULL;
D
David Teigland 已提交
703

704 705
	if (file->f_mode & FMODE_WRITE)
		gfs2_rsqa_delete(ip, &inode->i_writecount);
D
David Teigland 已提交
706 707 708 709 710
	return 0;
}

/**
 * gfs2_fsync - sync the dirty data for a file (across the cluster)
711 712 713
 * @file: the file that points to the dentry
 * @start: the start position in the file to sync
 * @end: the end position in the file to sync
S
Steven Whitehouse 已提交
714
 * @datasync: set if we can ignore timestamp changes
D
David Teigland 已提交
715
 *
716 717 718 719 720 721 722 723 724 725
 * We split the data flushing here so that we don't wait for the data
 * until after we've also sent the metadata to disk. Note that for
 * data=ordered, we will write & wait for the data at the log flush
 * stage anyway, so this is unlikely to make much of a difference
 * except in the data=writeback case.
 *
 * If the fdatawrite fails due to any reason except -EIO, we will
 * continue the remainder of the fsync, although we'll still report
 * the error at the end. This is to match filemap_write_and_wait_range()
 * behaviour.
726
 *
D
David Teigland 已提交
727 728 729
 * Returns: errno
 */

730 731
static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
		      int datasync)
D
David Teigland 已提交
732
{
733 734
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = mapping->host;
735
	int sync_state = inode->i_state & I_DIRTY_ALL;
S
Steven Whitehouse 已提交
736
	struct gfs2_inode *ip = GFS2_I(inode);
737
	int ret = 0, ret1 = 0;
D
David Teigland 已提交
738

739 740 741 742 743
	if (mapping->nrpages) {
		ret1 = filemap_fdatawrite_range(mapping, start, end);
		if (ret1 == -EIO)
			return ret1;
	}
744

745 746
	if (!gfs2_is_jdata(ip))
		sync_state &= ~I_DIRTY_PAGES;
S
Steven Whitehouse 已提交
747
	if (datasync)
748
		sync_state &= ~(I_DIRTY_SYNC | I_DIRTY_TIME);
D
David Teigland 已提交
749

S
Steven Whitehouse 已提交
750 751
	if (sync_state) {
		ret = sync_inode_metadata(inode, 1);
752
		if (ret)
S
Steven Whitehouse 已提交
753
			return ret;
754
		if (gfs2_is_jdata(ip))
755 756 757
			ret = file_write_and_wait(file);
		if (ret)
			return ret;
758
		gfs2_ail_flush(ip->i_gl, 1);
759 760
	}

761
	if (mapping->nrpages)
762
		ret = file_fdatawait_range(file, start, end);
763 764

	return ret ? ret : ret1;
D
David Teigland 已提交
765 766
}

767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782
static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to)
{
	struct file *file = iocb->ki_filp;
	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
	size_t count = iov_iter_count(to);
	struct gfs2_holder gh;
	ssize_t ret;

	if (!count)
		return 0; /* skip atime */

	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
	ret = gfs2_glock_nq(&gh);
	if (ret)
		goto out_uninit;

783 784
	ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL,
			   is_sync_kiocb(iocb));
785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818

	gfs2_glock_dq(&gh);
out_uninit:
	gfs2_holder_uninit(&gh);
	return ret;
}

static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
{
	struct file *file = iocb->ki_filp;
	struct inode *inode = file->f_mapping->host;
	struct gfs2_inode *ip = GFS2_I(inode);
	size_t len = iov_iter_count(from);
	loff_t offset = iocb->ki_pos;
	struct gfs2_holder gh;
	ssize_t ret;

	/*
	 * Deferred lock, even if its a write, since we do no allocation on
	 * this path. All we need to change is the atime, and this lock mode
	 * ensures that other nodes have flushed their buffered read caches
	 * (i.e. their page cache entries for this inode). We do not,
	 * unfortunately, have the option of only flushing a range like the
	 * VFS does.
	 */
	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
	ret = gfs2_glock_nq(&gh);
	if (ret)
		goto out_uninit;

	/* Silently fall back to buffered I/O when writing beyond EOF */
	if (offset + len > i_size_read(&ip->i_inode))
		goto out;

819 820
	ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL,
			   is_sync_kiocb(iocb));
821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841

out:
	gfs2_glock_dq(&gh);
out_uninit:
	gfs2_holder_uninit(&gh);
	return ret;
}

static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
	ssize_t ret;

	if (iocb->ki_flags & IOCB_DIRECT) {
		ret = gfs2_file_direct_read(iocb, to);
		if (likely(ret != -ENOTBLK))
			return ret;
		iocb->ki_flags &= ~IOCB_DIRECT;
	}
	return generic_file_read_iter(iocb, to);
}

842
/**
A
Al Viro 已提交
843
 * gfs2_file_write_iter - Perform a write to a file
844
 * @iocb: The io context
845
 * @from: The data to write
846 847 848 849 850 851 852 853
 *
 * We have to do a lock/unlock here to refresh the inode size for
 * O_APPEND writes, otherwise we can land up writing at the wrong
 * offset. There is still a race, but provided the app is using its
 * own file locking, this will make O_APPEND work as expected.
 *
 */

A
Al Viro 已提交
854
static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
855 856
{
	struct file *file = iocb->ki_filp;
857 858
	struct inode *inode = file_inode(file);
	struct gfs2_inode *ip = GFS2_I(inode);
859
	ssize_t ret;
860

861
	ret = gfs2_qa_get(ip);
862 863
	if (ret)
		return ret;
864

A
Al Viro 已提交
865
	gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from));
866

867
	if (iocb->ki_flags & IOCB_APPEND) {
868 869 870 871
		struct gfs2_holder gh;

		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
		if (ret)
872
			goto out;
873 874 875
		gfs2_glock_dq_uninit(&gh);
	}

876 877 878
	inode_lock(inode);
	ret = generic_write_checks(iocb, from);
	if (ret <= 0)
879
		goto out_unlock;
880 881 882

	ret = file_remove_privs(file);
	if (ret)
883
		goto out_unlock;
884 885 886

	ret = file_update_time(file);
	if (ret)
887
		goto out_unlock;
888

889 890
	if (iocb->ki_flags & IOCB_DIRECT) {
		struct address_space *mapping = file->f_mapping;
891
		ssize_t buffered, ret2;
892

893 894
		ret = gfs2_file_direct_write(iocb, from);
		if (ret < 0 || !iov_iter_count(from))
895
			goto out_unlock;
896

897
		iocb->ki_flags |= IOCB_DSYNC;
898
		current->backing_dev_info = inode_to_bdi(inode);
899
		buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
900
		current->backing_dev_info = NULL;
901
		if (unlikely(buffered <= 0))
902
			goto out_unlock;
903 904 905 906

		/*
		 * We need to ensure that the page cache pages are written to
		 * disk and invalidated to preserve the expected O_DIRECT
907 908 909
		 * semantics.  If the writeback or invalidate fails, only report
		 * the direct I/O range as we don't know if the buffered pages
		 * made it to disk.
910
		 */
911 912 913 914 915 916 917
		iocb->ki_pos += buffered;
		ret2 = generic_write_sync(iocb, buffered);
		invalidate_mapping_pages(mapping,
				(iocb->ki_pos - buffered) >> PAGE_SHIFT,
				(iocb->ki_pos - 1) >> PAGE_SHIFT);
		if (!ret || ret2 > 0)
			ret += ret2;
918
	} else {
919
		current->backing_dev_info = inode_to_bdi(inode);
920
		ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
921
		current->backing_dev_info = NULL;
922
		if (likely(ret > 0)) {
923
			iocb->ki_pos += ret;
924 925
			ret = generic_write_sync(iocb, ret);
		}
926
	}
927

928
out_unlock:
929
	inode_unlock(inode);
930 931
out:
	gfs2_qa_put(ip);
932
	return ret;
933 934
}

935 936 937
static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
			   int mode)
{
938
	struct super_block *sb = inode->i_sb;
939
	struct gfs2_inode *ip = GFS2_I(inode);
940
	loff_t end = offset + len;
941 942 943 944 945
	struct buffer_head *dibh;
	int error;

	error = gfs2_meta_inode_buffer(ip, &dibh);
	if (unlikely(error))
946
		return error;
947

948
	gfs2_trans_add_meta(ip->i_gl, dibh);
949 950 951 952 953 954 955

	if (gfs2_is_stuffed(ip)) {
		error = gfs2_unstuff_dinode(ip, NULL);
		if (unlikely(error))
			goto out;
	}

956
	while (offset < end) {
957 958
		struct iomap iomap = { };

959 960
		error = gfs2_iomap_get_alloc(inode, offset, end - offset,
					     &iomap);
961
		if (error)
962
			goto out;
963
		offset = iomap.offset + iomap.length;
964
		if (!(iomap.flags & IOMAP_F_NEW))
965
			continue;
966 967 968 969 970
		error = sb_issue_zeroout(sb, iomap.addr >> inode->i_blkbits,
					 iomap.length >> inode->i_blkbits,
					 GFP_NOFS);
		if (error) {
			fs_err(GFS2_SB(inode), "Failed to zero data buffers\n");
971
			goto out;
972
		}
973 974
	}
out:
975
	brelse(dibh);
976 977
	return error;
}
978

979 980 981 982 983 984 985 986 987 988 989 990 991 992
/**
 * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of
 *                     blocks, determine how many bytes can be written.
 * @ip:          The inode in question.
 * @len:         Max cap of bytes. What we return in *len must be <= this.
 * @data_blocks: Compute and return the number of data blocks needed
 * @ind_blocks:  Compute and return the number of indirect blocks needed
 * @max_blocks:  The total blocks available to work with.
 *
 * Returns: void, but @len, @data_blocks and @ind_blocks are filled in.
 */
static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len,
			    unsigned int *data_blocks, unsigned int *ind_blocks,
			    unsigned int max_blocks)
993
{
994
	loff_t max = *len;
995 996 997 998 999 1000 1001
	const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
	unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);

	for (tmp = max_data; tmp > sdp->sd_diptrs;) {
		tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
		max_data -= tmp;
	}
1002

1003 1004 1005 1006 1007 1008 1009 1010 1011
	*data_blocks = max_data;
	*ind_blocks = max_blocks - max_data;
	*len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
	if (*len > max) {
		*len = max;
		gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks);
	}
}

1012
static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
1013
{
A
Al Viro 已提交
1014
	struct inode *inode = file_inode(file);
1015 1016
	struct gfs2_sbd *sdp = GFS2_SB(inode);
	struct gfs2_inode *ip = GFS2_I(inode);
1017
	struct gfs2_alloc_parms ap = { .aflags = 0, };
1018
	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
1019
	loff_t bytes, max_bytes, max_blks;
1020
	int error;
1021 1022
	const loff_t pos = offset;
	const loff_t count = len;
1023
	loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
1024
	loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
1025
	loff_t max_chunk_size = UINT_MAX & bsize_mask;
1026

1027 1028
	next = (next + 1) << sdp->sd_sb.sb_bsize_shift;

1029
	offset &= bsize_mask;
1030 1031 1032 1033 1034

	len = next - offset;
	bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
	if (!bytes)
		bytes = UINT_MAX;
1035 1036 1037
	bytes &= bsize_mask;
	if (bytes == 0)
		bytes = sdp->sd_sb.sb_bsize;
1038

1039
	gfs2_size_hint(file, offset, len);
B
Bob Peterson 已提交
1040

1041 1042 1043
	gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
	ap.min_target = data_blocks + ind_blocks;

1044 1045 1046
	while (len > 0) {
		if (len < bytes)
			bytes = len;
1047 1048 1049 1050 1051
		if (!gfs2_write_alloc_required(ip, offset, bytes)) {
			len -= bytes;
			offset += bytes;
			continue;
		}
1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062

		/* We need to determine how many bytes we can actually
		 * fallocate without exceeding quota or going over the
		 * end of the fs. We start off optimistically by assuming
		 * we can write max_bytes */
		max_bytes = (len > max_chunk_size) ? max_chunk_size : len;

		/* Since max_bytes is most likely a theoretical max, we
		 * calculate a more realistic 'bytes' to serve as a good
		 * starting point for the number of bytes we may be able
		 * to write */
1063
		gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
1064
		ap.target = data_blocks + ind_blocks;
1065 1066

		error = gfs2_quota_lock_check(ip, &ap);
1067
		if (error)
1068
			return error;
1069 1070
		/* ap.allowed tells us how many blocks quota will allow
		 * us to write. Check if this reduces max_blks */
1071 1072
		max_blks = UINT_MAX;
		if (ap.allowed)
1073
			max_blks = ap.allowed;
1074

1075
		error = gfs2_inplace_reserve(ip, &ap);
1076
		if (error)
1077
			goto out_qunlock;
1078 1079 1080 1081 1082 1083 1084 1085 1086 1087

		/* check if the selected rgrp limits our max_blks further */
		if (ap.allowed && ap.allowed < max_blks)
			max_blks = ap.allowed;

		/* Almost done. Calculate bytes that can be written using
		 * max_blks. We also recompute max_bytes, data_blocks and
		 * ind_blocks */
		calc_max_reserv(ip, &max_bytes, &data_blocks,
				&ind_blocks, max_blks);
1088 1089

		rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
1090
			  RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks);
1091 1092 1093 1094
		if (gfs2_is_jdata(ip))
			rblocks += data_blocks ? data_blocks : 1;

		error = gfs2_trans_begin(sdp, rblocks,
1095
					 PAGE_SIZE >> inode->i_blkbits);
1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109
		if (error)
			goto out_trans_fail;

		error = fallocate_chunk(inode, offset, max_bytes, mode);
		gfs2_trans_end(sdp);

		if (error)
			goto out_trans_fail;

		len -= max_bytes;
		offset += max_bytes;
		gfs2_inplace_release(ip);
		gfs2_quota_unlock(ip);
	}
1110

1111
	if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size)
1112
		i_size_write(inode, pos + count);
1113 1114
	file_update_time(file);
	mark_inode_dirty(inode);
1115

1116 1117 1118 1119
	if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host))
		return vfs_fsync_range(file, pos, pos + count - 1,
			       (file->f_flags & __O_SYNC) ? 0 : 1);
	return 0;
1120 1121 1122 1123 1124

out_trans_fail:
	gfs2_inplace_release(ip);
out_qunlock:
	gfs2_quota_unlock(ip);
1125 1126 1127 1128 1129 1130
	return error;
}

static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
	struct inode *inode = file_inode(file);
1131
	struct gfs2_sbd *sdp = GFS2_SB(inode);
1132 1133 1134 1135
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_holder gh;
	int ret;

1136
	if (mode & ~(FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE))
1137 1138 1139
		return -EOPNOTSUPP;
	/* fallocate is needed by gfs2_grow to reserve space in the rindex */
	if (gfs2_is_jdata(ip) && inode != sdp->sd_rindex)
1140 1141
		return -EOPNOTSUPP;

A
Al Viro 已提交
1142
	inode_lock(inode);
1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159

	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
	ret = gfs2_glock_nq(&gh);
	if (ret)
		goto out_uninit;

	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
	    (offset + len) > inode->i_size) {
		ret = inode_newsize_ok(inode, offset + len);
		if (ret)
			goto out_unlock;
	}

	ret = get_write_access(inode);
	if (ret)
		goto out_unlock;

1160 1161 1162
	if (mode & FALLOC_FL_PUNCH_HOLE) {
		ret = __gfs2_punch_hole(file, offset, len);
	} else {
1163
		ret = gfs2_qa_get(ip);
1164 1165
		if (ret)
			goto out_putw;
1166

1167 1168 1169 1170
		ret = __gfs2_fallocate(file, mode, offset, len);

		if (ret)
			gfs2_rs_deltree(&ip->i_res);
1171
		gfs2_qa_put(ip);
1172
	}
1173

1174 1175
out_putw:
	put_write_access(inode);
1176
out_unlock:
1177
	gfs2_glock_dq(&gh);
1178
out_uninit:
1179
	gfs2_holder_uninit(&gh);
A
Al Viro 已提交
1180
	inode_unlock(inode);
1181
	return ret;
1182 1183
}

1184 1185 1186 1187 1188 1189
static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
				      struct file *out, loff_t *ppos,
				      size_t len, unsigned int flags)
{
	int error;
	struct gfs2_inode *ip = GFS2_I(out->f_mapping->host);
1190
	ssize_t ret;
1191

1192
	error = gfs2_qa_get(ip);
1193 1194 1195 1196 1197
	if (error)
		return (ssize_t)error;

	gfs2_size_hint(out, *ppos, len);

1198 1199 1200
	ret = iter_file_splice_write(pipe, out, ppos, len, flags);
	gfs2_qa_put(ip);
	return ret;
1201 1202
}

1203 1204
#ifdef CONFIG_GFS2_FS_LOCKING_DLM

D
David Teigland 已提交
1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215
/**
 * gfs2_lock - acquire/release a posix lock on a file
 * @file: the file pointer
 * @cmd: either modify or retrieve lock state, possibly wait
 * @fl: type and range of lock
 *
 * Returns: errno
 */

static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
{
1216 1217
	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
	struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
1218
	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
D
David Teigland 已提交
1219 1220 1221

	if (!(fl->fl_flags & FL_POSIX))
		return -ENOLCK;
1222
	if (__mandatory_lock(&ip->i_inode) && fl->fl_type != F_UNLCK)
D
David Teigland 已提交
1223 1224
		return -ENOLCK;

M
Marc Eshel 已提交
1225 1226 1227 1228 1229
	if (cmd == F_CANCELLK) {
		/* Hack: */
		cmd = F_SETLK;
		fl->fl_type = F_UNLCK;
	}
1230
	if (unlikely(gfs2_withdrawn(sdp))) {
1231
		if (fl->fl_type == F_UNLCK)
1232
			locks_lock_file_wait(file, fl);
1233
		return -EIO;
1234
	}
D
David Teigland 已提交
1235
	if (IS_GETLK(cmd))
1236
		return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
D
David Teigland 已提交
1237
	else if (fl->fl_type == F_UNLCK)
1238
		return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl);
D
David Teigland 已提交
1239
	else
1240
		return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl);
D
David Teigland 已提交
1241 1242 1243 1244
}

static int do_flock(struct file *file, int cmd, struct file_lock *fl)
{
1245
	struct gfs2_file *fp = file->private_data;
D
David Teigland 已提交
1246
	struct gfs2_holder *fl_gh = &fp->f_fl_gh;
A
Al Viro 已提交
1247
	struct gfs2_inode *ip = GFS2_I(file_inode(file));
D
David Teigland 已提交
1248 1249
	struct gfs2_glock *gl;
	unsigned int state;
B
Bob Peterson 已提交
1250
	u16 flags;
D
David Teigland 已提交
1251
	int error = 0;
1252
	int sleeptime;
D
David Teigland 已提交
1253 1254

	state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
1255
	flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY_1CB) | GL_EXACT;
D
David Teigland 已提交
1256

1257
	mutex_lock(&fp->f_fl_mutex);
D
David Teigland 已提交
1258

1259
	if (gfs2_holder_initialized(fl_gh)) {
1260
		struct file_lock request;
D
David Teigland 已提交
1261 1262
		if (fl_gh->gh_state == state)
			goto out;
1263 1264 1265 1266
		locks_init_lock(&request);
		request.fl_type = F_UNLCK;
		request.fl_flags = FL_FLOCK;
		locks_lock_file_wait(file, &request);
1267
		gfs2_glock_dq(fl_gh);
1268
		gfs2_holder_reinit(state, flags, fl_gh);
D
David Teigland 已提交
1269
	} else {
1270 1271
		error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr,
				       &gfs2_flock_glops, CREATE, &gl);
D
David Teigland 已提交
1272 1273
		if (error)
			goto out;
1274 1275
		gfs2_holder_init(gl, state, flags, fl_gh);
		gfs2_glock_put(gl);
D
David Teigland 已提交
1276
	}
1277 1278 1279 1280 1281 1282 1283 1284
	for (sleeptime = 1; sleeptime <= 4; sleeptime <<= 1) {
		error = gfs2_glock_nq(fl_gh);
		if (error != GLR_TRYFAILED)
			break;
		fl_gh->gh_flags = LM_FLAG_TRY | GL_EXACT;
		fl_gh->gh_error = 0;
		msleep(sleeptime);
	}
D
David Teigland 已提交
1285 1286 1287 1288 1289
	if (error) {
		gfs2_holder_uninit(fl_gh);
		if (error == GLR_TRYFAILED)
			error = -EAGAIN;
	} else {
1290
		error = locks_lock_file_wait(file, fl);
1291
		gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
D
David Teigland 已提交
1292 1293
	}

1294
out:
1295
	mutex_unlock(&fp->f_fl_mutex);
D
David Teigland 已提交
1296 1297 1298 1299 1300
	return error;
}

static void do_unflock(struct file *file, struct file_lock *fl)
{
1301
	struct gfs2_file *fp = file->private_data;
D
David Teigland 已提交
1302 1303
	struct gfs2_holder *fl_gh = &fp->f_fl_gh;

1304
	mutex_lock(&fp->f_fl_mutex);
1305
	locks_lock_file_wait(file, fl);
A
Andreas Gruenbacher 已提交
1306
	if (gfs2_holder_initialized(fl_gh)) {
1307
		gfs2_glock_dq(fl_gh);
1308 1309
		gfs2_holder_uninit(fl_gh);
	}
1310
	mutex_unlock(&fp->f_fl_mutex);
D
David Teigland 已提交
1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325
}

/**
 * gfs2_flock - acquire/release a flock lock on a file
 * @file: the file pointer
 * @cmd: either modify or retrieve lock state, possibly wait
 * @fl: type and range of lock
 *
 * Returns: errno
 */

static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
{
	if (!(fl->fl_flags & FL_FLOCK))
		return -ENOLCK;
1326 1327
	if (fl->fl_type & LOCK_MAND)
		return -EOPNOTSUPP;
D
David Teigland 已提交
1328 1329 1330 1331

	if (fl->fl_type == F_UNLCK) {
		do_unflock(file, fl);
		return 0;
1332
	} else {
D
David Teigland 已提交
1333
		return do_flock(file, cmd, fl);
1334
	}
D
David Teigland 已提交
1335 1336
}

1337
const struct file_operations gfs2_file_fops = {
1338
	.llseek		= gfs2_llseek,
1339
	.read_iter	= gfs2_file_read_iter,
A
Al Viro 已提交
1340
	.write_iter	= gfs2_file_write_iter,
1341
	.iopoll		= iomap_dio_iopoll,
1342
	.unlocked_ioctl	= gfs2_ioctl,
A
Arnd Bergmann 已提交
1343
	.compat_ioctl	= gfs2_compat_ioctl,
1344 1345
	.mmap		= gfs2_mmap,
	.open		= gfs2_open,
1346
	.release	= gfs2_release,
1347 1348 1349
	.fsync		= gfs2_fsync,
	.lock		= gfs2_lock,
	.flock		= gfs2_flock,
1350
	.splice_read	= generic_file_splice_read,
1351
	.splice_write	= gfs2_file_splice_write,
1352
	.setlease	= simple_nosetlease,
1353
	.fallocate	= gfs2_fallocate,
D
David Teigland 已提交
1354 1355
};

1356
const struct file_operations gfs2_dir_fops = {
A
Al Viro 已提交
1357
	.iterate_shared	= gfs2_readdir,
1358
	.unlocked_ioctl	= gfs2_ioctl,
A
Arnd Bergmann 已提交
1359
	.compat_ioctl	= gfs2_compat_ioctl,
1360
	.open		= gfs2_open,
1361
	.release	= gfs2_release,
1362 1363 1364
	.fsync		= gfs2_fsync,
	.lock		= gfs2_lock,
	.flock		= gfs2_flock,
1365
	.llseek		= default_llseek,
D
David Teigland 已提交
1366 1367
};

1368 1369
#endif /* CONFIG_GFS2_FS_LOCKING_DLM */

1370
const struct file_operations gfs2_file_fops_nolock = {
1371
	.llseek		= gfs2_llseek,
1372
	.read_iter	= gfs2_file_read_iter,
A
Al Viro 已提交
1373
	.write_iter	= gfs2_file_write_iter,
1374
	.iopoll		= iomap_dio_iopoll,
1375
	.unlocked_ioctl	= gfs2_ioctl,
A
Arnd Bergmann 已提交
1376
	.compat_ioctl	= gfs2_compat_ioctl,
1377 1378
	.mmap		= gfs2_mmap,
	.open		= gfs2_open,
1379
	.release	= gfs2_release,
1380
	.fsync		= gfs2_fsync,
1381
	.splice_read	= generic_file_splice_read,
1382
	.splice_write	= gfs2_file_splice_write,
1383
	.setlease	= generic_setlease,
1384
	.fallocate	= gfs2_fallocate,
1385 1386
};

1387
const struct file_operations gfs2_dir_fops_nolock = {
A
Al Viro 已提交
1388
	.iterate_shared	= gfs2_readdir,
1389
	.unlocked_ioctl	= gfs2_ioctl,
A
Arnd Bergmann 已提交
1390
	.compat_ioctl	= gfs2_compat_ioctl,
1391
	.open		= gfs2_open,
1392
	.release	= gfs2_release,
1393
	.fsync		= gfs2_fsync,
1394
	.llseek		= default_llseek,
1395 1396
};