file.c 34.0 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
D
David Teigland 已提交
2 3
/*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
4
 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
D
David Teigland 已提交
5 6 7 8
 */

#include <linux/slab.h>
#include <linux/spinlock.h>
A
Arnd Bergmann 已提交
9
#include <linux/compat.h>
D
David Teigland 已提交
10 11 12 13 14 15
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/pagemap.h>
#include <linux/uio.h>
#include <linux/blkdev.h>
#include <linux/mm.h>
M
Miklos Szeredi 已提交
16
#include <linux/mount.h>
17
#include <linux/fs.h>
18
#include <linux/gfs2_ondisk.h>
19 20
#include <linux/falloc.h>
#include <linux/swap.h>
21
#include <linux/crc32.h>
22
#include <linux/writeback.h>
23
#include <linux/uaccess.h>
24 25
#include <linux/dlm.h>
#include <linux/dlm_plock.h>
26
#include <linux/delay.h>
27
#include <linux/backing-dev.h>
D
David Teigland 已提交
28 29

#include "gfs2.h"
30
#include "incore.h"
D
David Teigland 已提交
31
#include "bmap.h"
32
#include "aops.h"
D
David Teigland 已提交
33 34 35 36 37 38 39 40 41
#include "dir.h"
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "log.h"
#include "meta_io.h"
#include "quota.h"
#include "rgrp.h"
#include "trans.h"
42
#include "util.h"
D
David Teigland 已提交
43 44 45 46 47

/**
 * gfs2_llseek - seek to a location in a file
 * @file: the file
 * @offset: the offset
48
 * @whence: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
D
David Teigland 已提交
49 50 51 52 53 54 55
 *
 * SEEK_END requires the glock for the file because it references the
 * file's size.
 *
 * Returns: The new offset, or errno
 */

56
static loff_t gfs2_llseek(struct file *file, loff_t offset, int whence)
D
David Teigland 已提交
57
{
58
	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
D
David Teigland 已提交
59 60 61
	struct gfs2_holder i_gh;
	loff_t error;

62
	switch (whence) {
63
	case SEEK_END:
D
David Teigland 已提交
64 65 66
		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
					   &i_gh);
		if (!error) {
67
			error = generic_file_llseek(file, offset, whence);
D
David Teigland 已提交
68 69
			gfs2_glock_dq_uninit(&i_gh);
		}
70
		break;
71 72 73 74 75 76 77 78 79

	case SEEK_DATA:
		error = gfs2_seek_data(file, offset);
		break;

	case SEEK_HOLE:
		error = gfs2_seek_hole(file, offset);
		break;

80 81
	case SEEK_CUR:
	case SEEK_SET:
82 83 84 85
		/*
		 * These don't reference inode->i_size and don't depend on the
		 * block mapping, so we don't need the glock.
		 */
86
		error = generic_file_llseek(file, offset, whence);
87 88 89 90
		break;
	default:
		error = -EINVAL;
	}
D
David Teigland 已提交
91 92 93 94 95

	return error;
}

/**
A
Al Viro 已提交
96
 * gfs2_readdir - Iterator for a directory
D
David Teigland 已提交
97
 * @file: The directory to read from
A
Al Viro 已提交
98
 * @ctx: What to feed directory entries to
D
David Teigland 已提交
99 100 101 102
 *
 * Returns: errno
 */

A
Al Viro 已提交
103
static int gfs2_readdir(struct file *file, struct dir_context *ctx)
D
David Teigland 已提交
104
{
105
	struct inode *dir = file->f_mapping->host;
106
	struct gfs2_inode *dip = GFS2_I(dir);
D
David Teigland 已提交
107 108 109
	struct gfs2_holder d_gh;
	int error;

A
Al Viro 已提交
110 111
	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
	if (error)
D
David Teigland 已提交
112 113
		return error;

A
Al Viro 已提交
114
	error = gfs2_dir_read(dir, ctx, &file->f_ra);
D
David Teigland 已提交
115 116 117 118 119 120

	gfs2_glock_dq_uninit(&d_gh);

	return error;
}

121
/**
122
 * fsflag_gfs2flag
123
 *
124 125
 * The FS_JOURNAL_DATA_FL flag maps to GFS2_DIF_INHERIT_JDATA for directories,
 * and to GFS2_DIF_JDATA for non-directories.
126
 */
127 128 129 130 131 132 133 134 135 136 137
static struct {
	u32 fsflag;
	u32 gfsflag;
} fsflag_gfs2flag[] = {
	{FS_SYNC_FL, GFS2_DIF_SYNC},
	{FS_IMMUTABLE_FL, GFS2_DIF_IMMUTABLE},
	{FS_APPEND_FL, GFS2_DIF_APPENDONLY},
	{FS_NOATIME_FL, GFS2_DIF_NOATIME},
	{FS_INDEX_FL, GFS2_DIF_EXHASH},
	{FS_TOPDIR_FL, GFS2_DIF_TOPDIR},
	{FS_JOURNAL_DATA_FL, GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA},
138
};
139

140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
static inline u32 gfs2_gfsflags_to_fsflags(struct inode *inode, u32 gfsflags)
{
	int i;
	u32 fsflags = 0;

	if (S_ISDIR(inode->i_mode))
		gfsflags &= ~GFS2_DIF_JDATA;
	else
		gfsflags &= ~GFS2_DIF_INHERIT_JDATA;

	for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++)
		if (gfsflags & fsflag_gfs2flag[i].gfsflag)
			fsflags |= fsflag_gfs2flag[i].fsflag;
	return fsflags;
}

156
static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
157
{
A
Al Viro 已提交
158
	struct inode *inode = file_inode(filp);
159
	struct gfs2_inode *ip = GFS2_I(inode);
160
	struct gfs2_holder gh;
161 162
	int error;
	u32 fsflags;
163

164 165
	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
	error = gfs2_glock_nq(&gh);
166
	if (error)
167
		goto out_uninit;
168

169
	fsflags = gfs2_gfsflags_to_fsflags(inode, ip->i_diskflags);
170

171
	if (put_user(fsflags, ptr))
172 173
		error = -EFAULT;

174
	gfs2_glock_dq(&gh);
175
out_uninit:
176 177 178 179
	gfs2_holder_uninit(&gh);
	return error;
}

180 181 182 183 184
void gfs2_set_inode_flags(struct inode *inode)
{
	struct gfs2_inode *ip = GFS2_I(inode);
	unsigned int flags = inode->i_flags;

S
Steven Whitehouse 已提交
185 186
	flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC);
	if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode))
187
		flags |= S_NOSEC;
188
	if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
189
		flags |= S_IMMUTABLE;
190
	if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
191
		flags |= S_APPEND;
192
	if (ip->i_diskflags & GFS2_DIF_NOATIME)
193
		flags |= S_NOATIME;
194
	if (ip->i_diskflags & GFS2_DIF_SYNC)
195 196 197 198
		flags |= S_SYNC;
	inode->i_flags = flags;
}

199 200 201 202 203 204
/* Flags that can be set by user space */
#define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA|			\
			     GFS2_DIF_IMMUTABLE|		\
			     GFS2_DIF_APPENDONLY|		\
			     GFS2_DIF_NOATIME|			\
			     GFS2_DIF_SYNC|			\
205
			     GFS2_DIF_TOPDIR|			\
206 207 208
			     GFS2_DIF_INHERIT_JDATA)

/**
209 210 211
 * do_gfs2_set_flags - set flags on an inode
 * @filp: file pointer
 * @reqflags: The flags to set
212
 * @mask: Indicates which flags are valid
213
 * @fsflags: The FS_* inode flags passed in
214 215
 *
 */
216 217
static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask,
			     const u32 fsflags)
218
{
A
Al Viro 已提交
219
	struct inode *inode = file_inode(filp);
220 221
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_sbd *sdp = GFS2_SB(inode);
222 223 224
	struct buffer_head *bh;
	struct gfs2_holder gh;
	int error;
225
	u32 new_flags, flags, oldflags;
226

227
	error = mnt_want_write_file(filp);
228
	if (error)
229 230
		return error;

M
Miklos Szeredi 已提交
231 232 233 234
	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
	if (error)
		goto out_drop_write;

235 236 237 238 239
	oldflags = gfs2_gfsflags_to_fsflags(inode, ip->i_diskflags);
	error = vfs_ioc_setflags_prepare(inode, oldflags, fsflags);
	if (error)
		goto out;

240
	error = -EACCES;
241
	if (!inode_owner_or_capable(inode))
242 243 244
		goto out;

	error = 0;
245
	flags = ip->i_diskflags;
246
	new_flags = (flags & ~mask) | (reqflags & mask);
247 248 249 250 251 252 253 254
	if ((new_flags ^ flags) == 0)
		goto out;

	error = -EPERM;
	if (IS_IMMUTABLE(inode) && (new_flags & GFS2_DIF_IMMUTABLE))
		goto out;
	if (IS_APPEND(inode) && (new_flags & GFS2_DIF_APPENDONLY))
		goto out;
255
	if (((new_flags ^ flags) & GFS2_DIF_IMMUTABLE) &&
256
	    !capable(CAP_LINUX_IMMUTABLE))
257
		goto out;
258
	if (!IS_IMMUTABLE(inode)) {
259
		error = gfs2_permission(inode, MAY_WRITE);
260 261 262
		if (error)
			goto out;
	}
263
	if ((flags ^ new_flags) & GFS2_DIF_JDATA) {
264
		if (new_flags & GFS2_DIF_JDATA)
265
			gfs2_log_flush(sdp, ip->i_gl,
266 267
				       GFS2_LOG_HEAD_FLUSH_NORMAL |
				       GFS2_LFC_SET_FLAGS);
268 269 270 271 272 273
		error = filemap_fdatawrite(inode->i_mapping);
		if (error)
			goto out;
		error = filemap_fdatawait(inode->i_mapping);
		if (error)
			goto out;
274 275
		if (new_flags & GFS2_DIF_JDATA)
			gfs2_ordered_del_inode(ip);
276
	}
277
	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
278 279
	if (error)
		goto out;
280 281 282
	error = gfs2_meta_inode_buffer(ip, &bh);
	if (error)
		goto out_trans_end;
283
	inode->i_ctime = current_time(inode);
284
	gfs2_trans_add_meta(ip->i_gl, bh);
285
	ip->i_diskflags = new_flags;
286
	gfs2_dinode_out(ip, bh->b_data);
287
	brelse(bh);
288
	gfs2_set_inode_flags(inode);
289
	gfs2_set_aops(inode);
290 291
out_trans_end:
	gfs2_trans_end(sdp);
292 293
out:
	gfs2_glock_dq_uninit(&gh);
M
Miklos Szeredi 已提交
294
out_drop_write:
A
Al Viro 已提交
295
	mnt_drop_write_file(filp);
296 297 298
	return error;
}

299
static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
300
{
A
Al Viro 已提交
301
	struct inode *inode = file_inode(filp);
302 303 304
	u32 fsflags, gfsflags = 0;
	u32 mask;
	int i;
305

306
	if (get_user(fsflags, ptr))
307
		return -EFAULT;
308

309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
	for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++) {
		if (fsflags & fsflag_gfs2flag[i].fsflag) {
			fsflags &= ~fsflag_gfs2flag[i].fsflag;
			gfsflags |= fsflag_gfs2flag[i].gfsflag;
		}
	}
	if (fsflags || gfsflags & ~GFS2_FLAGS_USER_SET)
		return -EINVAL;

	mask = GFS2_FLAGS_USER_SET;
	if (S_ISDIR(inode->i_mode)) {
		mask &= ~GFS2_DIF_JDATA;
	} else {
		/* The GFS2_DIF_TOPDIR flag is only valid for directories. */
		if (gfsflags & GFS2_DIF_TOPDIR)
			return -EINVAL;
		mask &= ~(GFS2_DIF_TOPDIR | GFS2_DIF_INHERIT_JDATA);
326
	}
327

328
	return do_gfs2_set_flags(filp, gfsflags, mask, fsflags);
329 330
}

S
Steve Whitehouse 已提交
331 332 333 334 335 336 337 338 339 340 341
static int gfs2_getlabel(struct file *filp, char __user *label)
{
	struct inode *inode = file_inode(filp);
	struct gfs2_sbd *sdp = GFS2_SB(inode);

	if (copy_to_user(label, sdp->sd_sb.sb_locktable, GFS2_LOCKNAME_LEN))
		return -EFAULT;

	return 0;
}

342
static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
343 344
{
	switch(cmd) {
345
	case FS_IOC_GETFLAGS:
346
		return gfs2_get_flags(filp, (u32 __user *)arg);
347
	case FS_IOC_SETFLAGS:
348
		return gfs2_set_flags(filp, (u32 __user *)arg);
S
Steven Whitehouse 已提交
349 350
	case FITRIM:
		return gfs2_fitrim(filp, (void __user *)arg);
S
Steve Whitehouse 已提交
351 352
	case FS_IOC_GETFSLABEL:
		return gfs2_getlabel(filp, (char __user *)arg);
353
	}
S
Steve Whitehouse 已提交
354

355 356 357
	return -ENOTTY;
}

A
Arnd Bergmann 已提交
358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
#ifdef CONFIG_COMPAT
static long gfs2_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
	switch(cmd) {
	/* These are just misnamed, they actually get/put from/to user an int */
	case FS_IOC32_GETFLAGS:
		cmd = FS_IOC_GETFLAGS;
		break;
	case FS_IOC32_SETFLAGS:
		cmd = FS_IOC_SETFLAGS;
		break;
	/* Keep this list in sync with gfs2_ioctl */
	case FITRIM:
	case FS_IOC_GETFSLABEL:
		break;
	default:
		return -ENOIOCTLCMD;
	}

	return gfs2_ioctl(filp, cmd, (unsigned long)compat_ptr(arg));
}
#else
#define gfs2_compat_ioctl NULL
#endif

383 384
/**
 * gfs2_size_hint - Give a hint to the size of a write request
385
 * @filep: The struct file
386 387 388 389 390 391 392 393 394 395 396
 * @offset: The file offset of the write
 * @size: The length of the write
 *
 * When we are about to do a write, this function records the total
 * write size in order to provide a suitable hint to the lower layers
 * about how many blocks will be required.
 *
 */

static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
{
A
Al Viro 已提交
397
	struct inode *inode = file_inode(filep);
398 399 400 401 402
	struct gfs2_sbd *sdp = GFS2_SB(inode);
	struct gfs2_inode *ip = GFS2_I(inode);
	size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift;
	int hint = min_t(size_t, INT_MAX, blks);

403 404
	if (hint > atomic_read(&ip->i_sizehint))
		atomic_set(&ip->i_sizehint, hint);
405 406
}

407
/**
408
 * gfs2_allocate_page_backing - Allocate blocks for a write fault
409 410
 * @page: The (locked) page to allocate backing for
 *
411 412 413 414
 * We try to allocate all the blocks required for the page in one go.  This
 * might fail for various reasons, so we keep trying until all the blocks to
 * back this page are allocated.  If some of the blocks are already allocated,
 * that is ok too.
415 416 417
 */
static int gfs2_allocate_page_backing(struct page *page)
{
418 419
	u64 pos = page_offset(page);
	u64 size = PAGE_SIZE;
420 421

	do {
422 423 424
		struct iomap iomap = { };

		if (gfs2_iomap_get_alloc(page->mapping->host, pos, 1, &iomap))
425
			return -EIO;
426 427 428 429 430 431

		iomap.length = min(iomap.length, size);
		size -= iomap.length;
		pos += iomap.length;
	} while (size > 0);

432 433 434 435 436 437
	return 0;
}

/**
 * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable
 * @vma: The virtual memory area
438
 * @vmf: The virtual memory fault containing the page to become writable
439 440 441 442 443
 *
 * When the page becomes writable, we need to ensure that we have
 * blocks allocated on disk to back that page.
 */

444
static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
445
{
446
	struct page *page = vmf->page;
447
	struct inode *inode = file_inode(vmf->vma->vm_file);
448 449
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_sbd *sdp = GFS2_SB(inode);
450
	struct gfs2_alloc_parms ap = { .aflags = 0, };
451
	unsigned long last_index;
452
	u64 pos = page_offset(page);
453 454
	unsigned int data_blocks, ind_blocks, rblocks;
	struct gfs2_holder gh;
S
Steven Whitehouse 已提交
455
	loff_t size;
456 457
	int ret;

458
	sb_start_pagefault(inode->i_sb);
S
Steven Whitehouse 已提交
459

460
	ret = gfs2_rsqa_alloc(ip);
461
	if (ret)
462
		goto out;
463

464
	gfs2_size_hint(vmf->vma->vm_file, pos, PAGE_SIZE);
B
Bob Peterson 已提交
465

466 467
	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
	ret = gfs2_glock_nq(&gh);
468
	if (ret)
469
		goto out_uninit;
470

471
	/* Update file times before taking page lock */
472
	file_update_time(vmf->vma->vm_file);
473

474 475 476
	set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
	set_bit(GIF_SW_PAGED, &ip->i_flags);

477
	if (!gfs2_write_alloc_required(ip, pos, PAGE_SIZE)) {
S
Steven Whitehouse 已提交
478 479 480 481 482
		lock_page(page);
		if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
			ret = -EAGAIN;
			unlock_page(page);
		}
483
		goto out_unlock;
S
Steven Whitehouse 已提交
484 485
	}

486 487
	ret = gfs2_rindex_update(sdp);
	if (ret)
488 489
		goto out_unlock;

490
	gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
491
	ap.target = data_blocks + ind_blocks;
492 493 494
	ret = gfs2_quota_lock_check(ip, &ap);
	if (ret)
		goto out_unlock;
495
	ret = gfs2_inplace_reserve(ip, &ap);
496 497 498 499 500 501
	if (ret)
		goto out_quota_unlock;

	rblocks = RES_DINODE + ind_blocks;
	if (gfs2_is_jdata(ip))
		rblocks += data_blocks ? data_blocks : 1;
502
	if (ind_blocks || data_blocks) {
503
		rblocks += RES_STATFS + RES_QUOTA;
504
		rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
505
	}
506 507 508 509 510 511
	ret = gfs2_trans_begin(sdp, rblocks, 0);
	if (ret)
		goto out_trans_fail;

	lock_page(page);
	ret = -EINVAL;
S
Steven Whitehouse 已提交
512
	size = i_size_read(inode);
513
	last_index = (size - 1) >> PAGE_SHIFT;
S
Steven Whitehouse 已提交
514 515 516 517 518 519 520 521 522 523 524 525
	/* Check page index against inode size */
	if (size == 0 || (page->index > last_index))
		goto out_trans_end;

	ret = -EAGAIN;
	/* If truncated, we must retry the operation, we may have raced
	 * with the glock demotion code.
	 */
	if (!PageUptodate(page) || page->mapping != inode->i_mapping)
		goto out_trans_end;

	/* Unstuff, if required, and allocate backing blocks for page */
526
	ret = 0;
S
Steven Whitehouse 已提交
527
	if (gfs2_is_stuffed(ip))
528
		ret = gfs2_unstuff_dinode(ip, page);
S
Steven Whitehouse 已提交
529 530
	if (ret == 0)
		ret = gfs2_allocate_page_backing(page);
531

S
Steven Whitehouse 已提交
532 533 534
out_trans_end:
	if (ret)
		unlock_page(page);
535 536 537 538 539 540 541
	gfs2_trans_end(sdp);
out_trans_fail:
	gfs2_inplace_release(ip);
out_quota_unlock:
	gfs2_quota_unlock(ip);
out_unlock:
	gfs2_glock_dq(&gh);
542
out_uninit:
543
	gfs2_holder_uninit(&gh);
S
Steven Whitehouse 已提交
544 545
	if (ret == 0) {
		set_page_dirty(page);
546
		wait_for_stable_page(page);
S
Steven Whitehouse 已提交
547
	}
548
out:
549
	sb_end_pagefault(inode->i_sb);
S
Steven Whitehouse 已提交
550
	return block_page_mkwrite_return(ret);
551 552
}

553
static const struct vm_operations_struct gfs2_vm_ops = {
554
	.fault = filemap_fault,
555
	.map_pages = filemap_map_pages,
556 557 558
	.page_mkwrite = gfs2_page_mkwrite,
};

D
David Teigland 已提交
559 560 561 562 563
/**
 * gfs2_mmap -
 * @file: The file to map
 * @vma: The VMA which described the mapping
 *
564 565 566 567 568
 * There is no need to get a lock here unless we should be updating
 * atime. We ignore any locking errors since the only consequence is
 * a missed atime update (which will just be deferred until later).
 *
 * Returns: 0
D
David Teigland 已提交
569 570 571 572
 */

static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
{
573
	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
D
David Teigland 已提交
574

575 576
	if (!(file->f_flags & O_NOATIME) &&
	    !IS_NOATIME(&ip->i_inode)) {
577 578
		struct gfs2_holder i_gh;
		int error;
D
David Teigland 已提交
579

580 581
		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
					   &i_gh);
582 583
		if (error)
			return error;
584 585 586
		/* grab lock to update inode */
		gfs2_glock_dq_uninit(&i_gh);
		file_accessed(file);
587
	}
588
	vma->vm_ops = &gfs2_vm_ops;
D
David Teigland 已提交
589

590
	return 0;
D
David Teigland 已提交
591 592 593
}

/**
594 595 596
 * gfs2_open_common - This is common to open and atomic_open
 * @inode: The inode being opened
 * @file: The file being opened
D
David Teigland 已提交
597
 *
598 599 600 601 602 603
 * This maybe called under a glock or not depending upon how it has
 * been called. We must always be called under a glock for regular
 * files, however. For other file types, it does not matter whether
 * we hold the glock or not.
 *
 * Returns: Error code or 0 for success
D
David Teigland 已提交
604 605
 */

606
int gfs2_open_common(struct inode *inode, struct file *file)
D
David Teigland 已提交
607 608
{
	struct gfs2_file *fp;
609 610 611 612 613 614 615
	int ret;

	if (S_ISREG(inode->i_mode)) {
		ret = generic_file_open(inode, file);
		if (ret)
			return ret;
	}
D
David Teigland 已提交
616

617
	fp = kzalloc(sizeof(struct gfs2_file), GFP_NOFS);
D
David Teigland 已提交
618 619 620
	if (!fp)
		return -ENOMEM;

621
	mutex_init(&fp->f_fl_mutex);
D
David Teigland 已提交
622

623
	gfs2_assert_warn(GFS2_SB(inode), !file->private_data);
624
	file->private_data = fp;
625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647
	return 0;
}

/**
 * gfs2_open - open a file
 * @inode: the inode to open
 * @file: the struct file for this opening
 *
 * After atomic_open, this function is only used for opening files
 * which are already cached. We must still get the glock for regular
 * files to ensure that we have the file size uptodate for the large
 * file check which is in the common code. That is only an issue for
 * regular files though.
 *
 * Returns: errno
 */

static int gfs2_open(struct inode *inode, struct file *file)
{
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_holder i_gh;
	int error;
	bool need_unlock = false;
D
David Teigland 已提交
648

649
	if (S_ISREG(ip->i_inode.i_mode)) {
D
David Teigland 已提交
650 651 652
		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
					   &i_gh);
		if (error)
653 654 655
			return error;
		need_unlock = true;
	}
D
David Teigland 已提交
656

657
	error = gfs2_open_common(inode, file);
D
David Teigland 已提交
658

659
	if (need_unlock)
D
David Teigland 已提交
660 661 662 663 664 665
		gfs2_glock_dq_uninit(&i_gh);

	return error;
}

/**
666
 * gfs2_release - called to close a struct file
D
David Teigland 已提交
667 668 669 670 671 672
 * @inode: the inode the struct file belongs to
 * @file: the struct file being closed
 *
 * Returns: errno
 */

673
static int gfs2_release(struct inode *inode, struct file *file)
D
David Teigland 已提交
674
{
675
	struct gfs2_inode *ip = GFS2_I(inode);
D
David Teigland 已提交
676

B
Bob Peterson 已提交
677
	kfree(file->private_data);
678
	file->private_data = NULL;
D
David Teigland 已提交
679

680 681
	if (!(file->f_mode & FMODE_WRITE))
		return 0;
682

683
	gfs2_rsqa_delete(ip, &inode->i_writecount);
D
David Teigland 已提交
684 685 686 687 688
	return 0;
}

/**
 * gfs2_fsync - sync the dirty data for a file (across the cluster)
689 690 691
 * @file: the file that points to the dentry
 * @start: the start position in the file to sync
 * @end: the end position in the file to sync
S
Steven Whitehouse 已提交
692
 * @datasync: set if we can ignore timestamp changes
D
David Teigland 已提交
693
 *
694 695 696 697 698 699 700 701 702 703
 * We split the data flushing here so that we don't wait for the data
 * until after we've also sent the metadata to disk. Note that for
 * data=ordered, we will write & wait for the data at the log flush
 * stage anyway, so this is unlikely to make much of a difference
 * except in the data=writeback case.
 *
 * If the fdatawrite fails due to any reason except -EIO, we will
 * continue the remainder of the fsync, although we'll still report
 * the error at the end. This is to match filemap_write_and_wait_range()
 * behaviour.
704
 *
D
David Teigland 已提交
705 706 707
 * Returns: errno
 */

708 709
static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
		      int datasync)
D
David Teigland 已提交
710
{
711 712
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = mapping->host;
713
	int sync_state = inode->i_state & I_DIRTY_ALL;
S
Steven Whitehouse 已提交
714
	struct gfs2_inode *ip = GFS2_I(inode);
715
	int ret = 0, ret1 = 0;
D
David Teigland 已提交
716

717 718 719 720 721
	if (mapping->nrpages) {
		ret1 = filemap_fdatawrite_range(mapping, start, end);
		if (ret1 == -EIO)
			return ret1;
	}
722

723 724
	if (!gfs2_is_jdata(ip))
		sync_state &= ~I_DIRTY_PAGES;
S
Steven Whitehouse 已提交
725
	if (datasync)
726
		sync_state &= ~(I_DIRTY_SYNC | I_DIRTY_TIME);
D
David Teigland 已提交
727

S
Steven Whitehouse 已提交
728 729
	if (sync_state) {
		ret = sync_inode_metadata(inode, 1);
730
		if (ret)
S
Steven Whitehouse 已提交
731
			return ret;
732
		if (gfs2_is_jdata(ip))
733 734 735
			ret = file_write_and_wait(file);
		if (ret)
			return ret;
736
		gfs2_ail_flush(ip->i_gl, 1);
737 738
	}

739
	if (mapping->nrpages)
740
		ret = file_fdatawait_range(file, start, end);
741 742

	return ret ? ret : ret1;
D
David Teigland 已提交
743 744
}

745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817
static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to)
{
	struct file *file = iocb->ki_filp;
	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
	size_t count = iov_iter_count(to);
	struct gfs2_holder gh;
	ssize_t ret;

	if (!count)
		return 0; /* skip atime */

	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
	ret = gfs2_glock_nq(&gh);
	if (ret)
		goto out_uninit;

	ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL);

	gfs2_glock_dq(&gh);
out_uninit:
	gfs2_holder_uninit(&gh);
	return ret;
}

static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
{
	struct file *file = iocb->ki_filp;
	struct inode *inode = file->f_mapping->host;
	struct gfs2_inode *ip = GFS2_I(inode);
	size_t len = iov_iter_count(from);
	loff_t offset = iocb->ki_pos;
	struct gfs2_holder gh;
	ssize_t ret;

	/*
	 * Deferred lock, even if its a write, since we do no allocation on
	 * this path. All we need to change is the atime, and this lock mode
	 * ensures that other nodes have flushed their buffered read caches
	 * (i.e. their page cache entries for this inode). We do not,
	 * unfortunately, have the option of only flushing a range like the
	 * VFS does.
	 */
	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
	ret = gfs2_glock_nq(&gh);
	if (ret)
		goto out_uninit;

	/* Silently fall back to buffered I/O when writing beyond EOF */
	if (offset + len > i_size_read(&ip->i_inode))
		goto out;

	ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL);

out:
	gfs2_glock_dq(&gh);
out_uninit:
	gfs2_holder_uninit(&gh);
	return ret;
}

static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
	ssize_t ret;

	if (iocb->ki_flags & IOCB_DIRECT) {
		ret = gfs2_file_direct_read(iocb, to);
		if (likely(ret != -ENOTBLK))
			return ret;
		iocb->ki_flags &= ~IOCB_DIRECT;
	}
	return generic_file_read_iter(iocb, to);
}

818
/**
A
Al Viro 已提交
819
 * gfs2_file_write_iter - Perform a write to a file
820
 * @iocb: The io context
821
 * @from: The data to write
822 823 824 825 826 827 828 829
 *
 * We have to do a lock/unlock here to refresh the inode size for
 * O_APPEND writes, otherwise we can land up writing at the wrong
 * offset. There is still a race, but provided the app is using its
 * own file locking, this will make O_APPEND work as expected.
 *
 */

A
Al Viro 已提交
830
static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
831 832
{
	struct file *file = iocb->ki_filp;
833 834
	struct inode *inode = file_inode(file);
	struct gfs2_inode *ip = GFS2_I(inode);
835
	ssize_t written = 0, ret;
836

837
	ret = gfs2_rsqa_alloc(ip);
838 839
	if (ret)
		return ret;
840

A
Al Viro 已提交
841
	gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from));
842

843
	if (iocb->ki_flags & IOCB_APPEND) {
844 845 846 847 848 849 850 851
		struct gfs2_holder gh;

		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
		if (ret)
			return ret;
		gfs2_glock_dq_uninit(&gh);
	}

852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867
	inode_lock(inode);
	ret = generic_write_checks(iocb, from);
	if (ret <= 0)
		goto out;

	/* We can write back this queue in page reclaim */
	current->backing_dev_info = inode_to_bdi(inode);

	ret = file_remove_privs(file);
	if (ret)
		goto out2;

	ret = file_update_time(file);
	if (ret)
		goto out2;

868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906
	if (iocb->ki_flags & IOCB_DIRECT) {
		struct address_space *mapping = file->f_mapping;
		loff_t pos, endbyte;
		ssize_t buffered;

		written = gfs2_file_direct_write(iocb, from);
		if (written < 0 || !iov_iter_count(from))
			goto out2;

		ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
		if (unlikely(ret < 0))
			goto out2;
		buffered = ret;

		/*
		 * We need to ensure that the page cache pages are written to
		 * disk and invalidated to preserve the expected O_DIRECT
		 * semantics.
		 */
		pos = iocb->ki_pos;
		endbyte = pos + buffered - 1;
		ret = filemap_write_and_wait_range(mapping, pos, endbyte);
		if (!ret) {
			iocb->ki_pos += buffered;
			written += buffered;
			invalidate_mapping_pages(mapping,
						 pos >> PAGE_SHIFT,
						 endbyte >> PAGE_SHIFT);
		} else {
			/*
			 * We don't know how much we wrote, so just return
			 * the number of bytes which were direct-written
			 */
		}
	} else {
		ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
		if (likely(ret > 0))
			iocb->ki_pos += ret;
	}
907 908 909 910 911 912 913 914 915

out2:
	current->backing_dev_info = NULL;
out:
	inode_unlock(inode);
	if (likely(ret > 0)) {
		/* Handle various SYNC-type writes */
		ret = generic_write_sync(iocb, ret);
	}
916
	return written ? written : ret;
917 918
}

919 920 921
static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
			   int mode)
{
922
	struct super_block *sb = inode->i_sb;
923
	struct gfs2_inode *ip = GFS2_I(inode);
924
	loff_t end = offset + len;
925 926 927 928 929
	struct buffer_head *dibh;
	int error;

	error = gfs2_meta_inode_buffer(ip, &dibh);
	if (unlikely(error))
930
		return error;
931

932
	gfs2_trans_add_meta(ip->i_gl, dibh);
933 934 935 936 937 938 939

	if (gfs2_is_stuffed(ip)) {
		error = gfs2_unstuff_dinode(ip, NULL);
		if (unlikely(error))
			goto out;
	}

940
	while (offset < end) {
941 942
		struct iomap iomap = { };

943 944
		error = gfs2_iomap_get_alloc(inode, offset, end - offset,
					     &iomap);
945
		if (error)
946
			goto out;
947
		offset = iomap.offset + iomap.length;
948
		if (!(iomap.flags & IOMAP_F_NEW))
949
			continue;
950 951 952 953 954
		error = sb_issue_zeroout(sb, iomap.addr >> inode->i_blkbits,
					 iomap.length >> inode->i_blkbits,
					 GFP_NOFS);
		if (error) {
			fs_err(GFS2_SB(inode), "Failed to zero data buffers\n");
955
			goto out;
956
		}
957 958
	}
out:
959
	brelse(dibh);
960 961
	return error;
}
962 963 964 965 966 967 968 969 970 971 972 973 974 975
/**
 * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of
 *                     blocks, determine how many bytes can be written.
 * @ip:          The inode in question.
 * @len:         Max cap of bytes. What we return in *len must be <= this.
 * @data_blocks: Compute and return the number of data blocks needed
 * @ind_blocks:  Compute and return the number of indirect blocks needed
 * @max_blocks:  The total blocks available to work with.
 *
 * Returns: void, but @len, @data_blocks and @ind_blocks are filled in.
 */
static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len,
			    unsigned int *data_blocks, unsigned int *ind_blocks,
			    unsigned int max_blocks)
976
{
977
	loff_t max = *len;
978 979 980 981 982 983 984
	const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
	unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);

	for (tmp = max_data; tmp > sdp->sd_diptrs;) {
		tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
		max_data -= tmp;
	}
985

986 987 988 989 990 991 992 993 994
	*data_blocks = max_data;
	*ind_blocks = max_blocks - max_data;
	*len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
	if (*len > max) {
		*len = max;
		gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks);
	}
}

995
static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
996
{
A
Al Viro 已提交
997
	struct inode *inode = file_inode(file);
998 999
	struct gfs2_sbd *sdp = GFS2_SB(inode);
	struct gfs2_inode *ip = GFS2_I(inode);
1000
	struct gfs2_alloc_parms ap = { .aflags = 0, };
1001
	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
1002
	loff_t bytes, max_bytes, max_blks;
1003
	int error;
1004 1005
	const loff_t pos = offset;
	const loff_t count = len;
1006
	loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
1007
	loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
1008
	loff_t max_chunk_size = UINT_MAX & bsize_mask;
1009

1010 1011
	next = (next + 1) << sdp->sd_sb.sb_bsize_shift;

1012
	offset &= bsize_mask;
1013 1014 1015 1016 1017

	len = next - offset;
	bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
	if (!bytes)
		bytes = UINT_MAX;
1018 1019 1020
	bytes &= bsize_mask;
	if (bytes == 0)
		bytes = sdp->sd_sb.sb_bsize;
1021

1022
	gfs2_size_hint(file, offset, len);
B
Bob Peterson 已提交
1023

1024 1025 1026
	gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
	ap.min_target = data_blocks + ind_blocks;

1027 1028 1029
	while (len > 0) {
		if (len < bytes)
			bytes = len;
1030 1031 1032 1033 1034
		if (!gfs2_write_alloc_required(ip, offset, bytes)) {
			len -= bytes;
			offset += bytes;
			continue;
		}
1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045

		/* We need to determine how many bytes we can actually
		 * fallocate without exceeding quota or going over the
		 * end of the fs. We start off optimistically by assuming
		 * we can write max_bytes */
		max_bytes = (len > max_chunk_size) ? max_chunk_size : len;

		/* Since max_bytes is most likely a theoretical max, we
		 * calculate a more realistic 'bytes' to serve as a good
		 * starting point for the number of bytes we may be able
		 * to write */
1046
		gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
1047
		ap.target = data_blocks + ind_blocks;
1048 1049

		error = gfs2_quota_lock_check(ip, &ap);
1050
		if (error)
1051
			return error;
1052 1053
		/* ap.allowed tells us how many blocks quota will allow
		 * us to write. Check if this reduces max_blks */
1054 1055
		max_blks = UINT_MAX;
		if (ap.allowed)
1056
			max_blks = ap.allowed;
1057

1058
		error = gfs2_inplace_reserve(ip, &ap);
1059
		if (error)
1060
			goto out_qunlock;
1061 1062 1063 1064 1065 1066 1067 1068 1069 1070

		/* check if the selected rgrp limits our max_blks further */
		if (ap.allowed && ap.allowed < max_blks)
			max_blks = ap.allowed;

		/* Almost done. Calculate bytes that can be written using
		 * max_blks. We also recompute max_bytes, data_blocks and
		 * ind_blocks */
		calc_max_reserv(ip, &max_bytes, &data_blocks,
				&ind_blocks, max_blks);
1071 1072

		rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
1073
			  RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks);
1074 1075 1076 1077
		if (gfs2_is_jdata(ip))
			rblocks += data_blocks ? data_blocks : 1;

		error = gfs2_trans_begin(sdp, rblocks,
1078
					 PAGE_SIZE >> inode->i_blkbits);
1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092
		if (error)
			goto out_trans_fail;

		error = fallocate_chunk(inode, offset, max_bytes, mode);
		gfs2_trans_end(sdp);

		if (error)
			goto out_trans_fail;

		len -= max_bytes;
		offset += max_bytes;
		gfs2_inplace_release(ip);
		gfs2_quota_unlock(ip);
	}
1093

1094
	if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size)
1095
		i_size_write(inode, pos + count);
1096 1097
	file_update_time(file);
	mark_inode_dirty(inode);
1098

1099 1100 1101 1102
	if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host))
		return vfs_fsync_range(file, pos, pos + count - 1,
			       (file->f_flags & __O_SYNC) ? 0 : 1);
	return 0;
1103 1104 1105 1106 1107

out_trans_fail:
	gfs2_inplace_release(ip);
out_qunlock:
	gfs2_quota_unlock(ip);
1108 1109 1110 1111 1112 1113
	return error;
}

static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
	struct inode *inode = file_inode(file);
1114
	struct gfs2_sbd *sdp = GFS2_SB(inode);
1115 1116 1117 1118
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_holder gh;
	int ret;

1119
	if (mode & ~(FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE))
1120 1121 1122
		return -EOPNOTSUPP;
	/* fallocate is needed by gfs2_grow to reserve space in the rindex */
	if (gfs2_is_jdata(ip) && inode != sdp->sd_rindex)
1123 1124
		return -EOPNOTSUPP;

A
Al Viro 已提交
1125
	inode_lock(inode);
1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142

	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
	ret = gfs2_glock_nq(&gh);
	if (ret)
		goto out_uninit;

	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
	    (offset + len) > inode->i_size) {
		ret = inode_newsize_ok(inode, offset + len);
		if (ret)
			goto out_unlock;
	}

	ret = get_write_access(inode);
	if (ret)
		goto out_unlock;

1143 1144 1145 1146 1147 1148
	if (mode & FALLOC_FL_PUNCH_HOLE) {
		ret = __gfs2_punch_hole(file, offset, len);
	} else {
		ret = gfs2_rsqa_alloc(ip);
		if (ret)
			goto out_putw;
1149

1150 1151 1152 1153 1154
		ret = __gfs2_fallocate(file, mode, offset, len);

		if (ret)
			gfs2_rs_deltree(&ip->i_res);
	}
1155

1156 1157
out_putw:
	put_write_access(inode);
1158
out_unlock:
1159
	gfs2_glock_dq(&gh);
1160
out_uninit:
1161
	gfs2_holder_uninit(&gh);
A
Al Viro 已提交
1162
	inode_unlock(inode);
1163
	return ret;
1164 1165
}

1166 1167 1168 1169 1170 1171 1172
static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
				      struct file *out, loff_t *ppos,
				      size_t len, unsigned int flags)
{
	int error;
	struct gfs2_inode *ip = GFS2_I(out->f_mapping->host);

1173
	error = gfs2_rsqa_alloc(ip);
1174 1175 1176 1177 1178 1179 1180 1181
	if (error)
		return (ssize_t)error;

	gfs2_size_hint(out, *ppos, len);

	return iter_file_splice_write(pipe, out, ppos, len, flags);
}

1182 1183
#ifdef CONFIG_GFS2_FS_LOCKING_DLM

D
David Teigland 已提交
1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194
/**
 * gfs2_lock - acquire/release a posix lock on a file
 * @file: the file pointer
 * @cmd: either modify or retrieve lock state, possibly wait
 * @fl: type and range of lock
 *
 * Returns: errno
 */

static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
{
1195 1196
	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
	struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
1197
	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
D
David Teigland 已提交
1198 1199 1200

	if (!(fl->fl_flags & FL_POSIX))
		return -ENOLCK;
1201
	if (__mandatory_lock(&ip->i_inode) && fl->fl_type != F_UNLCK)
D
David Teigland 已提交
1202 1203
		return -ENOLCK;

M
Marc Eshel 已提交
1204 1205 1206 1207 1208
	if (cmd == F_CANCELLK) {
		/* Hack: */
		cmd = F_SETLK;
		fl->fl_type = F_UNLCK;
	}
1209
	if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) {
1210
		if (fl->fl_type == F_UNLCK)
1211
			locks_lock_file_wait(file, fl);
1212
		return -EIO;
1213
	}
D
David Teigland 已提交
1214
	if (IS_GETLK(cmd))
1215
		return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
D
David Teigland 已提交
1216
	else if (fl->fl_type == F_UNLCK)
1217
		return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl);
D
David Teigland 已提交
1218
	else
1219
		return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl);
D
David Teigland 已提交
1220 1221 1222 1223
}

static int do_flock(struct file *file, int cmd, struct file_lock *fl)
{
1224
	struct gfs2_file *fp = file->private_data;
D
David Teigland 已提交
1225
	struct gfs2_holder *fl_gh = &fp->f_fl_gh;
A
Al Viro 已提交
1226
	struct gfs2_inode *ip = GFS2_I(file_inode(file));
D
David Teigland 已提交
1227 1228
	struct gfs2_glock *gl;
	unsigned int state;
B
Bob Peterson 已提交
1229
	u16 flags;
D
David Teigland 已提交
1230
	int error = 0;
1231
	int sleeptime;
D
David Teigland 已提交
1232 1233

	state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
1234
	flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY_1CB) | GL_EXACT;
D
David Teigland 已提交
1235

1236
	mutex_lock(&fp->f_fl_mutex);
D
David Teigland 已提交
1237

1238
	if (gfs2_holder_initialized(fl_gh)) {
1239
		struct file_lock request;
D
David Teigland 已提交
1240 1241
		if (fl_gh->gh_state == state)
			goto out;
1242 1243 1244 1245
		locks_init_lock(&request);
		request.fl_type = F_UNLCK;
		request.fl_flags = FL_FLOCK;
		locks_lock_file_wait(file, &request);
1246
		gfs2_glock_dq(fl_gh);
1247
		gfs2_holder_reinit(state, flags, fl_gh);
D
David Teigland 已提交
1248
	} else {
1249 1250
		error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr,
				       &gfs2_flock_glops, CREATE, &gl);
D
David Teigland 已提交
1251 1252
		if (error)
			goto out;
1253 1254
		gfs2_holder_init(gl, state, flags, fl_gh);
		gfs2_glock_put(gl);
D
David Teigland 已提交
1255
	}
1256 1257 1258 1259 1260 1261 1262 1263
	for (sleeptime = 1; sleeptime <= 4; sleeptime <<= 1) {
		error = gfs2_glock_nq(fl_gh);
		if (error != GLR_TRYFAILED)
			break;
		fl_gh->gh_flags = LM_FLAG_TRY | GL_EXACT;
		fl_gh->gh_error = 0;
		msleep(sleeptime);
	}
D
David Teigland 已提交
1264 1265 1266 1267 1268
	if (error) {
		gfs2_holder_uninit(fl_gh);
		if (error == GLR_TRYFAILED)
			error = -EAGAIN;
	} else {
1269
		error = locks_lock_file_wait(file, fl);
1270
		gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
D
David Teigland 已提交
1271 1272
	}

1273
out:
1274
	mutex_unlock(&fp->f_fl_mutex);
D
David Teigland 已提交
1275 1276 1277 1278 1279
	return error;
}

static void do_unflock(struct file *file, struct file_lock *fl)
{
1280
	struct gfs2_file *fp = file->private_data;
D
David Teigland 已提交
1281 1282
	struct gfs2_holder *fl_gh = &fp->f_fl_gh;

1283
	mutex_lock(&fp->f_fl_mutex);
1284
	locks_lock_file_wait(file, fl);
A
Andreas Gruenbacher 已提交
1285
	if (gfs2_holder_initialized(fl_gh)) {
1286
		gfs2_glock_dq(fl_gh);
1287 1288
		gfs2_holder_uninit(fl_gh);
	}
1289
	mutex_unlock(&fp->f_fl_mutex);
D
David Teigland 已提交
1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304
}

/**
 * gfs2_flock - acquire/release a flock lock on a file
 * @file: the file pointer
 * @cmd: either modify or retrieve lock state, possibly wait
 * @fl: type and range of lock
 *
 * Returns: errno
 */

static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
{
	if (!(fl->fl_flags & FL_FLOCK))
		return -ENOLCK;
1305 1306
	if (fl->fl_type & LOCK_MAND)
		return -EOPNOTSUPP;
D
David Teigland 已提交
1307 1308 1309 1310

	if (fl->fl_type == F_UNLCK) {
		do_unflock(file, fl);
		return 0;
1311
	} else {
D
David Teigland 已提交
1312
		return do_flock(file, cmd, fl);
1313
	}
D
David Teigland 已提交
1314 1315
}

1316
const struct file_operations gfs2_file_fops = {
1317
	.llseek		= gfs2_llseek,
1318
	.read_iter	= gfs2_file_read_iter,
A
Al Viro 已提交
1319
	.write_iter	= gfs2_file_write_iter,
1320
	.iopoll		= iomap_dio_iopoll,
1321
	.unlocked_ioctl	= gfs2_ioctl,
A
Arnd Bergmann 已提交
1322
	.compat_ioctl	= gfs2_compat_ioctl,
1323 1324
	.mmap		= gfs2_mmap,
	.open		= gfs2_open,
1325
	.release	= gfs2_release,
1326 1327 1328
	.fsync		= gfs2_fsync,
	.lock		= gfs2_lock,
	.flock		= gfs2_flock,
1329
	.splice_read	= generic_file_splice_read,
1330
	.splice_write	= gfs2_file_splice_write,
1331
	.setlease	= simple_nosetlease,
1332
	.fallocate	= gfs2_fallocate,
D
David Teigland 已提交
1333 1334
};

1335
const struct file_operations gfs2_dir_fops = {
A
Al Viro 已提交
1336
	.iterate_shared	= gfs2_readdir,
1337
	.unlocked_ioctl	= gfs2_ioctl,
A
Arnd Bergmann 已提交
1338
	.compat_ioctl	= gfs2_compat_ioctl,
1339
	.open		= gfs2_open,
1340
	.release	= gfs2_release,
1341 1342 1343
	.fsync		= gfs2_fsync,
	.lock		= gfs2_lock,
	.flock		= gfs2_flock,
1344
	.llseek		= default_llseek,
D
David Teigland 已提交
1345 1346
};

1347 1348
#endif /* CONFIG_GFS2_FS_LOCKING_DLM */

1349
const struct file_operations gfs2_file_fops_nolock = {
1350
	.llseek		= gfs2_llseek,
1351
	.read_iter	= gfs2_file_read_iter,
A
Al Viro 已提交
1352
	.write_iter	= gfs2_file_write_iter,
1353
	.iopoll		= iomap_dio_iopoll,
1354
	.unlocked_ioctl	= gfs2_ioctl,
A
Arnd Bergmann 已提交
1355
	.compat_ioctl	= gfs2_compat_ioctl,
1356 1357
	.mmap		= gfs2_mmap,
	.open		= gfs2_open,
1358
	.release	= gfs2_release,
1359
	.fsync		= gfs2_fsync,
1360
	.splice_read	= generic_file_splice_read,
1361
	.splice_write	= gfs2_file_splice_write,
1362
	.setlease	= generic_setlease,
1363
	.fallocate	= gfs2_fallocate,
1364 1365
};

1366
const struct file_operations gfs2_dir_fops_nolock = {
A
Al Viro 已提交
1367
	.iterate_shared	= gfs2_readdir,
1368
	.unlocked_ioctl	= gfs2_ioctl,
A
Arnd Bergmann 已提交
1369
	.compat_ioctl	= gfs2_compat_ioctl,
1370
	.open		= gfs2_open,
1371
	.release	= gfs2_release,
1372
	.fsync		= gfs2_fsync,
1373
	.llseek		= default_llseek,
1374 1375
};