locks.c 71.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
/*
 *  linux/fs/locks.c
 *
 *  Provide support for fcntl()'s F_GETLK, F_SETLK, and F_SETLKW calls.
 *  Doug Evans (dje@spiff.uucp), August 07, 1992
 *
 *  Deadlock detection added.
 *  FIXME: one thing isn't handled yet:
 *	- mandatory locks (requires lots of changes elsewhere)
 *  Kelly Carmichael (kelly@[142.24.8.65]), September 17, 1994.
 *
 *  Miscellaneous edits, and a total rewrite of posix_lock_file() code.
 *  Kai Petzke (wpp@marie.physik.tu-berlin.de), 1994
 *  
 *  Converted file_lock_table to a linked list from an array, which eliminates
 *  the limits on how many active file locks are open.
 *  Chad Page (pageone@netcom.com), November 27, 1994
 * 
 *  Removed dependency on file descriptors. dup()'ed file descriptors now
 *  get the same locks as the original file descriptors, and a close() on
 *  any file descriptor removes ALL the locks on the file for the current
 *  process. Since locks still depend on the process id, locks are inherited
 *  after an exec() but not after a fork(). This agrees with POSIX, and both
 *  BSD and SVR4 practice.
 *  Andy Walker (andy@lysaker.kvaerner.no), February 14, 1995
 *
 *  Scrapped free list which is redundant now that we allocate locks
 *  dynamically with kmalloc()/kfree().
 *  Andy Walker (andy@lysaker.kvaerner.no), February 21, 1995
 *
 *  Implemented two lock personalities - FL_FLOCK and FL_POSIX.
 *
 *  FL_POSIX locks are created with calls to fcntl() and lockf() through the
 *  fcntl() system call. They have the semantics described above.
 *
 *  FL_FLOCK locks are created with calls to flock(), through the flock()
 *  system call, which is new. Old C libraries implement flock() via fcntl()
 *  and will continue to use the old, broken implementation.
 *
 *  FL_FLOCK locks follow the 4.4 BSD flock() semantics. They are associated
 *  with a file pointer (filp). As a result they can be shared by a parent
 *  process and its children after a fork(). They are removed when the last
 *  file descriptor referring to the file pointer is closed (unless explicitly
 *  unlocked). 
 *
 *  FL_FLOCK locks never deadlock, an existing lock is always removed before
 *  upgrading from shared to exclusive (or vice versa). When this happens
 *  any processes blocked by the current lock are woken up and allowed to
 *  run before the new lock is applied.
 *  Andy Walker (andy@lysaker.kvaerner.no), June 09, 1995
 *
 *  Removed some race conditions in flock_lock_file(), marked other possible
 *  races. Just grep for FIXME to see them. 
 *  Dmitry Gorodchanin (pgmdsg@ibi.com), February 09, 1996.
 *
 *  Addressed Dmitry's concerns. Deadlock checking no longer recursive.
 *  Lock allocation changed to GFP_ATOMIC as we can't afford to sleep
 *  once we've checked for blocking and deadlocking.
 *  Andy Walker (andy@lysaker.kvaerner.no), April 03, 1996.
 *
 *  Initial implementation of mandatory locks. SunOS turned out to be
 *  a rotten model, so I implemented the "obvious" semantics.
P
Paul Bolle 已提交
63
 *  See 'Documentation/filesystems/mandatory-locking.txt' for details.
L
Linus Torvalds 已提交
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
 *  Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996.
 *
 *  Don't allow mandatory locks on mmap()'ed files. Added simple functions to
 *  check if a file has mandatory locks, used by mmap(), open() and creat() to
 *  see if system call should be rejected. Ref. HP-UX/SunOS/Solaris Reference
 *  Manual, Section 2.
 *  Andy Walker (andy@lysaker.kvaerner.no), April 09, 1996.
 *
 *  Tidied up block list handling. Added '/proc/locks' interface.
 *  Andy Walker (andy@lysaker.kvaerner.no), April 24, 1996.
 *
 *  Fixed deadlock condition for pathological code that mixes calls to
 *  flock() and fcntl().
 *  Andy Walker (andy@lysaker.kvaerner.no), April 29, 1996.
 *
 *  Allow only one type of locking scheme (FL_POSIX or FL_FLOCK) to be in use
 *  for a given file at a time. Changed the CONFIG_LOCK_MANDATORY scheme to
 *  guarantee sensible behaviour in the case where file system modules might
 *  be compiled with different options than the kernel itself.
 *  Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996.
 *
 *  Added a couple of missing wake_up() calls. Thanks to Thomas Meckel
 *  (Thomas.Meckel@mni.fh-giessen.de) for spotting this.
 *  Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996.
 *
 *  Changed FL_POSIX locks to use the block list in the same way as FL_FLOCK
 *  locks. Changed process synchronisation to avoid dereferencing locks that
 *  have already been freed.
 *  Andy Walker (andy@lysaker.kvaerner.no), Sep 21, 1996.
 *
 *  Made the block list a circular list to minimise searching in the list.
 *  Andy Walker (andy@lysaker.kvaerner.no), Sep 25, 1996.
 *
 *  Made mandatory locking a mount option. Default is not to allow mandatory
 *  locking.
 *  Andy Walker (andy@lysaker.kvaerner.no), Oct 04, 1996.
 *
 *  Some adaptations for NFS support.
 *  Olaf Kirch (okir@monad.swb.de), Dec 1996,
 *
 *  Fixed /proc/locks interface so that we can't overrun the buffer we are handed.
 *  Andy Walker (andy@lysaker.kvaerner.no), May 12, 1997.
 *
 *  Use slab allocator instead of kmalloc/kfree.
 *  Use generic list implementation from <linux/list.h>.
 *  Sped up posix_locks_deadlock by only considering blocked locks.
 *  Matthew Wilcox <willy@debian.org>, March, 2000.
 *
 *  Leases and LOCK_MAND
 *  Matthew Wilcox <willy@debian.org>, June, 2000.
 *  Stephen Rothwell <sfr@canb.auug.org.au>, June, 2000.
 */

#include <linux/capability.h>
#include <linux/file.h>
A
Al Viro 已提交
119
#include <linux/fdtable.h>
L
Linus Torvalds 已提交
120 121 122 123 124 125
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/security.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
#include <linux/time.h>
126
#include <linux/rcupdate.h>
127
#include <linux/pid_namespace.h>
128
#include <linux/hashtable.h>
129 130
#include <linux/percpu.h>
#include <linux/lglock.h>
L
Linus Torvalds 已提交
131

132 133 134
#define CREATE_TRACE_POINTS
#include <trace/events/filelock.h>

L
Linus Torvalds 已提交
135 136 137 138
#include <asm/uaccess.h>

#define IS_POSIX(fl)	(fl->fl_flags & FL_POSIX)
#define IS_FLOCK(fl)	(fl->fl_flags & FL_FLOCK)
C
Christoph Hellwig 已提交
139
#define IS_LEASE(fl)	(fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT))
140
#define IS_OFDLCK(fl)	(fl->fl_flags & FL_OFDLCK)
L
Linus Torvalds 已提交
141

J
J. Bruce Fields 已提交
142 143
static bool lease_breaking(struct file_lock *fl)
{
144 145 146 147 148 149 150 151 152 153
	return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
}

static int target_leasetype(struct file_lock *fl)
{
	if (fl->fl_flags & FL_UNLOCK_PENDING)
		return F_UNLCK;
	if (fl->fl_flags & FL_DOWNGRADE_PENDING)
		return F_RDLCK;
	return fl->fl_type;
J
J. Bruce Fields 已提交
154 155
}

L
Linus Torvalds 已提交
156 157 158
int leases_enable = 1;
int lease_break_time = 45;

159
/*
160 161 162
 * The global file_lock_list is only used for displaying /proc/locks, so we
 * keep a list on each CPU, with each list protected by its own spinlock via
 * the file_lock_lglock. Note that alterations to the list also require that
163
 * the relevant flc_lock is held.
164
 */
165 166
DEFINE_STATIC_LGLOCK(file_lock_lglock);
static DEFINE_PER_CPU(struct hlist_head, file_lock_list);
167

168
/*
169
 * The blocked_hash is used to find POSIX lock loops for deadlock detection.
170
 * It is protected by blocked_lock_lock.
171 172 173 174 175 176 177
 *
 * We hash locks by lockowner in order to optimize searching for the lock a
 * particular lockowner is waiting on.
 *
 * FIXME: make this value scale via some heuristic? We generally will want more
 * buckets when we have more lockowners holding locks, but that's a little
 * difficult to determine without knowing what the workload will look like.
178
 */
179 180
#define BLOCKED_HASH_BITS	7
static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
181

182
/*
183 184
 * This lock protects the blocked_hash. Generally, if you're accessing it, you
 * want to be holding this lock.
185 186 187 188 189 190
 *
 * In addition, it also protects the fl->fl_block list, and the fl->fl_next
 * pointer for file_lock structures that are acting as lock requests (in
 * contrast to those that are acting as records of acquired locks).
 *
 * Note that when we acquire this lock in order to change the above fields,
191
 * we often hold the flc_lock as well. In certain cases, when reading the fields
192
 * protected by this lock, we can skip acquiring it iff we already hold the
193
 * flc_lock.
194 195
 *
 * In particular, adding an entry to the fl_block list requires that you hold
196 197
 * both the flc_lock and the blocked_lock_lock (acquired in that order).
 * Deleting an entry from the list however only requires the file_lock_lock.
198
 */
199
static DEFINE_SPINLOCK(blocked_lock_lock);
L
Linus Torvalds 已提交
200

201
static struct kmem_cache *flctx_cache __read_mostly;
202
static struct kmem_cache *filelock_cache __read_mostly;
L
Linus Torvalds 已提交
203

204
static struct file_lock_context *
205
locks_get_lock_context(struct inode *inode, int type)
206
{
207
	struct file_lock_context *ctx;
208

209 210 211
	/* paired with cmpxchg() below */
	ctx = smp_load_acquire(&inode->i_flctx);
	if (likely(ctx) || type == F_UNLCK)
212 213
		goto out;

214 215
	ctx = kmem_cache_alloc(flctx_cache, GFP_KERNEL);
	if (!ctx)
216 217
		goto out;

218 219 220 221
	spin_lock_init(&ctx->flc_lock);
	INIT_LIST_HEAD(&ctx->flc_flock);
	INIT_LIST_HEAD(&ctx->flc_posix);
	INIT_LIST_HEAD(&ctx->flc_lease);
222 223 224 225 226

	/*
	 * Assign the pointer if it's not already assigned. If it is, then
	 * free the context we just allocated.
	 */
227 228 229 230
	if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
		kmem_cache_free(flctx_cache, ctx);
		ctx = smp_load_acquire(&inode->i_flctx);
	}
231
out:
232
	trace_locks_get_lock_context(inode, type, ctx);
233
	return ctx;
234 235 236
}

void
237
locks_free_lock_context(struct inode *inode)
238
{
239 240
	struct file_lock_context *ctx = inode->i_flctx;

241 242
	if (ctx) {
		WARN_ON_ONCE(!list_empty(&ctx->flc_flock));
243
		WARN_ON_ONCE(!list_empty(&ctx->flc_posix));
244
		WARN_ON_ONCE(!list_empty(&ctx->flc_lease));
245 246 247 248
		kmem_cache_free(flctx_cache, ctx);
	}
}

M
Miklos Szeredi 已提交
249
static void locks_init_lock_heads(struct file_lock *fl)
M
Miklos Szeredi 已提交
250
{
251
	INIT_HLIST_NODE(&fl->fl_link);
252
	INIT_LIST_HEAD(&fl->fl_list);
M
Miklos Szeredi 已提交
253 254
	INIT_LIST_HEAD(&fl->fl_block);
	init_waitqueue_head(&fl->fl_wait);
M
Miklos Szeredi 已提交
255 256
}

L
Linus Torvalds 已提交
257
/* Allocate an empty lock structure. */
258
struct file_lock *locks_alloc_lock(void)
L
Linus Torvalds 已提交
259
{
M
Miklos Szeredi 已提交
260
	struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
M
Miklos Szeredi 已提交
261 262

	if (fl)
M
Miklos Szeredi 已提交
263
		locks_init_lock_heads(fl);
M
Miklos Szeredi 已提交
264 265

	return fl;
L
Linus Torvalds 已提交
266
}
267
EXPORT_SYMBOL_GPL(locks_alloc_lock);
L
Linus Torvalds 已提交
268

269
void locks_release_private(struct file_lock *fl)
270 271 272 273 274 275 276
{
	if (fl->fl_ops) {
		if (fl->fl_ops->fl_release_private)
			fl->fl_ops->fl_release_private(fl);
		fl->fl_ops = NULL;
	}

277
	if (fl->fl_lmops) {
278 279 280 281
		if (fl->fl_lmops->lm_put_owner) {
			fl->fl_lmops->lm_put_owner(fl->fl_owner);
			fl->fl_owner = NULL;
		}
282 283
		fl->fl_lmops = NULL;
	}
284
}
285
EXPORT_SYMBOL_GPL(locks_release_private);
286

L
Linus Torvalds 已提交
287
/* Free a lock which is not in use. */
288
void locks_free_lock(struct file_lock *fl)
L
Linus Torvalds 已提交
289
{
M
Miklos Szeredi 已提交
290
	BUG_ON(waitqueue_active(&fl->fl_wait));
291
	BUG_ON(!list_empty(&fl->fl_list));
M
Miklos Szeredi 已提交
292
	BUG_ON(!list_empty(&fl->fl_block));
293
	BUG_ON(!hlist_unhashed(&fl->fl_link));
L
Linus Torvalds 已提交
294

295
	locks_release_private(fl);
L
Linus Torvalds 已提交
296 297
	kmem_cache_free(filelock_cache, fl);
}
298
EXPORT_SYMBOL(locks_free_lock);
L
Linus Torvalds 已提交
299

300 301 302 303 304 305
static void
locks_dispose_list(struct list_head *dispose)
{
	struct file_lock *fl;

	while (!list_empty(dispose)) {
306 307
		fl = list_first_entry(dispose, struct file_lock, fl_list);
		list_del_init(&fl->fl_list);
308 309 310 311
		locks_free_lock(fl);
	}
}

L
Linus Torvalds 已提交
312 313
void locks_init_lock(struct file_lock *fl)
{
M
Miklos Szeredi 已提交
314 315
	memset(fl, 0, sizeof(struct file_lock));
	locks_init_lock_heads(fl);
L
Linus Torvalds 已提交
316 317 318 319 320 321 322
}

EXPORT_SYMBOL(locks_init_lock);

/*
 * Initialize a new lock from an existing file_lock structure.
 */
323
void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
L
Linus Torvalds 已提交
324 325 326
{
	new->fl_owner = fl->fl_owner;
	new->fl_pid = fl->fl_pid;
327
	new->fl_file = NULL;
L
Linus Torvalds 已提交
328 329 330 331
	new->fl_flags = fl->fl_flags;
	new->fl_type = fl->fl_type;
	new->fl_start = fl->fl_start;
	new->fl_end = fl->fl_end;
332
	new->fl_lmops = fl->fl_lmops;
333
	new->fl_ops = NULL;
334 335 336

	if (fl->fl_lmops) {
		if (fl->fl_lmops->lm_get_owner)
337
			fl->fl_lmops->lm_get_owner(fl->fl_owner);
338
	}
339
}
340
EXPORT_SYMBOL(locks_copy_conflock);
341 342 343

void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
{
344 345
	/* "new" must be a freshly-initialized lock */
	WARN_ON_ONCE(new->fl_ops);
346

347
	locks_copy_conflock(new, fl);
348

349
	new->fl_file = fl->fl_file;
L
Linus Torvalds 已提交
350
	new->fl_ops = fl->fl_ops;
351

352 353 354 355
	if (fl->fl_ops) {
		if (fl->fl_ops->fl_copy_lock)
			fl->fl_ops->fl_copy_lock(new, fl);
	}
L
Linus Torvalds 已提交
356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
}

EXPORT_SYMBOL(locks_copy_lock);

static inline int flock_translate_cmd(int cmd) {
	if (cmd & LOCK_MAND)
		return cmd & (LOCK_MAND | LOCK_RW);
	switch (cmd) {
	case LOCK_SH:
		return F_RDLCK;
	case LOCK_EX:
		return F_WRLCK;
	case LOCK_UN:
		return F_UNLCK;
	}
	return -EINVAL;
}

/* Fill in a file_lock structure with an appropriate FLOCK lock. */
375 376
static struct file_lock *
flock_make_lock(struct file *filp, unsigned int cmd)
L
Linus Torvalds 已提交
377 378 379
{
	struct file_lock *fl;
	int type = flock_translate_cmd(cmd);
380

L
Linus Torvalds 已提交
381
	if (type < 0)
382
		return ERR_PTR(type);
L
Linus Torvalds 已提交
383 384 385
	
	fl = locks_alloc_lock();
	if (fl == NULL)
386
		return ERR_PTR(-ENOMEM);
L
Linus Torvalds 已提交
387 388

	fl->fl_file = filp;
389
	fl->fl_owner = filp;
L
Linus Torvalds 已提交
390 391 392 393 394
	fl->fl_pid = current->tgid;
	fl->fl_flags = FL_FLOCK;
	fl->fl_type = type;
	fl->fl_end = OFFSET_MAX;
	
395
	return fl;
L
Linus Torvalds 已提交
396 397
}

398
static int assign_type(struct file_lock *fl, long type)
L
Linus Torvalds 已提交
399 400 401 402 403 404 405 406 407 408 409 410 411
{
	switch (type) {
	case F_RDLCK:
	case F_WRLCK:
	case F_UNLCK:
		fl->fl_type = type;
		break;
	default:
		return -EINVAL;
	}
	return 0;
}

412 413
static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
				 struct flock64 *l)
L
Linus Torvalds 已提交
414 415
{
	switch (l->l_whence) {
416
	case SEEK_SET:
417
		fl->fl_start = 0;
L
Linus Torvalds 已提交
418
		break;
419
	case SEEK_CUR:
420
		fl->fl_start = filp->f_pos;
L
Linus Torvalds 已提交
421
		break;
422
	case SEEK_END:
423
		fl->fl_start = i_size_read(file_inode(filp));
L
Linus Torvalds 已提交
424 425 426 427
		break;
	default:
		return -EINVAL;
	}
428 429 430 431 432
	if (l->l_start > OFFSET_MAX - fl->fl_start)
		return -EOVERFLOW;
	fl->fl_start += l->l_start;
	if (fl->fl_start < 0)
		return -EINVAL;
L
Linus Torvalds 已提交
433 434 435

	/* POSIX-1996 leaves the case l->l_len < 0 undefined;
	   POSIX-2001 defines it. */
436
	if (l->l_len > 0) {
437 438 439 440
		if (l->l_len - 1 > OFFSET_MAX - fl->fl_start)
			return -EOVERFLOW;
		fl->fl_end = fl->fl_start + l->l_len - 1;

441
	} else if (l->l_len < 0) {
442
		if (fl->fl_start + l->l_len < 0)
443
			return -EINVAL;
444 445 446 447 448
		fl->fl_end = fl->fl_start - 1;
		fl->fl_start += l->l_len;
	} else
		fl->fl_end = OFFSET_MAX;

L
Linus Torvalds 已提交
449 450 451 452 453 454 455 456 457 458
	fl->fl_owner = current->files;
	fl->fl_pid = current->tgid;
	fl->fl_file = filp;
	fl->fl_flags = FL_POSIX;
	fl->fl_ops = NULL;
	fl->fl_lmops = NULL;

	return assign_type(fl, l->l_type);
}

459 460 461 462 463
/* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX
 * style lock.
 */
static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
			       struct flock *l)
L
Linus Torvalds 已提交
464
{
465 466 467 468 469 470 471 472
	struct flock64 ll = {
		.l_type = l->l_type,
		.l_whence = l->l_whence,
		.l_start = l->l_start,
		.l_len = l->l_len,
	};

	return flock64_to_posix_lock(filp, fl, &ll);
L
Linus Torvalds 已提交
473 474 475
}

/* default lease lock manager operations */
J
Jeff Layton 已提交
476 477
static bool
lease_break_callback(struct file_lock *fl)
L
Linus Torvalds 已提交
478 479
{
	kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
J
Jeff Layton 已提交
480
	return false;
L
Linus Torvalds 已提交
481 482
}

483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
static void
lease_setup(struct file_lock *fl, void **priv)
{
	struct file *filp = fl->fl_file;
	struct fasync_struct *fa = *priv;

	/*
	 * fasync_insert_entry() returns the old entry if any. If there was no
	 * old entry, then it used "priv" and inserted it into the fasync list.
	 * Clear the pointer to indicate that it shouldn't be freed.
	 */
	if (!fasync_insert_entry(fa->fa_fd, filp, &fl->fl_fasync, fa))
		*priv = NULL;

	__f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
}

500
static const struct lock_manager_operations lease_manager_ops = {
J
J. Bruce Fields 已提交
501 502
	.lm_break = lease_break_callback,
	.lm_change = lease_modify,
503
	.lm_setup = lease_setup,
L
Linus Torvalds 已提交
504 505 506 507 508
};

/*
 * Initialize a lease, use the default lock manager operations
 */
509
static int lease_init(struct file *filp, long type, struct file_lock *fl)
L
Linus Torvalds 已提交
510
 {
511 512 513
	if (assign_type(fl, type) != 0)
		return -EINVAL;

514
	fl->fl_owner = filp;
L
Linus Torvalds 已提交
515 516 517 518 519 520 521 522 523 524 525 526
	fl->fl_pid = current->tgid;

	fl->fl_file = filp;
	fl->fl_flags = FL_LEASE;
	fl->fl_start = 0;
	fl->fl_end = OFFSET_MAX;
	fl->fl_ops = NULL;
	fl->fl_lmops = &lease_manager_ops;
	return 0;
}

/* Allocate a file_lock initialised to this type of lease */
527
static struct file_lock *lease_alloc(struct file *filp, long type)
L
Linus Torvalds 已提交
528 529
{
	struct file_lock *fl = locks_alloc_lock();
530
	int error = -ENOMEM;
L
Linus Torvalds 已提交
531 532

	if (fl == NULL)
J
J. Bruce Fields 已提交
533
		return ERR_PTR(error);
L
Linus Torvalds 已提交
534 535

	error = lease_init(filp, type, fl);
536 537
	if (error) {
		locks_free_lock(fl);
J
J. Bruce Fields 已提交
538
		return ERR_PTR(error);
539
	}
J
J. Bruce Fields 已提交
540
	return fl;
L
Linus Torvalds 已提交
541 542 543 544 545 546 547 548 549 550 551 552 553
}

/* Check if two locks overlap each other.
 */
static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
{
	return ((fl1->fl_end >= fl2->fl_start) &&
		(fl2->fl_end >= fl1->fl_start));
}

/*
 * Check whether two locks have the same owner.
 */
554
static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
L
Linus Torvalds 已提交
555
{
J
J. Bruce Fields 已提交
556
	if (fl1->fl_lmops && fl1->fl_lmops->lm_compare_owner)
L
Linus Torvalds 已提交
557
		return fl2->fl_lmops == fl1->fl_lmops &&
J
J. Bruce Fields 已提交
558
			fl1->fl_lmops->lm_compare_owner(fl1, fl2);
L
Linus Torvalds 已提交
559 560 561
	return fl1->fl_owner == fl2->fl_owner;
}

562
/* Must be called with the flc_lock held! */
563
static void locks_insert_global_locks(struct file_lock *fl)
564
{
565 566 567 568
	lg_local_lock(&file_lock_lglock);
	fl->fl_link_cpu = smp_processor_id();
	hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list));
	lg_local_unlock(&file_lock_lglock);
569 570
}

571
/* Must be called with the flc_lock held! */
572
static void locks_delete_global_locks(struct file_lock *fl)
573
{
574 575
	/*
	 * Avoid taking lock if already unhashed. This is safe since this check
576
	 * is done while holding the flc_lock, and new insertions into the list
577 578 579 580 581
	 * also require that it be held.
	 */
	if (hlist_unhashed(&fl->fl_link))
		return;
	lg_local_lock_cpu(&file_lock_lglock, fl->fl_link_cpu);
582
	hlist_del_init(&fl->fl_link);
583
	lg_local_unlock_cpu(&file_lock_lglock, fl->fl_link_cpu);
584 585
}

586 587 588 589 590 591 592 593
static unsigned long
posix_owner_key(struct file_lock *fl)
{
	if (fl->fl_lmops && fl->fl_lmops->lm_owner_key)
		return fl->fl_lmops->lm_owner_key(fl);
	return (unsigned long)fl->fl_owner;
}

594
static void locks_insert_global_blocked(struct file_lock *waiter)
595
{
596 597
	lockdep_assert_held(&blocked_lock_lock);

598
	hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter));
599 600
}

601
static void locks_delete_global_blocked(struct file_lock *waiter)
602
{
603 604
	lockdep_assert_held(&blocked_lock_lock);

605
	hash_del(&waiter->fl_link);
606 607
}

L
Linus Torvalds 已提交
608 609
/* Remove waiter from blocker's block list.
 * When blocker ends up pointing to itself then the list is empty.
610
 *
611
 * Must be called with blocked_lock_lock held.
L
Linus Torvalds 已提交
612
 */
613
static void __locks_delete_block(struct file_lock *waiter)
L
Linus Torvalds 已提交
614
{
615
	locks_delete_global_blocked(waiter);
L
Linus Torvalds 已提交
616 617 618 619
	list_del_init(&waiter->fl_block);
	waiter->fl_next = NULL;
}

620
static void locks_delete_block(struct file_lock *waiter)
L
Linus Torvalds 已提交
621
{
622
	spin_lock(&blocked_lock_lock);
L
Linus Torvalds 已提交
623
	__locks_delete_block(waiter);
624
	spin_unlock(&blocked_lock_lock);
L
Linus Torvalds 已提交
625 626 627 628 629 630
}

/* Insert waiter into blocker's block list.
 * We use a circular list so that processes can be easily woken up in
 * the order they blocked. The documentation doesn't require this but
 * it seems like the reasonable thing to do.
631
 *
632 633 634 635
 * Must be called with both the flc_lock and blocked_lock_lock held. The
 * fl_block list itself is protected by the blocked_lock_lock, but by ensuring
 * that the flc_lock is also held on insertions we can avoid taking the
 * blocked_lock_lock in some cases when we see that the fl_block list is empty.
L
Linus Torvalds 已提交
636
 */
637 638
static void __locks_insert_block(struct file_lock *blocker,
					struct file_lock *waiter)
L
Linus Torvalds 已提交
639
{
640
	BUG_ON(!list_empty(&waiter->fl_block));
L
Linus Torvalds 已提交
641
	waiter->fl_next = blocker;
642
	list_add_tail(&waiter->fl_block, &blocker->fl_block);
643
	if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
644 645 646
		locks_insert_global_blocked(waiter);
}

647
/* Must be called with flc_lock held. */
648 649 650
static void locks_insert_block(struct file_lock *blocker,
					struct file_lock *waiter)
{
651
	spin_lock(&blocked_lock_lock);
652
	__locks_insert_block(blocker, waiter);
653
	spin_unlock(&blocked_lock_lock);
L
Linus Torvalds 已提交
654 655
}

656 657 658
/*
 * Wake up processes blocked waiting for blocker.
 *
659
 * Must be called with the inode->flc_lock held!
L
Linus Torvalds 已提交
660 661 662
 */
static void locks_wake_up_blocks(struct file_lock *blocker)
{
663 664
	/*
	 * Avoid taking global lock if list is empty. This is safe since new
665 666 667
	 * blocked requests are only added to the list under the flc_lock, and
	 * the flc_lock is always held here. Note that removal from the fl_block
	 * list does not require the flc_lock, so we must recheck list_empty()
668
	 * after acquiring the blocked_lock_lock.
669 670 671 672
	 */
	if (list_empty(&blocker->fl_block))
		return;

673
	spin_lock(&blocked_lock_lock);
L
Linus Torvalds 已提交
674
	while (!list_empty(&blocker->fl_block)) {
675 676 677
		struct file_lock *waiter;

		waiter = list_first_entry(&blocker->fl_block,
L
Linus Torvalds 已提交
678 679
				struct file_lock, fl_block);
		__locks_delete_block(waiter);
J
J. Bruce Fields 已提交
680 681
		if (waiter->fl_lmops && waiter->fl_lmops->lm_notify)
			waiter->fl_lmops->lm_notify(waiter);
L
Linus Torvalds 已提交
682 683 684
		else
			wake_up(&waiter->fl_wait);
	}
685
	spin_unlock(&blocked_lock_lock);
L
Linus Torvalds 已提交
686 687
}

688
static void
689
locks_insert_lock_ctx(struct file_lock *fl, struct list_head *before)
690 691 692 693 694 695
{
	fl->fl_nspid = get_pid(task_tgid(current));
	list_add_tail(&fl->fl_list, before);
	locks_insert_global_locks(fl);
}

696
static void
697
locks_unlink_lock_ctx(struct file_lock *fl)
L
Linus Torvalds 已提交
698
{
699
	locks_delete_global_locks(fl);
700
	list_del_init(&fl->fl_list);
701 702 703 704
	if (fl->fl_nspid) {
		put_pid(fl->fl_nspid);
		fl->fl_nspid = NULL;
	}
L
Linus Torvalds 已提交
705
	locks_wake_up_blocks(fl);
706 707
}

708
static void
709
locks_delete_lock_ctx(struct file_lock *fl, struct list_head *dispose)
710
{
711
	locks_unlink_lock_ctx(fl);
712
	if (dispose)
713
		list_add(&fl->fl_list, dispose);
714 715
	else
		locks_free_lock(fl);
L
Linus Torvalds 已提交
716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737
}

/* Determine if lock sys_fl blocks lock caller_fl. Common functionality
 * checks for shared/exclusive status of overlapping locks.
 */
static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
{
	if (sys_fl->fl_type == F_WRLCK)
		return 1;
	if (caller_fl->fl_type == F_WRLCK)
		return 1;
	return 0;
}

/* Determine if lock sys_fl blocks lock caller_fl. POSIX specific
 * checking before calling the locks_conflict().
 */
static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
{
	/* POSIX locks owned by the same process do not conflict with
	 * each other.
	 */
738
	if (posix_same_owner(caller_fl, sys_fl))
L
Linus Torvalds 已提交
739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755
		return (0);

	/* Check whether they overlap */
	if (!locks_overlap(caller_fl, sys_fl))
		return 0;

	return (locks_conflict(caller_fl, sys_fl));
}

/* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
 * checking before calling the locks_conflict().
 */
static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
{
	/* FLOCK locks referring to the same filp do not conflict with
	 * each other.
	 */
756
	if (caller_fl->fl_file == sys_fl->fl_file)
L
Linus Torvalds 已提交
757 758 759 760 761 762 763
		return (0);
	if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND))
		return 0;

	return (locks_conflict(caller_fl, sys_fl));
}

764
void
765
posix_test_lock(struct file *filp, struct file_lock *fl)
L
Linus Torvalds 已提交
766 767
{
	struct file_lock *cfl;
768
	struct file_lock_context *ctx;
769
	struct inode *inode = file_inode(filp);
L
Linus Torvalds 已提交
770

771
	ctx = smp_load_acquire(&inode->i_flctx);
772 773 774 775 776
	if (!ctx || list_empty_careful(&ctx->flc_posix)) {
		fl->fl_type = F_UNLCK;
		return;
	}

777
	spin_lock(&ctx->flc_lock);
778 779 780 781 782 783 784
	list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
		if (posix_locks_conflict(fl, cfl)) {
			locks_copy_conflock(fl, cfl);
			if (cfl->fl_nspid)
				fl->fl_pid = pid_vnr(cfl->fl_nspid);
			goto out;
		}
L
Linus Torvalds 已提交
785
	}
786 787
	fl->fl_type = F_UNLCK;
out:
788
	spin_unlock(&ctx->flc_lock);
789
	return;
L
Linus Torvalds 已提交
790 791 792
}
EXPORT_SYMBOL(posix_test_lock);

793 794 795 796 797
/*
 * Deadlock detection:
 *
 * We attempt to detect deadlocks that are due purely to posix file
 * locks.
L
Linus Torvalds 已提交
798
 *
799 800 801 802 803 804 805
 * We assume that a task can be waiting for at most one lock at a time.
 * So for any acquired lock, the process holding that lock may be
 * waiting on at most one other lock.  That lock in turns may be held by
 * someone waiting for at most one other lock.  Given a requested lock
 * caller_fl which is about to wait for a conflicting lock block_fl, we
 * follow this chain of waiters to ensure we are not about to create a
 * cycle.
L
Linus Torvalds 已提交
806
 *
807 808 809
 * Since we do this before we ever put a process to sleep on a lock, we
 * are ensured that there is never a cycle; that is what guarantees that
 * the while() loop in posix_locks_deadlock() eventually completes.
810
 *
811 812 813 814
 * Note: the above assumption may not be true when handling lock
 * requests from a broken NFS client. It may also fail in the presence
 * of tasks (such as posix threads) sharing the same open file table.
 * To handle those cases, we just bail out after a few iterations.
815
 *
816
 * For FL_OFDLCK locks, the owner is the filp, not the files_struct.
817 818 819 820
 * Because the owner is not even nominally tied to a thread of
 * execution, the deadlock detection below can't reasonably work well. Just
 * skip it for those.
 *
821
 * In principle, we could do a more limited deadlock detection on FL_OFDLCK
822 823
 * locks that just checks for the case where two tasks are attempting to
 * upgrade from read to write locks on the same inode.
L
Linus Torvalds 已提交
824
 */
825 826 827

#define MAX_DEADLK_ITERATIONS 10

828 829 830 831 832
/* Find a lock that the owner of the given block_fl is blocking on. */
static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl)
{
	struct file_lock *fl;

833
	hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) {
834 835 836 837 838 839
		if (posix_same_owner(fl, block_fl))
			return fl->fl_next;
	}
	return NULL;
}

840
/* Must be called with the blocked_lock_lock held! */
841
static int posix_locks_deadlock(struct file_lock *caller_fl,
L
Linus Torvalds 已提交
842 843
				struct file_lock *block_fl)
{
844
	int i = 0;
L
Linus Torvalds 已提交
845

846 847
	lockdep_assert_held(&blocked_lock_lock);

848 849
	/*
	 * This deadlock detector can't reasonably detect deadlocks with
850
	 * FL_OFDLCK locks, since they aren't owned by a process, per-se.
851
	 */
852
	if (IS_OFDLCK(caller_fl))
853 854
		return 0;

855 856 857 858 859
	while ((block_fl = what_owner_is_waiting_for(block_fl))) {
		if (i++ > MAX_DEADLK_ITERATIONS)
			return 0;
		if (posix_same_owner(caller_fl, block_fl))
			return 1;
L
Linus Torvalds 已提交
860 861 862 863 864
	}
	return 0;
}

/* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
865
 * after any leases, but before any posix locks.
866 867 868 869
 *
 * Note that if called with an FL_EXISTS argument, the caller may determine
 * whether or not a lock was successfully freed by testing the return
 * value for -ENOENT.
L
Linus Torvalds 已提交
870
 */
871
static int flock_lock_inode(struct inode *inode, struct file_lock *request)
L
Linus Torvalds 已提交
872
{
873
	struct file_lock *new_fl = NULL;
874 875
	struct file_lock *fl;
	struct file_lock_context *ctx;
L
Linus Torvalds 已提交
876
	int error = 0;
877
	bool found = false;
878
	LIST_HEAD(dispose);
L
Linus Torvalds 已提交
879

880 881 882 883 884 885
	ctx = locks_get_lock_context(inode, request->fl_type);
	if (!ctx) {
		if (request->fl_type != F_UNLCK)
			return -ENOMEM;
		return (request->fl_flags & FL_EXISTS) ? -ENOENT : 0;
	}
886

887
	if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
888
		new_fl = locks_alloc_lock();
889 890
		if (!new_fl)
			return -ENOMEM;
891 892
	}

893
	spin_lock(&ctx->flc_lock);
894 895 896
	if (request->fl_flags & FL_ACCESS)
		goto find_conflict;

897
	list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
898
		if (request->fl_file != fl->fl_file)
L
Linus Torvalds 已提交
899
			continue;
900
		if (request->fl_type == fl->fl_type)
L
Linus Torvalds 已提交
901
			goto out;
902
		found = true;
903
		locks_delete_lock_ctx(fl, &dispose);
L
Linus Torvalds 已提交
904 905 906
		break;
	}

907 908 909
	if (request->fl_type == F_UNLCK) {
		if ((request->fl_flags & FL_EXISTS) && !found)
			error = -ENOENT;
910
		goto out;
911
	}
L
Linus Torvalds 已提交
912

913
find_conflict:
914
	list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
915
		if (!flock_locks_conflict(request, fl))
L
Linus Torvalds 已提交
916 917
			continue;
		error = -EAGAIN;
918 919 920 921
		if (!(request->fl_flags & FL_SLEEP))
			goto out;
		error = FILE_LOCK_DEFERRED;
		locks_insert_block(fl, request);
L
Linus Torvalds 已提交
922 923
		goto out;
	}
924 925
	if (request->fl_flags & FL_ACCESS)
		goto out;
926
	locks_copy_lock(new_fl, request);
927
	locks_insert_lock_ctx(new_fl, &ctx->flc_flock);
928
	new_fl = NULL;
929
	error = 0;
L
Linus Torvalds 已提交
930 931

out:
932
	spin_unlock(&ctx->flc_lock);
933 934
	if (new_fl)
		locks_free_lock(new_fl);
935
	locks_dispose_list(&dispose);
L
Linus Torvalds 已提交
936 937 938
	return error;
}

939
static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
L
Linus Torvalds 已提交
940
{
941
	struct file_lock *fl, *tmp;
942 943
	struct file_lock *new_fl = NULL;
	struct file_lock *new_fl2 = NULL;
L
Linus Torvalds 已提交
944 945
	struct file_lock *left = NULL;
	struct file_lock *right = NULL;
946
	struct file_lock_context *ctx;
947 948
	int error;
	bool added = false;
949
	LIST_HEAD(dispose);
L
Linus Torvalds 已提交
950

951
	ctx = locks_get_lock_context(inode, request->fl_type);
952
	if (!ctx)
953
		return (request->fl_type == F_UNLCK) ? 0 : -ENOMEM;
954

L
Linus Torvalds 已提交
955 956 957
	/*
	 * We may need two file_lock structures for this operation,
	 * so we get them in advance to avoid races.
958 959
	 *
	 * In some cases we can be sure, that no new locks will be needed
L
Linus Torvalds 已提交
960
	 */
961 962 963 964 965 966
	if (!(request->fl_flags & FL_ACCESS) &&
	    (request->fl_type != F_UNLCK ||
	     request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
		new_fl = locks_alloc_lock();
		new_fl2 = locks_alloc_lock();
	}
L
Linus Torvalds 已提交
967

968
	spin_lock(&ctx->flc_lock);
969 970 971
	/*
	 * New lock request. Walk all POSIX locks and look for conflicts. If
	 * there are any, either return error or put the request on the
972
	 * blocker's list of waiters and the global blocked_hash.
973
	 */
L
Linus Torvalds 已提交
974
	if (request->fl_type != F_UNLCK) {
975
		list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
L
Linus Torvalds 已提交
976 977
			if (!posix_locks_conflict(request, fl))
				continue;
978
			if (conflock)
979
				locks_copy_conflock(conflock, fl);
L
Linus Torvalds 已提交
980 981 982
			error = -EAGAIN;
			if (!(request->fl_flags & FL_SLEEP))
				goto out;
983 984 985 986
			/*
			 * Deadlock detection and insertion into the blocked
			 * locks list must be done while holding the same lock!
			 */
L
Linus Torvalds 已提交
987
			error = -EDEADLK;
988
			spin_lock(&blocked_lock_lock);
989 990 991 992
			if (likely(!posix_locks_deadlock(request, fl))) {
				error = FILE_LOCK_DEFERRED;
				__locks_insert_block(fl, request);
			}
993
			spin_unlock(&blocked_lock_lock);
L
Linus Torvalds 已提交
994 995 996 997 998 999 1000 1001 1002
			goto out;
  		}
  	}

	/* If we're just looking for a conflict, we're done. */
	error = 0;
	if (request->fl_flags & FL_ACCESS)
		goto out;

1003 1004 1005 1006
	/* Find the first old lock with the same owner as the new lock */
	list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
		if (posix_same_owner(request, fl))
			break;
L
Linus Torvalds 已提交
1007 1008
	}

1009
	/* Process locks with this owner. */
1010 1011 1012 1013 1014
	list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, fl_list) {
		if (!posix_same_owner(request, fl))
			break;

		/* Detect adjacent or overlapping regions (if same lock type) */
L
Linus Torvalds 已提交
1015
		if (request->fl_type == fl->fl_type) {
O
Olaf Kirch 已提交
1016 1017 1018 1019
			/* In all comparisons of start vs end, use
			 * "start - 1" rather than "end + 1". If end
			 * is OFFSET_MAX, end + 1 will become negative.
			 */
L
Linus Torvalds 已提交
1020
			if (fl->fl_end < request->fl_start - 1)
1021
				continue;
L
Linus Torvalds 已提交
1022 1023 1024
			/* If the next lock in the list has entirely bigger
			 * addresses than the new one, insert the lock here.
			 */
O
Olaf Kirch 已提交
1025
			if (fl->fl_start - 1 > request->fl_end)
L
Linus Torvalds 已提交
1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041
				break;

			/* If we come here, the new and old lock are of the
			 * same type and adjacent or overlapping. Make one
			 * lock yielding from the lower start address of both
			 * locks to the higher end address.
			 */
			if (fl->fl_start > request->fl_start)
				fl->fl_start = request->fl_start;
			else
				request->fl_start = fl->fl_start;
			if (fl->fl_end < request->fl_end)
				fl->fl_end = request->fl_end;
			else
				request->fl_end = fl->fl_end;
			if (added) {
1042
				locks_delete_lock_ctx(fl, &dispose);
L
Linus Torvalds 已提交
1043 1044 1045
				continue;
			}
			request = fl;
1046
			added = true;
1047
		} else {
L
Linus Torvalds 已提交
1048 1049 1050 1051
			/* Processing for different lock types is a bit
			 * more complex.
			 */
			if (fl->fl_end < request->fl_start)
1052
				continue;
L
Linus Torvalds 已提交
1053 1054 1055
			if (fl->fl_start > request->fl_end)
				break;
			if (request->fl_type == F_UNLCK)
1056
				added = true;
L
Linus Torvalds 已提交
1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070
			if (fl->fl_start < request->fl_start)
				left = fl;
			/* If the next lock in the list has a higher end
			 * address than the new one, insert the new one here.
			 */
			if (fl->fl_end > request->fl_end) {
				right = fl;
				break;
			}
			if (fl->fl_start >= request->fl_start) {
				/* The new lock completely replaces an old
				 * one (This may happen several times).
				 */
				if (added) {
1071
					locks_delete_lock_ctx(fl, &dispose);
L
Linus Torvalds 已提交
1072 1073
					continue;
				}
1074 1075 1076 1077 1078 1079
				/*
				 * Replace the old lock with new_fl, and
				 * remove the old one. It's safe to do the
				 * insert here since we know that we won't be
				 * using new_fl later, and that the lock is
				 * just replacing an existing lock.
L
Linus Torvalds 已提交
1080
				 */
1081 1082 1083 1084 1085 1086
				error = -ENOLCK;
				if (!new_fl)
					goto out;
				locks_copy_lock(new_fl, request);
				request = new_fl;
				new_fl = NULL;
1087 1088
				locks_insert_lock_ctx(request, &fl->fl_list);
				locks_delete_lock_ctx(fl, &dispose);
1089
				added = true;
L
Linus Torvalds 已提交
1090 1091 1092 1093
			}
		}
	}

1094
	/*
1095 1096 1097
	 * The above code only modifies existing locks in case of merging or
	 * replacing. If new lock(s) need to be inserted all modifications are
	 * done below this, so it's safe yet to bail out.
1098 1099 1100 1101 1102
	 */
	error = -ENOLCK; /* "no luck" */
	if (right && left == right && !new_fl2)
		goto out;

L
Linus Torvalds 已提交
1103 1104
	error = 0;
	if (!added) {
1105 1106 1107
		if (request->fl_type == F_UNLCK) {
			if (request->fl_flags & FL_EXISTS)
				error = -ENOENT;
L
Linus Torvalds 已提交
1108
			goto out;
1109
		}
1110 1111 1112 1113 1114

		if (!new_fl) {
			error = -ENOLCK;
			goto out;
		}
L
Linus Torvalds 已提交
1115
		locks_copy_lock(new_fl, request);
1116
		locks_insert_lock_ctx(new_fl, &fl->fl_list);
1117
		fl = new_fl;
L
Linus Torvalds 已提交
1118 1119 1120 1121 1122 1123 1124 1125 1126 1127
		new_fl = NULL;
	}
	if (right) {
		if (left == right) {
			/* The new lock breaks the old one in two pieces,
			 * so we have to use the second new lock.
			 */
			left = new_fl2;
			new_fl2 = NULL;
			locks_copy_lock(left, right);
1128
			locks_insert_lock_ctx(left, &fl->fl_list);
L
Linus Torvalds 已提交
1129 1130 1131 1132 1133 1134 1135 1136 1137
		}
		right->fl_start = request->fl_end + 1;
		locks_wake_up_blocks(right);
	}
	if (left) {
		left->fl_end = request->fl_start - 1;
		locks_wake_up_blocks(left);
	}
 out:
1138
	spin_unlock(&ctx->flc_lock);
L
Linus Torvalds 已提交
1139 1140 1141 1142 1143 1144 1145
	/*
	 * Free any unused locks.
	 */
	if (new_fl)
		locks_free_lock(new_fl);
	if (new_fl2)
		locks_free_lock(new_fl2);
1146
	locks_dispose_list(&dispose);
1147 1148
	trace_posix_lock_inode(inode, request, error);

L
Linus Torvalds 已提交
1149 1150 1151 1152 1153 1154 1155
	return error;
}

/**
 * posix_lock_file - Apply a POSIX-style lock to a file
 * @filp: The file to apply the lock to
 * @fl: The lock to be applied
1156
 * @conflock: Place to return a copy of the conflicting lock, if found.
L
Linus Torvalds 已提交
1157 1158 1159 1160
 *
 * Add a POSIX style lock to a file.
 * We merge adjacent & overlapping locks whenever possible.
 * POSIX locks are sorted by owner task, then by starting address
1161 1162 1163 1164
 *
 * Note that if called with an FL_EXISTS argument, the caller may determine
 * whether or not a lock was successfully freed by testing the return
 * value for -ENOENT.
L
Linus Torvalds 已提交
1165
 */
1166
int posix_lock_file(struct file *filp, struct file_lock *fl,
1167 1168
			struct file_lock *conflock)
{
A
Al Viro 已提交
1169
	return __posix_lock_file(file_inode(filp), fl, conflock);
L
Linus Torvalds 已提交
1170
}
1171
EXPORT_SYMBOL(posix_lock_file);
L
Linus Torvalds 已提交
1172 1173

/**
1174 1175
 * posix_lock_inode_wait - Apply a POSIX-style lock to a file
 * @inode: inode of file to which lock request should be applied
L
Linus Torvalds 已提交
1176 1177
 * @fl: The lock to be applied
 *
1178
 * Apply a POSIX style lock request to an inode.
L
Linus Torvalds 已提交
1179
 */
1180
static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
L
Linus Torvalds 已提交
1181 1182 1183 1184
{
	int error;
	might_sleep ();
	for (;;) {
1185
		error = __posix_lock_file(inode, fl, NULL);
1186
		if (error != FILE_LOCK_DEFERRED)
L
Linus Torvalds 已提交
1187 1188 1189 1190 1191 1192 1193 1194 1195 1196
			break;
		error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
		if (!error)
			continue;

		locks_delete_block(fl);
		break;
	}
	return error;
}
1197

1198
#ifdef CONFIG_MANDATORY_FILE_LOCKING
L
Linus Torvalds 已提交
1199 1200
/**
 * locks_mandatory_locked - Check for an active lock
1201
 * @file: the file to check
L
Linus Torvalds 已提交
1202 1203 1204 1205
 *
 * Searches the inode's list of locks to find any POSIX locks which conflict.
 * This function is called from locks_verify_locked() only.
 */
1206
int locks_mandatory_locked(struct file *file)
L
Linus Torvalds 已提交
1207
{
1208
	int ret;
1209
	struct inode *inode = file_inode(file);
1210
	struct file_lock_context *ctx;
L
Linus Torvalds 已提交
1211 1212
	struct file_lock *fl;

1213
	ctx = smp_load_acquire(&inode->i_flctx);
1214 1215 1216
	if (!ctx || list_empty_careful(&ctx->flc_posix))
		return 0;

L
Linus Torvalds 已提交
1217 1218 1219
	/*
	 * Search the lock list for this inode for any POSIX locks.
	 */
1220
	spin_lock(&ctx->flc_lock);
1221 1222
	ret = 0;
	list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
1223
		if (fl->fl_owner != current->files &&
1224 1225
		    fl->fl_owner != file) {
			ret = -EAGAIN;
L
Linus Torvalds 已提交
1226
			break;
1227
		}
L
Linus Torvalds 已提交
1228
	}
1229
	spin_unlock(&ctx->flc_lock);
1230
	return ret;
L
Linus Torvalds 已提交
1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251
}

/**
 * locks_mandatory_area - Check for a conflicting lock
 * @read_write: %FLOCK_VERIFY_WRITE for exclusive access, %FLOCK_VERIFY_READ
 *		for shared
 * @inode:      the file to check
 * @filp:       how the file was opened (if it was)
 * @offset:     start of area to check
 * @count:      length of area to check
 *
 * Searches the inode's list of locks to find any POSIX locks which conflict.
 * This function is called from rw_verify_area() and
 * locks_verify_truncate().
 */
int locks_mandatory_area(int read_write, struct inode *inode,
			 struct file *filp, loff_t offset,
			 size_t count)
{
	struct file_lock fl;
	int error;
1252
	bool sleep = false;
L
Linus Torvalds 已提交
1253 1254 1255 1256 1257 1258

	locks_init_lock(&fl);
	fl.fl_pid = current->tgid;
	fl.fl_file = filp;
	fl.fl_flags = FL_POSIX | FL_ACCESS;
	if (filp && !(filp->f_flags & O_NONBLOCK))
1259
		sleep = true;
L
Linus Torvalds 已提交
1260 1261 1262 1263 1264
	fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;
	fl.fl_start = offset;
	fl.fl_end = offset + count - 1;

	for (;;) {
1265
		if (filp) {
1266
			fl.fl_owner = filp;
1267 1268 1269 1270 1271 1272 1273 1274 1275
			fl.fl_flags &= ~FL_SLEEP;
			error = __posix_lock_file(inode, &fl, NULL);
			if (!error)
				break;
		}

		if (sleep)
			fl.fl_flags |= FL_SLEEP;
		fl.fl_owner = current->files;
1276
		error = __posix_lock_file(inode, &fl, NULL);
1277
		if (error != FILE_LOCK_DEFERRED)
L
Linus Torvalds 已提交
1278 1279 1280 1281 1282 1283 1284
			break;
		error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
		if (!error) {
			/*
			 * If we've been sleeping someone might have
			 * changed the permissions behind our back.
			 */
1285
			if (__mandatory_lock(inode))
L
Linus Torvalds 已提交
1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296
				continue;
		}

		locks_delete_block(&fl);
		break;
	}

	return error;
}

EXPORT_SYMBOL(locks_mandatory_area);
1297
#endif /* CONFIG_MANDATORY_FILE_LOCKING */
L
Linus Torvalds 已提交
1298

1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309
static void lease_clear_pending(struct file_lock *fl, int arg)
{
	switch (arg) {
	case F_UNLCK:
		fl->fl_flags &= ~FL_UNLOCK_PENDING;
		/* fall through: */
	case F_RDLCK:
		fl->fl_flags &= ~FL_DOWNGRADE_PENDING;
	}
}

L
Linus Torvalds 已提交
1310
/* We already had a lease on this file; just change its type */
1311
int lease_modify(struct file_lock *fl, int arg, struct list_head *dispose)
L
Linus Torvalds 已提交
1312 1313 1314 1315 1316
{
	int error = assign_type(fl, arg);

	if (error)
		return error;
1317
	lease_clear_pending(fl, arg);
L
Linus Torvalds 已提交
1318
	locks_wake_up_blocks(fl);
1319 1320 1321 1322 1323
	if (arg == F_UNLCK) {
		struct file *filp = fl->fl_file;

		f_delown(filp);
		filp->f_owner.signum = 0;
1324 1325 1326 1327 1328
		fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync);
		if (fl->fl_fasync != NULL) {
			printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
			fl->fl_fasync = NULL;
		}
1329
		locks_delete_lock_ctx(fl, dispose);
1330
	}
L
Linus Torvalds 已提交
1331 1332 1333 1334
	return 0;
}
EXPORT_SYMBOL(lease_modify);

1335 1336 1337 1338 1339 1340 1341 1342
static bool past_time(unsigned long then)
{
	if (!then)
		/* 0 is a special value meaning "this never expires": */
		return false;
	return time_after(jiffies, then);
}

1343
static void time_out_leases(struct inode *inode, struct list_head *dispose)
L
Linus Torvalds 已提交
1344
{
1345 1346
	struct file_lock_context *ctx = inode->i_flctx;
	struct file_lock *fl, *tmp;
L
Linus Torvalds 已提交
1347

1348
	lockdep_assert_held(&ctx->flc_lock);
1349

1350
	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
1351
		trace_time_out_leases(inode, fl);
1352
		if (past_time(fl->fl_downgrade_time))
1353
			lease_modify(fl, F_RDLCK, dispose);
1354
		if (past_time(fl->fl_break_time))
1355
			lease_modify(fl, F_UNLCK, dispose);
L
Linus Torvalds 已提交
1356 1357 1358
	}
}

J
J. Bruce Fields 已提交
1359 1360
static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
{
C
Christoph Hellwig 已提交
1361 1362
	if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT))
		return false;
J
J. Bruce Fields 已提交
1363 1364 1365 1366 1367
	if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE))
		return false;
	return locks_conflict(breaker, lease);
}

1368 1369 1370
static bool
any_leases_conflict(struct inode *inode, struct file_lock *breaker)
{
1371
	struct file_lock_context *ctx = inode->i_flctx;
1372 1373
	struct file_lock *fl;

1374
	lockdep_assert_held(&ctx->flc_lock);
1375

1376
	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1377 1378 1379 1380 1381 1382
		if (leases_conflict(fl, breaker))
			return true;
	}
	return false;
}

L
Linus Torvalds 已提交
1383 1384 1385
/**
 *	__break_lease	-	revoke all outstanding leases on file
 *	@inode: the inode of the file to return
J
J. Bruce Fields 已提交
1386 1387 1388 1389
 *	@mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR:
 *	    break all leases
 *	@type: FL_LEASE: break leases and delegations; FL_DELEG: break
 *	    only delegations
L
Linus Torvalds 已提交
1390
 *
1391 1392 1393
 *	break_lease (inlined for speed) has checked there already is at least
 *	some kind of lock (maybe a lease) on this file.  Leases are broken on
 *	a call to open() or truncate().  This function can sleep unless you
L
Linus Torvalds 已提交
1394 1395
 *	specified %O_NONBLOCK to your open().
 */
J
J. Bruce Fields 已提交
1396
int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
L
Linus Torvalds 已提交
1397
{
1398
	int error = 0;
1399
	struct file_lock_context *ctx;
1400
	struct file_lock *new_fl, *fl, *tmp;
L
Linus Torvalds 已提交
1401
	unsigned long break_time;
1402
	int want_write = (mode & O_ACCMODE) != O_RDONLY;
1403
	LIST_HEAD(dispose);
L
Linus Torvalds 已提交
1404

1405
	new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
1406 1407
	if (IS_ERR(new_fl))
		return PTR_ERR(new_fl);
J
J. Bruce Fields 已提交
1408
	new_fl->fl_flags = type;
L
Linus Torvalds 已提交
1409

1410
	/* typically we will check that ctx is non-NULL before calling */
1411
	ctx = smp_load_acquire(&inode->i_flctx);
1412 1413 1414 1415 1416
	if (!ctx) {
		WARN_ON_ONCE(1);
		return error;
	}

1417
	spin_lock(&ctx->flc_lock);
L
Linus Torvalds 已提交
1418

1419
	time_out_leases(inode, &dispose);
L
Linus Torvalds 已提交
1420

1421
	if (!any_leases_conflict(inode, new_fl))
1422 1423
		goto out;

L
Linus Torvalds 已提交
1424 1425 1426 1427 1428 1429 1430
	break_time = 0;
	if (lease_break_time > 0) {
		break_time = jiffies + lease_break_time * HZ;
		if (break_time == 0)
			break_time++;	/* so that 0 means no break time */
	}

1431
	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
J
J. Bruce Fields 已提交
1432 1433
		if (!leases_conflict(fl, new_fl))
			continue;
1434 1435 1436 1437
		if (want_write) {
			if (fl->fl_flags & FL_UNLOCK_PENDING)
				continue;
			fl->fl_flags |= FL_UNLOCK_PENDING;
L
Linus Torvalds 已提交
1438
			fl->fl_break_time = break_time;
1439
		} else {
1440
			if (lease_breaking(fl))
1441 1442 1443
				continue;
			fl->fl_flags |= FL_DOWNGRADE_PENDING;
			fl->fl_downgrade_time = break_time;
L
Linus Torvalds 已提交
1444
		}
J
Jeff Layton 已提交
1445
		if (fl->fl_lmops->lm_break(fl))
1446
			locks_delete_lock_ctx(fl, &dispose);
L
Linus Torvalds 已提交
1447 1448
	}

1449
	if (list_empty(&ctx->flc_lease))
J
Jeff Layton 已提交
1450 1451
		goto out;

1452
	if (mode & O_NONBLOCK) {
1453
		trace_break_lease_noblock(inode, new_fl);
L
Linus Torvalds 已提交
1454 1455 1456 1457 1458
		error = -EWOULDBLOCK;
		goto out;
	}

restart:
1459 1460
	fl = list_first_entry(&ctx->flc_lease, struct file_lock, fl_list);
	break_time = fl->fl_break_time;
1461
	if (break_time != 0)
L
Linus Torvalds 已提交
1462
		break_time -= jiffies;
1463 1464
	if (break_time == 0)
		break_time++;
1465
	locks_insert_block(fl, new_fl);
1466
	trace_break_lease_block(inode, new_fl);
1467
	spin_unlock(&ctx->flc_lock);
1468
	locks_dispose_list(&dispose);
1469 1470
	error = wait_event_interruptible_timeout(new_fl->fl_wait,
						!new_fl->fl_next, break_time);
1471
	spin_lock(&ctx->flc_lock);
1472
	trace_break_lease_unblock(inode, new_fl);
1473
	locks_delete_block(new_fl);
L
Linus Torvalds 已提交
1474
	if (error >= 0) {
1475 1476 1477 1478
		/*
		 * Wait for the next conflicting lease that has not been
		 * broken yet
		 */
1479 1480 1481 1482
		if (error == 0)
			time_out_leases(inode, &dispose);
		if (any_leases_conflict(inode, new_fl))
			goto restart;
L
Linus Torvalds 已提交
1483 1484 1485
		error = 0;
	}
out:
1486
	spin_unlock(&ctx->flc_lock);
1487
	locks_dispose_list(&dispose);
1488
	locks_free_lock(new_fl);
L
Linus Torvalds 已提交
1489 1490 1491 1492 1493 1494
	return error;
}

EXPORT_SYMBOL(__break_lease);

/**
1495
 *	lease_get_mtime - get the last modified time of an inode
L
Linus Torvalds 已提交
1496 1497 1498 1499 1500
 *	@inode: the inode
 *      @time:  pointer to a timespec which will contain the last modified time
 *
 * This is to force NFS clients to flush their caches for files with
 * exclusive leases.  The justification is that if someone has an
1501
 * exclusive lease, then they could be modifying it.
L
Linus Torvalds 已提交
1502 1503 1504
 */
void lease_get_mtime(struct inode *inode, struct timespec *time)
{
1505
	bool has_lease = false;
1506
	struct file_lock_context *ctx;
1507
	struct file_lock *fl;
1508

1509
	ctx = smp_load_acquire(&inode->i_flctx);
1510
	if (ctx && !list_empty_careful(&ctx->flc_lease)) {
1511
		spin_lock(&ctx->flc_lock);
1512 1513 1514 1515
		fl = list_first_entry_or_null(&ctx->flc_lease,
					      struct file_lock, fl_list);
		if (fl && (fl->fl_type == F_WRLCK))
			has_lease = true;
1516
		spin_unlock(&ctx->flc_lock);
1517 1518 1519
	}

	if (has_lease)
L
Linus Torvalds 已提交
1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552
		*time = current_fs_time(inode->i_sb);
	else
		*time = inode->i_mtime;
}

EXPORT_SYMBOL(lease_get_mtime);

/**
 *	fcntl_getlease - Enquire what lease is currently active
 *	@filp: the file
 *
 *	The value returned by this function will be one of
 *	(if no lease break is pending):
 *
 *	%F_RDLCK to indicate a shared lease is held.
 *
 *	%F_WRLCK to indicate an exclusive lease is held.
 *
 *	%F_UNLCK to indicate no lease is held.
 *
 *	(if a lease break is pending):
 *
 *	%F_RDLCK to indicate an exclusive lease needs to be
 *		changed to a shared lease (or removed).
 *
 *	%F_UNLCK to indicate the lease needs to be removed.
 *
 *	XXX: sfr & willy disagree over whether F_INPROGRESS
 *	should be returned to userspace.
 */
int fcntl_getlease(struct file *filp)
{
	struct file_lock *fl;
1553
	struct inode *inode = file_inode(filp);
1554
	struct file_lock_context *ctx;
L
Linus Torvalds 已提交
1555
	int type = F_UNLCK;
1556
	LIST_HEAD(dispose);
L
Linus Torvalds 已提交
1557

1558
	ctx = smp_load_acquire(&inode->i_flctx);
1559
	if (ctx && !list_empty_careful(&ctx->flc_lease)) {
1560
		spin_lock(&ctx->flc_lock);
1561 1562 1563 1564
		time_out_leases(file_inode(filp), &dispose);
		list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
			if (fl->fl_file != filp)
				continue;
1565
			type = target_leasetype(fl);
L
Linus Torvalds 已提交
1566 1567
			break;
		}
1568
		spin_unlock(&ctx->flc_lock);
1569
		locks_dispose_list(&dispose);
L
Linus Torvalds 已提交
1570 1571 1572 1573
	}
	return type;
}

1574 1575 1576 1577 1578 1579
/**
 * check_conflicting_open - see if the given dentry points to a file that has
 * 			    an existing open that would conflict with the
 * 			    desired lease.
 * @dentry:	dentry to check
 * @arg:	type of lease that we're trying to acquire
1580
 * @flags:	current lock flags
1581 1582 1583 1584 1585
 *
 * Check to see if there's an existing open fd on this file that would
 * conflict with the lease we're trying to set.
 */
static int
C
Christoph Hellwig 已提交
1586
check_conflicting_open(const struct dentry *dentry, const long arg, int flags)
1587 1588 1589 1590
{
	int ret = 0;
	struct inode *inode = dentry->d_inode;

C
Christoph Hellwig 已提交
1591 1592 1593
	if (flags & FL_LAYOUT)
		return 0;

1594 1595 1596 1597 1598 1599 1600 1601 1602 1603
	if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
		return -EAGAIN;

	if ((arg == F_WRLCK) && ((d_count(dentry) > 1) ||
	    (atomic_read(&inode->i_count) > 1)))
		ret = -EAGAIN;

	return ret;
}

1604 1605
static int
generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv)
L
Linus Torvalds 已提交
1606
{
1607
	struct file_lock *fl, *my_fl = NULL, *lease;
1608
	struct dentry *dentry = filp->f_path.dentry;
L
Linus Torvalds 已提交
1609
	struct inode *inode = dentry->d_inode;
1610
	struct file_lock_context *ctx;
J
J. Bruce Fields 已提交
1611
	bool is_deleg = (*flp)->fl_flags & FL_DELEG;
J
J. Bruce Fields 已提交
1612
	int error;
1613
	LIST_HEAD(dispose);
L
Linus Torvalds 已提交
1614

1615
	lease = *flp;
1616 1617
	trace_generic_add_lease(inode, lease);

1618 1619
	/* Note that arg is never F_UNLCK here */
	ctx = locks_get_lock_context(inode, arg);
1620 1621 1622
	if (!ctx)
		return -ENOMEM;

J
J. Bruce Fields 已提交
1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635
	/*
	 * In the delegation case we need mutual exclusion with
	 * a number of operations that take the i_mutex.  We trylock
	 * because delegations are an optional optimization, and if
	 * there's some chance of a conflict--we'd rather not
	 * bother, maybe that's a sign this just isn't a good file to
	 * hand out a delegation on.
	 */
	if (is_deleg && !mutex_trylock(&inode->i_mutex))
		return -EAGAIN;

	if (is_deleg && arg == F_WRLCK) {
		/* Write delegations are not currently supported: */
1636
		mutex_unlock(&inode->i_mutex);
J
J. Bruce Fields 已提交
1637 1638 1639
		WARN_ON_ONCE(1);
		return -EINVAL;
	}
1640

1641
	spin_lock(&ctx->flc_lock);
1642
	time_out_leases(inode, &dispose);
C
Christoph Hellwig 已提交
1643
	error = check_conflicting_open(dentry, arg, lease->fl_flags);
1644
	if (error)
1645
		goto out;
1646

L
Linus Torvalds 已提交
1647 1648 1649 1650 1651 1652 1653 1654
	/*
	 * At this point, we know that if there is an exclusive
	 * lease on this file, then we hold it on this filp
	 * (otherwise our open of this file would have blocked).
	 * And if we are trying to acquire an exclusive lease,
	 * then the file is not open by anyone (including us)
	 * except for this filp.
	 */
J
J. Bruce Fields 已提交
1655
	error = -EAGAIN;
1656
	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1657 1658
		if (fl->fl_file == filp &&
		    fl->fl_owner == lease->fl_owner) {
1659
			my_fl = fl;
J
J. Bruce Fields 已提交
1660 1661
			continue;
		}
1662

J
J. Bruce Fields 已提交
1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674
		/*
		 * No exclusive leases if someone else has a lease on
		 * this file:
		 */
		if (arg == F_WRLCK)
			goto out;
		/*
		 * Modifying our existing lease is OK, but no getting a
		 * new lease if someone else is opening for write:
		 */
		if (fl->fl_flags & FL_UNLOCK_PENDING)
			goto out;
L
Linus Torvalds 已提交
1675 1676
	}

1677
	if (my_fl != NULL) {
1678 1679
		lease = my_fl;
		error = lease->fl_lmops->lm_change(lease, arg, &dispose);
1680 1681 1682
		if (error)
			goto out;
		goto out_setup;
L
Linus Torvalds 已提交
1683 1684 1685 1686 1687 1688
	}

	error = -EINVAL;
	if (!leases_enable)
		goto out;

1689
	locks_insert_lock_ctx(lease, &ctx->flc_lease);
1690 1691 1692 1693 1694 1695 1696 1697 1698 1699
	/*
	 * The check in break_lease() is lockless. It's possible for another
	 * open to race in after we did the earlier check for a conflicting
	 * open but before the lease was inserted. Check again for a
	 * conflicting open and cancel the lease if there is one.
	 *
	 * We also add a barrier here to ensure that the insertion of the lock
	 * precedes these checks.
	 */
	smp_mb();
C
Christoph Hellwig 已提交
1700
	error = check_conflicting_open(dentry, arg, lease->fl_flags);
1701
	if (error) {
1702
		locks_unlink_lock_ctx(lease);
1703 1704
		goto out;
	}
1705 1706 1707 1708

out_setup:
	if (lease->fl_lmops->lm_setup)
		lease->fl_lmops->lm_setup(lease, priv);
L
Linus Torvalds 已提交
1709
out:
1710
	spin_unlock(&ctx->flc_lock);
1711
	locks_dispose_list(&dispose);
J
J. Bruce Fields 已提交
1712 1713
	if (is_deleg)
		mutex_unlock(&inode->i_mutex);
1714
	if (!error && !my_fl)
1715
		*flp = NULL;
L
Linus Torvalds 已提交
1716 1717
	return error;
}
1718

1719
static int generic_delete_lease(struct file *filp, void *owner)
1720
{
1721
	int error = -EAGAIN;
1722
	struct file_lock *fl, *victim = NULL;
1723
	struct inode *inode = file_inode(filp);
1724
	struct file_lock_context *ctx;
1725
	LIST_HEAD(dispose);
1726

1727
	ctx = smp_load_acquire(&inode->i_flctx);
1728 1729 1730 1731 1732
	if (!ctx) {
		trace_generic_delete_lease(inode, NULL);
		return error;
	}

1733
	spin_lock(&ctx->flc_lock);
1734
	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1735 1736
		if (fl->fl_file == filp &&
		    fl->fl_owner == owner) {
1737
			victim = fl;
1738
			break;
1739
		}
1740
	}
1741
	trace_generic_delete_lease(inode, victim);
1742
	if (victim)
1743
		error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
1744
	spin_unlock(&ctx->flc_lock);
1745
	locks_dispose_list(&dispose);
1746
	return error;
1747 1748 1749 1750
}

/**
 *	generic_setlease	-	sets a lease on an open file
1751 1752 1753 1754 1755
 *	@filp:	file pointer
 *	@arg:	type of lease to obtain
 *	@flp:	input - file_lock to use, output - file_lock inserted
 *	@priv:	private data for lm_setup (may be NULL if lm_setup
 *		doesn't require it)
1756 1757 1758 1759
 *
 *	The (input) flp->fl_lmops->lm_break function is required
 *	by break_lease().
 */
1760 1761
int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
			void **priv)
1762
{
1763
	struct inode *inode = file_inode(filp);
1764 1765
	int error;

1766
	if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE))
1767 1768 1769 1770 1771 1772 1773 1774 1775
		return -EACCES;
	if (!S_ISREG(inode->i_mode))
		return -EINVAL;
	error = security_file_lock(filp, arg);
	if (error)
		return error;

	switch (arg) {
	case F_UNLCK:
1776
		return generic_delete_lease(filp, *priv);
1777 1778
	case F_RDLCK:
	case F_WRLCK:
1779 1780 1781 1782
		if (!(*flp)->fl_lmops->lm_break) {
			WARN_ON_ONCE(1);
			return -ENOLCK;
		}
C
Christoph Hellwig 已提交
1783

1784
		return generic_add_lease(filp, arg, flp, priv);
1785
	default:
1786
		return -EINVAL;
1787 1788
	}
}
1789
EXPORT_SYMBOL(generic_setlease);
L
Linus Torvalds 已提交
1790

1791
/**
1792
 * vfs_setlease        -       sets a lease on an open file
1793 1794 1795 1796 1797
 * @filp:	file pointer
 * @arg:	type of lease to obtain
 * @lease:	file_lock to use when adding a lease
 * @priv:	private info for lm_setup when adding a lease (may be
 * 		NULL if lm_setup doesn't require it)
1798 1799 1800 1801 1802 1803
 *
 * Call this to establish a lease on the file. The "lease" argument is not
 * used for F_UNLCK requests and may be NULL. For commands that set or alter
 * an existing lease, the (*lease)->fl_lmops->lm_break operation must be set;
 * if not, this function will return -ENOLCK (and generate a scary-looking
 * stack trace).
1804 1805 1806
 *
 * The "priv" pointer is passed directly to the lm_setup function as-is. It
 * may be NULL if the lm_setup operation doesn't require it.
L
Linus Torvalds 已提交
1807
 */
1808 1809
int
vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
L
Linus Torvalds 已提交
1810
{
1811
	if (filp->f_op->setlease)
1812
		return filp->f_op->setlease(filp, arg, lease, priv);
1813
	else
1814
		return generic_setlease(filp, arg, lease, priv);
L
Linus Torvalds 已提交
1815
}
1816
EXPORT_SYMBOL_GPL(vfs_setlease);
L
Linus Torvalds 已提交
1817

1818
static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
L
Linus Torvalds 已提交
1819
{
1820
	struct file_lock *fl;
1821
	struct fasync_struct *new;
L
Linus Torvalds 已提交
1822 1823
	int error;

1824 1825 1826
	fl = lease_alloc(filp, arg);
	if (IS_ERR(fl))
		return PTR_ERR(fl);
L
Linus Torvalds 已提交
1827

1828 1829 1830 1831 1832
	new = fasync_alloc();
	if (!new) {
		locks_free_lock(fl);
		return -ENOMEM;
	}
1833
	new->fa_fd = fd;
1834

1835
	error = vfs_setlease(filp, arg, &fl, (void **)&new);
1836 1837
	if (fl)
		locks_free_lock(fl);
1838 1839
	if (new)
		fasync_free(new);
L
Linus Torvalds 已提交
1840 1841 1842
	return error;
}

1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855
/**
 *	fcntl_setlease	-	sets a lease on an open file
 *	@fd: open file descriptor
 *	@filp: file pointer
 *	@arg: type of lease to obtain
 *
 *	Call this fcntl to establish a lease on the file.
 *	Note that you also need to call %F_SETSIG to
 *	receive a signal when the lease is broken.
 */
int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
{
	if (arg == F_UNLCK)
1856
		return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp);
1857 1858 1859
	return do_fcntl_add_lease(fd, filp, arg);
}

L
Linus Torvalds 已提交
1860
/**
1861 1862
 * flock_lock_inode_wait - Apply a FLOCK-style lock to a file
 * @inode: inode of the file to apply to
L
Linus Torvalds 已提交
1863 1864
 * @fl: The lock to be applied
 *
1865
 * Apply a FLOCK style lock request to an inode.
L
Linus Torvalds 已提交
1866
 */
1867
static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
L
Linus Torvalds 已提交
1868 1869 1870 1871
{
	int error;
	might_sleep();
	for (;;) {
1872
		error = flock_lock_inode(inode, fl);
1873
		if (error != FILE_LOCK_DEFERRED)
L
Linus Torvalds 已提交
1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884
			break;
		error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
		if (!error)
			continue;

		locks_delete_block(fl);
		break;
	}
	return error;
}

1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908
/**
 * locks_lock_inode_wait - Apply a lock to an inode
 * @inode: inode of the file to apply to
 * @fl: The lock to be applied
 *
 * Apply a POSIX or FLOCK style lock request to an inode.
 */
int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
{
	int res = 0;
	switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
		case FL_POSIX:
			res = posix_lock_inode_wait(inode, fl);
			break;
		case FL_FLOCK:
			res = flock_lock_inode_wait(inode, fl);
			break;
		default:
			BUG();
	}
	return res;
}
EXPORT_SYMBOL(locks_lock_inode_wait);

L
Linus Torvalds 已提交
1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927
/**
 *	sys_flock: - flock() system call.
 *	@fd: the file descriptor to lock.
 *	@cmd: the type of lock to apply.
 *
 *	Apply a %FL_FLOCK style lock to an open file descriptor.
 *	The @cmd can be one of
 *
 *	%LOCK_SH -- a shared lock.
 *
 *	%LOCK_EX -- an exclusive lock.
 *
 *	%LOCK_UN -- remove an existing lock.
 *
 *	%LOCK_MAND -- a `mandatory' flock.  This exists to emulate Windows Share Modes.
 *
 *	%LOCK_MAND can be combined with %LOCK_READ or %LOCK_WRITE to allow other
 *	processes read and write access respectively.
 */
1928
SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
L
Linus Torvalds 已提交
1929
{
1930
	struct fd f = fdget(fd);
L
Linus Torvalds 已提交
1931 1932 1933 1934 1935
	struct file_lock *lock;
	int can_sleep, unlock;
	int error;

	error = -EBADF;
1936
	if (!f.file)
L
Linus Torvalds 已提交
1937 1938 1939 1940 1941 1942
		goto out;

	can_sleep = !(cmd & LOCK_NB);
	cmd &= ~LOCK_NB;
	unlock = (cmd == LOCK_UN);

1943
	if (!unlock && !(cmd & LOCK_MAND) &&
1944
	    !(f.file->f_mode & (FMODE_READ|FMODE_WRITE)))
L
Linus Torvalds 已提交
1945 1946
		goto out_putf;

1947 1948 1949
	lock = flock_make_lock(f.file, cmd);
	if (IS_ERR(lock)) {
		error = PTR_ERR(lock);
L
Linus Torvalds 已提交
1950
		goto out_putf;
1951 1952
	}

L
Linus Torvalds 已提交
1953 1954 1955
	if (can_sleep)
		lock->fl_flags |= FL_SLEEP;

1956
	error = security_file_lock(f.file, lock->fl_type);
L
Linus Torvalds 已提交
1957 1958 1959
	if (error)
		goto out_free;

A
Al Viro 已提交
1960
	if (f.file->f_op->flock)
1961
		error = f.file->f_op->flock(f.file,
L
Linus Torvalds 已提交
1962 1963 1964
					  (can_sleep) ? F_SETLKW : F_SETLK,
					  lock);
	else
1965
		error = locks_lock_file_wait(f.file, lock);
L
Linus Torvalds 已提交
1966 1967

 out_free:
1968
	locks_free_lock(lock);
L
Linus Torvalds 已提交
1969 1970

 out_putf:
1971
	fdput(f);
L
Linus Torvalds 已提交
1972 1973 1974 1975
 out:
	return error;
}

1976 1977 1978
/**
 * vfs_test_lock - test file byte range lock
 * @filp: The file to test lock for
1979
 * @fl: The lock to test; also used to hold result
1980 1981 1982 1983 1984 1985
 *
 * Returns -ERRNO on failure.  Indicates presence of conflicting lock by
 * setting conf->fl_type to something other than F_UNLCK.
 */
int vfs_test_lock(struct file *filp, struct file_lock *fl)
{
A
Al Viro 已提交
1986
	if (filp->f_op->lock)
1987 1988 1989 1990 1991 1992
		return filp->f_op->lock(filp, F_GETLK, fl);
	posix_test_lock(filp, fl);
	return 0;
}
EXPORT_SYMBOL_GPL(vfs_test_lock);

1993 1994
static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
{
1995
	flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009
#if BITS_PER_LONG == 32
	/*
	 * Make sure we can represent the posix lock via
	 * legacy 32bit flock.
	 */
	if (fl->fl_start > OFFT_OFFSET_MAX)
		return -EOVERFLOW;
	if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX)
		return -EOVERFLOW;
#endif
	flock->l_start = fl->fl_start;
	flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
		fl->fl_end - fl->fl_start + 1;
	flock->l_whence = 0;
2010
	flock->l_type = fl->fl_type;
2011 2012 2013 2014 2015 2016
	return 0;
}

#if BITS_PER_LONG == 32
static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
{
2017
	flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
2018 2019 2020 2021 2022 2023 2024 2025
	flock->l_start = fl->fl_start;
	flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
		fl->fl_end - fl->fl_start + 1;
	flock->l_whence = 0;
	flock->l_type = fl->fl_type;
}
#endif

L
Linus Torvalds 已提交
2026 2027 2028
/* Report the first existing lock that would conflict with l.
 * This implements the F_GETLK command of fcntl().
 */
2029
int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
L
Linus Torvalds 已提交
2030
{
2031
	struct file_lock file_lock;
L
Linus Torvalds 已提交
2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045
	struct flock flock;
	int error;

	error = -EFAULT;
	if (copy_from_user(&flock, l, sizeof(flock)))
		goto out;
	error = -EINVAL;
	if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
		goto out;

	error = flock_to_posix_lock(filp, &file_lock, &flock);
	if (error)
		goto out;

2046
	if (cmd == F_OFD_GETLK) {
2047 2048 2049 2050
		error = -EINVAL;
		if (flock.l_pid != 0)
			goto out;

2051
		cmd = F_GETLK;
2052
		file_lock.fl_flags |= FL_OFDLCK;
2053
		file_lock.fl_owner = filp;
2054 2055
	}

2056 2057 2058
	error = vfs_test_lock(filp, &file_lock);
	if (error)
		goto out;
L
Linus Torvalds 已提交
2059
 
2060 2061 2062
	flock.l_type = file_lock.fl_type;
	if (file_lock.fl_type != F_UNLCK) {
		error = posix_lock_to_flock(&flock, &file_lock);
2063
		if (error)
2064
			goto rel_priv;
L
Linus Torvalds 已提交
2065 2066 2067 2068
	}
	error = -EFAULT;
	if (!copy_to_user(l, &flock, sizeof(flock)))
		error = 0;
2069 2070
rel_priv:
	locks_release_private(&file_lock);
L
Linus Torvalds 已提交
2071 2072 2073 2074
out:
	return error;
}

2075 2076 2077 2078 2079
/**
 * vfs_lock_file - file byte range lock
 * @filp: The file to apply the lock to
 * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.)
 * @fl: The lock to be applied
2080 2081 2082 2083 2084 2085 2086 2087
 * @conf: Place to return a copy of the conflicting lock, if found.
 *
 * A caller that doesn't care about the conflicting lock may pass NULL
 * as the final argument.
 *
 * If the filesystem defines a private ->lock() method, then @conf will
 * be left unchanged; so a caller that cares should initialize it to
 * some acceptable default.
2088 2089 2090 2091
 *
 * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX
 * locks, the ->lock() interface may return asynchronously, before the lock has
 * been granted or denied by the underlying filesystem, if (and only if)
J
J. Bruce Fields 已提交
2092
 * lm_grant is set. Callers expecting ->lock() to return asynchronously
2093 2094
 * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
 * the request is for a blocking lock. When ->lock() does return asynchronously,
J
J. Bruce Fields 已提交
2095
 * it must return FILE_LOCK_DEFERRED, and call ->lm_grant() when the lock
2096 2097
 * request completes.
 * If the request is for non-blocking lock the file system should return
2098 2099
 * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
 * with the result. If the request timed out the callback routine will return a
2100 2101 2102 2103 2104
 * nonzero return code and the file system should release the lock. The file
 * system is also responsible to keep a corresponding posix lock when it
 * grants a lock so the VFS can find out which locks are locally held and do
 * the correct lock cleanup when required.
 * The underlying filesystem must not drop the kernel lock or call
J
J. Bruce Fields 已提交
2105
 * ->lm_grant() before returning to the caller with a FILE_LOCK_DEFERRED
2106
 * return code.
2107
 */
2108
int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
2109
{
A
Al Viro 已提交
2110
	if (filp->f_op->lock)
2111 2112
		return filp->f_op->lock(filp, cmd, fl);
	else
2113
		return posix_lock_file(filp, fl, conf);
2114 2115 2116
}
EXPORT_SYMBOL_GPL(vfs_lock_file);

M
Miklos Szeredi 已提交
2117 2118 2119 2120 2121 2122 2123 2124 2125
static int do_lock_file_wait(struct file *filp, unsigned int cmd,
			     struct file_lock *fl)
{
	int error;

	error = security_file_lock(filp, fl->fl_type);
	if (error)
		return error;

2126 2127 2128
	for (;;) {
		error = vfs_lock_file(filp, cmd, fl, NULL);
		if (error != FILE_LOCK_DEFERRED)
M
Miklos Szeredi 已提交
2129
			break;
2130 2131 2132 2133 2134 2135
		error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
		if (!error)
			continue;

		locks_delete_block(fl);
		break;
M
Miklos Szeredi 已提交
2136 2137 2138 2139 2140
	}

	return error;
}

2141
/* Ensure that fl->fl_file has compatible f_mode for F_SETLK calls */
2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156
static int
check_fmode_for_setlk(struct file_lock *fl)
{
	switch (fl->fl_type) {
	case F_RDLCK:
		if (!(fl->fl_file->f_mode & FMODE_READ))
			return -EBADF;
		break;
	case F_WRLCK:
		if (!(fl->fl_file->f_mode & FMODE_WRITE))
			return -EBADF;
	}
	return 0;
}

L
Linus Torvalds 已提交
2157 2158 2159
/* Apply the lock described by l to an open file descriptor.
 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
 */
2160 2161
int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
		struct flock __user *l)
L
Linus Torvalds 已提交
2162 2163 2164 2165
{
	struct file_lock *file_lock = locks_alloc_lock();
	struct flock flock;
	struct inode *inode;
2166
	struct file *f;
L
Linus Torvalds 已提交
2167 2168 2169 2170 2171
	int error;

	if (file_lock == NULL)
		return -ENOLCK;

2172 2173
	inode = file_inode(filp);

L
Linus Torvalds 已提交
2174 2175 2176 2177 2178 2179 2180 2181 2182 2183
	/*
	 * This might block, so we do it before checking the inode.
	 */
	error = -EFAULT;
	if (copy_from_user(&flock, l, sizeof(flock)))
		goto out;

	/* Don't allow mandatory locks on files that may be memory mapped
	 * and shared.
	 */
2184
	if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) {
L
Linus Torvalds 已提交
2185 2186 2187 2188 2189 2190 2191
		error = -EAGAIN;
		goto out;
	}

	error = flock_to_posix_lock(filp, file_lock, &flock);
	if (error)
		goto out;
2192

2193 2194 2195 2196
	error = check_fmode_for_setlk(file_lock);
	if (error)
		goto out;

2197 2198
	/*
	 * If the cmd is requesting file-private locks, then set the
2199
	 * FL_OFDLCK flag and override the owner.
2200 2201
	 */
	switch (cmd) {
2202
	case F_OFD_SETLK:
2203 2204 2205 2206
		error = -EINVAL;
		if (flock.l_pid != 0)
			goto out;

2207
		cmd = F_SETLK;
2208
		file_lock->fl_flags |= FL_OFDLCK;
2209
		file_lock->fl_owner = filp;
2210
		break;
2211
	case F_OFD_SETLKW:
2212 2213 2214 2215
		error = -EINVAL;
		if (flock.l_pid != 0)
			goto out;

2216
		cmd = F_SETLKW;
2217
		file_lock->fl_flags |= FL_OFDLCK;
2218
		file_lock->fl_owner = filp;
2219 2220
		/* Fallthrough */
	case F_SETLKW:
L
Linus Torvalds 已提交
2221 2222
		file_lock->fl_flags |= FL_SLEEP;
	}
2223

M
Miklos Szeredi 已提交
2224
	error = do_lock_file_wait(filp, cmd, file_lock);
L
Linus Torvalds 已提交
2225

2226
	/*
2227 2228 2229
	 * Attempt to detect a close/fcntl race and recover by releasing the
	 * lock that was just acquired. There is no need to do that when we're
	 * unlocking though, or for OFD locks.
2230
	 */
2231 2232
	if (!error && file_lock->fl_type != F_UNLCK &&
	    !(file_lock->fl_flags & FL_OFDLCK)) {
2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246
		/*
		 * We need that spin_lock here - it prevents reordering between
		 * update of i_flctx->flc_posix and check for it done in
		 * close(). rcu_read_lock() wouldn't do.
		 */
		spin_lock(&current->files->file_lock);
		f = fcheck(fd);
		spin_unlock(&current->files->file_lock);
		if (f != filp) {
			file_lock->fl_type = F_UNLCK;
			error = do_lock_file_wait(filp, cmd, file_lock);
			WARN_ON_ONCE(error);
			error = -EBADF;
		}
L
Linus Torvalds 已提交
2247
	}
2248
out:
2249
	trace_fcntl_setlk(inode, file_lock, error);
L
Linus Torvalds 已提交
2250 2251 2252 2253 2254 2255 2256 2257
	locks_free_lock(file_lock);
	return error;
}

#if BITS_PER_LONG == 32
/* Report the first existing lock that would conflict with l.
 * This implements the F_GETLK command of fcntl().
 */
2258
int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
L
Linus Torvalds 已提交
2259
{
2260
	struct file_lock file_lock;
L
Linus Torvalds 已提交
2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274
	struct flock64 flock;
	int error;

	error = -EFAULT;
	if (copy_from_user(&flock, l, sizeof(flock)))
		goto out;
	error = -EINVAL;
	if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
		goto out;

	error = flock64_to_posix_lock(filp, &file_lock, &flock);
	if (error)
		goto out;

2275
	if (cmd == F_OFD_GETLK) {
2276 2277 2278 2279
		error = -EINVAL;
		if (flock.l_pid != 0)
			goto out;

2280
		cmd = F_GETLK64;
2281
		file_lock.fl_flags |= FL_OFDLCK;
2282
		file_lock.fl_owner = filp;
2283 2284
	}

2285 2286 2287 2288
	error = vfs_test_lock(filp, &file_lock);
	if (error)
		goto out;

2289 2290 2291 2292
	flock.l_type = file_lock.fl_type;
	if (file_lock.fl_type != F_UNLCK)
		posix_lock_to_flock64(&flock, &file_lock);

L
Linus Torvalds 已提交
2293 2294 2295
	error = -EFAULT;
	if (!copy_to_user(l, &flock, sizeof(flock)))
		error = 0;
2296 2297

	locks_release_private(&file_lock);
L
Linus Torvalds 已提交
2298 2299 2300 2301 2302 2303 2304
out:
	return error;
}

/* Apply the lock described by l to an open file descriptor.
 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
 */
2305 2306
int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
		struct flock64 __user *l)
L
Linus Torvalds 已提交
2307 2308 2309 2310
{
	struct file_lock *file_lock = locks_alloc_lock();
	struct flock64 flock;
	struct inode *inode;
2311
	struct file *f;
L
Linus Torvalds 已提交
2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323
	int error;

	if (file_lock == NULL)
		return -ENOLCK;

	/*
	 * This might block, so we do it before checking the inode.
	 */
	error = -EFAULT;
	if (copy_from_user(&flock, l, sizeof(flock)))
		goto out;

A
Al Viro 已提交
2324
	inode = file_inode(filp);
L
Linus Torvalds 已提交
2325 2326 2327 2328

	/* Don't allow mandatory locks on files that may be memory mapped
	 * and shared.
	 */
2329
	if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) {
L
Linus Torvalds 已提交
2330 2331 2332 2333 2334 2335 2336
		error = -EAGAIN;
		goto out;
	}

	error = flock64_to_posix_lock(filp, file_lock, &flock);
	if (error)
		goto out;
2337

2338 2339 2340 2341
	error = check_fmode_for_setlk(file_lock);
	if (error)
		goto out;

2342 2343
	/*
	 * If the cmd is requesting file-private locks, then set the
2344
	 * FL_OFDLCK flag and override the owner.
2345 2346
	 */
	switch (cmd) {
2347
	case F_OFD_SETLK:
2348 2349 2350 2351
		error = -EINVAL;
		if (flock.l_pid != 0)
			goto out;

2352
		cmd = F_SETLK64;
2353
		file_lock->fl_flags |= FL_OFDLCK;
2354
		file_lock->fl_owner = filp;
2355
		break;
2356
	case F_OFD_SETLKW:
2357 2358 2359 2360
		error = -EINVAL;
		if (flock.l_pid != 0)
			goto out;

2361
		cmd = F_SETLKW64;
2362
		file_lock->fl_flags |= FL_OFDLCK;
2363
		file_lock->fl_owner = filp;
2364 2365
		/* Fallthrough */
	case F_SETLKW64:
L
Linus Torvalds 已提交
2366 2367
		file_lock->fl_flags |= FL_SLEEP;
	}
2368

M
Miklos Szeredi 已提交
2369
	error = do_lock_file_wait(filp, cmd, file_lock);
L
Linus Torvalds 已提交
2370

2371
	/*
2372 2373 2374
	 * Attempt to detect a close/fcntl race and recover by releasing the
	 * lock that was just acquired. There is no need to do that when we're
	 * unlocking though, or for OFD locks.
2375
	 */
2376 2377
	if (!error && file_lock->fl_type != F_UNLCK &&
	    !(file_lock->fl_flags & FL_OFDLCK)) {
2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391
		/*
		 * We need that spin_lock here - it prevents reordering between
		 * update of i_flctx->flc_posix and check for it done in
		 * close(). rcu_read_lock() wouldn't do.
		 */
		spin_lock(&current->files->file_lock);
		f = fcheck(fd);
		spin_unlock(&current->files->file_lock);
		if (f != filp) {
			file_lock->fl_type = F_UNLCK;
			error = do_lock_file_wait(filp, cmd, file_lock);
			WARN_ON_ONCE(error);
			error = -EBADF;
		}
L
Linus Torvalds 已提交
2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405
	}
out:
	locks_free_lock(file_lock);
	return error;
}
#endif /* BITS_PER_LONG == 32 */

/*
 * This function is called when the file is being removed
 * from the task's fd array.  POSIX locks belonging to this task
 * are deleted at this time.
 */
void locks_remove_posix(struct file *filp, fl_owner_t owner)
{
2406
	int error;
2407
	struct file_lock lock;
2408
	struct file_lock_context *ctx;
L
Linus Torvalds 已提交
2409 2410 2411 2412 2413 2414

	/*
	 * If there are no locks held on this file, we don't need to call
	 * posix_lock_file().  Another process could be setting a lock on this
	 * file at the same time, but we wouldn't remove that lock anyway.
	 */
2415
	ctx =  smp_load_acquire(&file_inode(filp)->i_flctx);
2416
	if (!ctx || list_empty(&ctx->flc_posix))
L
Linus Torvalds 已提交
2417 2418 2419
		return;

	lock.fl_type = F_UNLCK;
2420
	lock.fl_flags = FL_POSIX | FL_CLOSE;
L
Linus Torvalds 已提交
2421 2422 2423 2424 2425 2426 2427 2428
	lock.fl_start = 0;
	lock.fl_end = OFFSET_MAX;
	lock.fl_owner = owner;
	lock.fl_pid = current->tgid;
	lock.fl_file = filp;
	lock.fl_ops = NULL;
	lock.fl_lmops = NULL;

2429
	error = vfs_lock_file(filp, F_SETLK, &lock, NULL);
L
Linus Torvalds 已提交
2430 2431 2432

	if (lock.fl_ops && lock.fl_ops->fl_release_private)
		lock.fl_ops->fl_release_private(&lock);
2433
	trace_locks_remove_posix(file_inode(filp), &lock, error);
L
Linus Torvalds 已提交
2434 2435 2436 2437
}

EXPORT_SYMBOL(locks_remove_posix);

2438
/* The i_flctx must be valid when calling into here */
2439
static void
2440
locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
2441 2442 2443 2444 2445 2446 2447 2448 2449
{
	struct file_lock fl = {
		.fl_owner = filp,
		.fl_pid = current->tgid,
		.fl_file = filp,
		.fl_flags = FL_FLOCK,
		.fl_type = F_UNLCK,
		.fl_end = OFFSET_MAX,
	};
2450
	struct inode *inode = file_inode(filp);
2451

2452
	if (list_empty(&flctx->flc_flock))
2453 2454 2455 2456 2457
		return;

	if (filp->f_op->flock)
		filp->f_op->flock(filp, F_SETLKW, &fl);
	else
2458
		flock_lock_inode(inode, &fl);
2459 2460 2461 2462 2463

	if (fl.fl_ops && fl.fl_ops->fl_release_private)
		fl.fl_ops->fl_release_private(&fl);
}

2464
/* The i_flctx must be valid when calling into here */
2465
static void
2466
locks_remove_lease(struct file *filp, struct file_lock_context *ctx)
2467 2468 2469 2470
{
	struct file_lock *fl, *tmp;
	LIST_HEAD(dispose);

2471
	if (list_empty(&ctx->flc_lease))
2472 2473
		return;

2474
	spin_lock(&ctx->flc_lock);
2475
	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list)
2476 2477
		if (filp == fl->fl_file)
			lease_modify(fl, F_UNLCK, &dispose);
2478
	spin_unlock(&ctx->flc_lock);
2479 2480 2481
	locks_dispose_list(&dispose);
}

L
Linus Torvalds 已提交
2482 2483 2484
/*
 * This function is called on the last close of an open file.
 */
2485
void locks_remove_file(struct file *filp)
L
Linus Torvalds 已提交
2486
{
2487 2488 2489 2490
	struct file_lock_context *ctx;

	ctx = smp_load_acquire(&file_inode(filp)->i_flctx);
	if (!ctx)
2491 2492
		return;

2493
	/* remove any OFD locks */
2494
	locks_remove_posix(filp, filp);
2495

2496
	/* remove flock locks */
2497
	locks_remove_flock(filp, ctx);
2498

2499
	/* remove any leases */
2500
	locks_remove_lease(filp, ctx);
L
Linus Torvalds 已提交
2501 2502 2503 2504 2505 2506 2507 2508
}

/**
 *	posix_unblock_lock - stop waiting for a file lock
 *	@waiter: the lock which was waiting
 *
 *	lockd needs to block waiting for locks.
 */
J
J. Bruce Fields 已提交
2509
int
2510
posix_unblock_lock(struct file_lock *waiter)
L
Linus Torvalds 已提交
2511
{
J
J. Bruce Fields 已提交
2512 2513
	int status = 0;

2514
	spin_lock(&blocked_lock_lock);
2515
	if (waiter->fl_next)
L
Linus Torvalds 已提交
2516
		__locks_delete_block(waiter);
J
J. Bruce Fields 已提交
2517 2518
	else
		status = -ENOENT;
2519
	spin_unlock(&blocked_lock_lock);
J
J. Bruce Fields 已提交
2520
	return status;
L
Linus Torvalds 已提交
2521 2522 2523
}
EXPORT_SYMBOL(posix_unblock_lock);

M
Marc Eshel 已提交
2524 2525 2526 2527 2528 2529 2530 2531 2532
/**
 * vfs_cancel_lock - file byte range unblock lock
 * @filp: The file to apply the unblock to
 * @fl: The lock to be unblocked
 *
 * Used by lock managers to cancel blocked requests
 */
int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
{
A
Al Viro 已提交
2533
	if (filp->f_op->lock)
M
Marc Eshel 已提交
2534 2535 2536 2537 2538 2539
		return filp->f_op->lock(filp, F_CANCELLK, fl);
	return 0;
}

EXPORT_SYMBOL_GPL(vfs_cancel_lock);

2540
#ifdef CONFIG_PROC_FS
2541
#include <linux/proc_fs.h>
2542 2543
#include <linux/seq_file.h>

2544 2545 2546 2547 2548
struct locks_iterator {
	int	li_cpu;
	loff_t	li_pos;
};

2549
static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2550
			    loff_t id, char *pfx)
L
Linus Torvalds 已提交
2551 2552
{
	struct inode *inode = NULL;
2553 2554 2555
	unsigned int fl_pid;

	if (fl->fl_nspid)
2556
		fl_pid = pid_vnr(fl->fl_nspid);
2557 2558
	else
		fl_pid = fl->fl_pid;
L
Linus Torvalds 已提交
2559 2560

	if (fl->fl_file != NULL)
A
Al Viro 已提交
2561
		inode = file_inode(fl->fl_file);
L
Linus Torvalds 已提交
2562

2563
	seq_printf(f, "%lld:%s ", id, pfx);
L
Linus Torvalds 已提交
2564
	if (IS_POSIX(fl)) {
2565
		if (fl->fl_flags & FL_ACCESS)
2566
			seq_puts(f, "ACCESS");
2567
		else if (IS_OFDLCK(fl))
2568
			seq_puts(f, "OFDLCK");
2569
		else
2570
			seq_puts(f, "POSIX ");
2571 2572

		seq_printf(f, " %s ",
L
Linus Torvalds 已提交
2573
			     (inode == NULL) ? "*NOINODE*" :
2574
			     mandatory_lock(inode) ? "MANDATORY" : "ADVISORY ");
L
Linus Torvalds 已提交
2575 2576
	} else if (IS_FLOCK(fl)) {
		if (fl->fl_type & LOCK_MAND) {
2577
			seq_puts(f, "FLOCK  MSNFS     ");
L
Linus Torvalds 已提交
2578
		} else {
2579
			seq_puts(f, "FLOCK  ADVISORY  ");
L
Linus Torvalds 已提交
2580 2581
		}
	} else if (IS_LEASE(fl)) {
2582 2583 2584 2585 2586
		if (fl->fl_flags & FL_DELEG)
			seq_puts(f, "DELEG  ");
		else
			seq_puts(f, "LEASE  ");

J
J. Bruce Fields 已提交
2587
		if (lease_breaking(fl))
2588
			seq_puts(f, "BREAKING  ");
L
Linus Torvalds 已提交
2589
		else if (fl->fl_file)
2590
			seq_puts(f, "ACTIVE    ");
L
Linus Torvalds 已提交
2591
		else
2592
			seq_puts(f, "BREAKER   ");
L
Linus Torvalds 已提交
2593
	} else {
2594
		seq_puts(f, "UNKNOWN UNKNOWN  ");
L
Linus Torvalds 已提交
2595 2596
	}
	if (fl->fl_type & LOCK_MAND) {
2597
		seq_printf(f, "%s ",
L
Linus Torvalds 已提交
2598 2599 2600 2601
			       (fl->fl_type & LOCK_READ)
			       ? (fl->fl_type & LOCK_WRITE) ? "RW   " : "READ "
			       : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE ");
	} else {
2602
		seq_printf(f, "%s ",
J
J. Bruce Fields 已提交
2603
			       (lease_breaking(fl))
2604 2605
			       ? (fl->fl_type == F_UNLCK) ? "UNLCK" : "READ "
			       : (fl->fl_type == F_WRLCK) ? "WRITE" : "READ ");
L
Linus Torvalds 已提交
2606 2607
	}
	if (inode) {
2608
		/* userspace relies on this representation of dev_t */
2609
		seq_printf(f, "%d %02x:%02x:%ld ", fl_pid,
L
Linus Torvalds 已提交
2610 2611 2612
				MAJOR(inode->i_sb->s_dev),
				MINOR(inode->i_sb->s_dev), inode->i_ino);
	} else {
2613
		seq_printf(f, "%d <none>:0 ", fl_pid);
L
Linus Torvalds 已提交
2614 2615 2616
	}
	if (IS_POSIX(fl)) {
		if (fl->fl_end == OFFSET_MAX)
2617
			seq_printf(f, "%Ld EOF\n", fl->fl_start);
L
Linus Torvalds 已提交
2618
		else
2619
			seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end);
L
Linus Torvalds 已提交
2620
	} else {
2621
		seq_puts(f, "0 EOF\n");
L
Linus Torvalds 已提交
2622 2623 2624
	}
}

2625
static int locks_show(struct seq_file *f, void *v)
L
Linus Torvalds 已提交
2626
{
2627
	struct locks_iterator *iter = f->private;
2628
	struct file_lock *fl, *bfl;
L
Linus Torvalds 已提交
2629

2630
	fl = hlist_entry(v, struct file_lock, fl_link);
L
Linus Torvalds 已提交
2631

2632
	lock_get_status(f, fl, iter->li_pos, "");
L
Linus Torvalds 已提交
2633

2634
	list_for_each_entry(bfl, &fl->fl_block, fl_block)
2635
		lock_get_status(f, bfl, iter->li_pos, " ->");
2636

2637 2638
	return 0;
}
L
Linus Torvalds 已提交
2639

2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666
static void __show_fd_locks(struct seq_file *f,
			struct list_head *head, int *id,
			struct file *filp, struct files_struct *files)
{
	struct file_lock *fl;

	list_for_each_entry(fl, head, fl_list) {

		if (filp != fl->fl_file)
			continue;
		if (fl->fl_owner != files &&
		    fl->fl_owner != filp)
			continue;

		(*id)++;
		seq_puts(f, "lock:\t");
		lock_get_status(f, fl, *id, "");
	}
}

void show_fd_locks(struct seq_file *f,
		  struct file *filp, struct files_struct *files)
{
	struct inode *inode = file_inode(filp);
	struct file_lock_context *ctx;
	int id = 0;

2667
	ctx = smp_load_acquire(&inode->i_flctx);
2668 2669 2670 2671 2672 2673 2674 2675 2676 2677
	if (!ctx)
		return;

	spin_lock(&ctx->flc_lock);
	__show_fd_locks(f, &ctx->flc_flock, &id, filp, files);
	__show_fd_locks(f, &ctx->flc_posix, &id, filp, files);
	__show_fd_locks(f, &ctx->flc_lease, &id, filp, files);
	spin_unlock(&ctx->flc_lock);
}

2678
static void *locks_start(struct seq_file *f, loff_t *pos)
2679
	__acquires(&blocked_lock_lock)
2680
{
2681
	struct locks_iterator *iter = f->private;
2682

2683 2684
	iter->li_pos = *pos + 1;
	lg_global_lock(&file_lock_lglock);
2685
	spin_lock(&blocked_lock_lock);
2686
	return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos);
2687
}
L
Linus Torvalds 已提交
2688

2689 2690
static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
{
2691 2692 2693 2694
	struct locks_iterator *iter = f->private;

	++iter->li_pos;
	return seq_hlist_next_percpu(v, &file_lock_list, &iter->li_cpu, pos);
2695
}
L
Linus Torvalds 已提交
2696

2697
static void locks_stop(struct seq_file *f, void *v)
2698
	__releases(&blocked_lock_lock)
2699
{
2700
	spin_unlock(&blocked_lock_lock);
2701
	lg_global_unlock(&file_lock_lglock);
L
Linus Torvalds 已提交
2702 2703
}

2704
static const struct seq_operations locks_seq_operations = {
2705 2706 2707 2708 2709
	.start	= locks_start,
	.next	= locks_next,
	.stop	= locks_stop,
	.show	= locks_show,
};
2710 2711 2712

static int locks_open(struct inode *inode, struct file *filp)
{
2713 2714
	return seq_open_private(filp, &locks_seq_operations,
					sizeof(struct locks_iterator));
2715 2716 2717 2718 2719 2720
}

static const struct file_operations proc_locks_operations = {
	.open		= locks_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
2721
	.release	= seq_release_private,
2722 2723 2724 2725 2726 2727 2728
};

static int __init proc_locks_init(void)
{
	proc_create("locks", 0, NULL, &proc_locks_operations);
	return 0;
}
2729
fs_initcall(proc_locks_init);
2730 2731
#endif

L
Linus Torvalds 已提交
2732 2733
static int __init filelock_init(void)
{
2734 2735
	int i;

2736 2737 2738
	flctx_cache = kmem_cache_create("file_lock_ctx",
			sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL);

L
Linus Torvalds 已提交
2739
	filelock_cache = kmem_cache_create("file_lock_cache",
M
Miklos Szeredi 已提交
2740 2741
			sizeof(struct file_lock), 0, SLAB_PANIC, NULL);

2742 2743 2744 2745 2746
	lg_lock_init(&file_lock_lglock, "file_lock_lglock");

	for_each_possible_cpu(i)
		INIT_HLIST_HEAD(per_cpu_ptr(&file_lock_list, i));

L
Linus Torvalds 已提交
2747 2748 2749 2750
	return 0;
}

core_initcall(filelock_init);