file.c 127.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4
/*
 *   fs/cifs/file.c
 *
 *   vfs operations that deal with files
S
Steve French 已提交
5
 *
6
 *   Copyright (C) International Business Machines  Corp., 2002,2010
L
Linus Torvalds 已提交
7
 *   Author(s): Steve French (sfrench@us.ibm.com)
J
[CIFS]  
Jeremy Allison 已提交
8
 *              Jeremy Allison (jra@samba.org)
L
Linus Torvalds 已提交
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 *
 *   This library is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU Lesser General Public License as published
 *   by the Free Software Foundation; either version 2.1 of the License, or
 *   (at your option) any later version.
 *
 *   This library is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU Lesser General Public License for more details.
 *
 *   You should have received a copy of the GNU Lesser General Public License
 *   along with this library; if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */
#include <linux/fs.h>
25
#include <linux/backing-dev.h>
L
Linus Torvalds 已提交
26 27 28 29
#include <linux/stat.h>
#include <linux/fcntl.h>
#include <linux/pagemap.h>
#include <linux/pagevec.h>
30
#include <linux/writeback.h>
31
#include <linux/task_io_accounting_ops.h>
32
#include <linux/delay.h>
33
#include <linux/mount.h>
34
#include <linux/slab.h>
35
#include <linux/swap.h>
36
#include <linux/mm.h>
L
Linus Torvalds 已提交
37 38 39 40 41 42 43 44
#include <asm/div64.h>
#include "cifsfs.h"
#include "cifspdu.h"
#include "cifsglob.h"
#include "cifsproto.h"
#include "cifs_unicode.h"
#include "cifs_debug.h"
#include "cifs_fs_sb.h"
45
#include "fscache.h"
46
#include "smbdirect.h"
47
#include "fs_context.h"
S
Steve French 已提交
48
#include "cifs_ioctl.h"
49

L
Linus Torvalds 已提交
50 51 52 53 54 55 56 57 58 59 60 61 62
static inline int cifs_convert_flags(unsigned int flags)
{
	if ((flags & O_ACCMODE) == O_RDONLY)
		return GENERIC_READ;
	else if ((flags & O_ACCMODE) == O_WRONLY)
		return GENERIC_WRITE;
	else if ((flags & O_ACCMODE) == O_RDWR) {
		/* GENERIC_ALL is too much permission to request
		   can cause unnecessary access denied on create */
		/* return GENERIC_ALL; */
		return (GENERIC_READ | GENERIC_WRITE);
	}

63 64 65
	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
		FILE_READ_DATA);
66
}
67

68
static u32 cifs_posix_convert_flags(unsigned int flags)
69
{
70
	u32 posix_flags = 0;
71

72
	if ((flags & O_ACCMODE) == O_RDONLY)
73
		posix_flags = SMB_O_RDONLY;
74
	else if ((flags & O_ACCMODE) == O_WRONLY)
75 76 77 78
		posix_flags = SMB_O_WRONLY;
	else if ((flags & O_ACCMODE) == O_RDWR)
		posix_flags = SMB_O_RDWR;

79
	if (flags & O_CREAT) {
80
		posix_flags |= SMB_O_CREAT;
81 82 83
		if (flags & O_EXCL)
			posix_flags |= SMB_O_EXCL;
	} else if (flags & O_EXCL)
84 85
		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
			 current->comm, current->tgid);
86

87 88 89
	if (flags & O_TRUNC)
		posix_flags |= SMB_O_TRUNC;
	/* be safe and imply O_SYNC for O_DSYNC */
90
	if (flags & O_DSYNC)
91
		posix_flags |= SMB_O_SYNC;
92
	if (flags & O_DIRECTORY)
93
		posix_flags |= SMB_O_DIRECTORY;
94
	if (flags & O_NOFOLLOW)
95
		posix_flags |= SMB_O_NOFOLLOW;
96
	if (flags & O_DIRECT)
97
		posix_flags |= SMB_O_DIRECT;
98 99

	return posix_flags;
L
Linus Torvalds 已提交
100 101 102 103 104 105 106 107 108 109
}

static inline int cifs_get_disposition(unsigned int flags)
{
	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
		return FILE_CREATE;
	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
		return FILE_OVERWRITE_IF;
	else if ((flags & O_CREAT) == O_CREAT)
		return FILE_OPEN_IF;
110 111
	else if ((flags & O_TRUNC) == O_TRUNC)
		return FILE_OVERWRITE;
L
Linus Torvalds 已提交
112 113 114 115
	else
		return FILE_OPEN;
}

116
int cifs_posix_open(const char *full_path, struct inode **pinode,
117
			struct super_block *sb, int mode, unsigned int f_flags,
118
			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
119 120 121 122 123 124 125
{
	int rc;
	FILE_UNIX_BASIC_INFO *presp_data;
	__u32 posix_flags = 0;
	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
	struct cifs_fattr fattr;
	struct tcon_link *tlink;
126
	struct cifs_tcon *tcon;
127

128
	cifs_dbg(FYI, "posix open %s\n", full_path);
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145

	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
	if (presp_data == NULL)
		return -ENOMEM;

	tlink = cifs_sb_tlink(cifs_sb);
	if (IS_ERR(tlink)) {
		rc = PTR_ERR(tlink);
		goto posix_open_ret;
	}

	tcon = tlink_tcon(tlink);
	mode &= ~current_umask();

	posix_flags = cifs_posix_convert_flags(f_flags);
	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
			     poplock, full_path, cifs_sb->local_nls,
146
			     cifs_remap(cifs_sb));
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
	cifs_put_tlink(tlink);

	if (rc)
		goto posix_open_ret;

	if (presp_data->Type == cpu_to_le32(-1))
		goto posix_open_ret; /* open ok, caller does qpathinfo */

	if (!pinode)
		goto posix_open_ret; /* caller does not need info */

	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);

	/* get new inode and set it up */
	if (*pinode == NULL) {
		cifs_fill_uniqueid(sb, &fattr);
		*pinode = cifs_iget(sb, &fattr);
		if (!*pinode) {
			rc = -ENOMEM;
			goto posix_open_ret;
		}
	} else {
169
		cifs_revalidate_mapping(*pinode);
170
		rc = cifs_fattr_to_inode(*pinode, &fattr);
171 172 173 174 175 176 177
	}

posix_open_ret:
	kfree(presp_data);
	return rc;
}

178
static int
179
cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
180 181
	     struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
	     struct cifs_fid *fid, unsigned int xid)
182 183
{
	int rc;
184
	int desired_access;
185
	int disposition;
186
	int create_options = CREATE_NOT_DIR;
187
	FILE_ALL_INFO *buf;
P
Pavel Shilovsky 已提交
188
	struct TCP_Server_Info *server = tcon->ses->server;
189
	struct cifs_open_parms oparms;
190

P
Pavel Shilovsky 已提交
191
	if (!server->ops->open)
192 193 194
		return -ENOSYS;

	desired_access = cifs_convert_flags(f_flags);
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227

/*********************************************************************
 *  open flag mapping table:
 *
 *	POSIX Flag            CIFS Disposition
 *	----------            ----------------
 *	O_CREAT               FILE_OPEN_IF
 *	O_CREAT | O_EXCL      FILE_CREATE
 *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 *	O_TRUNC               FILE_OVERWRITE
 *	none of the above     FILE_OPEN
 *
 *	Note that there is not a direct match between disposition
 *	FILE_SUPERSEDE (ie create whether or not file exists although
 *	O_CREAT | O_TRUNC is similar but truncates the existing
 *	file rather than creating a new file as FILE_SUPERSEDE does
 *	(which uses the attributes / metadata passed in on open call)
 *?
 *?  O_SYNC is a reasonable match to CIFS writethrough flag
 *?  and the read write flags match reasonably.  O_LARGEFILE
 *?  is irrelevant because largefile support is always used
 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 *********************************************************************/

	disposition = cifs_get_disposition(f_flags);

	/* BB pass O_SYNC flag through on file attributes .. BB */

	buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
	if (!buf)
		return -ENOMEM;

228 229 230 231 232 233 234
	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
	if (f_flags & O_SYNC)
		create_options |= CREATE_WRITE_THROUGH;

	if (f_flags & O_DIRECT)
		create_options |= CREATE_NO_BUFFER;

235 236 237
	oparms.tcon = tcon;
	oparms.cifs_sb = cifs_sb;
	oparms.desired_access = desired_access;
238
	oparms.create_options = cifs_create_options(cifs_sb, create_options);
239 240 241
	oparms.disposition = disposition;
	oparms.path = full_path;
	oparms.fid = fid;
242
	oparms.reconnect = false;
243 244

	rc = server->ops->open(xid, &oparms, oplock, buf);
245 246 247 248

	if (rc)
		goto out;

249
	/* TODO: Add support for calling posix query info but with passing in fid */
250 251 252 253 254
	if (tcon->unix_ext)
		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
					      xid);
	else
		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
255
					 xid, fid);
256

257 258 259 260 261 262
	if (rc) {
		server->ops->close(xid, tcon, fid);
		if (rc == -ESTALE)
			rc = -EOPENSTALE;
	}

263 264 265 266 267
out:
	kfree(buf);
	return rc;
}

268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
static bool
cifs_has_mand_locks(struct cifsInodeInfo *cinode)
{
	struct cifs_fid_locks *cur;
	bool has_locks = false;

	down_read(&cinode->lock_sem);
	list_for_each_entry(cur, &cinode->llist, llist) {
		if (!list_empty(&cur->locks)) {
			has_locks = true;
			break;
		}
	}
	up_read(&cinode->lock_sem);
	return has_locks;
}

285 286 287 288 289 290 291
void
cifs_down_write(struct rw_semaphore *sem)
{
	while (!down_write_trylock(sem))
		msleep(10);
}

292 293
static void cifsFileInfo_put_work(struct work_struct *work);

294
struct cifsFileInfo *
295
cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
296 297
		  struct tcon_link *tlink, __u32 oplock)
{
G
Goldwyn Rodrigues 已提交
298
	struct dentry *dentry = file_dentry(file);
299
	struct inode *inode = d_inode(dentry);
300 301
	struct cifsInodeInfo *cinode = CIFS_I(inode);
	struct cifsFileInfo *cfile;
302
	struct cifs_fid_locks *fdlocks;
303
	struct cifs_tcon *tcon = tlink_tcon(tlink);
304
	struct TCP_Server_Info *server = tcon->ses->server;
305 306 307 308 309

	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
	if (cfile == NULL)
		return cfile;

310 311 312 313 314 315 316 317 318 319
	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
	if (!fdlocks) {
		kfree(cfile);
		return NULL;
	}

	INIT_LIST_HEAD(&fdlocks->locks);
	fdlocks->cfile = cfile;
	cfile->llist = fdlocks;

320 321 322 323 324 325
	cfile->count = 1;
	cfile->pid = current->tgid;
	cfile->uid = current_fsuid();
	cfile->dentry = dget(dentry);
	cfile->f_flags = file->f_flags;
	cfile->invalidHandle = false;
326 327
	cfile->oplock_break_received = false;
	cfile->deferred_scheduled = false;
328 329
	cfile->tlink = cifs_get_tlink(tlink);
	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
330
	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
331
	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
332
	mutex_init(&cfile->fh_mutex);
333
	spin_lock_init(&cfile->file_info_lock);
334

335 336
	cifs_sb_active(inode->i_sb);

337 338 339 340
	/*
	 * If the server returned a read oplock and we have mandatory brlocks,
	 * set oplock level to None.
	 */
341
	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
342
		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
343 344 345
		oplock = 0;
	}

346 347 348 349
	cifs_down_write(&cinode->lock_sem);
	list_add(&fdlocks->llist, &cinode->llist);
	up_write(&cinode->lock_sem);

350
	spin_lock(&tcon->open_file_lock);
351
	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
352 353 354
		oplock = fid->pending_open->oplock;
	list_del(&fid->pending_open->olist);

355
	fid->purge_cache = false;
356
	server->ops->set_fid(cfile, fid, oplock);
357 358

	list_add(&cfile->tlist, &tcon->openFileList);
359
	atomic_inc(&tcon->num_local_opens);
360

361
	/* if readable file instance put first in list*/
362
	spin_lock(&cinode->open_file_lock);
363
	if (file->f_mode & FMODE_READ)
364
		list_add(&cfile->flist, &cinode->openFileList);
365
	else
366
		list_add_tail(&cfile->flist, &cinode->openFileList);
367
	spin_unlock(&cinode->open_file_lock);
368
	spin_unlock(&tcon->open_file_lock);
369

370
	if (fid->purge_cache)
371
		cifs_zap_mapping(inode);
372

373 374
	file->private_data = cfile;
	return cfile;
375 376
}

377 378 379
struct cifsFileInfo *
cifsFileInfo_get(struct cifsFileInfo *cifs_file)
{
380
	spin_lock(&cifs_file->file_info_lock);
381
	cifsFileInfo_get_locked(cifs_file);
382
	spin_unlock(&cifs_file->file_info_lock);
383 384 385
	return cifs_file;
}

386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
{
	struct inode *inode = d_inode(cifs_file->dentry);
	struct cifsInodeInfo *cifsi = CIFS_I(inode);
	struct cifsLockInfo *li, *tmp;
	struct super_block *sb = inode->i_sb;

	/*
	 * Delete any outstanding lock records. We'll lose them when the file
	 * is closed anyway.
	 */
	cifs_down_write(&cifsi->lock_sem);
	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
		list_del(&li->llist);
		cifs_del_lock_waiters(li);
		kfree(li);
	}
	list_del(&cifs_file->llist->llist);
	kfree(cifs_file->llist);
	up_write(&cifsi->lock_sem);

	cifs_put_tlink(cifs_file->tlink);
	dput(cifs_file->dentry);
	cifs_sb_deactive(sb);
	kfree(cifs_file);
}

static void cifsFileInfo_put_work(struct work_struct *work)
{
	struct cifsFileInfo *cifs_file = container_of(work,
			struct cifsFileInfo, put);

	cifsFileInfo_put_final(cifs_file);
}

421 422 423 424
/**
 * cifsFileInfo_put - release a reference of file priv data
 *
 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
425 426
 *
 * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
427
 */
428
void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
429
{
430
	_cifsFileInfo_put(cifs_file, true, true);
431 432 433 434 435 436
}

/**
 * _cifsFileInfo_put - release a reference of file priv data
 *
 * This may involve closing the filehandle @cifs_file out on the
437 438
 * server. Must be called without holding tcon->open_file_lock,
 * cinode->open_file_lock and cifs_file->file_info_lock.
439 440 441
 *
 * If @wait_for_oplock_handler is true and we are releasing the last
 * reference, wait for any running oplock break handler of the file
442 443 444 445 446
 * and cancel any pending one.
 *
 * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
 * @wait_oplock_handler: must be false if called from oplock_break_handler
 * @offload:	not offloaded on close and oplock breaks
447 448
 *
 */
449 450
void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
		       bool wait_oplock_handler, bool offload)
451
{
452
	struct inode *inode = d_inode(cifs_file->dentry);
453
	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
454
	struct TCP_Server_Info *server = tcon->ses->server;
P
Pavel Shilovsky 已提交
455
	struct cifsInodeInfo *cifsi = CIFS_I(inode);
456 457
	struct super_block *sb = inode->i_sb;
	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
458 459
	struct cifs_fid fid;
	struct cifs_pending_open open;
460
	bool oplock_break_cancelled;
461

462
	spin_lock(&tcon->open_file_lock);
463
	spin_lock(&cifsi->open_file_lock);
464
	spin_lock(&cifs_file->file_info_lock);
465
	if (--cifs_file->count > 0) {
466
		spin_unlock(&cifs_file->file_info_lock);
467
		spin_unlock(&cifsi->open_file_lock);
468
		spin_unlock(&tcon->open_file_lock);
469 470
		return;
	}
471
	spin_unlock(&cifs_file->file_info_lock);
472

473 474 475 476 477 478
	if (server->ops->get_lease_key)
		server->ops->get_lease_key(inode, &fid);

	/* store open in pending opens to make sure we don't miss lease break */
	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);

479 480 481
	/* remove it from the lists */
	list_del(&cifs_file->flist);
	list_del(&cifs_file->tlist);
482
	atomic_dec(&tcon->num_local_opens);
483 484

	if (list_empty(&cifsi->openFileList)) {
485
		cifs_dbg(FYI, "closing last open instance for inode %p\n",
486
			 d_inode(cifs_file->dentry));
487 488 489 490 491
		/*
		 * In strict cache mode we need invalidate mapping on the last
		 * close  because it may cause a error when we open this file
		 * again and get at least level II oplock.
		 */
492
		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
493
			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
494
		cifs_set_oplock_level(cifsi, 0);
495
	}
496

497
	spin_unlock(&cifsi->open_file_lock);
498
	spin_unlock(&tcon->open_file_lock);
499

500 501
	oplock_break_cancelled = wait_oplock_handler ?
		cancel_work_sync(&cifs_file->oplock_break) : false;
502

503
	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
504
		struct TCP_Server_Info *server = tcon->ses->server;
505
		unsigned int xid;
506

507
		xid = get_xid();
508 509 510
		if (server->ops->close_getattr)
			server->ops->close_getattr(xid, tcon, cifs_file);
		else if (server->ops->close)
511 512
			server->ops->close(xid, tcon, &cifs_file->fid);
		_free_xid(xid);
513 514
	}

515 516 517
	if (oplock_break_cancelled)
		cifs_done_oplock_break(cifsi);

518 519
	cifs_del_pending_open(&open);

520 521 522 523
	if (offload)
		queue_work(fileinfo_put_wq, &cifs_file->put);
	else
		cifsFileInfo_put_final(cifs_file);
524 525
}

L
Linus Torvalds 已提交
526
int cifs_open(struct inode *inode, struct file *file)
527

L
Linus Torvalds 已提交
528 529
{
	int rc = -EACCES;
530
	unsigned int xid;
531
	__u32 oplock;
L
Linus Torvalds 已提交
532
	struct cifs_sb_info *cifs_sb;
P
Pavel Shilovsky 已提交
533
	struct TCP_Server_Info *server;
534
	struct cifs_tcon *tcon;
535
	struct tcon_link *tlink;
536
	struct cifsFileInfo *cfile = NULL;
537 538
	void *page;
	const char *full_path;
P
Pavel Shilovsky 已提交
539
	bool posix_open_ok = false;
540
	struct cifs_fid fid;
541
	struct cifs_pending_open open;
L
Linus Torvalds 已提交
542

543
	xid = get_xid();
L
Linus Torvalds 已提交
544 545

	cifs_sb = CIFS_SB(inode->i_sb);
S
Steve French 已提交
546 547 548 549 550
	if (unlikely(cifs_forced_shutdown(cifs_sb))) {
		free_xid(xid);
		return -EIO;
	}

551 552
	tlink = cifs_sb_tlink(cifs_sb);
	if (IS_ERR(tlink)) {
553
		free_xid(xid);
554 555 556
		return PTR_ERR(tlink);
	}
	tcon = tlink_tcon(tlink);
P
Pavel Shilovsky 已提交
557
	server = tcon->ses->server;
L
Linus Torvalds 已提交
558

559 560 561 562
	page = alloc_dentry_path();
	full_path = build_path_from_dentry(file_dentry(file), page);
	if (IS_ERR(full_path)) {
		rc = PTR_ERR(full_path);
563
		goto out;
L
Linus Torvalds 已提交
564 565
	}

566
	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
567
		 inode, file->f_flags, full_path);
568

569 570 571 572 573 574 575 576
	if (file->f_flags & O_DIRECT &&
	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
			file->f_op = &cifs_file_direct_nobrl_ops;
		else
			file->f_op = &cifs_file_direct_ops;
	}

577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593
	spin_lock(&CIFS_I(inode)->deferred_lock);
	/* Get the cached handle as SMB2 close is deferred */
	rc = cifs_get_readable_path(tcon, full_path, &cfile);
	if (rc == 0) {
		if (file->f_flags == cfile->f_flags) {
			file->private_data = cfile;
			cifs_del_deferred_close(cfile);
			spin_unlock(&CIFS_I(inode)->deferred_lock);
			goto out;
		} else {
			spin_unlock(&CIFS_I(inode)->deferred_lock);
			_cifsFileInfo_put(cfile, true, false);
		}
	} else {
		spin_unlock(&CIFS_I(inode)->deferred_lock);
	}

594
	if (server->oplocks)
595 596 597 598
		oplock = REQ_OPLOCK;
	else
		oplock = 0;

599
	if (!tcon->broken_posix_open && tcon->unix_ext &&
600 601
	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
602
		/* can not refresh inode info since size could be stale */
603
		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
604
				cifs_sb->ctx->file_mode /* ignored */,
605
				file->f_flags, &oplock, &fid.netfid, xid);
606
		if (rc == 0) {
607
			cifs_dbg(FYI, "posix open succeeded\n");
P
Pavel Shilovsky 已提交
608
			posix_open_ok = true;
609 610
		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
			if (tcon->ses->serverNOS)
611
				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
612
					 tcon->ses->ip_addr,
613
					 tcon->ses->serverNOS);
614
			tcon->broken_posix_open = true;
615 616 617
		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
			 (rc != -EOPNOTSUPP)) /* path not found or net err */
			goto out;
618 619 620 621
		/*
		 * Else fallthrough to retry open the old way on network i/o
		 * or DFS errors.
		 */
622 623
	}

624 625 626 627 628
	if (server->ops->get_lease_key)
		server->ops->get_lease_key(inode, &fid);

	cifs_add_pending_open(&fid, tlink, &open);

P
Pavel Shilovsky 已提交
629
	if (!posix_open_ok) {
P
Pavel Shilovsky 已提交
630 631 632
		if (server->ops->get_lease_key)
			server->ops->get_lease_key(inode, &fid);

P
Pavel Shilovsky 已提交
633
		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
634
				  file->f_flags, &oplock, &fid, xid);
635 636
		if (rc) {
			cifs_del_pending_open(&open);
P
Pavel Shilovsky 已提交
637
			goto out;
638
		}
P
Pavel Shilovsky 已提交
639
	}
640

641 642
	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
	if (cfile == NULL) {
P
Pavel Shilovsky 已提交
643 644
		if (server->ops->close)
			server->ops->close(xid, tcon, &fid);
645
		cifs_del_pending_open(&open);
L
Linus Torvalds 已提交
646 647 648 649
		rc = -ENOMEM;
		goto out;
	}

650 651
	cifs_fscache_set_inode_cookie(inode, file);

P
Pavel Shilovsky 已提交
652
	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
653 654 655 656
		/*
		 * Time to set mode which we can not set earlier due to
		 * problems creating new read-only files.
		 */
P
Pavel Shilovsky 已提交
657 658
		struct cifs_unix_set_info_args args = {
			.mode	= inode->i_mode,
659 660
			.uid	= INVALID_UID, /* no change */
			.gid	= INVALID_GID, /* no change */
P
Pavel Shilovsky 已提交
661 662 663 664 665
			.ctime	= NO_CHANGE_64,
			.atime	= NO_CHANGE_64,
			.mtime	= NO_CHANGE_64,
			.device	= 0,
		};
666 667
		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
				       cfile->pid);
L
Linus Torvalds 已提交
668 669 670
	}

out:
671
	free_dentry_path(page);
672
	free_xid(xid);
673
	cifs_put_tlink(tlink);
L
Linus Torvalds 已提交
674 675 676
	return rc;
}

P
Pavel Shilovsky 已提交
677 678
static int cifs_push_posix_locks(struct cifsFileInfo *cfile);

679 680
/*
 * Try to reacquire byte range locks that were released when session
P
Pavel Shilovsky 已提交
681
 * to server was lost.
682
 */
P
Pavel Shilovsky 已提交
683 684
static int
cifs_relock_file(struct cifsFileInfo *cfile)
L
Linus Torvalds 已提交
685
{
P
Pavel Shilovsky 已提交
686
	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
687
	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
P
Pavel Shilovsky 已提交
688
	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
L
Linus Torvalds 已提交
689 690
	int rc = 0;

691
	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
P
Pavel Shilovsky 已提交
692
	if (cinode->can_cache_brlcks) {
693 694
		/* can cache locks - no need to relock */
		up_read(&cinode->lock_sem);
P
Pavel Shilovsky 已提交
695 696 697 698 699 700 701 702 703
		return rc;
	}

	if (cap_unix(tcon->ses) &&
	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
		rc = cifs_push_posix_locks(cfile);
	else
		rc = tcon->ses->server->ops->push_mand_locks(cfile);
L
Linus Torvalds 已提交
704

705
	up_read(&cinode->lock_sem);
L
Linus Torvalds 已提交
706 707 708
	return rc;
}

709 710
static int
cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
L
Linus Torvalds 已提交
711 712
{
	int rc = -EACCES;
713
	unsigned int xid;
714
	__u32 oplock;
L
Linus Torvalds 已提交
715
	struct cifs_sb_info *cifs_sb;
716
	struct cifs_tcon *tcon;
717 718
	struct TCP_Server_Info *server;
	struct cifsInodeInfo *cinode;
S
Steve French 已提交
719
	struct inode *inode;
720 721
	void *page;
	const char *full_path;
722
	int desired_access;
L
Linus Torvalds 已提交
723
	int disposition = FILE_OPEN;
724
	int create_options = CREATE_NOT_DIR;
725
	struct cifs_open_parms oparms;
L
Linus Torvalds 已提交
726

727
	xid = get_xid();
728 729 730
	mutex_lock(&cfile->fh_mutex);
	if (!cfile->invalidHandle) {
		mutex_unlock(&cfile->fh_mutex);
731
		free_xid(xid);
732
		return 0;
L
Linus Torvalds 已提交
733 734
	}

735
	inode = d_inode(cfile->dentry);
L
Linus Torvalds 已提交
736
	cifs_sb = CIFS_SB(inode->i_sb);
737 738 739 740 741 742 743 744 745
	tcon = tlink_tcon(cfile->tlink);
	server = tcon->ses->server;

	/*
	 * Can not grab rename sem here because various ops, including those
	 * that already have the rename sem can end up causing writepage to get
	 * called and if the server was down that means we end up here, and we
	 * can never tell if the caller already has the rename_sem.
	 */
746 747 748
	page = alloc_dentry_path();
	full_path = build_path_from_dentry(cfile->dentry, page);
	if (IS_ERR(full_path)) {
749
		mutex_unlock(&cfile->fh_mutex);
750
		free_dentry_path(page);
751
		free_xid(xid);
752
		return PTR_ERR(full_path);
L
Linus Torvalds 已提交
753 754
	}

755 756
	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
		 inode, cfile->f_flags, full_path);
L
Linus Torvalds 已提交
757

758
	if (tcon->ses->server->oplocks)
L
Linus Torvalds 已提交
759 760
		oplock = REQ_OPLOCK;
	else
761
		oplock = 0;
L
Linus Torvalds 已提交
762

763
	if (tcon->unix_ext && cap_unix(tcon->ses) &&
764
	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
765
				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
766 767 768 769
		/*
		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
		 * original open. Must mask them off for a reopen.
		 */
770
		unsigned int oflags = cfile->f_flags &
J
Jeff Layton 已提交
771
						~(O_CREAT | O_EXCL | O_TRUNC);
772

773
		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
774
				     cifs_sb->ctx->file_mode /* ignored */,
775
				     oflags, &oplock, &cfile->fid.netfid, xid);
776
		if (rc == 0) {
777
			cifs_dbg(FYI, "posix reopen succeeded\n");
778
			oparms.reconnect = true;
779 780
			goto reopen_success;
		}
781 782 783 784
		/*
		 * fallthrough to retry open the old way on errors, especially
		 * in the reconnect path it is important to retry hard
		 */
785 786
	}

787
	desired_access = cifs_convert_flags(cfile->f_flags);
788

789 790 791 792 793 794 795
	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
	if (cfile->f_flags & O_SYNC)
		create_options |= CREATE_WRITE_THROUGH;

	if (cfile->f_flags & O_DIRECT)
		create_options |= CREATE_NO_BUFFER;

P
Pavel Shilovsky 已提交
796
	if (server->ops->get_lease_key)
797
		server->ops->get_lease_key(inode, &cfile->fid);
P
Pavel Shilovsky 已提交
798

799 800 801
	oparms.tcon = tcon;
	oparms.cifs_sb = cifs_sb;
	oparms.desired_access = desired_access;
802
	oparms.create_options = cifs_create_options(cifs_sb, create_options);
803 804
	oparms.disposition = disposition;
	oparms.path = full_path;
805 806
	oparms.fid = &cfile->fid;
	oparms.reconnect = true;
807

808 809
	/*
	 * Can not refresh inode by passing in file_info buf to be returned by
810
	 * ops->open and then calling get_inode_info with returned buf since
811 812 813 814
	 * file might have write behind data that needs to be flushed and server
	 * version of file size can be stale. If we knew for sure that inode was
	 * not dirty locally we could do this.
	 */
815
	rc = server->ops->open(xid, &oparms, &oplock, NULL);
816 817 818 819 820 821 822
	if (rc == -ENOENT && oparms.reconnect == false) {
		/* durable handle timeout is expired - open the file again */
		rc = server->ops->open(xid, &oparms, &oplock, NULL);
		/* indicate that we need to relock the file */
		oparms.reconnect = true;
	}

L
Linus Torvalds 已提交
823
	if (rc) {
824
		mutex_unlock(&cfile->fh_mutex);
825 826
		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
		cifs_dbg(FYI, "oplock: %d\n", oplock);
J
Jeff Layton 已提交
827 828 829
		goto reopen_error_exit;
	}

830
reopen_success:
831 832 833
	cfile->invalidHandle = false;
	mutex_unlock(&cfile->fh_mutex);
	cinode = CIFS_I(inode);
J
Jeff Layton 已提交
834 835 836

	if (can_flush) {
		rc = filemap_write_and_wait(inode->i_mapping);
837 838
		if (!is_interrupt_error(rc))
			mapping_set_error(inode->i_mapping, rc);
J
Jeff Layton 已提交
839

840 841 842
		if (tcon->posix_extensions)
			rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
		else if (tcon->unix_ext)
843 844
			rc = cifs_get_inode_info_unix(&inode, full_path,
						      inode->i_sb, xid);
J
Jeff Layton 已提交
845
		else
846 847 848 849 850 851 852 853 854 855
			rc = cifs_get_inode_info(&inode, full_path, NULL,
						 inode->i_sb, xid, NULL);
	}
	/*
	 * Else we are writing out data to server already and could deadlock if
	 * we tried to flush data, and since we do not know if we have data that
	 * would invalidate the current end of file on the server we can not go
	 * to the server to get the new inode info.
	 */

856 857 858 859 860 861 862 863 864
	/*
	 * If the server returned a read oplock and we have mandatory brlocks,
	 * set oplock level to None.
	 */
	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
		oplock = 0;
	}

865 866 867
	server->ops->set_fid(cfile, &cfile->fid, oplock);
	if (oparms.reconnect)
		cifs_relock_file(cfile);
J
Jeff Layton 已提交
868 869

reopen_error_exit:
870
	free_dentry_path(page);
871
	free_xid(xid);
L
Linus Torvalds 已提交
872 873 874
	return rc;
}

875 876 877 878 879 880
void smb2_deferred_work_close(struct work_struct *work)
{
	struct cifsFileInfo *cfile = container_of(work,
			struct cifsFileInfo, deferred.work);

	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
881 882 883 884
	if (!cfile->deferred_scheduled) {
		spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
		return;
	}
885 886 887 888 889 890
	cifs_del_deferred_close(cfile);
	cfile->deferred_scheduled = false;
	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
	_cifsFileInfo_put(cfile, true, false);
}

L
Linus Torvalds 已提交
891 892
int cifs_close(struct inode *inode, struct file *file)
{
893 894 895 896 897
	struct cifsFileInfo *cfile;
	struct cifsInodeInfo *cinode = CIFS_I(inode);
	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
	struct cifs_deferred_close *dclose;

898
	if (file->private_data != NULL) {
899
		cfile = file->private_data;
900
		file->private_data = NULL;
901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924
		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
		if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
		    dclose) {
			if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags))
				inode->i_ctime = inode->i_mtime = current_time(inode);
			spin_lock(&cinode->deferred_lock);
			cifs_add_deferred_close(cfile, dclose);
			if (cfile->deferred_scheduled) {
				mod_delayed_work(deferredclose_wq,
						&cfile->deferred, cifs_sb->ctx->acregmax);
			} else {
				/* Deferred close for files */
				queue_delayed_work(deferredclose_wq,
						&cfile->deferred, cifs_sb->ctx->acregmax);
				cfile->deferred_scheduled = true;
				spin_unlock(&cinode->deferred_lock);
				return 0;
			}
			spin_unlock(&cinode->deferred_lock);
			_cifsFileInfo_put(cfile, true, false);
		} else {
			_cifsFileInfo_put(cfile, true, false);
			kfree(dclose);
		}
925
	}
J
[CIFS]  
Jeremy Allison 已提交
926

927 928
	/* return code from the ->release op is always ignored */
	return 0;
L
Linus Torvalds 已提交
929 930
}

931 932 933
void
cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
{
934
	struct cifsFileInfo *open_file;
935 936
	struct list_head *tmp;
	struct list_head *tmp1;
937 938
	struct list_head tmp_list;

939 940 941 942 943
	if (!tcon->use_persistent || !tcon->need_reopen_files)
		return;

	tcon->need_reopen_files = false;

J
Joe Perches 已提交
944
	cifs_dbg(FYI, "Reopen persistent handles\n");
945
	INIT_LIST_HEAD(&tmp_list);
946 947 948

	/* list all files open on tree connection, reopen resilient handles  */
	spin_lock(&tcon->open_file_lock);
949
	list_for_each(tmp, &tcon->openFileList) {
950
		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
951 952 953 954
		if (!open_file->invalidHandle)
			continue;
		cifsFileInfo_get(open_file);
		list_add_tail(&open_file->rlist, &tmp_list);
955 956
	}
	spin_unlock(&tcon->open_file_lock);
957 958 959

	list_for_each_safe(tmp, tmp1, &tmp_list) {
		open_file = list_entry(tmp, struct cifsFileInfo, rlist);
960 961
		if (cifs_reopen_file(open_file, false /* do not flush */))
			tcon->need_reopen_files = true;
962 963 964
		list_del_init(&open_file->rlist);
		cifsFileInfo_put(open_file);
	}
965 966
}

L
Linus Torvalds 已提交
967 968 969
int cifs_closedir(struct inode *inode, struct file *file)
{
	int rc = 0;
970
	unsigned int xid;
971
	struct cifsFileInfo *cfile = file->private_data;
972 973 974
	struct cifs_tcon *tcon;
	struct TCP_Server_Info *server;
	char *buf;
L
Linus Torvalds 已提交
975

976
	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
L
Linus Torvalds 已提交
977

978 979 980
	if (cfile == NULL)
		return rc;

981
	xid = get_xid();
982 983
	tcon = tlink_tcon(cfile->tlink);
	server = tcon->ses->server;
L
Linus Torvalds 已提交
984

985
	cifs_dbg(FYI, "Freeing private data in close dir\n");
986
	spin_lock(&cfile->file_info_lock);
987
	if (server->ops->dir_needs_close(cfile)) {
988
		cfile->invalidHandle = true;
989
		spin_unlock(&cfile->file_info_lock);
990 991 992 993
		if (server->ops->close_dir)
			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
		else
			rc = -ENOSYS;
994
		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
995 996 997
		/* not much we can do if it fails anyway, ignore rc */
		rc = 0;
	} else
998
		spin_unlock(&cfile->file_info_lock);
999 1000 1001

	buf = cfile->srch_inf.ntwrk_buf_start;
	if (buf) {
1002
		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1003 1004 1005 1006 1007
		cfile->srch_inf.ntwrk_buf_start = NULL;
		if (cfile->srch_inf.smallBuf)
			cifs_small_buf_release(buf);
		else
			cifs_buf_release(buf);
L
Linus Torvalds 已提交
1008
	}
1009 1010 1011 1012

	cifs_put_tlink(cfile->tlink);
	kfree(file->private_data);
	file->private_data = NULL;
L
Linus Torvalds 已提交
1013
	/* BB can we lock the filestruct while this is going on? */
1014
	free_xid(xid);
L
Linus Torvalds 已提交
1015 1016 1017
	return rc;
}

1018
static struct cifsLockInfo *
1019
cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
J
[CIFS]  
Jeremy Allison 已提交
1020
{
1021
	struct cifsLockInfo *lock =
S
Steve French 已提交
1022
		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1023 1024 1025 1026 1027 1028
	if (!lock)
		return lock;
	lock->offset = offset;
	lock->length = length;
	lock->type = type;
	lock->pid = current->tgid;
1029
	lock->flags = flags;
1030 1031 1032
	INIT_LIST_HEAD(&lock->blist);
	init_waitqueue_head(&lock->block_q);
	return lock;
1033 1034
}

1035
void
1036 1037 1038 1039 1040 1041 1042 1043 1044
cifs_del_lock_waiters(struct cifsLockInfo *lock)
{
	struct cifsLockInfo *li, *tmp;
	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
		list_del_init(&li->blist);
		wake_up(&li->block_q);
	}
}

1045 1046 1047 1048 1049
#define CIFS_LOCK_OP	0
#define CIFS_READ_OP	1
#define CIFS_WRITE_OP	2

/* @rw_check : 0 - no op, 1 - read, 2 - write */
1050
static bool
1051
cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1052 1053
			    __u64 length, __u8 type, __u16 flags,
			    struct cifsFileInfo *cfile,
1054
			    struct cifsLockInfo **conf_lock, int rw_check)
1055
{
1056
	struct cifsLockInfo *li;
1057
	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1058
	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1059

1060
	list_for_each_entry(li, &fdlocks->locks, llist) {
1061 1062 1063
		if (offset + length <= li->offset ||
		    offset >= li->offset + li->length)
			continue;
1064 1065 1066 1067 1068 1069 1070
		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
		    server->ops->compare_fids(cfile, cur_cfile)) {
			/* shared lock prevents write op through the same fid */
			if (!(li->type & server->vals->shared_lock_type) ||
			    rw_check != CIFS_WRITE_OP)
				continue;
		}
1071 1072 1073
		if ((type & server->vals->shared_lock_type) &&
		    ((server->ops->compare_fids(cfile, cur_cfile) &&
		     current->tgid == li->pid) || type == li->type))
1074
			continue;
1075 1076 1077 1078
		if (rw_check == CIFS_LOCK_OP &&
		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
		    server->ops->compare_fids(cfile, cur_cfile))
			continue;
1079 1080
		if (conf_lock)
			*conf_lock = li;
1081
		return true;
1082 1083 1084 1085
	}
	return false;
}

1086
bool
1087
cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1088 1089
			__u8 type, __u16 flags,
			struct cifsLockInfo **conf_lock, int rw_check)
1090
{
1091
	bool rc = false;
1092
	struct cifs_fid_locks *cur;
1093
	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1094

1095 1096
	list_for_each_entry(cur, &cinode->llist, llist) {
		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1097 1098
						 flags, cfile, conf_lock,
						 rw_check);
1099 1100 1101 1102 1103
		if (rc)
			break;
	}

	return rc;
1104 1105
}

1106 1107 1108 1109 1110 1111 1112
/*
 * Check if there is another lock that prevents us to set the lock (mandatory
 * style). If such a lock exists, update the flock structure with its
 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 * or leave it the same if we can't. Returns 0 if we don't need to request to
 * the server or 1 otherwise.
 */
1113
static int
1114 1115
cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
	       __u8 type, struct file_lock *flock)
1116 1117 1118
{
	int rc = 0;
	struct cifsLockInfo *conf_lock;
1119
	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1120
	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1121 1122
	bool exist;

1123
	down_read(&cinode->lock_sem);
1124

1125
	exist = cifs_find_lock_conflict(cfile, offset, length, type,
1126 1127
					flock->fl_flags, &conf_lock,
					CIFS_LOCK_OP);
1128 1129 1130 1131
	if (exist) {
		flock->fl_start = conf_lock->offset;
		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
		flock->fl_pid = conf_lock->pid;
1132
		if (conf_lock->type & server->vals->shared_lock_type)
1133 1134 1135 1136 1137 1138 1139 1140
			flock->fl_type = F_RDLCK;
		else
			flock->fl_type = F_WRLCK;
	} else if (!cinode->can_cache_brlcks)
		rc = 1;
	else
		flock->fl_type = F_UNLCK;

1141
	up_read(&cinode->lock_sem);
1142 1143 1144
	return rc;
}

1145
static void
1146
cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1147
{
1148
	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1149
	cifs_down_write(&cinode->lock_sem);
1150
	list_add_tail(&lock->llist, &cfile->llist->locks);
1151
	up_write(&cinode->lock_sem);
J
[CIFS]  
Jeremy Allison 已提交
1152 1153
}

1154 1155 1156 1157
/*
 * Set the byte-range lock (mandatory style). Returns:
 * 1) 0, if we set the lock and don't need to request to the server;
 * 2) 1, if no locks prevent us but we need to request to the server;
1158
 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1159
 */
1160
static int
1161
cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1162
		 bool wait)
1163
{
1164
	struct cifsLockInfo *conf_lock;
1165
	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1166 1167 1168 1169 1170
	bool exist;
	int rc = 0;

try_again:
	exist = false;
1171
	cifs_down_write(&cinode->lock_sem);
1172

1173
	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1174 1175
					lock->type, lock->flags, &conf_lock,
					CIFS_LOCK_OP);
1176
	if (!exist && cinode->can_cache_brlcks) {
1177
		list_add_tail(&lock->llist, &cfile->llist->locks);
1178
		up_write(&cinode->lock_sem);
1179 1180 1181 1182 1183 1184 1185 1186 1187
		return rc;
	}

	if (!exist)
		rc = 1;
	else if (!wait)
		rc = -EACCES;
	else {
		list_add_tail(&lock->blist, &conf_lock->blist);
1188
		up_write(&cinode->lock_sem);
1189 1190 1191 1192 1193
		rc = wait_event_interruptible(lock->block_q,
					(lock->blist.prev == &lock->blist) &&
					(lock->blist.next == &lock->blist));
		if (!rc)
			goto try_again;
1194
		cifs_down_write(&cinode->lock_sem);
1195
		list_del_init(&lock->blist);
1196 1197
	}

1198
	up_write(&cinode->lock_sem);
1199 1200 1201
	return rc;
}

1202 1203 1204 1205 1206 1207 1208
/*
 * Check if there is another lock that prevents us to set the lock (posix
 * style). If such a lock exists, update the flock structure with its
 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 * or leave it the same if we can't. Returns 0 if we don't need to request to
 * the server or 1 otherwise.
 */
1209
static int
1210 1211 1212
cifs_posix_lock_test(struct file *file, struct file_lock *flock)
{
	int rc = 0;
A
Al Viro 已提交
1213
	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1214 1215
	unsigned char saved_type = flock->fl_type;

1216 1217 1218
	if ((flock->fl_flags & FL_POSIX) == 0)
		return 1;

1219
	down_read(&cinode->lock_sem);
1220 1221 1222 1223 1224 1225 1226
	posix_test_lock(file, flock);

	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
		flock->fl_type = saved_type;
		rc = 1;
	}

1227
	up_read(&cinode->lock_sem);
1228 1229 1230
	return rc;
}

1231 1232
/*
 * Set the byte-range lock (posix style). Returns:
1233 1234 1235 1236
 * 1) <0, if the error occurs while setting the lock;
 * 2) 0, if we set the lock and don't need to request to the server;
 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1237
 */
1238 1239 1240
static int
cifs_posix_lock_set(struct file *file, struct file_lock *flock)
{
A
Al Viro 已提交
1241
	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1242
	int rc = FILE_LOCK_DEFERRED + 1;
1243 1244 1245

	if ((flock->fl_flags & FL_POSIX) == 0)
		return rc;
1246

1247
	cifs_down_write(&cinode->lock_sem);
1248
	if (!cinode->can_cache_brlcks) {
1249
		up_write(&cinode->lock_sem);
1250
		return rc;
1251
	}
1252 1253

	rc = posix_lock_file(file, flock, NULL);
1254
	up_write(&cinode->lock_sem);
1255
	return rc;
1256 1257
}

1258
int
1259
cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1260
{
1261 1262
	unsigned int xid;
	int rc = 0, stored_rc;
1263 1264
	struct cifsLockInfo *li, *tmp;
	struct cifs_tcon *tcon;
1265
	unsigned int num, max_num, max_buf;
1266
	LOCKING_ANDX_RANGE *buf, *cur;
1267 1268 1269 1270
	static const int types[] = {
		LOCKING_ANDX_LARGE_FILES,
		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
	};
1271
	int i;
1272

1273
	xid = get_xid();
1274 1275
	tcon = tlink_tcon(cfile->tlink);

1276 1277
	/*
	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1278
	 * and check it before using.
1279 1280
	 */
	max_buf = tcon->ses->server->maxBuf;
1281
	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1282
		free_xid(xid);
1283 1284 1285
		return -EINVAL;
	}

1286 1287 1288 1289
	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
		     PAGE_SIZE);
	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
			PAGE_SIZE);
1290 1291
	max_num = (max_buf - sizeof(struct smb_hdr)) /
						sizeof(LOCKING_ANDX_RANGE);
1292
	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1293
	if (!buf) {
1294
		free_xid(xid);
1295
		return -ENOMEM;
1296 1297 1298 1299 1300
	}

	for (i = 0; i < 2; i++) {
		cur = buf;
		num = 0;
1301
		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1302 1303 1304 1305 1306 1307 1308 1309
			if (li->type != types[i])
				continue;
			cur->Pid = cpu_to_le16(li->pid);
			cur->LengthLow = cpu_to_le32((u32)li->length);
			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
			cur->OffsetLow = cpu_to_le32((u32)li->offset);
			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
			if (++num == max_num) {
1310 1311
				stored_rc = cifs_lockv(xid, tcon,
						       cfile->fid.netfid,
1312 1313
						       (__u8)li->type, 0, num,
						       buf);
1314 1315 1316 1317 1318 1319 1320 1321 1322
				if (stored_rc)
					rc = stored_rc;
				cur = buf;
				num = 0;
			} else
				cur++;
		}

		if (num) {
1323
			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1324
					       (__u8)types[i], 0, num, buf);
1325 1326 1327
			if (stored_rc)
				rc = stored_rc;
		}
1328 1329
	}

1330
	kfree(buf);
1331
	free_xid(xid);
1332 1333 1334
	return rc;
}

1335 1336 1337 1338 1339 1340
static __u32
hash_lockowner(fl_owner_t owner)
{
	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
}

1341 1342 1343 1344 1345 1346 1347 1348 1349
struct lock_to_push {
	struct list_head llist;
	__u64 offset;
	__u64 length;
	__u32 pid;
	__u16 netfid;
	__u8 type;
};

1350
static int
1351
cifs_push_posix_locks(struct cifsFileInfo *cfile)
1352
{
1353
	struct inode *inode = d_inode(cfile->dentry);
1354
	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1355 1356
	struct file_lock *flock;
	struct file_lock_context *flctx = inode->i_flctx;
1357
	unsigned int count = 0, i;
1358
	int rc = 0, xid, type;
1359 1360
	struct list_head locks_to_send, *el;
	struct lock_to_push *lck, *tmp;
1361 1362
	__u64 length;

1363
	xid = get_xid();
1364

1365 1366
	if (!flctx)
		goto out;
1367

1368 1369 1370 1371 1372 1373
	spin_lock(&flctx->flc_lock);
	list_for_each(el, &flctx->flc_posix) {
		count++;
	}
	spin_unlock(&flctx->flc_lock);

1374 1375
	INIT_LIST_HEAD(&locks_to_send);

1376
	/*
1377 1378
	 * Allocating count locks is enough because no FL_POSIX locks can be
	 * added to the list while we are holding cinode->lock_sem that
1379
	 * protects locking operations of this inode.
1380
	 */
1381
	for (i = 0; i < count; i++) {
1382 1383 1384 1385 1386 1387 1388 1389 1390
		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
		if (!lck) {
			rc = -ENOMEM;
			goto err_out;
		}
		list_add_tail(&lck->llist, &locks_to_send);
	}

	el = locks_to_send.next;
1391
	spin_lock(&flctx->flc_lock);
1392
	list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1393
		if (el == &locks_to_send) {
1394 1395 1396 1397
			/*
			 * The list ended. We don't have enough allocated
			 * structures - something is really wrong.
			 */
1398
			cifs_dbg(VFS, "Can't push all brlocks!\n");
1399 1400
			break;
		}
1401 1402 1403 1404 1405
		length = 1 + flock->fl_end - flock->fl_start;
		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
			type = CIFS_RDLCK;
		else
			type = CIFS_WRLCK;
1406
		lck = list_entry(el, struct lock_to_push, llist);
1407
		lck->pid = hash_lockowner(flock->fl_owner);
1408
		lck->netfid = cfile->fid.netfid;
1409 1410 1411
		lck->length = length;
		lck->type = type;
		lck->offset = flock->fl_start;
1412
	}
1413
	spin_unlock(&flctx->flc_lock);
1414 1415 1416 1417 1418

	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
		int stored_rc;

		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1419
					     lck->offset, lck->length, NULL,
1420 1421 1422 1423 1424 1425 1426
					     lck->type, 0);
		if (stored_rc)
			rc = stored_rc;
		list_del(&lck->llist);
		kfree(lck);
	}

1427
out:
1428
	free_xid(xid);
1429
	return rc;
1430 1431 1432 1433 1434 1435
err_out:
	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
		list_del(&lck->llist);
		kfree(lck);
	}
	goto out;
1436 1437
}

1438
static int
1439
cifs_push_locks(struct cifsFileInfo *cfile)
1440
{
1441
	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1442
	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1443
	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1444 1445 1446
	int rc = 0;

	/* we are going to update can_cache_brlcks here - need a write access */
1447
	cifs_down_write(&cinode->lock_sem);
1448 1449 1450 1451
	if (!cinode->can_cache_brlcks) {
		up_write(&cinode->lock_sem);
		return rc;
	}
1452

1453
	if (cap_unix(tcon->ses) &&
1454 1455
	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1456 1457 1458
		rc = cifs_push_posix_locks(cfile);
	else
		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1459

1460 1461 1462
	cinode->can_cache_brlcks = false;
	up_write(&cinode->lock_sem);
	return rc;
1463 1464
}

1465
static void
1466
cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1467
		bool *wait_flag, struct TCP_Server_Info *server)
L
Linus Torvalds 已提交
1468
{
1469
	if (flock->fl_flags & FL_POSIX)
1470
		cifs_dbg(FYI, "Posix\n");
1471
	if (flock->fl_flags & FL_FLOCK)
1472
		cifs_dbg(FYI, "Flock\n");
1473
	if (flock->fl_flags & FL_SLEEP) {
1474
		cifs_dbg(FYI, "Blocking lock\n");
1475
		*wait_flag = true;
L
Linus Torvalds 已提交
1476
	}
1477
	if (flock->fl_flags & FL_ACCESS)
1478
		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1479
	if (flock->fl_flags & FL_LEASE)
1480
		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1481
	if (flock->fl_flags &
1482
	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1483
	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1484
		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
L
Linus Torvalds 已提交
1485

1486
	*type = server->vals->large_lock_type;
1487
	if (flock->fl_type == F_WRLCK) {
1488
		cifs_dbg(FYI, "F_WRLCK\n");
1489
		*type |= server->vals->exclusive_lock_type;
1490 1491
		*lock = 1;
	} else if (flock->fl_type == F_UNLCK) {
1492
		cifs_dbg(FYI, "F_UNLCK\n");
1493
		*type |= server->vals->unlock_lock_type;
1494 1495 1496
		*unlock = 1;
		/* Check if unlock includes more than one lock range */
	} else if (flock->fl_type == F_RDLCK) {
1497
		cifs_dbg(FYI, "F_RDLCK\n");
1498
		*type |= server->vals->shared_lock_type;
1499 1500
		*lock = 1;
	} else if (flock->fl_type == F_EXLCK) {
1501
		cifs_dbg(FYI, "F_EXLCK\n");
1502
		*type |= server->vals->exclusive_lock_type;
1503 1504
		*lock = 1;
	} else if (flock->fl_type == F_SHLCK) {
1505
		cifs_dbg(FYI, "F_SHLCK\n");
1506
		*type |= server->vals->shared_lock_type;
1507
		*lock = 1;
L
Linus Torvalds 已提交
1508
	} else
1509
		cifs_dbg(FYI, "Unknown type of lock\n");
1510
}
L
Linus Torvalds 已提交
1511

1512
static int
1513
cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1514
	   bool wait_flag, bool posix_lck, unsigned int xid)
1515 1516 1517
{
	int rc = 0;
	__u64 length = 1 + flock->fl_end - flock->fl_start;
1518 1519
	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1520
	struct TCP_Server_Info *server = tcon->ses->server;
1521
	__u16 netfid = cfile->fid.netfid;
1522

1523 1524
	if (posix_lck) {
		int posix_lock_type;
1525 1526 1527 1528 1529

		rc = cifs_posix_lock_test(file, flock);
		if (!rc)
			return rc;

1530
		if (type & server->vals->shared_lock_type)
1531 1532 1533
			posix_lock_type = CIFS_RDLCK;
		else
			posix_lock_type = CIFS_WRLCK;
1534 1535
		rc = CIFSSMBPosixLock(xid, tcon, netfid,
				      hash_lockowner(flock->fl_owner),
1536
				      flock->fl_start, length, flock,
1537
				      posix_lock_type, wait_flag);
1538 1539
		return rc;
	}
L
Linus Torvalds 已提交
1540

1541
	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1542 1543 1544
	if (!rc)
		return rc;

1545
	/* BB we could chain these into one lock request BB */
1546 1547
	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
				    1, 0, false);
1548
	if (rc == 0) {
1549 1550
		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
					    type, 0, 1, false);
1551 1552
		flock->fl_type = F_UNLCK;
		if (rc != 0)
1553 1554
			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
				 rc);
1555
		return 0;
L
Linus Torvalds 已提交
1556
	}
J
[CIFS]  
Jeremy Allison 已提交
1557

1558
	if (type & server->vals->shared_lock_type) {
1559
		flock->fl_type = F_WRLCK;
1560
		return 0;
J
[CIFS]  
Jeremy Allison 已提交
1561 1562
	}

1563 1564 1565 1566 1567
	type &= ~server->vals->exclusive_lock_type;

	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
				    type | server->vals->shared_lock_type,
				    1, 0, false);
1568
	if (rc == 0) {
1569 1570
		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
			type | server->vals->shared_lock_type, 0, 1, false);
1571 1572
		flock->fl_type = F_RDLCK;
		if (rc != 0)
1573 1574
			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
				 rc);
1575 1576 1577
	} else
		flock->fl_type = F_WRLCK;

1578
	return 0;
1579 1580
}

1581
void
1582 1583 1584 1585 1586 1587 1588
cifs_move_llist(struct list_head *source, struct list_head *dest)
{
	struct list_head *li, *tmp;
	list_for_each_safe(li, tmp, source)
		list_move(li, dest);
}

1589
void
1590 1591 1592 1593 1594 1595 1596 1597 1598 1599
cifs_free_llist(struct list_head *llist)
{
	struct cifsLockInfo *li, *tmp;
	list_for_each_entry_safe(li, tmp, llist, llist) {
		cifs_del_lock_waiters(li);
		list_del(&li->llist);
		kfree(li);
	}
}

1600
int
1601 1602
cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
		  unsigned int xid)
1603 1604
{
	int rc = 0, stored_rc;
1605 1606 1607 1608
	static const int types[] = {
		LOCKING_ANDX_LARGE_FILES,
		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
	};
1609
	unsigned int i;
1610
	unsigned int max_num, num, max_buf;
1611 1612
	LOCKING_ANDX_RANGE *buf, *cur;
	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1613
	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1614 1615 1616 1617 1618 1619
	struct cifsLockInfo *li, *tmp;
	__u64 length = 1 + flock->fl_end - flock->fl_start;
	struct list_head tmp_llist;

	INIT_LIST_HEAD(&tmp_llist);

1620 1621
	/*
	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1622
	 * and check it before using.
1623 1624
	 */
	max_buf = tcon->ses->server->maxBuf;
1625
	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1626 1627
		return -EINVAL;

1628 1629 1630 1631
	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
		     PAGE_SIZE);
	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
			PAGE_SIZE);
1632 1633
	max_num = (max_buf - sizeof(struct smb_hdr)) /
						sizeof(LOCKING_ANDX_RANGE);
1634
	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1635 1636 1637
	if (!buf)
		return -ENOMEM;

1638
	cifs_down_write(&cinode->lock_sem);
1639 1640 1641
	for (i = 0; i < 2; i++) {
		cur = buf;
		num = 0;
1642
		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1643 1644 1645 1646 1647 1648 1649 1650
			if (flock->fl_start > li->offset ||
			    (flock->fl_start + length) <
			    (li->offset + li->length))
				continue;
			if (current->tgid != li->pid)
				continue;
			if (types[i] != li->type)
				continue;
1651
			if (cinode->can_cache_brlcks) {
1652 1653
				/*
				 * We can cache brlock requests - simply remove
1654
				 * a lock from the file's list.
1655 1656 1657 1658
				 */
				list_del(&li->llist);
				cifs_del_lock_waiters(li);
				kfree(li);
1659
				continue;
1660
			}
1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672
			cur->Pid = cpu_to_le16(li->pid);
			cur->LengthLow = cpu_to_le32((u32)li->length);
			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
			cur->OffsetLow = cpu_to_le32((u32)li->offset);
			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
			/*
			 * We need to save a lock here to let us add it again to
			 * the file's list if the unlock range request fails on
			 * the server.
			 */
			list_move(&li->llist, &tmp_llist);
			if (++num == max_num) {
1673 1674
				stored_rc = cifs_lockv(xid, tcon,
						       cfile->fid.netfid,
1675 1676 1677 1678 1679 1680 1681 1682
						       li->type, num, 0, buf);
				if (stored_rc) {
					/*
					 * We failed on the unlock range
					 * request - add all locks from the tmp
					 * list to the head of the file's list.
					 */
					cifs_move_llist(&tmp_llist,
1683
							&cfile->llist->locks);
1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694
					rc = stored_rc;
				} else
					/*
					 * The unlock range request succeed -
					 * free the tmp list.
					 */
					cifs_free_llist(&tmp_llist);
				cur = buf;
				num = 0;
			} else
				cur++;
1695 1696
		}
		if (num) {
1697
			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1698 1699
					       types[i], num, 0, buf);
			if (stored_rc) {
1700 1701
				cifs_move_llist(&tmp_llist,
						&cfile->llist->locks);
1702 1703 1704 1705 1706 1707
				rc = stored_rc;
			} else
				cifs_free_llist(&tmp_llist);
		}
	}

1708
	up_write(&cinode->lock_sem);
1709 1710 1711 1712
	kfree(buf);
	return rc;
}

1713
static int
1714
cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1715 1716
	   bool wait_flag, bool posix_lck, int lock, int unlock,
	   unsigned int xid)
1717 1718 1719 1720 1721
{
	int rc = 0;
	__u64 length = 1 + flock->fl_end - flock->fl_start;
	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1722
	struct TCP_Server_Info *server = tcon->ses->server;
1723
	struct inode *inode = d_inode(cfile->dentry);
1724 1725

	if (posix_lck) {
1726
		int posix_lock_type;
1727 1728

		rc = cifs_posix_lock_set(file, flock);
1729
		if (rc <= FILE_LOCK_DEFERRED)
1730 1731
			return rc;

1732
		if (type & server->vals->shared_lock_type)
1733 1734 1735
			posix_lock_type = CIFS_RDLCK;
		else
			posix_lock_type = CIFS_WRLCK;
1736

1737
		if (unlock == 1)
1738
			posix_lock_type = CIFS_UNLCK;
J
[CIFS]  
Jeremy Allison 已提交
1739

1740
		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1741 1742
				      hash_lockowner(flock->fl_owner),
				      flock->fl_start, length,
1743
				      NULL, posix_lock_type, wait_flag);
1744 1745
		goto out;
	}
J
[CIFS]  
Jeremy Allison 已提交
1746

1747
	if (lock) {
1748 1749
		struct cifsLockInfo *lock;

1750 1751
		lock = cifs_lock_init(flock->fl_start, length, type,
				      flock->fl_flags);
1752 1753 1754
		if (!lock)
			return -ENOMEM;

1755
		rc = cifs_lock_add_if(cfile, lock, wait_flag);
1756
		if (rc < 0) {
1757
			kfree(lock);
1758 1759 1760
			return rc;
		}
		if (!rc)
1761 1762
			goto out;

1763 1764 1765 1766 1767 1768 1769
		/*
		 * Windows 7 server can delay breaking lease from read to None
		 * if we set a byte-range lock on a file - break it explicitly
		 * before sending the lock to the server to be sure the next
		 * read won't conflict with non-overlapted locks due to
		 * pagereading.
		 */
1770 1771
		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
					CIFS_CACHE_READ(CIFS_I(inode))) {
1772
			cifs_zap_mapping(inode);
1773 1774
			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
				 inode);
1775
			CIFS_I(inode)->oplock = 0;
1776 1777
		}

1778 1779
		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
					    type, 1, 0, wait_flag);
1780 1781
		if (rc) {
			kfree(lock);
1782
			return rc;
1783
		}
1784

1785
		cifs_lock_add(cfile, lock);
1786
	} else if (unlock)
1787
		rc = server->ops->mand_unlock_range(cfile, flock, xid);
1788 1789

out:
S
Steve French 已提交
1790
	if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801
		/*
		 * If this is a request to remove all locks because we
		 * are closing the file, it doesn't matter if the
		 * unlocking failed as both cifs.ko and the SMB server
		 * remove the lock on file close
		 */
		if (rc) {
			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
			if (!(flock->fl_flags & FL_CLOSE))
				return rc;
		}
1802
		rc = locks_lock_file_wait(file, flock);
1803
	}
1804 1805 1806
	return rc;
}

S
Steve French 已提交
1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852
int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
{
	int rc, xid;
	int lock = 0, unlock = 0;
	bool wait_flag = false;
	bool posix_lck = false;
	struct cifs_sb_info *cifs_sb;
	struct cifs_tcon *tcon;
	struct cifsFileInfo *cfile;
	__u32 type;

	rc = -EACCES;
	xid = get_xid();

	if (!(fl->fl_flags & FL_FLOCK))
		return -ENOLCK;

	cfile = (struct cifsFileInfo *)file->private_data;
	tcon = tlink_tcon(cfile->tlink);

	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
			tcon->ses->server);
	cifs_sb = CIFS_FILE_SB(file);

	if (cap_unix(tcon->ses) &&
	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
		posix_lck = true;

	if (!lock && !unlock) {
		/*
		 * if no lock or unlock then nothing to do since we do not
		 * know what it is
		 */
		free_xid(xid);
		return -EOPNOTSUPP;
	}

	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
			xid);
	free_xid(xid);
	return rc;


}

1853 1854 1855 1856 1857 1858 1859 1860 1861
int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
{
	int rc, xid;
	int lock = 0, unlock = 0;
	bool wait_flag = false;
	bool posix_lck = false;
	struct cifs_sb_info *cifs_sb;
	struct cifs_tcon *tcon;
	struct cifsFileInfo *cfile;
1862
	__u32 type;
1863 1864

	rc = -EACCES;
1865
	xid = get_xid();
1866

1867 1868 1869
	cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
		 cmd, flock->fl_flags, flock->fl_type,
		 flock->fl_start, flock->fl_end);
1870 1871 1872

	cfile = (struct cifsFileInfo *)file->private_data;
	tcon = tlink_tcon(cfile->tlink);
1873 1874 1875

	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
			tcon->ses->server);
1876
	cifs_sb = CIFS_FILE_SB(file);
1877

1878
	if (cap_unix(tcon->ses) &&
1879 1880 1881 1882 1883 1884 1885 1886
	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
		posix_lck = true;
	/*
	 * BB add code here to normalize offset and length to account for
	 * negative length which we can not accept over the wire.
	 */
	if (IS_GETLK(cmd)) {
1887
		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1888
		free_xid(xid);
1889 1890 1891 1892 1893 1894 1895 1896
		return rc;
	}

	if (!lock && !unlock) {
		/*
		 * if no lock or unlock then nothing to do since we do not
		 * know what it is
		 */
1897
		free_xid(xid);
1898
		return -EOPNOTSUPP;
J
[CIFS]  
Jeremy Allison 已提交
1899 1900
	}

1901 1902
	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
			xid);
1903
	free_xid(xid);
L
Linus Torvalds 已提交
1904 1905 1906
	return rc;
}

1907 1908 1909 1910
/*
 * update the file size (if needed) after a write. Should be called with
 * the inode->i_lock held
 */
1911
void
1912 1913 1914 1915 1916 1917 1918 1919 1920
cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
		      unsigned int bytes_written)
{
	loff_t end_of_write = offset + bytes_written;

	if (end_of_write > cifsi->server_eof)
		cifsi->server_eof = end_of_write;
}

1921 1922 1923
static ssize_t
cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
	   size_t write_size, loff_t *offset)
L
Linus Torvalds 已提交
1924 1925 1926 1927
{
	int rc = 0;
	unsigned int bytes_written = 0;
	unsigned int total_written;
1928 1929
	struct cifs_tcon *tcon;
	struct TCP_Server_Info *server;
1930
	unsigned int xid;
1931
	struct dentry *dentry = open_file->dentry;
1932
	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1933
	struct cifs_io_parms io_parms = {0};
L
Linus Torvalds 已提交
1934

A
Al Viro 已提交
1935 1936
	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
		 write_size, *offset, dentry);
L
Linus Torvalds 已提交
1937

1938 1939 1940 1941 1942
	tcon = tlink_tcon(open_file->tlink);
	server = tcon->ses->server;

	if (!server->ops->sync_write)
		return -ENOSYS;
1943

1944
	xid = get_xid();
L
Linus Torvalds 已提交
1945 1946 1947 1948 1949

	for (total_written = 0; write_size > total_written;
	     total_written += bytes_written) {
		rc = -EAGAIN;
		while (rc == -EAGAIN) {
1950 1951 1952
			struct kvec iov[2];
			unsigned int len;

L
Linus Torvalds 已提交
1953 1954 1955
			if (open_file->invalidHandle) {
				/* we could deadlock if we called
				   filemap_fdatawait from here so tell
S
Steve French 已提交
1956
				   reopen_file not to flush data to
L
Linus Torvalds 已提交
1957
				   server now */
J
Jeff Layton 已提交
1958
				rc = cifs_reopen_file(open_file, false);
L
Linus Torvalds 已提交
1959 1960 1961
				if (rc != 0)
					break;
			}
1962

1963
			len = min(server->ops->wp_retry_size(d_inode(dentry)),
1964
				  (unsigned int)write_size - total_written);
1965 1966 1967
			/* iov[0] is reserved for smb header */
			iov[1].iov_base = (char *)write_data + total_written;
			iov[1].iov_len = len;
1968
			io_parms.pid = pid;
1969 1970
			io_parms.tcon = tcon;
			io_parms.offset = *offset;
1971
			io_parms.length = len;
1972 1973
			rc = server->ops->sync_write(xid, &open_file->fid,
					&io_parms, &bytes_written, iov, 1);
L
Linus Torvalds 已提交
1974 1975 1976 1977 1978
		}
		if (rc || (bytes_written == 0)) {
			if (total_written)
				break;
			else {
1979
				free_xid(xid);
L
Linus Torvalds 已提交
1980 1981
				return rc;
			}
1982
		} else {
1983
			spin_lock(&d_inode(dentry)->i_lock);
1984
			cifs_update_eof(cifsi, *offset, bytes_written);
1985
			spin_unlock(&d_inode(dentry)->i_lock);
1986
			*offset += bytes_written;
1987
		}
L
Linus Torvalds 已提交
1988 1989
	}

1990
	cifs_stats_bytes_written(tcon, total_written);
L
Linus Torvalds 已提交
1991

1992
	if (total_written > 0) {
1993
		spin_lock(&d_inode(dentry)->i_lock);
1994
		if (*offset > d_inode(dentry)->i_size) {
1995
			i_size_write(d_inode(dentry), *offset);
1996 1997
			d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
		}
1998
		spin_unlock(&d_inode(dentry)->i_lock);
L
Linus Torvalds 已提交
1999
	}
2000
	mark_inode_dirty_sync(d_inode(dentry));
2001
	free_xid(xid);
L
Linus Torvalds 已提交
2002 2003 2004
	return total_written;
}

2005 2006
struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
					bool fsuid_only)
S
Steve French 已提交
2007 2008
{
	struct cifsFileInfo *open_file = NULL;
2009 2010 2011 2012 2013
	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);

	/* only filter by fsuid on multiuser mounts */
	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
		fsuid_only = false;
S
Steve French 已提交
2014

2015
	spin_lock(&cifs_inode->open_file_lock);
S
Steve French 已提交
2016 2017 2018 2019
	/* we could simply get the first_list_entry since write-only entries
	   are always at the end of the list but since the first entry might
	   have a close pending, we go through the whole list */
	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2020
		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2021
			continue;
2022
		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2023 2024
			if ((!open_file->invalidHandle) &&
				(!open_file->oplock_break_received)) {
S
Steve French 已提交
2025 2026
				/* found a good file */
				/* lock it so it will not be closed on us */
2027
				cifsFileInfo_get(open_file);
2028
				spin_unlock(&cifs_inode->open_file_lock);
S
Steve French 已提交
2029 2030 2031 2032 2033 2034 2035
				return open_file;
			} /* else might as well continue, and look for
			     another, or simply have the caller reopen it
			     again rather than trying to fix this handle */
		} else /* write only file */
			break; /* write only files are last so must be done */
	}
2036
	spin_unlock(&cifs_inode->open_file_lock);
S
Steve French 已提交
2037 2038 2039
	return NULL;
}

2040 2041
/* Return -EBADF if no handle is found and general rc otherwise */
int
2042
cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2043
		       struct cifsFileInfo **ret_file)
2044
{
2045
	struct cifsFileInfo *open_file, *inv_file = NULL;
2046
	struct cifs_sb_info *cifs_sb;
2047
	bool any_available = false;
2048
	int rc = -EBADF;
2049
	unsigned int refind = 0;
2050 2051
	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
	bool with_delete = flags & FIND_WR_WITH_DELETE;
2052 2053 2054 2055 2056 2057 2058
	*ret_file = NULL;

	/*
	 * Having a null inode here (because mapping->host was set to zero by
	 * the VFS or MM) should not happen but we had reports of on oops (due
	 * to it being zero) during stress testcases so we need to check for it
	 */
2059

S
Steve French 已提交
2060
	if (cifs_inode == NULL) {
2061
		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2062
		dump_stack();
2063
		return rc;
2064 2065
	}

2066 2067
	cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);

2068 2069 2070 2071
	/* only filter by fsuid on multiuser mounts */
	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
		fsuid_only = false;

2072
	spin_lock(&cifs_inode->open_file_lock);
2073
refind_writable:
2074
	if (refind > MAX_REOPEN_ATT) {
2075
		spin_unlock(&cifs_inode->open_file_lock);
2076
		return rc;
2077
	}
2078
	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2079 2080
		if (!any_available && open_file->pid != current->tgid)
			continue;
2081
		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2082
			continue;
2083 2084
		if (with_delete && !(open_file->fid.access & DELETE))
			continue;
2085
		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2086 2087
			if (!open_file->invalidHandle) {
				/* found a good writable file */
2088
				cifsFileInfo_get(open_file);
2089
				spin_unlock(&cifs_inode->open_file_lock);
2090 2091
				*ret_file = open_file;
				return 0;
2092 2093 2094
			} else {
				if (!inv_file)
					inv_file = open_file;
2095
			}
2096 2097
		}
	}
2098 2099 2100 2101 2102
	/* couldn't find useable FH with same pid, try any available */
	if (!any_available) {
		any_available = true;
		goto refind_writable;
	}
2103 2104 2105

	if (inv_file) {
		any_available = false;
2106
		cifsFileInfo_get(inv_file);
2107 2108
	}

2109
	spin_unlock(&cifs_inode->open_file_lock);
2110 2111 2112

	if (inv_file) {
		rc = cifs_reopen_file(inv_file, false);
2113 2114 2115
		if (!rc) {
			*ret_file = inv_file;
			return 0;
2116
		}
2117

2118
		spin_lock(&cifs_inode->open_file_lock);
2119
		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2120
		spin_unlock(&cifs_inode->open_file_lock);
2121 2122 2123
		cifsFileInfo_put(inv_file);
		++refind;
		inv_file = NULL;
2124
		spin_lock(&cifs_inode->open_file_lock);
2125
		goto refind_writable;
2126 2127
	}

2128 2129 2130 2131
	return rc;
}

struct cifsFileInfo *
2132
find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2133 2134 2135 2136
{
	struct cifsFileInfo *cfile;
	int rc;

2137
	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2138
	if (rc)
J
Joe Perches 已提交
2139
		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2140 2141

	return cfile;
2142 2143
}

2144 2145
int
cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2146
		       int flags,
2147 2148 2149
		       struct cifsFileInfo **ret_file)
{
	struct cifsFileInfo *cfile;
2150
	void *page = alloc_dentry_path();
2151 2152 2153 2154

	*ret_file = NULL;

	spin_lock(&tcon->open_file_lock);
2155 2156 2157 2158
	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
		struct cifsInodeInfo *cinode;
		const char *full_path = build_path_from_dentry(cfile->dentry, page);
		if (IS_ERR(full_path)) {
2159
			spin_unlock(&tcon->open_file_lock);
2160 2161
			free_dentry_path(page);
			return PTR_ERR(full_path);
2162
		}
2163
		if (strcmp(full_path, name))
2164 2165 2166 2167
			continue;

		cinode = CIFS_I(d_inode(cfile->dentry));
		spin_unlock(&tcon->open_file_lock);
2168
		free_dentry_path(page);
2169
		return cifs_get_writable_file(cinode, flags, ret_file);
2170 2171 2172
	}

	spin_unlock(&tcon->open_file_lock);
2173
	free_dentry_path(page);
2174 2175 2176
	return -ENOENT;
}

2177 2178 2179 2180 2181
int
cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
		       struct cifsFileInfo **ret_file)
{
	struct cifsFileInfo *cfile;
2182
	void *page = alloc_dentry_path();
2183 2184 2185 2186

	*ret_file = NULL;

	spin_lock(&tcon->open_file_lock);
2187 2188 2189 2190
	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
		struct cifsInodeInfo *cinode;
		const char *full_path = build_path_from_dentry(cfile->dentry, page);
		if (IS_ERR(full_path)) {
2191
			spin_unlock(&tcon->open_file_lock);
2192 2193
			free_dentry_path(page);
			return PTR_ERR(full_path);
2194
		}
2195
		if (strcmp(full_path, name))
2196 2197 2198 2199
			continue;

		cinode = CIFS_I(d_inode(cfile->dentry));
		spin_unlock(&tcon->open_file_lock);
2200
		free_dentry_path(page);
2201 2202 2203 2204 2205
		*ret_file = find_readable_file(cinode, 0);
		return *ret_file ? 0 : -ENOENT;
	}

	spin_unlock(&tcon->open_file_lock);
2206
	free_dentry_path(page);
2207 2208 2209
	return -ENOENT;
}

L
Linus Torvalds 已提交
2210 2211 2212
static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
{
	struct address_space *mapping = page->mapping;
2213
	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
L
Linus Torvalds 已提交
2214 2215 2216 2217
	char *write_data;
	int rc = -EFAULT;
	int bytes_written = 0;
	struct inode *inode;
2218
	struct cifsFileInfo *open_file;
L
Linus Torvalds 已提交
2219 2220 2221 2222 2223 2224 2225 2226 2227 2228

	if (!mapping || !mapping->host)
		return -EFAULT;

	inode = page->mapping->host;

	offset += (loff_t)from;
	write_data = kmap(page);
	write_data += from;

2229
	if ((to > PAGE_SIZE) || (from > to)) {
L
Linus Torvalds 已提交
2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241
		kunmap(page);
		return -EIO;
	}

	/* racing with truncate? */
	if (offset > mapping->host->i_size) {
		kunmap(page);
		return 0; /* don't care */
	}

	/* check to make sure that we are not extending the file */
	if (mapping->host->i_size - offset < (loff_t)to)
S
Steve French 已提交
2242
		to = (unsigned)(mapping->host->i_size - offset);
L
Linus Torvalds 已提交
2243

2244 2245
	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
				    &open_file);
2246
	if (!rc) {
2247 2248
		bytes_written = cifs_write(open_file, open_file->pid,
					   write_data, to - from, &offset);
2249
		cifsFileInfo_put(open_file);
L
Linus Torvalds 已提交
2250
		/* Does mm or vfs already set times? */
2251
		inode->i_atime = inode->i_mtime = current_time(inode);
2252
		if ((bytes_written > 0) && (offset))
2253
			rc = 0;
2254 2255
		else if (bytes_written < 0)
			rc = bytes_written;
2256 2257
		else
			rc = -EFAULT;
2258
	} else {
2259 2260 2261
		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
		if (!is_retryable_error(rc))
			rc = -EIO;
L
Linus Torvalds 已提交
2262 2263 2264 2265 2266 2267
	}

	kunmap(page);
	return rc;
}

2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279
static struct cifs_writedata *
wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
			  pgoff_t end, pgoff_t *index,
			  unsigned int *found_pages)
{
	struct cifs_writedata *wdata;

	wdata = cifs_writedata_alloc((unsigned int)tofind,
				     cifs_writev_complete);
	if (!wdata)
		return NULL;

J
Jan Kara 已提交
2280 2281
	*found_pages = find_get_pages_range_tag(mapping, index, end,
				PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2282 2283 2284
	return wdata;
}

2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296
static unsigned int
wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
		    struct address_space *mapping,
		    struct writeback_control *wbc,
		    pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
{
	unsigned int nr_pages = 0, i;
	struct page *page;

	for (i = 0; i < found_pages; i++) {
		page = wdata->pages[i];
		/*
M
Matthew Wilcox 已提交
2297 2298 2299 2300
		 * At this point we hold neither the i_pages lock nor the
		 * page lock: the page may be truncated or invalidated
		 * (changing page->mapping to NULL), or even swizzled
		 * back from swapper_space to tmpfs file mapping
2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356
		 */

		if (nr_pages == 0)
			lock_page(page);
		else if (!trylock_page(page))
			break;

		if (unlikely(page->mapping != mapping)) {
			unlock_page(page);
			break;
		}

		if (!wbc->range_cyclic && page->index > end) {
			*done = true;
			unlock_page(page);
			break;
		}

		if (*next && (page->index != *next)) {
			/* Not next consecutive page */
			unlock_page(page);
			break;
		}

		if (wbc->sync_mode != WB_SYNC_NONE)
			wait_on_page_writeback(page);

		if (PageWriteback(page) ||
				!clear_page_dirty_for_io(page)) {
			unlock_page(page);
			break;
		}

		/*
		 * This actually clears the dirty bit in the radix tree.
		 * See cifs_writepage() for more commentary.
		 */
		set_page_writeback(page);
		if (page_offset(page) >= i_size_read(mapping->host)) {
			*done = true;
			unlock_page(page);
			end_page_writeback(page);
			break;
		}

		wdata->pages[i] = page;
		*next = page->index + 1;
		++nr_pages;
	}

	/* reset index to refind any pages skipped */
	if (nr_pages == 0)
		*index = wdata->pages[0]->index + 1;

	/* put any pages we aren't going to use */
	for (i = nr_pages; i < found_pages; i++) {
2357
		put_page(wdata->pages[i]);
2358 2359 2360 2361 2362 2363
		wdata->pages[i] = NULL;
	}

	return nr_pages;
}

2364
static int
2365 2366
wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
		 struct address_space *mapping, struct writeback_control *wbc)
2367
{
2368
	int rc;
2369 2370 2371 2372

	wdata->sync_mode = wbc->sync_mode;
	wdata->nr_pages = nr_pages;
	wdata->offset = page_offset(wdata->pages[0]);
2373
	wdata->pagesz = PAGE_SIZE;
2374 2375
	wdata->tailsz = min(i_size_read(mapping->host) -
			page_offset(wdata->pages[nr_pages - 1]),
2376 2377
			(loff_t)PAGE_SIZE);
	wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2378
	wdata->pid = wdata->cfile->pid;
2379

2380
	rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2381
	if (rc)
2382
		return rc;
2383

2384 2385 2386
	if (wdata->cfile->invalidHandle)
		rc = -EAGAIN;
	else
2387 2388
		rc = wdata->server->ops->async_writev(wdata,
						      cifs_writedata_release);
2389 2390 2391 2392

	return rc;
}

L
Linus Torvalds 已提交
2393
static int cifs_writepages(struct address_space *mapping,
2394
			   struct writeback_control *wbc)
L
Linus Torvalds 已提交
2395
{
2396 2397
	struct inode *inode = mapping->host;
	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2398
	struct TCP_Server_Info *server;
2399 2400 2401
	bool done = false, scanned = false, range_whole = false;
	pgoff_t end, index;
	struct cifs_writedata *wdata;
2402
	struct cifsFileInfo *cfile = NULL;
2403
	int rc = 0;
2404
	int saved_rc = 0;
2405
	unsigned int xid;
2406

2407
	/*
2408
	 * If wsize is smaller than the page cache size, default to writing
2409 2410
	 * one page at a time via cifs_writepage
	 */
2411
	if (cifs_sb->ctx->wsize < PAGE_SIZE)
2412 2413
		return generic_writepages(mapping, wbc);

2414
	xid = get_xid();
2415
	if (wbc->range_cyclic) {
2416
		index = mapping->writeback_index; /* Start from prev offset */
2417 2418
		end = -1;
	} else {
2419 2420
		index = wbc->range_start >> PAGE_SHIFT;
		end = wbc->range_end >> PAGE_SHIFT;
2421
		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2422 2423
			range_whole = true;
		scanned = true;
2424
	}
2425 2426
	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);

2427
retry:
2428
	while (!done && index <= end) {
2429
		unsigned int i, nr_pages, found_pages, wsize;
2430
		pgoff_t next = 0, tofind, saved_index = index;
2431 2432
		struct cifs_credits credits_on_stack;
		struct cifs_credits *credits = &credits_on_stack;
2433
		int get_file_rc = 0;
2434

2435 2436 2437
		if (cfile)
			cifsFileInfo_put(cfile);

2438
		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2439 2440 2441 2442

		/* in case of an error store it to return later */
		if (rc)
			get_file_rc = rc;
2443

2444
		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2445
						   &wsize, credits);
2446 2447
		if (rc != 0) {
			done = true;
2448
			break;
2449
		}
2450

2451
		tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2452

2453 2454
		wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
						  &found_pages);
2455 2456
		if (!wdata) {
			rc = -ENOMEM;
2457
			done = true;
2458
			add_credits_and_wake_if(server, credits, 0);
2459 2460 2461 2462 2463
			break;
		}

		if (found_pages == 0) {
			kref_put(&wdata->refcount, cifs_writedata_release);
2464
			add_credits_and_wake_if(server, credits, 0);
2465 2466 2467
			break;
		}

2468 2469
		nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
					       end, &index, &next, &done);
2470

2471 2472 2473
		/* nothing to write? */
		if (nr_pages == 0) {
			kref_put(&wdata->refcount, cifs_writedata_release);
2474
			add_credits_and_wake_if(server, credits, 0);
2475
			continue;
2476
		}
2477

2478
		wdata->credits = credits_on_stack;
2479
		wdata->cfile = cfile;
2480
		wdata->server = server;
2481
		cfile = NULL;
2482

2483
		if (!wdata->cfile) {
2484 2485 2486 2487 2488 2489
			cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
				 get_file_rc);
			if (is_retryable_error(get_file_rc))
				rc = get_file_rc;
			else
				rc = -EBADF;
2490 2491
		} else
			rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2492

2493 2494 2495
		for (i = 0; i < nr_pages; ++i)
			unlock_page(wdata->pages[i]);

2496 2497
		/* send failure -- clean up the mess */
		if (rc != 0) {
2498
			add_credits_and_wake_if(server, &wdata->credits, 0);
2499
			for (i = 0; i < nr_pages; ++i) {
2500
				if (is_retryable_error(rc))
2501 2502 2503 2504 2505
					redirty_page_for_writepage(wbc,
							   wdata->pages[i]);
				else
					SetPageError(wdata->pages[i]);
				end_page_writeback(wdata->pages[i]);
2506
				put_page(wdata->pages[i]);
2507
			}
2508
			if (!is_retryable_error(rc))
2509
				mapping_set_error(mapping, rc);
2510 2511
		}
		kref_put(&wdata->refcount, cifs_writedata_release);
2512

2513 2514 2515 2516 2517
		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
			index = saved_index;
			continue;
		}

2518 2519 2520 2521 2522 2523 2524 2525 2526
		/* Return immediately if we received a signal during writing */
		if (is_interrupt_error(rc)) {
			done = true;
			break;
		}

		if (rc != 0 && saved_rc == 0)
			saved_rc = rc;

2527 2528 2529
		wbc->nr_to_write -= nr_pages;
		if (wbc->nr_to_write <= 0)
			done = true;
2530

2531
		index = next;
2532
	}
2533

2534 2535 2536 2537 2538
	if (!scanned && !done) {
		/*
		 * We hit the last page and there is more work to be done: wrap
		 * back to the start of the file
		 */
2539
		scanned = true;
2540 2541 2542
		index = 0;
		goto retry;
	}
2543

2544 2545 2546
	if (saved_rc != 0)
		rc = saved_rc;

2547
	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2548 2549
		mapping->writeback_index = index;

2550 2551
	if (cfile)
		cifsFileInfo_put(cfile);
2552
	free_xid(xid);
2553 2554
	/* Indication to update ctime and mtime as close is deferred */
	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
L
Linus Torvalds 已提交
2555 2556 2557
	return rc;
}

2558 2559
static int
cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
L
Linus Torvalds 已提交
2560
{
2561
	int rc;
2562
	unsigned int xid;
L
Linus Torvalds 已提交
2563

2564
	xid = get_xid();
L
Linus Torvalds 已提交
2565
/* BB add check for wbc flags */
2566
	get_page(page);
S
Steve French 已提交
2567
	if (!PageUptodate(page))
2568
		cifs_dbg(FYI, "ppw - page not up to date\n");
2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579

	/*
	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
	 *
	 * A writepage() implementation always needs to do either this,
	 * or re-dirty the page with "redirty_page_for_writepage()" in
	 * the case of a failure.
	 *
	 * Just unlocking the page will cause the radix tree tag-bits
	 * to fail to update with the state of the page correctly.
	 */
S
Steve French 已提交
2580
	set_page_writeback(page);
2581
retry_write:
2582
	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2583 2584
	if (is_retryable_error(rc)) {
		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2585
			goto retry_write;
2586
		redirty_page_for_writepage(wbc, page);
2587
	} else if (rc != 0) {
2588
		SetPageError(page);
2589 2590
		mapping_set_error(page->mapping, rc);
	} else {
2591
		SetPageUptodate(page);
2592
	}
2593
	end_page_writeback(page);
2594
	put_page(page);
2595
	free_xid(xid);
L
Linus Torvalds 已提交
2596 2597 2598
	return rc;
}

2599 2600 2601 2602 2603 2604 2605
static int cifs_writepage(struct page *page, struct writeback_control *wbc)
{
	int rc = cifs_writepage_locked(page, wbc);
	unlock_page(page);
	return rc;
}

N
Nick Piggin 已提交
2606 2607 2608
static int cifs_write_end(struct file *file, struct address_space *mapping,
			loff_t pos, unsigned len, unsigned copied,
			struct page *page, void *fsdata)
L
Linus Torvalds 已提交
2609
{
N
Nick Piggin 已提交
2610 2611
	int rc;
	struct inode *inode = mapping->host;
2612 2613 2614 2615 2616 2617 2618 2619
	struct cifsFileInfo *cfile = file->private_data;
	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
	__u32 pid;

	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
		pid = cfile->pid;
	else
		pid = current->tgid;
L
Linus Torvalds 已提交
2620

2621
	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2622
		 page, pos, copied);
N
Nick Piggin 已提交
2623

2624 2625 2626 2627
	if (PageChecked(page)) {
		if (copied == len)
			SetPageUptodate(page);
		ClearPageChecked(page);
2628
	} else if (!PageUptodate(page) && copied == PAGE_SIZE)
N
Nick Piggin 已提交
2629
		SetPageUptodate(page);
S
Steve French 已提交
2630

L
Linus Torvalds 已提交
2631
	if (!PageUptodate(page)) {
N
Nick Piggin 已提交
2632
		char *page_data;
2633
		unsigned offset = pos & (PAGE_SIZE - 1);
2634
		unsigned int xid;
N
Nick Piggin 已提交
2635

2636
		xid = get_xid();
L
Linus Torvalds 已提交
2637 2638 2639 2640 2641 2642
		/* this is probably better than directly calling
		   partialpage_write since in this function the file handle is
		   known which we might as well	leverage */
		/* BB check if anything else missing out of ppw
		   such as updating last write time */
		page_data = kmap(page);
2643
		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
N
Nick Piggin 已提交
2644
		/* if (rc < 0) should we set writebehind rc? */
L
Linus Torvalds 已提交
2645
		kunmap(page);
N
Nick Piggin 已提交
2646

2647
		free_xid(xid);
S
Steve French 已提交
2648
	} else {
N
Nick Piggin 已提交
2649 2650
		rc = copied;
		pos += copied;
2651
		set_page_dirty(page);
L
Linus Torvalds 已提交
2652 2653
	}

N
Nick Piggin 已提交
2654 2655
	if (rc > 0) {
		spin_lock(&inode->i_lock);
2656
		if (pos > inode->i_size) {
N
Nick Piggin 已提交
2657
			i_size_write(inode, pos);
2658 2659
			inode->i_blocks = (512 - 1 + pos) >> 9;
		}
N
Nick Piggin 已提交
2660 2661 2662 2663
		spin_unlock(&inode->i_lock);
	}

	unlock_page(page);
2664
	put_page(page);
2665 2666
	/* Indication to update ctime and mtime as close is deferred */
	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
N
Nick Piggin 已提交
2667

L
Linus Torvalds 已提交
2668 2669 2670
	return rc;
}

2671 2672
int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
		      int datasync)
L
Linus Torvalds 已提交
2673
{
2674
	unsigned int xid;
L
Linus Torvalds 已提交
2675
	int rc = 0;
2676
	struct cifs_tcon *tcon;
2677
	struct TCP_Server_Info *server;
2678
	struct cifsFileInfo *smbfile = file->private_data;
A
Al Viro 已提交
2679
	struct inode *inode = file_inode(file);
2680
	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
L
Linus Torvalds 已提交
2681

2682
	rc = file_write_and_wait_range(file, start, end);
2683 2684
	if (rc) {
		trace_cifs_fsync_err(inode->i_ino, rc);
2685
		return rc;
2686
	}
2687

2688
	xid = get_xid();
L
Linus Torvalds 已提交
2689

A
Al Viro 已提交
2690 2691
	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
		 file, datasync);
2692

2693
	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2694
		rc = cifs_zap_mapping(inode);
2695
		if (rc) {
2696
			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2697 2698 2699
			rc = 0; /* don't care about it in fsync */
		}
	}
2700

2701
	tcon = tlink_tcon(smbfile->tlink);
2702 2703 2704 2705 2706 2707 2708
	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
		server = tcon->ses->server;
		if (server->ops->flush)
			rc = server->ops->flush(xid, tcon, &smbfile->fid);
		else
			rc = -ENOSYS;
	}
2709

2710
	free_xid(xid);
2711 2712 2713
	return rc;
}

2714
int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2715
{
2716
	unsigned int xid;
2717
	int rc = 0;
2718
	struct cifs_tcon *tcon;
2719
	struct TCP_Server_Info *server;
2720
	struct cifsFileInfo *smbfile = file->private_data;
2721
	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2722

2723
	rc = file_write_and_wait_range(file, start, end);
2724 2725
	if (rc) {
		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2726
		return rc;
2727
	}
2728

2729
	xid = get_xid();
2730

A
Al Viro 已提交
2731 2732
	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
		 file, datasync);
2733 2734

	tcon = tlink_tcon(smbfile->tlink);
2735 2736 2737 2738 2739 2740 2741
	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
		server = tcon->ses->server;
		if (server->ops->flush)
			rc = server->ops->flush(xid, tcon, &smbfile->fid);
		else
			rc = -ENOSYS;
	}
2742

2743
	free_xid(xid);
L
Linus Torvalds 已提交
2744 2745 2746 2747 2748 2749 2750
	return rc;
}

/*
 * As file closes, flush all cached write data for this inode checking
 * for write behind errors.
 */
2751
int cifs_flush(struct file *file, fl_owner_t id)
L
Linus Torvalds 已提交
2752
{
A
Al Viro 已提交
2753
	struct inode *inode = file_inode(file);
L
Linus Torvalds 已提交
2754 2755
	int rc = 0;

2756
	if (file->f_mode & FMODE_WRITE)
2757
		rc = filemap_write_and_wait(inode->i_mapping);
2758

2759
	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2760 2761
	if (rc)
		trace_cifs_flush_err(inode->i_ino, rc);
L
Linus Torvalds 已提交
2762 2763 2764
	return rc;
}

2765 2766 2767 2768 2769 2770 2771
static int
cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
{
	int rc = 0;
	unsigned long i;

	for (i = 0; i < num_pages; i++) {
2772
		pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2773 2774 2775 2776 2777 2778 2779
		if (!pages[i]) {
			/*
			 * save number of pages we have already allocated and
			 * return with ENOMEM error
			 */
			num_pages = i;
			rc = -ENOMEM;
2780
			break;
2781 2782 2783
		}
	}

2784 2785 2786 2787
	if (rc) {
		for (i = 0; i < num_pages; i++)
			put_page(pages[i]);
	}
2788 2789 2790 2791 2792 2793 2794 2795 2796 2797
	return rc;
}

static inline
size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
{
	size_t num_pages;
	size_t clen;

	clen = min_t(const size_t, len, wsize);
J
Jeff Layton 已提交
2798
	num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2799 2800 2801 2802 2803 2804 2805

	if (cur_len)
		*cur_len = clen;

	return num_pages;
}

2806
static void
2807
cifs_uncached_writedata_release(struct kref *refcount)
2808 2809
{
	int i;
2810 2811 2812
	struct cifs_writedata *wdata = container_of(refcount,
					struct cifs_writedata, refcount);

2813
	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2814 2815 2816 2817 2818
	for (i = 0; i < wdata->nr_pages; i++)
		put_page(wdata->pages[i]);
	cifs_writedata_release(refcount);
}

2819 2820
static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);

2821 2822 2823
static void
cifs_uncached_writev_complete(struct work_struct *work)
{
2824 2825
	struct cifs_writedata *wdata = container_of(work,
					struct cifs_writedata, work);
2826
	struct inode *inode = d_inode(wdata->cfile->dentry);
2827 2828 2829 2830 2831 2832 2833 2834 2835
	struct cifsInodeInfo *cifsi = CIFS_I(inode);

	spin_lock(&inode->i_lock);
	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
	if (cifsi->server_eof > inode->i_size)
		i_size_write(inode, cifsi->server_eof);
	spin_unlock(&inode->i_lock);

	complete(&wdata->done);
2836 2837
	collect_uncached_write_data(wdata->ctx);
	/* the below call can possibly free the last ref to aio ctx */
2838
	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2839 2840 2841
}

static int
2842 2843
wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
		      size_t *len, unsigned long *num_pages)
2844
{
2845 2846
	size_t save_len, copied, bytes, cur_len = *len;
	unsigned long i, nr_pages = *num_pages;
2847

2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864
	save_len = cur_len;
	for (i = 0; i < nr_pages; i++) {
		bytes = min_t(const size_t, cur_len, PAGE_SIZE);
		copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
		cur_len -= copied;
		/*
		 * If we didn't copy as much as we expected, then that
		 * may mean we trod into an unmapped area. Stop copying
		 * at that point. On the next pass through the big
		 * loop, we'll likely end up getting a zero-length
		 * write and bailing out of it.
		 */
		if (copied < bytes)
			break;
	}
	cur_len = save_len - cur_len;
	*len = cur_len;
2865

2866 2867 2868 2869 2870 2871 2872 2873
	/*
	 * If we have no data to send, then that probably means that
	 * the copy above failed altogether. That's most likely because
	 * the address in the iovec was bogus. Return -EFAULT and let
	 * the caller free anything we allocated and bail out.
	 */
	if (!cur_len)
		return -EFAULT;
2874

2875 2876 2877 2878 2879 2880
	/*
	 * i + 1 now represents the number of pages we actually used in
	 * the copy phase above.
	 */
	*num_pages = i + 1;
	return 0;
2881 2882
}

L
Long Li 已提交
2883 2884 2885 2886
static int
cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
	struct cifs_aio_ctx *ctx)
{
2887 2888
	unsigned int wsize;
	struct cifs_credits credits;
L
Long Li 已提交
2889
	int rc;
2890
	struct TCP_Server_Info *server = wdata->server;
L
Long Li 已提交
2891 2892

	do {
2893 2894 2895 2896 2897 2898 2899
		if (wdata->cfile->invalidHandle) {
			rc = cifs_reopen_file(wdata->cfile, false);
			if (rc == -EAGAIN)
				continue;
			else if (rc)
				break;
		}
L
Long Li 已提交
2900 2901


2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918
		/*
		 * Wait for credits to resend this wdata.
		 * Note: we are attempting to resend the whole wdata not in
		 * segments
		 */
		do {
			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
						&wsize, &credits);
			if (rc)
				goto fail;

			if (wsize < wdata->bytes) {
				add_credits_and_wake_if(server, &credits, 0);
				msleep(1000);
			}
		} while (wsize < wdata->bytes);
		wdata->credits = credits;
L
Long Li 已提交
2919

2920 2921 2922 2923 2924
		rc = adjust_credits(server, &wdata->credits, wdata->bytes);

		if (!rc) {
			if (wdata->cfile->invalidHandle)
				rc = -EAGAIN;
2925 2926 2927 2928 2929 2930 2931 2932
			else {
#ifdef CONFIG_CIFS_SMB_DIRECT
				if (wdata->mr) {
					wdata->mr->need_invalidate = true;
					smbd_deregister_mr(wdata->mr);
					wdata->mr = NULL;
				}
#endif
2933
				rc = server->ops->async_writev(wdata,
L
Long Li 已提交
2934
					cifs_uncached_writedata_release);
2935
			}
2936
		}
L
Long Li 已提交
2937

2938 2939 2940 2941 2942
		/* If the write was successfully sent, we are done */
		if (!rc) {
			list_add_tail(&wdata->list, wdata_list);
			return 0;
		}
L
Long Li 已提交
2943

2944 2945 2946
		/* Roll back credits and retry if needed */
		add_credits_and_wake_if(server, &wdata->credits, 0);
	} while (rc == -EAGAIN);
L
Long Li 已提交
2947

2948 2949
fail:
	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
L
Long Li 已提交
2950 2951 2952
	return rc;
}

2953 2954 2955
static int
cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
		     struct cifsFileInfo *open_file,
2956 2957
		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
		     struct cifs_aio_ctx *ctx)
2958
{
2959 2960
	int rc = 0;
	size_t cur_len;
2961
	unsigned long nr_pages, num_pages, i;
2962
	struct cifs_writedata *wdata;
2963
	struct iov_iter saved_from = *from;
2964
	loff_t saved_offset = offset;
2965
	pid_t pid;
2966
	struct TCP_Server_Info *server;
L
Long Li 已提交
2967 2968
	struct page **pagevec;
	size_t start;
2969
	unsigned int xid;
2970 2971 2972 2973 2974 2975

	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
		pid = open_file->pid;
	else
		pid = current->tgid;

2976
	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2977
	xid = get_xid();
2978

2979
	do {
2980 2981 2982
		unsigned int wsize;
		struct cifs_credits credits_on_stack;
		struct cifs_credits *credits = &credits_on_stack;
2983

2984 2985 2986 2987 2988 2989 2990 2991
		if (open_file->invalidHandle) {
			rc = cifs_reopen_file(open_file, false);
			if (rc == -EAGAIN)
				continue;
			else if (rc)
				break;
		}

2992
		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2993
						   &wsize, credits);
2994 2995
		if (rc)
			break;
2996

2997 2998
		cur_len = min_t(const size_t, len, wsize);

L
Long Li 已提交
2999
		if (ctx->direct_io) {
3000 3001 3002
			ssize_t result;

			result = iov_iter_get_pages_alloc(
3003
				from, &pagevec, cur_len, &start);
3004
			if (result < 0) {
L
Long Li 已提交
3005
				cifs_dbg(VFS,
J
Joe Perches 已提交
3006 3007 3008
					 "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
					 result, iov_iter_type(from),
					 from->iov_offset, from->count);
L
Long Li 已提交
3009
				dump_stack();
3010 3011 3012

				rc = result;
				add_credits_and_wake_if(server, credits, 0);
L
Long Li 已提交
3013 3014
				break;
			}
3015
			cur_len = (size_t)result;
L
Long Li 已提交
3016 3017 3018 3019 3020 3021
			iov_iter_advance(from, cur_len);

			nr_pages =
				(cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;

			wdata = cifs_writedata_direct_alloc(pagevec,
3022
					     cifs_uncached_writev_complete);
L
Long Li 已提交
3023 3024 3025 3026 3027
			if (!wdata) {
				rc = -ENOMEM;
				add_credits_and_wake_if(server, credits, 0);
				break;
			}
3028 3029


L
Long Li 已提交
3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044
			wdata->page_offset = start;
			wdata->tailsz =
				nr_pages > 1 ?
					cur_len - (PAGE_SIZE - start) -
					(nr_pages - 2) * PAGE_SIZE :
					cur_len;
		} else {
			nr_pages = get_numpages(wsize, len, &cur_len);
			wdata = cifs_writedata_alloc(nr_pages,
					     cifs_uncached_writev_complete);
			if (!wdata) {
				rc = -ENOMEM;
				add_credits_and_wake_if(server, credits, 0);
				break;
			}
3045

L
Long Li 已提交
3046 3047
			rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
			if (rc) {
3048
				kvfree(wdata->pages);
L
Long Li 已提交
3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059
				kfree(wdata);
				add_credits_and_wake_if(server, credits, 0);
				break;
			}

			num_pages = nr_pages;
			rc = wdata_fill_from_iovec(
				wdata, from, &cur_len, &num_pages);
			if (rc) {
				for (i = 0; i < nr_pages; i++)
					put_page(wdata->pages[i]);
3060
				kvfree(wdata->pages);
L
Long Li 已提交
3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074
				kfree(wdata);
				add_credits_and_wake_if(server, credits, 0);
				break;
			}

			/*
			 * Bring nr_pages down to the number of pages we
			 * actually used, and free any pages that we didn't use.
			 */
			for ( ; nr_pages > num_pages; nr_pages--)
				put_page(wdata->pages[nr_pages - 1]);

			wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
		}
3075

3076 3077 3078 3079
		wdata->sync_mode = WB_SYNC_ALL;
		wdata->nr_pages = nr_pages;
		wdata->offset = (__u64)offset;
		wdata->cfile = cifsFileInfo_get(open_file);
3080
		wdata->server = server;
3081 3082
		wdata->pid = pid;
		wdata->bytes = cur_len;
3083
		wdata->pagesz = PAGE_SIZE;
3084
		wdata->credits = credits_on_stack;
3085 3086
		wdata->ctx = ctx;
		kref_get(&ctx->refcount);
3087

3088 3089 3090 3091
		rc = adjust_credits(server, &wdata->credits, wdata->bytes);

		if (!rc) {
			if (wdata->cfile->invalidHandle)
3092 3093
				rc = -EAGAIN;
			else
3094
				rc = server->ops->async_writev(wdata,
3095
					cifs_uncached_writedata_release);
3096 3097
		}

3098
		if (rc) {
3099
			add_credits_and_wake_if(server, &wdata->credits, 0);
3100 3101
			kref_put(&wdata->refcount,
				 cifs_uncached_writedata_release);
3102
			if (rc == -EAGAIN) {
3103
				*from = saved_from;
3104 3105 3106
				iov_iter_advance(from, offset - saved_offset);
				continue;
			}
3107 3108 3109
			break;
		}

3110
		list_add_tail(&wdata->list, wdata_list);
3111 3112
		offset += cur_len;
		len -= cur_len;
3113 3114
	} while (len > 0);

3115
	free_xid(xid);
3116 3117 3118
	return rc;
}

3119
static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3120
{
3121
	struct cifs_writedata *wdata, *tmp;
3122 3123
	struct cifs_tcon *tcon;
	struct cifs_sb_info *cifs_sb;
3124
	struct dentry *dentry = ctx->cfile->dentry;
3125 3126
	int rc;

3127 3128
	tcon = tlink_tcon(ctx->cfile->tlink);
	cifs_sb = CIFS_SB(dentry->d_sb);
3129

3130
	mutex_lock(&ctx->aio_mutex);
3131

3132 3133 3134 3135
	if (list_empty(&ctx->list)) {
		mutex_unlock(&ctx->aio_mutex);
		return;
	}
3136

3137
	rc = ctx->rc;
3138 3139
	/*
	 * Wait for and collect replies for any successful sends in order of
3140 3141
	 * increasing offset. Once an error is hit, then return without waiting
	 * for any more replies.
3142 3143
	 */
restart_loop:
3144
	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3145
		if (!rc) {
3146 3147 3148 3149 3150 3151
			if (!try_wait_for_completion(&wdata->done)) {
				mutex_unlock(&ctx->aio_mutex);
				return;
			}

			if (wdata->result)
3152 3153
				rc = wdata->result;
			else
3154
				ctx->total_len += wdata->bytes;
3155 3156 3157

			/* resend call if it's a retryable error */
			if (rc == -EAGAIN) {
3158
				struct list_head tmp_list;
3159
				struct iov_iter tmp_from = ctx->iter;
3160 3161 3162 3163

				INIT_LIST_HEAD(&tmp_list);
				list_del_init(&wdata->list);

L
Long Li 已提交
3164 3165 3166 3167 3168
				if (ctx->direct_io)
					rc = cifs_resend_wdata(
						wdata, &tmp_list, ctx);
				else {
					iov_iter_advance(&tmp_from,
3169
						 wdata->offset - ctx->pos);
3170

L
Long Li 已提交
3171
					rc = cifs_write_from_iter(wdata->offset,
3172
						wdata->bytes, &tmp_from,
3173 3174
						ctx->cfile, cifs_sb, &tmp_list,
						ctx);
3175 3176 3177

					kref_put(&wdata->refcount,
						cifs_uncached_writedata_release);
L
Long Li 已提交
3178
				}
3179

3180
				list_splice(&tmp_list, &ctx->list);
3181 3182 3183 3184
				goto restart_loop;
			}
		}
		list_del_init(&wdata->list);
3185
		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3186 3187
	}

3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200
	cifs_stats_bytes_written(tcon, ctx->total_len);
	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);

	ctx->rc = (rc == 0) ? ctx->total_len : rc;

	mutex_unlock(&ctx->aio_mutex);

	if (ctx->iocb && ctx->iocb->ki_complete)
		ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
	else
		complete(&ctx->done);
}

L
Long Li 已提交
3201 3202
static ssize_t __cifs_writev(
	struct kiocb *iocb, struct iov_iter *from, bool direct)
3203 3204 3205 3206 3207 3208 3209 3210
{
	struct file *file = iocb->ki_filp;
	ssize_t total_written = 0;
	struct cifsFileInfo *cfile;
	struct cifs_tcon *tcon;
	struct cifs_sb_info *cifs_sb;
	struct cifs_aio_ctx *ctx;
	struct iov_iter saved_from = *from;
L
Long Li 已提交
3211
	size_t len = iov_iter_count(from);
3212 3213 3214
	int rc;

	/*
L
Long Li 已提交
3215 3216 3217
	 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
	 * In this case, fall back to non-direct write function.
	 * this could be improved by getting pages directly in ITER_KVEC
3218
	 */
3219
	if (direct && iov_iter_is_kvec(from)) {
L
Long Li 已提交
3220 3221 3222
		cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
		direct = false;
	}
3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245

	rc = generic_write_checks(iocb, from);
	if (rc <= 0)
		return rc;

	cifs_sb = CIFS_FILE_SB(file);
	cfile = file->private_data;
	tcon = tlink_tcon(cfile->tlink);

	if (!tcon->ses->server->ops->async_writev)
		return -ENOSYS;

	ctx = cifs_aio_ctx_alloc();
	if (!ctx)
		return -ENOMEM;

	ctx->cfile = cifsFileInfo_get(cfile);

	if (!is_sync_kiocb(iocb))
		ctx->iocb = iocb;

	ctx->pos = iocb->ki_pos;

L
Long Li 已提交
3246 3247 3248 3249 3250 3251 3252 3253 3254 3255
	if (direct) {
		ctx->direct_io = true;
		ctx->iter = *from;
		ctx->len = len;
	} else {
		rc = setup_aio_ctx_iter(ctx, from, WRITE);
		if (rc) {
			kref_put(&ctx->refcount, cifs_aio_ctx_release);
			return rc;
		}
3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297
	}

	/* grab a lock here due to read response handlers can access ctx */
	mutex_lock(&ctx->aio_mutex);

	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
				  cfile, cifs_sb, &ctx->list, ctx);

	/*
	 * If at least one write was successfully sent, then discard any rc
	 * value from the later writes. If the other write succeeds, then
	 * we'll end up returning whatever was written. If it fails, then
	 * we'll get a new rc value from that.
	 */
	if (!list_empty(&ctx->list))
		rc = 0;

	mutex_unlock(&ctx->aio_mutex);

	if (rc) {
		kref_put(&ctx->refcount, cifs_aio_ctx_release);
		return rc;
	}

	if (!is_sync_kiocb(iocb)) {
		kref_put(&ctx->refcount, cifs_aio_ctx_release);
		return -EIOCBQUEUED;
	}

	rc = wait_for_completion_killable(&ctx->done);
	if (rc) {
		mutex_lock(&ctx->aio_mutex);
		ctx->rc = rc = -EINTR;
		total_written = ctx->total_len;
		mutex_unlock(&ctx->aio_mutex);
	} else {
		rc = ctx->rc;
		total_written = ctx->total_len;
	}

	kref_put(&ctx->refcount, cifs_aio_ctx_release);

3298 3299
	if (unlikely(!total_written))
		return rc;
3300

3301 3302
	iocb->ki_pos += total_written;
	return total_written;
3303 3304
}

L
Long Li 已提交
3305 3306 3307 3308 3309 3310 3311 3312 3313 3314
ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
{
	return __cifs_writev(iocb, from, true);
}

ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
{
	return __cifs_writev(iocb, from, false);
}

3315
static ssize_t
A
Al Viro 已提交
3316
cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3317
{
3318 3319 3320 3321 3322
	struct file *file = iocb->ki_filp;
	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
	struct inode *inode = file->f_mapping->host;
	struct cifsInodeInfo *cinode = CIFS_I(inode);
	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3323
	ssize_t rc;
3324

3325
	inode_lock(inode);
3326 3327 3328 3329 3330
	/*
	 * We need to hold the sem to be sure nobody modifies lock list
	 * with a brlock that prevents writing.
	 */
	down_read(&cinode->lock_sem);
3331

3332 3333
	rc = generic_write_checks(iocb, from);
	if (rc <= 0)
3334 3335 3336
		goto out;

	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3337 3338
				     server->vals->exclusive_lock_type, 0,
				     NULL, CIFS_WRITE_OP))
A
Al Viro 已提交
3339
		rc = __generic_file_write_iter(iocb, from);
3340 3341 3342
	else
		rc = -EACCES;
out:
3343
	up_read(&cinode->lock_sem);
A
Al Viro 已提交
3344
	inode_unlock(inode);
A
Al Viro 已提交
3345

3346 3347
	if (rc > 0)
		rc = generic_write_sync(iocb, rc);
3348 3349 3350 3351
	return rc;
}

ssize_t
A
Al Viro 已提交
3352
cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3353
{
A
Al Viro 已提交
3354
	struct inode *inode = file_inode(iocb->ki_filp);
3355 3356 3357 3358 3359
	struct cifsInodeInfo *cinode = CIFS_I(inode);
	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
						iocb->ki_filp->private_data;
	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3360
	ssize_t written;
3361

3362 3363 3364 3365
	written = cifs_get_writer(cinode);
	if (written)
		return written;

3366
	if (CIFS_CACHE_WRITE(cinode)) {
3367 3368
		if (cap_unix(tcon->ses) &&
		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3369
		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
A
Al Viro 已提交
3370
			written = generic_file_write_iter(iocb, from);
3371 3372
			goto out;
		}
A
Al Viro 已提交
3373
		written = cifs_writev(iocb, from);
3374
		goto out;
3375 3376
	}
	/*
3377 3378 3379 3380
	 * For non-oplocked files in strict cache mode we need to write the data
	 * to the server exactly from the pos to pos+len-1 rather than flush all
	 * affected pages because it may cause a error with mandatory locks on
	 * these pages but not on the region from pos to ppos+len-1.
3381
	 */
A
Al Viro 已提交
3382
	written = cifs_user_writev(iocb, from);
3383
	if (CIFS_CACHE_READ(cinode)) {
3384
		/*
3385 3386 3387 3388 3389
		 * We have read level caching and we have just sent a write
		 * request to the server thus making data in the cache stale.
		 * Zap the cache and set oplock/lease level to NONE to avoid
		 * reading stale data from the cache. All subsequent read
		 * operations will read new data from the server.
3390
		 */
3391
		cifs_zap_mapping(inode);
3392
		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3393
			 inode);
3394
		cinode->oplock = 0;
3395
	}
3396 3397
out:
	cifs_put_writer(cinode);
3398
	return written;
3399 3400
}

3401
static struct cifs_readdata *
3402
cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3403 3404
{
	struct cifs_readdata *rdata;
3405

3406
	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3407
	if (rdata != NULL) {
3408
		rdata->pages = pages;
3409
		kref_init(&rdata->refcount);
3410 3411
		INIT_LIST_HEAD(&rdata->list);
		init_completion(&rdata->done);
3412 3413
		INIT_WORK(&rdata->work, complete);
	}
3414

3415 3416 3417
	return rdata;
}

3418 3419 3420 3421
static struct cifs_readdata *
cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
{
	struct page **pages =
K
Kees Cook 已提交
3422
		kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433
	struct cifs_readdata *ret = NULL;

	if (pages) {
		ret = cifs_readdata_direct_alloc(pages, complete);
		if (!ret)
			kfree(pages);
	}

	return ret;
}

3434 3435
void
cifs_readdata_release(struct kref *refcount)
3436
{
3437 3438
	struct cifs_readdata *rdata = container_of(refcount,
					struct cifs_readdata, refcount);
3439 3440 3441 3442 3443 3444
#ifdef CONFIG_CIFS_SMB_DIRECT
	if (rdata->mr) {
		smbd_deregister_mr(rdata->mr);
		rdata->mr = NULL;
	}
#endif
3445 3446 3447
	if (rdata->cfile)
		cifsFileInfo_put(rdata->cfile);

3448
	kvfree(rdata->pages);
3449 3450 3451
	kfree(rdata);
}

3452
static int
3453
cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3454 3455
{
	int rc = 0;
3456
	struct page *page;
3457 3458
	unsigned int i;

3459
	for (i = 0; i < nr_pages; i++) {
3460 3461 3462 3463 3464
		page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
		if (!page) {
			rc = -ENOMEM;
			break;
		}
3465
		rdata->pages[i] = page;
3466 3467 3468
	}

	if (rc) {
3469 3470 3471
		unsigned int nr_page_failed = i;

		for (i = 0; i < nr_page_failed; i++) {
3472 3473
			put_page(rdata->pages[i]);
			rdata->pages[i] = NULL;
3474 3475 3476 3477 3478 3479 3480 3481 3482 3483
		}
	}
	return rc;
}

static void
cifs_uncached_readdata_release(struct kref *refcount)
{
	struct cifs_readdata *rdata = container_of(refcount,
					struct cifs_readdata, refcount);
3484
	unsigned int i;
3485

3486
	kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3487 3488
	for (i = 0; i < rdata->nr_pages; i++) {
		put_page(rdata->pages[i]);
3489 3490 3491 3492 3493 3494 3495
	}
	cifs_readdata_release(refcount);
}

/**
 * cifs_readdata_to_iov - copy data from pages in response to an iovec
 * @rdata:	the readdata response with list of pages holding data
3496
 * @iter:	destination for our data
3497 3498 3499 3500 3501
 *
 * This function copies data from a list of pages in a readdata response into
 * an array of iovecs. It will first calculate where the data should go
 * based on the info in the readdata and then copy the data into that spot.
 */
3502 3503
static int
cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3504
{
3505
	size_t remaining = rdata->got_bytes;
3506
	unsigned int i;
3507

3508 3509
	for (i = 0; i < rdata->nr_pages; i++) {
		struct page *page = rdata->pages[i];
3510
		size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3511 3512
		size_t written;

D
David Howells 已提交
3513
		if (unlikely(iov_iter_is_pipe(iter))) {
3514 3515 3516 3517 3518 3519
			void *addr = kmap_atomic(page);

			written = copy_to_iter(addr, copy, iter);
			kunmap_atomic(addr);
		} else
			written = copy_page_to_iter(page, 0, copy, iter);
3520 3521 3522
		remaining -= written;
		if (written < copy && iov_iter_count(iter) > 0)
			break;
3523
	}
3524
	return remaining ? -EFAULT : 0;
3525 3526
}

3527 3528
static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);

3529 3530 3531 3532 3533 3534 3535
static void
cifs_uncached_readv_complete(struct work_struct *work)
{
	struct cifs_readdata *rdata = container_of(work,
						struct cifs_readdata, work);

	complete(&rdata->done);
3536 3537
	collect_uncached_read_data(rdata->ctx);
	/* the below call can possibly free the last ref to aio ctx */
3538 3539 3540 3541
	kref_put(&rdata->refcount, cifs_uncached_readdata_release);
}

static int
3542 3543 3544
uncached_fill_pages(struct TCP_Server_Info *server,
		    struct cifs_readdata *rdata, struct iov_iter *iter,
		    unsigned int len)
3545
{
3546
	int result = 0;
3547 3548
	unsigned int i;
	unsigned int nr_pages = rdata->nr_pages;
L
Long Li 已提交
3549
	unsigned int page_offset = rdata->page_offset;
3550

3551
	rdata->got_bytes = 0;
3552
	rdata->tailsz = PAGE_SIZE;
3553 3554
	for (i = 0; i < nr_pages; i++) {
		struct page *page = rdata->pages[i];
3555
		size_t n;
L
Long Li 已提交
3556 3557 3558 3559 3560 3561 3562
		unsigned int segment_size = rdata->pagesz;

		if (i == 0)
			segment_size -= page_offset;
		else
			page_offset = 0;

3563

3564
		if (len <= 0) {
3565
			/* no need to hold page hostage */
3566 3567
			rdata->pages[i] = NULL;
			rdata->nr_pages--;
3568
			put_page(page);
3569
			continue;
3570
		}
L
Long Li 已提交
3571

3572
		n = len;
L
Long Li 已提交
3573
		if (len >= segment_size)
3574
			/* enough data to fill the page */
L
Long Li 已提交
3575 3576
			n = segment_size;
		else
3577
			rdata->tailsz = len;
L
Long Li 已提交
3578 3579
		len -= n;

3580
		if (iter)
L
Long Li 已提交
3581 3582
			result = copy_page_from_iter(
					page, page_offset, n, iter);
3583 3584 3585 3586
#ifdef CONFIG_CIFS_SMB_DIRECT
		else if (rdata->mr)
			result = n;
#endif
3587
		else
L
Long Li 已提交
3588 3589
			result = cifs_read_page_from_socket(
					server, page, page_offset, n);
3590 3591 3592
		if (result < 0)
			break;

3593
		rdata->got_bytes += result;
3594 3595
	}

3596 3597
	return rdata->got_bytes > 0 && result != -ECONNABORTED ?
						rdata->got_bytes : result;
3598 3599
}

3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614
static int
cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
			      struct cifs_readdata *rdata, unsigned int len)
{
	return uncached_fill_pages(server, rdata, NULL, len);
}

static int
cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
			      struct cifs_readdata *rdata,
			      struct iov_iter *iter)
{
	return uncached_fill_pages(server, rdata, iter, iter->count);
}

L
Long Li 已提交
3615 3616 3617 3618
static int cifs_resend_rdata(struct cifs_readdata *rdata,
			struct list_head *rdata_list,
			struct cifs_aio_ctx *ctx)
{
3619 3620
	unsigned int rsize;
	struct cifs_credits credits;
L
Long Li 已提交
3621
	int rc;
3622 3623 3624 3625
	struct TCP_Server_Info *server;

	/* XXX: should we pick a new channel here? */
	server = rdata->server;
L
Long Li 已提交
3626 3627

	do {
3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642
		if (rdata->cfile->invalidHandle) {
			rc = cifs_reopen_file(rdata->cfile, true);
			if (rc == -EAGAIN)
				continue;
			else if (rc)
				break;
		}

		/*
		 * Wait for credits to resend this rdata.
		 * Note: we are attempting to resend the whole rdata not in
		 * segments
		 */
		do {
			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
L
Long Li 已提交
3643 3644
						&rsize, &credits);

3645 3646
			if (rc)
				goto fail;
L
Long Li 已提交
3647

3648 3649 3650 3651 3652 3653
			if (rsize < rdata->bytes) {
				add_credits_and_wake_if(server, &credits, 0);
				msleep(1000);
			}
		} while (rsize < rdata->bytes);
		rdata->credits = credits;
L
Long Li 已提交
3654

3655 3656 3657 3658
		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
		if (!rc) {
			if (rdata->cfile->invalidHandle)
				rc = -EAGAIN;
3659 3660 3661 3662 3663 3664 3665 3666
			else {
#ifdef CONFIG_CIFS_SMB_DIRECT
				if (rdata->mr) {
					rdata->mr->need_invalidate = true;
					smbd_deregister_mr(rdata->mr);
					rdata->mr = NULL;
				}
#endif
3667
				rc = server->ops->async_readv(rdata);
3668
			}
3669
		}
L
Long Li 已提交
3670

3671 3672 3673 3674 3675 3676
		/* If the read was successfully sent, we are done */
		if (!rc) {
			/* Add to aio pending list */
			list_add_tail(&rdata->list, rdata_list);
			return 0;
		}
L
Long Li 已提交
3677

3678 3679 3680
		/* Roll back credits and retry if needed */
		add_credits_and_wake_if(server, &rdata->credits, 0);
	} while (rc == -EAGAIN);
L
Long Li 已提交
3681

3682 3683
fail:
	kref_put(&rdata->refcount, cifs_uncached_readdata_release);
L
Long Li 已提交
3684 3685 3686
	return rc;
}

3687 3688
static int
cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3689 3690
		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
		     struct cifs_aio_ctx *ctx)
L
Linus Torvalds 已提交
3691
{
3692
	struct cifs_readdata *rdata;
3693 3694 3695
	unsigned int npages, rsize;
	struct cifs_credits credits_on_stack;
	struct cifs_credits *credits = &credits_on_stack;
3696 3697
	size_t cur_len;
	int rc;
3698
	pid_t pid;
3699
	struct TCP_Server_Info *server;
L
Long Li 已提交
3700 3701 3702
	struct page **pagevec;
	size_t start;
	struct iov_iter direct_iov = ctx->iter;
3703

3704
	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3705

3706 3707 3708 3709 3710
	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
		pid = open_file->pid;
	else
		pid = current->tgid;

L
Long Li 已提交
3711 3712 3713
	if (ctx->direct_io)
		iov_iter_advance(&direct_iov, offset - ctx->pos);

3714
	do {
3715 3716 3717 3718 3719 3720 3721 3722
		if (open_file->invalidHandle) {
			rc = cifs_reopen_file(open_file, true);
			if (rc == -EAGAIN)
				continue;
			else if (rc)
				break;
		}

3723
		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3724
						   &rsize, credits);
3725 3726 3727 3728
		if (rc)
			break;

		cur_len = min_t(const size_t, len, rsize);
3729

L
Long Li 已提交
3730
		if (ctx->direct_io) {
3731
			ssize_t result;
L
Long Li 已提交
3732

3733
			result = iov_iter_get_pages_alloc(
L
Long Li 已提交
3734 3735
					&direct_iov, &pagevec,
					cur_len, &start);
3736
			if (result < 0) {
L
Long Li 已提交
3737
				cifs_dbg(VFS,
J
Joe Perches 已提交
3738 3739 3740 3741
					 "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
					 result, iov_iter_type(&direct_iov),
					 direct_iov.iov_offset,
					 direct_iov.count);
L
Long Li 已提交
3742
				dump_stack();
3743 3744 3745

				rc = result;
				add_credits_and_wake_if(server, credits, 0);
L
Long Li 已提交
3746 3747
				break;
			}
3748
			cur_len = (size_t)result;
L
Long Li 已提交
3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769
			iov_iter_advance(&direct_iov, cur_len);

			rdata = cifs_readdata_direct_alloc(
					pagevec, cifs_uncached_readv_complete);
			if (!rdata) {
				add_credits_and_wake_if(server, credits, 0);
				rc = -ENOMEM;
				break;
			}

			npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
			rdata->page_offset = start;
			rdata->tailsz = npages > 1 ?
				cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
				cur_len;

		} else {

			npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
			/* allocate a readdata struct */
			rdata = cifs_readdata_alloc(npages,
3770
					    cifs_uncached_readv_complete);
L
Long Li 已提交
3771 3772 3773 3774 3775
			if (!rdata) {
				add_credits_and_wake_if(server, credits, 0);
				rc = -ENOMEM;
				break;
			}
3776

L
Long Li 已提交
3777
			rc = cifs_read_allocate_pages(rdata, npages);
3778 3779 3780 3781 3782 3783
			if (rc) {
				kvfree(rdata->pages);
				kfree(rdata);
				add_credits_and_wake_if(server, credits, 0);
				break;
			}
L
Long Li 已提交
3784 3785 3786

			rdata->tailsz = PAGE_SIZE;
		}
3787

3788
		rdata->server = server;
3789
		rdata->cfile = cifsFileInfo_get(open_file);
3790
		rdata->nr_pages = npages;
3791 3792 3793
		rdata->offset = offset;
		rdata->bytes = cur_len;
		rdata->pid = pid;
3794 3795
		rdata->pagesz = PAGE_SIZE;
		rdata->read_into_pages = cifs_uncached_read_into_pages;
3796
		rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3797
		rdata->credits = credits_on_stack;
3798 3799
		rdata->ctx = ctx;
		kref_get(&ctx->refcount);
3800

3801 3802 3803 3804
		rc = adjust_credits(server, &rdata->credits, rdata->bytes);

		if (!rc) {
			if (rdata->cfile->invalidHandle)
3805 3806
				rc = -EAGAIN;
			else
3807 3808 3809
				rc = server->ops->async_readv(rdata);
		}

3810
		if (rc) {
3811
			add_credits_and_wake_if(server, &rdata->credits, 0);
3812
			kref_put(&rdata->refcount,
L
Long Li 已提交
3813 3814 3815
				cifs_uncached_readdata_release);
			if (rc == -EAGAIN) {
				iov_iter_revert(&direct_iov, cur_len);
3816
				continue;
L
Long Li 已提交
3817
			}
3818 3819 3820
			break;
		}

3821
		list_add_tail(&rdata->list, rdata_list);
3822 3823 3824 3825
		offset += cur_len;
		len -= cur_len;
	} while (len > 0);

3826 3827 3828
	return rc;
}

3829 3830
static void
collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3831
{
3832 3833
	struct cifs_readdata *rdata, *tmp;
	struct iov_iter *to = &ctx->iter;
3834
	struct cifs_sb_info *cifs_sb;
3835
	int rc;
3836

3837
	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3838

3839
	mutex_lock(&ctx->aio_mutex);
3840

3841 3842 3843 3844
	if (list_empty(&ctx->list)) {
		mutex_unlock(&ctx->aio_mutex);
		return;
	}
3845

3846
	rc = ctx->rc;
3847
	/* the loop below should proceed in the order of increasing offsets */
3848
again:
3849
	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3850
		if (!rc) {
3851 3852 3853 3854 3855 3856
			if (!try_wait_for_completion(&rdata->done)) {
				mutex_unlock(&ctx->aio_mutex);
				return;
			}

			if (rdata->result == -EAGAIN) {
3857
				/* resend call if it's a retryable error */
3858
				struct list_head tmp_list;
3859
				unsigned int got_bytes = rdata->got_bytes;
3860

3861 3862
				list_del_init(&rdata->list);
				INIT_LIST_HEAD(&tmp_list);
3863

3864 3865 3866 3867 3868 3869
				/*
				 * Got a part of data and then reconnect has
				 * happened -- fill the buffer and continue
				 * reading.
				 */
				if (got_bytes && got_bytes < rdata->bytes) {
L
Long Li 已提交
3870 3871 3872
					rc = 0;
					if (!ctx->direct_io)
						rc = cifs_readdata_to_iov(rdata, to);
3873 3874
					if (rc) {
						kref_put(&rdata->refcount,
L
Long Li 已提交
3875
							cifs_uncached_readdata_release);
3876 3877
						continue;
					}
3878
				}
3879

L
Long Li 已提交
3880 3881 3882 3883 3884 3885 3886 3887 3888 3889
				if (ctx->direct_io) {
					/*
					 * Re-use rdata as this is a
					 * direct I/O
					 */
					rc = cifs_resend_rdata(
						rdata,
						&tmp_list, ctx);
				} else {
					rc = cifs_send_async_read(
3890 3891 3892
						rdata->offset + got_bytes,
						rdata->bytes - got_bytes,
						rdata->cfile, cifs_sb,
3893
						&tmp_list, ctx);
3894

L
Long Li 已提交
3895 3896 3897 3898
					kref_put(&rdata->refcount,
						cifs_uncached_readdata_release);
				}

3899
				list_splice(&tmp_list, &ctx->list);
3900

3901 3902 3903
				goto again;
			} else if (rdata->result)
				rc = rdata->result;
L
Long Li 已提交
3904
			else if (!ctx->direct_io)
A
Al Viro 已提交
3905
				rc = cifs_readdata_to_iov(rdata, to);
3906

3907 3908 3909
			/* if there was a short read -- discard anything left */
			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
				rc = -ENODATA;
L
Long Li 已提交
3910 3911

			ctx->total_len += rdata->got_bytes;
L
Linus Torvalds 已提交
3912
		}
3913 3914
		list_del_init(&rdata->list);
		kref_put(&rdata->refcount, cifs_uncached_readdata_release);
L
Linus Torvalds 已提交
3915
	}
3916

3917
	if (!ctx->direct_io)
L
Long Li 已提交
3918
		ctx->total_len = ctx->len - iov_iter_count(to);
3919

3920 3921 3922 3923
	/* mask nodata case */
	if (rc == -ENODATA)
		rc = 0;

3924
	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3925 3926 3927 3928 3929 3930 3931 3932 3933

	mutex_unlock(&ctx->aio_mutex);

	if (ctx->iocb && ctx->iocb->ki_complete)
		ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
	else
		complete(&ctx->done);
}

L
Long Li 已提交
3934 3935
static ssize_t __cifs_readv(
	struct kiocb *iocb, struct iov_iter *to, bool direct)
3936 3937
{
	size_t len;
L
Long Li 已提交
3938
	struct file *file = iocb->ki_filp;
3939 3940
	struct cifs_sb_info *cifs_sb;
	struct cifsFileInfo *cfile;
L
Long Li 已提交
3941 3942 3943
	struct cifs_tcon *tcon;
	ssize_t rc, total_read = 0;
	loff_t offset = iocb->ki_pos;
3944 3945
	struct cifs_aio_ctx *ctx;

L
Long Li 已提交
3946 3947 3948 3949 3950
	/*
	 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
	 * fall back to data copy read path
	 * this could be improved by getting pages directly in ITER_KVEC
	 */
3951
	if (direct && iov_iter_is_kvec(to)) {
L
Long Li 已提交
3952 3953 3954 3955
		cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
		direct = false;
	}

3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978
	len = iov_iter_count(to);
	if (!len)
		return 0;

	cifs_sb = CIFS_FILE_SB(file);
	cfile = file->private_data;
	tcon = tlink_tcon(cfile->tlink);

	if (!tcon->ses->server->ops->async_readv)
		return -ENOSYS;

	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
		cifs_dbg(FYI, "attempting read on write only file instance\n");

	ctx = cifs_aio_ctx_alloc();
	if (!ctx)
		return -ENOMEM;

	ctx->cfile = cifsFileInfo_get(cfile);

	if (!is_sync_kiocb(iocb))
		ctx->iocb = iocb;

D
David Howells 已提交
3979
	if (iter_is_iovec(to))
3980 3981
		ctx->should_dirty = true;

L
Long Li 已提交
3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993
	if (direct) {
		ctx->pos = offset;
		ctx->direct_io = true;
		ctx->iter = *to;
		ctx->len = len;
	} else {
		rc = setup_aio_ctx_iter(ctx, to, READ);
		if (rc) {
			kref_put(&ctx->refcount, cifs_aio_ctx_release);
			return rc;
		}
		len = ctx->len;
3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029
	}

	/* grab a lock here due to read response handlers can access ctx */
	mutex_lock(&ctx->aio_mutex);

	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);

	/* if at least one read request send succeeded, then reset rc */
	if (!list_empty(&ctx->list))
		rc = 0;

	mutex_unlock(&ctx->aio_mutex);

	if (rc) {
		kref_put(&ctx->refcount, cifs_aio_ctx_release);
		return rc;
	}

	if (!is_sync_kiocb(iocb)) {
		kref_put(&ctx->refcount, cifs_aio_ctx_release);
		return -EIOCBQUEUED;
	}

	rc = wait_for_completion_killable(&ctx->done);
	if (rc) {
		mutex_lock(&ctx->aio_mutex);
		ctx->rc = rc = -EINTR;
		total_read = ctx->total_len;
		mutex_unlock(&ctx->aio_mutex);
	} else {
		rc = ctx->rc;
		total_read = ctx->total_len;
	}

	kref_put(&ctx->refcount, cifs_aio_ctx_release);

4030
	if (total_read) {
A
Al Viro 已提交
4031
		iocb->ki_pos += total_read;
4032 4033 4034
		return total_read;
	}
	return rc;
4035 4036
}

L
Long Li 已提交
4037 4038 4039 4040 4041 4042 4043 4044 4045 4046
ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
{
	return __cifs_readv(iocb, to, true);
}

ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
{
	return __cifs_readv(iocb, to, false);
}

4047
ssize_t
A
Al Viro 已提交
4048
cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4049
{
A
Al Viro 已提交
4050
	struct inode *inode = file_inode(iocb->ki_filp);
4051 4052 4053 4054 4055 4056
	struct cifsInodeInfo *cinode = CIFS_I(inode);
	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
						iocb->ki_filp->private_data;
	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
	int rc = -EACCES;
4057 4058 4059 4060 4061 4062 4063 4064 4065

	/*
	 * In strict cache mode we need to read from the server all the time
	 * if we don't have level II oplock because the server can delay mtime
	 * change - so we can't make a decision about inode invalidating.
	 * And we can also fail with pagereading if there are mandatory locks
	 * on pages affected by this read but not on the region from pos to
	 * pos+len-1.
	 */
4066
	if (!CIFS_CACHE_READ(cinode))
A
Al Viro 已提交
4067
		return cifs_user_readv(iocb, to);
4068

4069 4070 4071
	if (cap_unix(tcon->ses) &&
	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
A
Al Viro 已提交
4072
		return generic_file_read_iter(iocb, to);
4073 4074 4075 4076 4077 4078

	/*
	 * We need to hold the sem to be sure nobody modifies lock list
	 * with a brlock that prevents reading.
	 */
	down_read(&cinode->lock_sem);
A
Al Viro 已提交
4079
	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4080
				     tcon->ses->server->vals->shared_lock_type,
4081
				     0, NULL, CIFS_READ_OP))
A
Al Viro 已提交
4082
		rc = generic_file_read_iter(iocb, to);
4083 4084
	up_read(&cinode->lock_sem);
	return rc;
4085
}
L
Linus Torvalds 已提交
4086

4087 4088
static ssize_t
cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
L
Linus Torvalds 已提交
4089 4090 4091 4092 4093
{
	int rc = -EACCES;
	unsigned int bytes_read = 0;
	unsigned int total_read;
	unsigned int current_read_size;
4094
	unsigned int rsize;
L
Linus Torvalds 已提交
4095
	struct cifs_sb_info *cifs_sb;
4096
	struct cifs_tcon *tcon;
4097
	struct TCP_Server_Info *server;
4098
	unsigned int xid;
4099
	char *cur_offset;
L
Linus Torvalds 已提交
4100
	struct cifsFileInfo *open_file;
4101
	struct cifs_io_parms io_parms = {0};
4102
	int buf_type = CIFS_NO_BUFFER;
4103
	__u32 pid;
L
Linus Torvalds 已提交
4104

4105
	xid = get_xid();
4106
	cifs_sb = CIFS_FILE_SB(file);
L
Linus Torvalds 已提交
4107

4108
	/* FIXME: set up handlers for larger reads and/or convert to async */
4109
	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4110

L
Linus Torvalds 已提交
4111
	if (file->private_data == NULL) {
4112
		rc = -EBADF;
4113
		free_xid(xid);
4114
		return rc;
L
Linus Torvalds 已提交
4115
	}
4116
	open_file = file->private_data;
4117
	tcon = tlink_tcon(open_file->tlink);
4118
	server = cifs_pick_channel(tcon->ses);
4119 4120 4121 4122 4123

	if (!server->ops->sync_read) {
		free_xid(xid);
		return -ENOSYS;
	}
L
Linus Torvalds 已提交
4124

4125 4126 4127 4128 4129
	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
		pid = open_file->pid;
	else
		pid = current->tgid;

L
Linus Torvalds 已提交
4130
	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4131
		cifs_dbg(FYI, "attempting read on write only file instance\n");
L
Linus Torvalds 已提交
4132

4133 4134
	for (total_read = 0, cur_offset = read_data; read_size > total_read;
	     total_read += bytes_read, cur_offset += bytes_read) {
4135 4136 4137 4138 4139 4140 4141 4142
		do {
			current_read_size = min_t(uint, read_size - total_read,
						  rsize);
			/*
			 * For windows me and 9x we do not want to request more
			 * than it negotiated since it will refuse the read
			 * then.
			 */
4143
			if (!(tcon->ses->capabilities &
4144
				tcon->ses->server->vals->cap_large_files)) {
4145 4146 4147
				current_read_size = min_t(uint,
					current_read_size, CIFSMaxBufSize);
			}
4148
			if (open_file->invalidHandle) {
J
Jeff Layton 已提交
4149
				rc = cifs_reopen_file(open_file, true);
L
Linus Torvalds 已提交
4150 4151 4152
				if (rc != 0)
					break;
			}
4153
			io_parms.pid = pid;
4154
			io_parms.tcon = tcon;
4155
			io_parms.offset = *offset;
4156
			io_parms.length = current_read_size;
4157
			io_parms.server = server;
4158
			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4159 4160
						    &bytes_read, &cur_offset,
						    &buf_type);
4161 4162
		} while (rc == -EAGAIN);

L
Linus Torvalds 已提交
4163 4164 4165 4166
		if (rc || (bytes_read == 0)) {
			if (total_read) {
				break;
			} else {
4167
				free_xid(xid);
L
Linus Torvalds 已提交
4168 4169 4170
				return rc;
			}
		} else {
4171
			cifs_stats_bytes_read(tcon, total_read);
4172
			*offset += bytes_read;
L
Linus Torvalds 已提交
4173 4174
		}
	}
4175
	free_xid(xid);
L
Linus Torvalds 已提交
4176 4177 4178
	return total_read;
}

4179 4180 4181 4182
/*
 * If the page is mmap'ed into a process' page tables, then we need to make
 * sure that it doesn't change while being written back.
 */
4183
static vm_fault_t
4184
cifs_page_mkwrite(struct vm_fault *vmf)
4185 4186 4187 4188 4189 4190 4191
{
	struct page *page = vmf->page;

	lock_page(page);
	return VM_FAULT_LOCKED;
}

4192
static const struct vm_operations_struct cifs_file_vm_ops = {
4193
	.fault = filemap_fault,
4194
	.map_pages = filemap_map_pages,
4195 4196 4197
	.page_mkwrite = cifs_page_mkwrite,
};

4198 4199
int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
{
4200
	int xid, rc = 0;
A
Al Viro 已提交
4201
	struct inode *inode = file_inode(file);
4202

4203
	xid = get_xid();
4204

4205
	if (!CIFS_CACHE_READ(CIFS_I(inode)))
4206
		rc = cifs_zap_mapping(inode);
4207 4208 4209
	if (!rc)
		rc = generic_file_mmap(file, vma);
	if (!rc)
4210
		vma->vm_ops = &cifs_file_vm_ops;
4211

4212
	free_xid(xid);
4213 4214 4215
	return rc;
}

L
Linus Torvalds 已提交
4216 4217 4218 4219
int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
	int rc, xid;

4220
	xid = get_xid();
4221

J
Jeff Layton 已提交
4222
	rc = cifs_revalidate_file(file);
4223
	if (rc)
4224 4225
		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
			 rc);
4226 4227 4228
	if (!rc)
		rc = generic_file_mmap(file, vma);
	if (!rc)
4229
		vma->vm_ops = &cifs_file_vm_ops;
4230

4231
	free_xid(xid);
L
Linus Torvalds 已提交
4232 4233 4234
	return rc;
}

4235 4236 4237
static void
cifs_readv_complete(struct work_struct *work)
{
4238
	unsigned int i, got_bytes;
4239 4240 4241
	struct cifs_readdata *rdata = container_of(work,
						struct cifs_readdata, work);

4242
	got_bytes = rdata->got_bytes;
4243 4244 4245
	for (i = 0; i < rdata->nr_pages; i++) {
		struct page *page = rdata->pages[i];

4246
		lru_cache_add(page);
4247

4248 4249
		if (rdata->result == 0 ||
		    (rdata->result == -EAGAIN && got_bytes)) {
4250 4251 4252 4253 4254 4255
			flush_dcache_page(page);
			SetPageUptodate(page);
		}

		unlock_page(page);

4256 4257
		if (rdata->result == 0 ||
		    (rdata->result == -EAGAIN && got_bytes))
4258 4259
			cifs_readpage_to_fscache(rdata->mapping->host, page);

4260
		got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4261

4262
		put_page(page);
4263
		rdata->pages[i] = NULL;
4264
	}
4265
	kref_put(&rdata->refcount, cifs_readdata_release);
4266 4267
}

4268
static int
4269 4270 4271
readpages_fill_pages(struct TCP_Server_Info *server,
		     struct cifs_readdata *rdata, struct iov_iter *iter,
		     unsigned int len)
4272
{
4273
	int result = 0;
4274
	unsigned int i;
4275 4276
	u64 eof;
	pgoff_t eof_index;
4277
	unsigned int nr_pages = rdata->nr_pages;
L
Long Li 已提交
4278
	unsigned int page_offset = rdata->page_offset;
4279 4280 4281

	/* determine the eof that the server (probably) has */
	eof = CIFS_I(rdata->mapping->host)->server_eof;
4282
	eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4283
	cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4284

4285
	rdata->got_bytes = 0;
4286
	rdata->tailsz = PAGE_SIZE;
4287 4288
	for (i = 0; i < nr_pages; i++) {
		struct page *page = rdata->pages[i];
L
Long Li 已提交
4289 4290 4291 4292 4293 4294 4295 4296 4297
		unsigned int to_read = rdata->pagesz;
		size_t n;

		if (i == 0)
			to_read -= page_offset;
		else
			page_offset = 0;

		n = to_read;
4298

L
Long Li 已提交
4299 4300
		if (len >= to_read) {
			len -= to_read;
4301
		} else if (len > 0) {
4302
			/* enough for partial page, fill and zero the rest */
L
Long Li 已提交
4303
			zero_user(page, len + page_offset, to_read - len);
4304
			n = rdata->tailsz = len;
4305
			len = 0;
4306 4307 4308 4309 4310 4311 4312 4313 4314
		} else if (page->index > eof_index) {
			/*
			 * The VFS will not try to do readahead past the
			 * i_size, but it's possible that we have outstanding
			 * writes with gaps in the middle and the i_size hasn't
			 * caught up yet. Populate those with zeroed out pages
			 * to prevent the VFS from repeatedly attempting to
			 * fill them until the writes are flushed.
			 */
4315
			zero_user(page, 0, PAGE_SIZE);
4316
			lru_cache_add(page);
4317 4318 4319
			flush_dcache_page(page);
			SetPageUptodate(page);
			unlock_page(page);
4320
			put_page(page);
4321 4322
			rdata->pages[i] = NULL;
			rdata->nr_pages--;
4323
			continue;
4324 4325
		} else {
			/* no need to hold page hostage */
4326
			lru_cache_add(page);
4327
			unlock_page(page);
4328
			put_page(page);
4329 4330
			rdata->pages[i] = NULL;
			rdata->nr_pages--;
4331
			continue;
4332
		}
4333

4334
		if (iter)
L
Long Li 已提交
4335 4336
			result = copy_page_from_iter(
					page, page_offset, n, iter);
4337 4338 4339 4340
#ifdef CONFIG_CIFS_SMB_DIRECT
		else if (rdata->mr)
			result = n;
#endif
4341
		else
L
Long Li 已提交
4342 4343
			result = cifs_read_page_from_socket(
					server, page, page_offset, n);
4344 4345 4346
		if (result < 0)
			break;

4347
		rdata->got_bytes += result;
4348 4349
	}

4350 4351
	return rdata->got_bytes > 0 && result != -ECONNABORTED ?
						rdata->got_bytes : result;
4352 4353
}

4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368
static int
cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
			       struct cifs_readdata *rdata, unsigned int len)
{
	return readpages_fill_pages(server, rdata, NULL, len);
}

static int
cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
			       struct cifs_readdata *rdata,
			       struct iov_iter *iter)
{
	return readpages_fill_pages(server, rdata, iter, iter->count);
}

4369 4370 4371 4372 4373 4374 4375 4376
static int
readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
		    unsigned int rsize, struct list_head *tmplist,
		    unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
{
	struct page *page, *tpage;
	unsigned int expected_index;
	int rc;
4377
	gfp_t gfp = readahead_gfp_mask(mapping);
4378

4379 4380
	INIT_LIST_HEAD(tmplist);

4381
	page = lru_to_page(page_list);
4382 4383 4384 4385 4386 4387

	/*
	 * Lock the page and put it in the cache. Since no one else
	 * should have access to this page, we're safe to simply set
	 * PG_locked without checking it first.
	 */
4388
	__SetPageLocked(page);
4389
	rc = add_to_page_cache_locked(page, mapping,
4390
				      page->index, gfp);
4391 4392 4393

	/* give up if we can't stick it in the cache */
	if (rc) {
4394
		__ClearPageLocked(page);
4395 4396 4397 4398
		return rc;
	}

	/* move first page to the tmplist */
4399 4400
	*offset = (loff_t)page->index << PAGE_SHIFT;
	*bytes = PAGE_SIZE;
4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411
	*nr_pages = 1;
	list_move_tail(&page->lru, tmplist);

	/* now try and add more pages onto the request */
	expected_index = page->index + 1;
	list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
		/* discontinuity ? */
		if (page->index != expected_index)
			break;

		/* would this page push the read over the rsize? */
4412
		if (*bytes + PAGE_SIZE > rsize)
4413 4414
			break;

4415
		__SetPageLocked(page);
4416 4417
		rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
		if (rc) {
4418
			__ClearPageLocked(page);
4419 4420 4421
			break;
		}
		list_move_tail(&page->lru, tmplist);
4422
		(*bytes) += PAGE_SIZE;
4423 4424 4425 4426
		expected_index++;
		(*nr_pages)++;
	}
	return rc;
4427 4428
}

L
Linus Torvalds 已提交
4429 4430 4431
static int cifs_readpages(struct file *file, struct address_space *mapping,
	struct list_head *page_list, unsigned num_pages)
{
4432
	int rc;
4433
	int err = 0;
4434 4435
	struct list_head tmplist;
	struct cifsFileInfo *open_file = file->private_data;
4436
	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4437
	struct TCP_Server_Info *server;
4438
	pid_t pid;
4439
	unsigned int xid;
L
Linus Torvalds 已提交
4440

4441
	xid = get_xid();
4442 4443 4444
	/*
	 * Reads as many pages as possible from fscache. Returns -ENOBUFS
	 * immediately if the cookie is negative
4445 4446 4447
	 *
	 * After this point, every page in the list might have PG_fscache set,
	 * so we will need to clean that up off of every page we don't use.
4448 4449 4450
	 */
	rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
					 &num_pages);
4451 4452
	if (rc == 0) {
		free_xid(xid);
4453
		return rc;
4454
	}
4455

4456 4457 4458 4459 4460
	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
		pid = open_file->pid;
	else
		pid = current->tgid;

4461
	rc = 0;
4462
	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
L
Linus Torvalds 已提交
4463

4464 4465
	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
		 __func__, file, mapping, num_pages);
4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477

	/*
	 * Start with the page at end of list and move it to private
	 * list. Do the same with any following pages until we hit
	 * the rsize limit, hit an index discontinuity, or run out of
	 * pages. Issue the async read and then start the loop again
	 * until the list is empty.
	 *
	 * Note that list order is important. The page_list is in
	 * the order of declining indexes. When we put the pages in
	 * the rdata->pages, then we want them in increasing order.
	 */
4478
	while (!list_empty(page_list) && !err) {
4479
		unsigned int i, nr_pages, bytes, rsize;
4480 4481 4482
		loff_t offset;
		struct page *page, *tpage;
		struct cifs_readdata *rdata;
4483 4484
		struct cifs_credits credits_on_stack;
		struct cifs_credits *credits = &credits_on_stack;
L
Linus Torvalds 已提交
4485

4486 4487 4488 4489 4490 4491 4492 4493
		if (open_file->invalidHandle) {
			rc = cifs_reopen_file(open_file, true);
			if (rc == -EAGAIN)
				continue;
			else if (rc)
				break;
		}

4494
		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4495
						   &rsize, credits);
4496 4497
		if (rc)
			break;
4498 4499

		/*
4500 4501 4502 4503
		 * Give up immediately if rsize is too small to read an entire
		 * page. The VFS will fall back to readpage. We should never
		 * reach this point however since we set ra_pages to 0 when the
		 * rsize is smaller than a cache page.
4504
		 */
4505
		if (unlikely(rsize < PAGE_SIZE)) {
4506
			add_credits_and_wake_if(server, credits, 0);
4507
			free_xid(xid);
4508
			return 0;
4509
		}
4510

4511 4512
		nr_pages = 0;
		err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4513
					 &nr_pages, &offset, &bytes);
4514
		if (!nr_pages) {
4515
			add_credits_and_wake_if(server, credits, 0);
4516 4517 4518
			break;
		}

4519
		rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4520 4521 4522 4523
		if (!rdata) {
			/* best to give up if we're out of mem */
			list_for_each_entry_safe(page, tpage, &tmplist, lru) {
				list_del(&page->lru);
4524
				lru_cache_add(page);
4525
				unlock_page(page);
4526
				put_page(page);
4527 4528
			}
			rc = -ENOMEM;
4529
			add_credits_and_wake_if(server, credits, 0);
4530 4531 4532
			break;
		}

4533
		rdata->cfile = cifsFileInfo_get(open_file);
4534
		rdata->server = server;
4535 4536 4537 4538
		rdata->mapping = mapping;
		rdata->offset = offset;
		rdata->bytes = bytes;
		rdata->pid = pid;
4539
		rdata->pagesz = PAGE_SIZE;
L
Long Li 已提交
4540
		rdata->tailsz = PAGE_SIZE;
4541
		rdata->read_into_pages = cifs_readpages_read_into_pages;
4542
		rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4543
		rdata->credits = credits_on_stack;
4544 4545 4546 4547 4548

		list_for_each_entry_safe(page, tpage, &tmplist, lru) {
			list_del(&page->lru);
			rdata->pages[rdata->nr_pages++] = page;
		}
4549

4550 4551 4552 4553
		rc = adjust_credits(server, &rdata->credits, rdata->bytes);

		if (!rc) {
			if (rdata->cfile->invalidHandle)
4554 4555
				rc = -EAGAIN;
			else
4556 4557 4558
				rc = server->ops->async_readv(rdata);
		}

4559
		if (rc) {
4560
			add_credits_and_wake_if(server, &rdata->credits, 0);
4561 4562
			for (i = 0; i < rdata->nr_pages; i++) {
				page = rdata->pages[i];
4563
				lru_cache_add(page);
4564
				unlock_page(page);
4565
				put_page(page);
L
Linus Torvalds 已提交
4566
			}
4567
			/* Fallback to the readpage in error/reconnect cases */
4568
			kref_put(&rdata->refcount, cifs_readdata_release);
L
Linus Torvalds 已提交
4569 4570
			break;
		}
4571 4572

		kref_put(&rdata->refcount, cifs_readdata_release);
L
Linus Torvalds 已提交
4573 4574
	}

4575 4576 4577 4578 4579
	/* Any pages that have been shown to fscache but didn't get added to
	 * the pagecache must be uncached before they get returned to the
	 * allocator.
	 */
	cifs_fscache_readpages_cancel(mapping->host, page_list);
4580
	free_xid(xid);
L
Linus Torvalds 已提交
4581 4582 4583
	return rc;
}

4584 4585 4586
/*
 * cifs_readpage_worker must be called with the page pinned
 */
L
Linus Torvalds 已提交
4587 4588 4589 4590 4591 4592
static int cifs_readpage_worker(struct file *file, struct page *page,
	loff_t *poffset)
{
	char *read_data;
	int rc;

4593
	/* Is the page cached? */
A
Al Viro 已提交
4594
	rc = cifs_readpage_from_fscache(file_inode(file), page);
4595 4596 4597
	if (rc == 0)
		goto read_complete;

L
Linus Torvalds 已提交
4598 4599
	read_data = kmap(page);
	/* for reads over a certain size could initiate async read ahead */
S
Steve French 已提交
4600

4601
	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
S
Steve French 已提交
4602

L
Linus Torvalds 已提交
4603 4604 4605
	if (rc < 0)
		goto io_error;
	else
4606
		cifs_dbg(FYI, "Bytes read %d\n", rc);
S
Steve French 已提交
4607

4608 4609 4610 4611 4612 4613
	/* we do not want atime to be less than mtime, it broke some apps */
	file_inode(file)->i_atime = current_time(file_inode(file));
	if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
		file_inode(file)->i_atime = file_inode(file)->i_mtime;
	else
		file_inode(file)->i_atime = current_time(file_inode(file));
S
Steve French 已提交
4614

4615 4616
	if (PAGE_SIZE > rc)
		memset(read_data + rc, 0, PAGE_SIZE - rc);
L
Linus Torvalds 已提交
4617 4618 4619

	flush_dcache_page(page);
	SetPageUptodate(page);
4620 4621

	/* send this page to the cache */
A
Al Viro 已提交
4622
	cifs_readpage_to_fscache(file_inode(file), page);
4623

L
Linus Torvalds 已提交
4624
	rc = 0;
S
Steve French 已提交
4625

L
Linus Torvalds 已提交
4626
io_error:
S
Steve French 已提交
4627
	kunmap(page);
4628
	unlock_page(page);
4629 4630

read_complete:
L
Linus Torvalds 已提交
4631 4632 4633 4634 4635
	return rc;
}

static int cifs_readpage(struct file *file, struct page *page)
{
4636
	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
L
Linus Torvalds 已提交
4637
	int rc = -EACCES;
4638
	unsigned int xid;
L
Linus Torvalds 已提交
4639

4640
	xid = get_xid();
L
Linus Torvalds 已提交
4641 4642

	if (file->private_data == NULL) {
4643
		rc = -EBADF;
4644
		free_xid(xid);
4645
		return rc;
L
Linus Torvalds 已提交
4646 4647
	}

4648
	cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4649
		 page, (int)offset, (int)offset);
L
Linus Torvalds 已提交
4650 4651 4652

	rc = cifs_readpage_worker(file, page, &offset);

4653
	free_xid(xid);
L
Linus Torvalds 已提交
4654 4655 4656
	return rc;
}

4657 4658 4659 4660
static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
{
	struct cifsFileInfo *open_file;

4661
	spin_lock(&cifs_inode->open_file_lock);
4662
	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4663
		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4664
			spin_unlock(&cifs_inode->open_file_lock);
4665 4666 4667
			return 1;
		}
	}
4668
	spin_unlock(&cifs_inode->open_file_lock);
4669 4670 4671
	return 0;
}

L
Linus Torvalds 已提交
4672 4673 4674
/* We do not want to update the file size from server for inodes
   open for write - to avoid races with writepage extending
   the file - in the future we could consider allowing
S
Steve French 已提交
4675
   refreshing the inode only on increases in the file size
L
Linus Torvalds 已提交
4676 4677
   but this is tricky to do without racing with writebehind
   page caching in the current Linux kernel design */
4678
bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
L
Linus Torvalds 已提交
4679
{
4680
	if (!cifsInode)
4681
		return true;
4682

4683 4684
	if (is_inode_writable(cifsInode)) {
		/* This inode is open for write at least once */
4685 4686 4687
		struct cifs_sb_info *cifs_sb;

		cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
S
Steve French 已提交
4688
		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
S
Steve French 已提交
4689
			/* since no page cache to corrupt on directio
4690
			we can change size safely */
4691
			return true;
4692 4693
		}

S
Steve French 已提交
4694
		if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4695
			return true;
4696

4697
		return false;
4698
	} else
4699
		return true;
L
Linus Torvalds 已提交
4700 4701
}

N
Nick Piggin 已提交
4702 4703 4704
static int cifs_write_begin(struct file *file, struct address_space *mapping,
			loff_t pos, unsigned len, unsigned flags,
			struct page **pagep, void **fsdata)
L
Linus Torvalds 已提交
4705
{
4706
	int oncethru = 0;
4707 4708
	pgoff_t index = pos >> PAGE_SHIFT;
	loff_t offset = pos & (PAGE_SIZE - 1);
4709 4710 4711 4712
	loff_t page_start = pos & PAGE_MASK;
	loff_t i_size;
	struct page *page;
	int rc = 0;
N
Nick Piggin 已提交
4713

4714
	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
N
Nick Piggin 已提交
4715

4716
start:
4717
	page = grab_cache_page_write_begin(mapping, index, flags);
4718 4719 4720 4721
	if (!page) {
		rc = -ENOMEM;
		goto out;
	}
4722

4723 4724
	if (PageUptodate(page))
		goto out;
4725

4726 4727 4728 4729 4730
	/*
	 * If we write a full page it will be up to date, no need to read from
	 * the server. If the write is short, we'll end up doing a sync write
	 * instead.
	 */
4731
	if (len == PAGE_SIZE)
4732
		goto out;
4733

4734 4735 4736 4737 4738 4739
	/*
	 * optimize away the read when we have an oplock, and we're not
	 * expecting to use any of the data we'd be reading in. That
	 * is, when the page lies beyond the EOF, or straddles the EOF
	 * and the write will cover all of the existing data.
	 */
4740
	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4741 4742 4743 4744 4745
		i_size = i_size_read(mapping->host);
		if (page_start >= i_size ||
		    (offset == 0 && (pos + len) >= i_size)) {
			zero_user_segments(page, 0, offset,
					   offset + len,
4746
					   PAGE_SIZE);
4747 4748 4749 4750 4751 4752 4753 4754 4755 4756
			/*
			 * PageChecked means that the parts of the page
			 * to which we're not writing are considered up
			 * to date. Once the data is copied to the
			 * page, it can be set uptodate.
			 */
			SetPageChecked(page);
			goto out;
		}
	}
N
Nick Piggin 已提交
4757

4758
	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4759 4760 4761 4762 4763 4764
		/*
		 * might as well read a page, it is fast enough. If we get
		 * an error, we don't need to return it. cifs_write_end will
		 * do a sync write instead since PG_uptodate isn't set.
		 */
		cifs_readpage_worker(file, page, &page_start);
4765
		put_page(page);
4766 4767
		oncethru = 1;
		goto start;
4768 4769 4770 4771
	} else {
		/* we could try using another file handle if there is one -
		   but how would we lock it to prevent close of that handle
		   racing with this read? In any case
N
Nick Piggin 已提交
4772
		   this will be written out by write_end so is fine */
L
Linus Torvalds 已提交
4773
	}
4774 4775 4776
out:
	*pagep = page;
	return rc;
L
Linus Torvalds 已提交
4777 4778
}

4779 4780 4781 4782 4783 4784 4785 4786
static int cifs_release_page(struct page *page, gfp_t gfp)
{
	if (PagePrivate(page))
		return 0;

	return cifs_fscache_release_page(page, gfp);
}

4787 4788
static void cifs_invalidate_page(struct page *page, unsigned int offset,
				 unsigned int length)
4789 4790 4791
{
	struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);

4792
	if (offset == 0 && length == PAGE_SIZE)
4793 4794 4795
		cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
}

4796 4797 4798 4799
static int cifs_launder_page(struct page *page)
{
	int rc = 0;
	loff_t range_start = page_offset(page);
4800
	loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4801 4802 4803 4804 4805 4806 4807
	struct writeback_control wbc = {
		.sync_mode = WB_SYNC_ALL,
		.nr_to_write = 0,
		.range_start = range_start,
		.range_end = range_end,
	};

4808
	cifs_dbg(FYI, "Launder page: %p\n", page);
4809 4810 4811 4812 4813 4814 4815 4816

	if (clear_page_dirty_for_io(page))
		rc = cifs_writepage_locked(page, &wbc);

	cifs_fscache_invalidate_page(page, page->mapping->host);
	return rc;
}

4817
void cifs_oplock_break(struct work_struct *work)
4818 4819 4820
{
	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
						  oplock_break);
4821
	struct inode *inode = d_inode(cfile->dentry);
4822
	struct cifsInodeInfo *cinode = CIFS_I(inode);
4823
	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4824
	struct TCP_Server_Info *server = tcon->ses->server;
4825
	int rc = 0;
4826
	bool purge_cache = false;
4827 4828
	bool is_deferred = false;
	struct cifs_deferred_close *dclose;
4829

4830
	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4831
			TASK_UNINTERRUPTIBLE);
4832

4833 4834
	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
				      cfile->oplock_epoch, &purge_cache);
4835

4836
	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4837
						cifs_has_mand_locks(cinode)) {
4838 4839
		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
			 inode);
4840
		cinode->oplock = 0;
4841 4842
	}

4843
	if (inode && S_ISREG(inode->i_mode)) {
4844
		if (CIFS_CACHE_READ(cinode))
4845
			break_lease(inode, O_RDONLY);
S
Steve French 已提交
4846
		else
4847
			break_lease(inode, O_WRONLY);
4848
		rc = filemap_fdatawrite(inode->i_mapping);
4849
		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4850 4851
			rc = filemap_fdatawait(inode->i_mapping);
			mapping_set_error(inode->i_mapping, rc);
4852
			cifs_zap_mapping(inode);
4853
		}
4854
		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4855 4856
		if (CIFS_CACHE_WRITE(cinode))
			goto oplock_break_ack;
4857 4858
	}

4859 4860
	rc = cifs_push_locks(cfile);
	if (rc)
4861
		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4862

4863
oplock_break_ack:
4864 4865 4866 4867 4868 4869
	/*
	 * releasing stale oplock after recent reconnect of smb session using
	 * a now incorrect file handle is not a data integrity issue but do
	 * not bother sending an oplock release if session to server still is
	 * disconnected since oplock already released by the server
	 */
4870
	if (!cfile->oplock_break_cancelled) {
4871 4872
		rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
							     cinode);
4873
		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4874
	}
4875 4876 4877 4878 4879 4880 4881
	/*
	 * When oplock break is received and there are no active
	 * file handles but cached, then set the flag oplock_break_received.
	 * So, new open will not use cached handle.
	 */
	spin_lock(&CIFS_I(inode)->deferred_lock);
	is_deferred = cifs_is_deferred_close(cfile, &dclose);
4882
	if (is_deferred && cfile->deferred_scheduled) {
4883 4884 4885 4886
		cfile->oplock_break_received = true;
		mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
	}
	spin_unlock(&CIFS_I(inode)->deferred_lock);
4887
	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4888
	cifs_done_oplock_break(cinode);
4889 4890
}

4891 4892 4893 4894 4895 4896 4897 4898 4899 4900
/*
 * The presence of cifs_direct_io() in the address space ops vector
 * allowes open() O_DIRECT flags which would have failed otherwise.
 *
 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
 * so this method should never be called.
 *
 * Direct IO is not yet supported in the cached mode. 
 */
static ssize_t
4901
cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4902 4903 4904 4905 4906 4907 4908 4909
{
        /*
         * FIXME
         * Eventually need to support direct IO for non forcedirectio mounts
         */
        return -EINVAL;
}

S
Steve French 已提交
4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929
static int cifs_swap_activate(struct swap_info_struct *sis,
			      struct file *swap_file, sector_t *span)
{
	struct cifsFileInfo *cfile = swap_file->private_data;
	struct inode *inode = swap_file->f_mapping->host;
	unsigned long blocks;
	long long isize;

	cifs_dbg(FYI, "swap activate\n");

	spin_lock(&inode->i_lock);
	blocks = inode->i_blocks;
	isize = inode->i_size;
	spin_unlock(&inode->i_lock);
	if (blocks*512 < isize) {
		pr_warn("swap activate: swapfile has holes\n");
		return -EINVAL;
	}
	*span = sis->pages;

J
Joe Perches 已提交
4930
	pr_warn_once("Swap support over SMB3 is experimental\n");
S
Steve French 已提交
4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963

	/*
	 * TODO: consider adding ACL (or documenting how) to prevent other
	 * users (on this or other systems) from reading it
	 */


	/* TODO: add sk_set_memalloc(inet) or similar */

	if (cfile)
		cfile->swapfile = true;
	/*
	 * TODO: Since file already open, we can't open with DENY_ALL here
	 * but we could add call to grab a byte range lock to prevent others
	 * from reading or writing the file
	 */

	return 0;
}

static void cifs_swap_deactivate(struct file *file)
{
	struct cifsFileInfo *cfile = file->private_data;

	cifs_dbg(FYI, "swap deactivate\n");

	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */

	if (cfile)
		cfile->swapfile = false;

	/* do we need to unpin (or unlock) the file */
}
4964

4965
const struct address_space_operations cifs_addr_ops = {
L
Linus Torvalds 已提交
4966 4967 4968
	.readpage = cifs_readpage,
	.readpages = cifs_readpages,
	.writepage = cifs_writepage,
4969
	.writepages = cifs_writepages,
N
Nick Piggin 已提交
4970 4971
	.write_begin = cifs_write_begin,
	.write_end = cifs_write_end,
L
Linus Torvalds 已提交
4972
	.set_page_dirty = __set_page_dirty_nobuffers,
4973
	.releasepage = cifs_release_page,
4974
	.direct_IO = cifs_direct_io,
4975
	.invalidatepage = cifs_invalidate_page,
4976
	.launder_page = cifs_launder_page,
S
Steve French 已提交
4977 4978 4979 4980 4981 4982 4983
	/*
	 * TODO: investigate and if useful we could add an cifs_migratePage
	 * helper (under an CONFIG_MIGRATION) in the future, and also
	 * investigate and add an is_dirty_writeback helper if needed
	 */
	.swap_activate = cifs_swap_activate,
	.swap_deactivate = cifs_swap_deactivate,
L
Linus Torvalds 已提交
4984
};
D
Dave Kleikamp 已提交
4985 4986 4987 4988 4989 4990

/*
 * cifs_readpages requires the server to support a buffer large enough to
 * contain the header plus one complete page of data.  Otherwise, we need
 * to leave cifs_readpages out of the address space operations.
 */
4991
const struct address_space_operations cifs_addr_ops_smallbuf = {
D
Dave Kleikamp 已提交
4992 4993 4994
	.readpage = cifs_readpage,
	.writepage = cifs_writepage,
	.writepages = cifs_writepages,
N
Nick Piggin 已提交
4995 4996
	.write_begin = cifs_write_begin,
	.write_end = cifs_write_end,
D
Dave Kleikamp 已提交
4997
	.set_page_dirty = __set_page_dirty_nobuffers,
4998 4999
	.releasepage = cifs_release_page,
	.invalidatepage = cifs_invalidate_page,
5000
	.launder_page = cifs_launder_page,
D
Dave Kleikamp 已提交
5001
};