file.c 117.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4
/*
 *   fs/cifs/file.c
 *
 *   vfs operations that deal with files
S
Steve French 已提交
5
 *
6
 *   Copyright (C) International Business Machines  Corp., 2002,2010
L
Linus Torvalds 已提交
7
 *   Author(s): Steve French (sfrench@us.ibm.com)
J
[CIFS]  
Jeremy Allison 已提交
8
 *              Jeremy Allison (jra@samba.org)
L
Linus Torvalds 已提交
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 *
 *   This library is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU Lesser General Public License as published
 *   by the Free Software Foundation; either version 2.1 of the License, or
 *   (at your option) any later version.
 *
 *   This library is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU Lesser General Public License for more details.
 *
 *   You should have received a copy of the GNU Lesser General Public License
 *   along with this library; if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */
#include <linux/fs.h>
25
#include <linux/backing-dev.h>
L
Linus Torvalds 已提交
26 27 28 29
#include <linux/stat.h>
#include <linux/fcntl.h>
#include <linux/pagemap.h>
#include <linux/pagevec.h>
30
#include <linux/writeback.h>
31
#include <linux/task_io_accounting_ops.h>
32
#include <linux/delay.h>
33
#include <linux/mount.h>
34
#include <linux/slab.h>
35
#include <linux/swap.h>
36
#include <linux/mm.h>
L
Linus Torvalds 已提交
37 38 39 40 41 42 43 44
#include <asm/div64.h>
#include "cifsfs.h"
#include "cifspdu.h"
#include "cifsglob.h"
#include "cifsproto.h"
#include "cifs_unicode.h"
#include "cifs_debug.h"
#include "cifs_fs_sb.h"
45
#include "fscache.h"
46
#include "smbdirect.h"
47

L
Linus Torvalds 已提交
48 49 50 51 52 53 54 55 56 57 58 59 60
static inline int cifs_convert_flags(unsigned int flags)
{
	if ((flags & O_ACCMODE) == O_RDONLY)
		return GENERIC_READ;
	else if ((flags & O_ACCMODE) == O_WRONLY)
		return GENERIC_WRITE;
	else if ((flags & O_ACCMODE) == O_RDWR) {
		/* GENERIC_ALL is too much permission to request
		   can cause unnecessary access denied on create */
		/* return GENERIC_ALL; */
		return (GENERIC_READ | GENERIC_WRITE);
	}

61 62 63
	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
		FILE_READ_DATA);
64
}
65

66
static u32 cifs_posix_convert_flags(unsigned int flags)
67
{
68
	u32 posix_flags = 0;
69

70
	if ((flags & O_ACCMODE) == O_RDONLY)
71
		posix_flags = SMB_O_RDONLY;
72
	else if ((flags & O_ACCMODE) == O_WRONLY)
73 74 75 76
		posix_flags = SMB_O_WRONLY;
	else if ((flags & O_ACCMODE) == O_RDWR)
		posix_flags = SMB_O_RDWR;

77
	if (flags & O_CREAT) {
78
		posix_flags |= SMB_O_CREAT;
79 80 81
		if (flags & O_EXCL)
			posix_flags |= SMB_O_EXCL;
	} else if (flags & O_EXCL)
82 83
		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
			 current->comm, current->tgid);
84

85 86 87
	if (flags & O_TRUNC)
		posix_flags |= SMB_O_TRUNC;
	/* be safe and imply O_SYNC for O_DSYNC */
88
	if (flags & O_DSYNC)
89
		posix_flags |= SMB_O_SYNC;
90
	if (flags & O_DIRECTORY)
91
		posix_flags |= SMB_O_DIRECTORY;
92
	if (flags & O_NOFOLLOW)
93
		posix_flags |= SMB_O_NOFOLLOW;
94
	if (flags & O_DIRECT)
95
		posix_flags |= SMB_O_DIRECT;
96 97

	return posix_flags;
L
Linus Torvalds 已提交
98 99 100 101 102 103 104 105 106 107
}

static inline int cifs_get_disposition(unsigned int flags)
{
	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
		return FILE_CREATE;
	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
		return FILE_OVERWRITE_IF;
	else if ((flags & O_CREAT) == O_CREAT)
		return FILE_OPEN_IF;
108 109
	else if ((flags & O_TRUNC) == O_TRUNC)
		return FILE_OVERWRITE;
L
Linus Torvalds 已提交
110 111 112 113
	else
		return FILE_OPEN;
}

114 115
int cifs_posix_open(char *full_path, struct inode **pinode,
			struct super_block *sb, int mode, unsigned int f_flags,
116
			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 118 119 120 121 122 123
{
	int rc;
	FILE_UNIX_BASIC_INFO *presp_data;
	__u32 posix_flags = 0;
	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
	struct cifs_fattr fattr;
	struct tcon_link *tlink;
124
	struct cifs_tcon *tcon;
125

126
	cifs_dbg(FYI, "posix open %s\n", full_path);
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143

	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
	if (presp_data == NULL)
		return -ENOMEM;

	tlink = cifs_sb_tlink(cifs_sb);
	if (IS_ERR(tlink)) {
		rc = PTR_ERR(tlink);
		goto posix_open_ret;
	}

	tcon = tlink_tcon(tlink);
	mode &= ~current_umask();

	posix_flags = cifs_posix_convert_flags(f_flags);
	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
			     poplock, full_path, cifs_sb->local_nls,
144
			     cifs_remap(cifs_sb));
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
	cifs_put_tlink(tlink);

	if (rc)
		goto posix_open_ret;

	if (presp_data->Type == cpu_to_le32(-1))
		goto posix_open_ret; /* open ok, caller does qpathinfo */

	if (!pinode)
		goto posix_open_ret; /* caller does not need info */

	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);

	/* get new inode and set it up */
	if (*pinode == NULL) {
		cifs_fill_uniqueid(sb, &fattr);
		*pinode = cifs_iget(sb, &fattr);
		if (!*pinode) {
			rc = -ENOMEM;
			goto posix_open_ret;
		}
	} else {
		cifs_fattr_to_inode(*pinode, &fattr);
	}

posix_open_ret:
	kfree(presp_data);
	return rc;
}

175 176
static int
cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 178
	     struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
	     struct cifs_fid *fid, unsigned int xid)
179 180
{
	int rc;
181
	int desired_access;
182
	int disposition;
183
	int create_options = CREATE_NOT_DIR;
184
	FILE_ALL_INFO *buf;
P
Pavel Shilovsky 已提交
185
	struct TCP_Server_Info *server = tcon->ses->server;
186
	struct cifs_open_parms oparms;
187

P
Pavel Shilovsky 已提交
188
	if (!server->ops->open)
189 190 191
		return -ENOSYS;

	desired_access = cifs_convert_flags(f_flags);
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224

/*********************************************************************
 *  open flag mapping table:
 *
 *	POSIX Flag            CIFS Disposition
 *	----------            ----------------
 *	O_CREAT               FILE_OPEN_IF
 *	O_CREAT | O_EXCL      FILE_CREATE
 *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 *	O_TRUNC               FILE_OVERWRITE
 *	none of the above     FILE_OPEN
 *
 *	Note that there is not a direct match between disposition
 *	FILE_SUPERSEDE (ie create whether or not file exists although
 *	O_CREAT | O_TRUNC is similar but truncates the existing
 *	file rather than creating a new file as FILE_SUPERSEDE does
 *	(which uses the attributes / metadata passed in on open call)
 *?
 *?  O_SYNC is a reasonable match to CIFS writethrough flag
 *?  and the read write flags match reasonably.  O_LARGEFILE
 *?  is irrelevant because largefile support is always used
 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 *********************************************************************/

	disposition = cifs_get_disposition(f_flags);

	/* BB pass O_SYNC flag through on file attributes .. BB */

	buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
	if (!buf)
		return -ENOMEM;

225 226 227
	if (backup_cred(cifs_sb))
		create_options |= CREATE_OPEN_BACKUP_INTENT;

228 229 230 231 232 233 234
	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
	if (f_flags & O_SYNC)
		create_options |= CREATE_WRITE_THROUGH;

	if (f_flags & O_DIRECT)
		create_options |= CREATE_NO_BUFFER;

235 236 237 238 239 240 241
	oparms.tcon = tcon;
	oparms.cifs_sb = cifs_sb;
	oparms.desired_access = desired_access;
	oparms.create_options = create_options;
	oparms.disposition = disposition;
	oparms.path = full_path;
	oparms.fid = fid;
242
	oparms.reconnect = false;
243 244

	rc = server->ops->open(xid, &oparms, oplock, buf);
245 246 247 248 249 250 251 252 253

	if (rc)
		goto out;

	if (tcon->unix_ext)
		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
					      xid);
	else
		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
254
					 xid, fid);
255 256 257 258 259 260

out:
	kfree(buf);
	return rc;
}

261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
static bool
cifs_has_mand_locks(struct cifsInodeInfo *cinode)
{
	struct cifs_fid_locks *cur;
	bool has_locks = false;

	down_read(&cinode->lock_sem);
	list_for_each_entry(cur, &cinode->llist, llist) {
		if (!list_empty(&cur->locks)) {
			has_locks = true;
			break;
		}
	}
	up_read(&cinode->lock_sem);
	return has_locks;
}

278
struct cifsFileInfo *
279
cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
280 281
		  struct tcon_link *tlink, __u32 oplock)
{
G
Goldwyn Rodrigues 已提交
282
	struct dentry *dentry = file_dentry(file);
283
	struct inode *inode = d_inode(dentry);
284 285
	struct cifsInodeInfo *cinode = CIFS_I(inode);
	struct cifsFileInfo *cfile;
286
	struct cifs_fid_locks *fdlocks;
287
	struct cifs_tcon *tcon = tlink_tcon(tlink);
288
	struct TCP_Server_Info *server = tcon->ses->server;
289 290 291 292 293

	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
	if (cfile == NULL)
		return cfile;

294 295 296 297 298 299 300 301 302
	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
	if (!fdlocks) {
		kfree(cfile);
		return NULL;
	}

	INIT_LIST_HEAD(&fdlocks->locks);
	fdlocks->cfile = cfile;
	cfile->llist = fdlocks;
303
	down_write(&cinode->lock_sem);
304
	list_add(&fdlocks->llist, &cinode->llist);
305
	up_write(&cinode->lock_sem);
306

307 308 309 310 311 312 313 314
	cfile->count = 1;
	cfile->pid = current->tgid;
	cfile->uid = current_fsuid();
	cfile->dentry = dget(dentry);
	cfile->f_flags = file->f_flags;
	cfile->invalidHandle = false;
	cfile->tlink = cifs_get_tlink(tlink);
	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
315
	mutex_init(&cfile->fh_mutex);
316
	spin_lock_init(&cfile->file_info_lock);
317

318 319
	cifs_sb_active(inode->i_sb);

320 321 322 323
	/*
	 * If the server returned a read oplock and we have mandatory brlocks,
	 * set oplock level to None.
	 */
324
	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
325
		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
326 327 328
		oplock = 0;
	}

329
	spin_lock(&tcon->open_file_lock);
330
	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
331 332 333
		oplock = fid->pending_open->oplock;
	list_del(&fid->pending_open->olist);

334
	fid->purge_cache = false;
335
	server->ops->set_fid(cfile, fid, oplock);
336 337

	list_add(&cfile->tlist, &tcon->openFileList);
338
	atomic_inc(&tcon->num_local_opens);
339

340 341
	/* if readable file instance put first in list*/
	if (file->f_mode & FMODE_READ)
342
		list_add(&cfile->flist, &cinode->openFileList);
343
	else
344
		list_add_tail(&cfile->flist, &cinode->openFileList);
345
	spin_unlock(&tcon->open_file_lock);
346

347
	if (fid->purge_cache)
348
		cifs_zap_mapping(inode);
349

350 351
	file->private_data = cfile;
	return cfile;
352 353
}

354 355 356
struct cifsFileInfo *
cifsFileInfo_get(struct cifsFileInfo *cifs_file)
{
357
	spin_lock(&cifs_file->file_info_lock);
358
	cifsFileInfo_get_locked(cifs_file);
359
	spin_unlock(&cifs_file->file_info_lock);
360 361 362
	return cifs_file;
}

363 364
/*
 * Release a reference on the file private data. This may involve closing
365
 * the filehandle out on the server. Must be called without holding
366
 * tcon->open_file_lock and cifs_file->file_info_lock.
367
 */
368 369
void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
{
370
	struct inode *inode = d_inode(cifs_file->dentry);
371
	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
372
	struct TCP_Server_Info *server = tcon->ses->server;
P
Pavel Shilovsky 已提交
373
	struct cifsInodeInfo *cifsi = CIFS_I(inode);
374 375
	struct super_block *sb = inode->i_sb;
	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
376
	struct cifsLockInfo *li, *tmp;
377 378
	struct cifs_fid fid;
	struct cifs_pending_open open;
379
	bool oplock_break_cancelled;
380

381 382 383
	spin_lock(&tcon->open_file_lock);

	spin_lock(&cifs_file->file_info_lock);
384
	if (--cifs_file->count > 0) {
385 386
		spin_unlock(&cifs_file->file_info_lock);
		spin_unlock(&tcon->open_file_lock);
387 388
		return;
	}
389
	spin_unlock(&cifs_file->file_info_lock);
390

391 392 393 394 395 396
	if (server->ops->get_lease_key)
		server->ops->get_lease_key(inode, &fid);

	/* store open in pending opens to make sure we don't miss lease break */
	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);

397 398 399
	/* remove it from the lists */
	list_del(&cifs_file->flist);
	list_del(&cifs_file->tlist);
400
	atomic_dec(&tcon->num_local_opens);
401 402

	if (list_empty(&cifsi->openFileList)) {
403
		cifs_dbg(FYI, "closing last open instance for inode %p\n",
404
			 d_inode(cifs_file->dentry));
405 406 407 408 409
		/*
		 * In strict cache mode we need invalidate mapping on the last
		 * close  because it may cause a error when we open this file
		 * again and get at least level II oplock.
		 */
410
		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
411
			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
412
		cifs_set_oplock_level(cifsi, 0);
413
	}
414 415

	spin_unlock(&tcon->open_file_lock);
416

417
	oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
418

419
	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
420
		struct TCP_Server_Info *server = tcon->ses->server;
421
		unsigned int xid;
422

423
		xid = get_xid();
424
		if (server->ops->close)
425 426
			server->ops->close(xid, tcon, &cifs_file->fid);
		_free_xid(xid);
427 428
	}

429 430 431
	if (oplock_break_cancelled)
		cifs_done_oplock_break(cifsi);

432 433
	cifs_del_pending_open(&open);

434 435
	/*
	 * Delete any outstanding lock records. We'll lose them when the file
436 437
	 * is closed anyway.
	 */
438
	down_write(&cifsi->lock_sem);
439
	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
440
		list_del(&li->llist);
441
		cifs_del_lock_waiters(li);
442
		kfree(li);
443
	}
444 445
	list_del(&cifs_file->llist->llist);
	kfree(cifs_file->llist);
446
	up_write(&cifsi->lock_sem);
447 448 449

	cifs_put_tlink(cifs_file->tlink);
	dput(cifs_file->dentry);
450
	cifs_sb_deactive(sb);
451
	kfree(cifs_file);
452 453
}

L
Linus Torvalds 已提交
454
int cifs_open(struct inode *inode, struct file *file)
455

L
Linus Torvalds 已提交
456 457
{
	int rc = -EACCES;
458
	unsigned int xid;
459
	__u32 oplock;
L
Linus Torvalds 已提交
460
	struct cifs_sb_info *cifs_sb;
P
Pavel Shilovsky 已提交
461
	struct TCP_Server_Info *server;
462
	struct cifs_tcon *tcon;
463
	struct tcon_link *tlink;
464
	struct cifsFileInfo *cfile = NULL;
L
Linus Torvalds 已提交
465
	char *full_path = NULL;
P
Pavel Shilovsky 已提交
466
	bool posix_open_ok = false;
467
	struct cifs_fid fid;
468
	struct cifs_pending_open open;
L
Linus Torvalds 已提交
469

470
	xid = get_xid();
L
Linus Torvalds 已提交
471 472

	cifs_sb = CIFS_SB(inode->i_sb);
473 474
	tlink = cifs_sb_tlink(cifs_sb);
	if (IS_ERR(tlink)) {
475
		free_xid(xid);
476 477 478
		return PTR_ERR(tlink);
	}
	tcon = tlink_tcon(tlink);
P
Pavel Shilovsky 已提交
479
	server = tcon->ses->server;
L
Linus Torvalds 已提交
480

G
Goldwyn Rodrigues 已提交
481
	full_path = build_path_from_dentry(file_dentry(file));
L
Linus Torvalds 已提交
482
	if (full_path == NULL) {
483
		rc = -ENOMEM;
484
		goto out;
L
Linus Torvalds 已提交
485 486
	}

487
	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
488
		 inode, file->f_flags, full_path);
489

490 491 492 493 494 495 496 497
	if (file->f_flags & O_DIRECT &&
	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
			file->f_op = &cifs_file_direct_nobrl_ops;
		else
			file->f_op = &cifs_file_direct_ops;
	}

498
	if (server->oplocks)
499 500 501 502
		oplock = REQ_OPLOCK;
	else
		oplock = 0;

503
	if (!tcon->broken_posix_open && tcon->unix_ext &&
504 505
	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
506
		/* can not refresh inode info since size could be stale */
507
		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
508
				cifs_sb->mnt_file_mode /* ignored */,
509
				file->f_flags, &oplock, &fid.netfid, xid);
510
		if (rc == 0) {
511
			cifs_dbg(FYI, "posix open succeeded\n");
P
Pavel Shilovsky 已提交
512
			posix_open_ok = true;
513 514
		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
			if (tcon->ses->serverNOS)
515 516 517
				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
					 tcon->ses->serverName,
					 tcon->ses->serverNOS);
518
			tcon->broken_posix_open = true;
519 520 521
		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
			 (rc != -EOPNOTSUPP)) /* path not found or net err */
			goto out;
522 523 524 525
		/*
		 * Else fallthrough to retry open the old way on network i/o
		 * or DFS errors.
		 */
526 527
	}

528 529 530 531 532
	if (server->ops->get_lease_key)
		server->ops->get_lease_key(inode, &fid);

	cifs_add_pending_open(&fid, tlink, &open);

P
Pavel Shilovsky 已提交
533
	if (!posix_open_ok) {
P
Pavel Shilovsky 已提交
534 535 536
		if (server->ops->get_lease_key)
			server->ops->get_lease_key(inode, &fid);

P
Pavel Shilovsky 已提交
537
		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
538
				  file->f_flags, &oplock, &fid, xid);
539 540
		if (rc) {
			cifs_del_pending_open(&open);
P
Pavel Shilovsky 已提交
541
			goto out;
542
		}
P
Pavel Shilovsky 已提交
543
	}
544

545 546
	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
	if (cfile == NULL) {
P
Pavel Shilovsky 已提交
547 548
		if (server->ops->close)
			server->ops->close(xid, tcon, &fid);
549
		cifs_del_pending_open(&open);
L
Linus Torvalds 已提交
550 551 552 553
		rc = -ENOMEM;
		goto out;
	}

554 555
	cifs_fscache_set_inode_cookie(inode, file);

P
Pavel Shilovsky 已提交
556
	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
557 558 559 560
		/*
		 * Time to set mode which we can not set earlier due to
		 * problems creating new read-only files.
		 */
P
Pavel Shilovsky 已提交
561 562
		struct cifs_unix_set_info_args args = {
			.mode	= inode->i_mode,
563 564
			.uid	= INVALID_UID, /* no change */
			.gid	= INVALID_GID, /* no change */
P
Pavel Shilovsky 已提交
565 566 567 568 569
			.ctime	= NO_CHANGE_64,
			.atime	= NO_CHANGE_64,
			.mtime	= NO_CHANGE_64,
			.device	= 0,
		};
570 571
		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
				       cfile->pid);
L
Linus Torvalds 已提交
572 573 574 575
	}

out:
	kfree(full_path);
576
	free_xid(xid);
577
	cifs_put_tlink(tlink);
L
Linus Torvalds 已提交
578 579 580
	return rc;
}

P
Pavel Shilovsky 已提交
581 582
static int cifs_push_posix_locks(struct cifsFileInfo *cfile);

583 584
/*
 * Try to reacquire byte range locks that were released when session
P
Pavel Shilovsky 已提交
585
 * to server was lost.
586
 */
P
Pavel Shilovsky 已提交
587 588
static int
cifs_relock_file(struct cifsFileInfo *cfile)
L
Linus Torvalds 已提交
589
{
P
Pavel Shilovsky 已提交
590
	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
591
	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
P
Pavel Shilovsky 已提交
592
	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
L
Linus Torvalds 已提交
593 594
	int rc = 0;

595
	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
P
Pavel Shilovsky 已提交
596
	if (cinode->can_cache_brlcks) {
597 598
		/* can cache locks - no need to relock */
		up_read(&cinode->lock_sem);
P
Pavel Shilovsky 已提交
599 600 601 602 603 604 605 606 607
		return rc;
	}

	if (cap_unix(tcon->ses) &&
	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
		rc = cifs_push_posix_locks(cfile);
	else
		rc = tcon->ses->server->ops->push_mand_locks(cfile);
L
Linus Torvalds 已提交
608

609
	up_read(&cinode->lock_sem);
L
Linus Torvalds 已提交
610 611 612
	return rc;
}

613 614
static int
cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
L
Linus Torvalds 已提交
615 616
{
	int rc = -EACCES;
617
	unsigned int xid;
618
	__u32 oplock;
L
Linus Torvalds 已提交
619
	struct cifs_sb_info *cifs_sb;
620
	struct cifs_tcon *tcon;
621 622
	struct TCP_Server_Info *server;
	struct cifsInodeInfo *cinode;
S
Steve French 已提交
623
	struct inode *inode;
L
Linus Torvalds 已提交
624
	char *full_path = NULL;
625
	int desired_access;
L
Linus Torvalds 已提交
626
	int disposition = FILE_OPEN;
627
	int create_options = CREATE_NOT_DIR;
628
	struct cifs_open_parms oparms;
L
Linus Torvalds 已提交
629

630
	xid = get_xid();
631 632 633
	mutex_lock(&cfile->fh_mutex);
	if (!cfile->invalidHandle) {
		mutex_unlock(&cfile->fh_mutex);
634
		rc = 0;
635
		free_xid(xid);
636
		return rc;
L
Linus Torvalds 已提交
637 638
	}

639
	inode = d_inode(cfile->dentry);
L
Linus Torvalds 已提交
640
	cifs_sb = CIFS_SB(inode->i_sb);
641 642 643 644 645 646 647 648 649 650
	tcon = tlink_tcon(cfile->tlink);
	server = tcon->ses->server;

	/*
	 * Can not grab rename sem here because various ops, including those
	 * that already have the rename sem can end up causing writepage to get
	 * called and if the server was down that means we end up here, and we
	 * can never tell if the caller already has the rename_sem.
	 */
	full_path = build_path_from_dentry(cfile->dentry);
L
Linus Torvalds 已提交
651
	if (full_path == NULL) {
652
		rc = -ENOMEM;
653
		mutex_unlock(&cfile->fh_mutex);
654
		free_xid(xid);
655
		return rc;
L
Linus Torvalds 已提交
656 657
	}

658 659
	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
		 inode, cfile->f_flags, full_path);
L
Linus Torvalds 已提交
660

661
	if (tcon->ses->server->oplocks)
L
Linus Torvalds 已提交
662 663
		oplock = REQ_OPLOCK;
	else
664
		oplock = 0;
L
Linus Torvalds 已提交
665

666
	if (tcon->unix_ext && cap_unix(tcon->ses) &&
667
	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
668
				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
669 670 671 672
		/*
		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
		 * original open. Must mask them off for a reopen.
		 */
673
		unsigned int oflags = cfile->f_flags &
J
Jeff Layton 已提交
674
						~(O_CREAT | O_EXCL | O_TRUNC);
675

676
		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
677
				     cifs_sb->mnt_file_mode /* ignored */,
678
				     oflags, &oplock, &cfile->fid.netfid, xid);
679
		if (rc == 0) {
680
			cifs_dbg(FYI, "posix reopen succeeded\n");
681
			oparms.reconnect = true;
682 683
			goto reopen_success;
		}
684 685 686 687
		/*
		 * fallthrough to retry open the old way on errors, especially
		 * in the reconnect path it is important to retry hard
		 */
688 689
	}

690
	desired_access = cifs_convert_flags(cfile->f_flags);
691

692 693 694
	if (backup_cred(cifs_sb))
		create_options |= CREATE_OPEN_BACKUP_INTENT;

P
Pavel Shilovsky 已提交
695
	if (server->ops->get_lease_key)
696
		server->ops->get_lease_key(inode, &cfile->fid);
P
Pavel Shilovsky 已提交
697

698 699 700 701 702 703
	oparms.tcon = tcon;
	oparms.cifs_sb = cifs_sb;
	oparms.desired_access = desired_access;
	oparms.create_options = create_options;
	oparms.disposition = disposition;
	oparms.path = full_path;
704 705
	oparms.fid = &cfile->fid;
	oparms.reconnect = true;
706

707 708
	/*
	 * Can not refresh inode by passing in file_info buf to be returned by
709
	 * ops->open and then calling get_inode_info with returned buf since
710 711 712 713
	 * file might have write behind data that needs to be flushed and server
	 * version of file size can be stale. If we knew for sure that inode was
	 * not dirty locally we could do this.
	 */
714
	rc = server->ops->open(xid, &oparms, &oplock, NULL);
715 716 717 718 719 720 721
	if (rc == -ENOENT && oparms.reconnect == false) {
		/* durable handle timeout is expired - open the file again */
		rc = server->ops->open(xid, &oparms, &oplock, NULL);
		/* indicate that we need to relock the file */
		oparms.reconnect = true;
	}

L
Linus Torvalds 已提交
722
	if (rc) {
723
		mutex_unlock(&cfile->fh_mutex);
724 725
		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
		cifs_dbg(FYI, "oplock: %d\n", oplock);
J
Jeff Layton 已提交
726 727 728
		goto reopen_error_exit;
	}

729
reopen_success:
730 731 732
	cfile->invalidHandle = false;
	mutex_unlock(&cfile->fh_mutex);
	cinode = CIFS_I(inode);
J
Jeff Layton 已提交
733 734 735

	if (can_flush) {
		rc = filemap_write_and_wait(inode->i_mapping);
736 737
		if (!is_interrupt_error(rc))
			mapping_set_error(inode->i_mapping, rc);
J
Jeff Layton 已提交
738 739

		if (tcon->unix_ext)
740 741
			rc = cifs_get_inode_info_unix(&inode, full_path,
						      inode->i_sb, xid);
J
Jeff Layton 已提交
742
		else
743 744 745 746 747 748 749 750 751 752
			rc = cifs_get_inode_info(&inode, full_path, NULL,
						 inode->i_sb, xid, NULL);
	}
	/*
	 * Else we are writing out data to server already and could deadlock if
	 * we tried to flush data, and since we do not know if we have data that
	 * would invalidate the current end of file on the server we can not go
	 * to the server to get the new inode info.
	 */

753 754 755 756 757 758 759 760 761
	/*
	 * If the server returned a read oplock and we have mandatory brlocks,
	 * set oplock level to None.
	 */
	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
		oplock = 0;
	}

762 763 764
	server->ops->set_fid(cfile, &cfile->fid, oplock);
	if (oparms.reconnect)
		cifs_relock_file(cfile);
J
Jeff Layton 已提交
765 766

reopen_error_exit:
L
Linus Torvalds 已提交
767
	kfree(full_path);
768
	free_xid(xid);
L
Linus Torvalds 已提交
769 770 771 772 773
	return rc;
}

int cifs_close(struct inode *inode, struct file *file)
{
774 775 776 777
	if (file->private_data != NULL) {
		cifsFileInfo_put(file->private_data);
		file->private_data = NULL;
	}
J
[CIFS]  
Jeremy Allison 已提交
778

779 780
	/* return code from the ->release op is always ignored */
	return 0;
L
Linus Torvalds 已提交
781 782
}

783 784 785
void
cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
{
786
	struct cifsFileInfo *open_file;
787 788
	struct list_head *tmp;
	struct list_head *tmp1;
789 790
	struct list_head tmp_list;

791 792 793 794 795
	if (!tcon->use_persistent || !tcon->need_reopen_files)
		return;

	tcon->need_reopen_files = false;

796 797
	cifs_dbg(FYI, "Reopen persistent handles");
	INIT_LIST_HEAD(&tmp_list);
798 799 800

	/* list all files open on tree connection, reopen resilient handles  */
	spin_lock(&tcon->open_file_lock);
801
	list_for_each(tmp, &tcon->openFileList) {
802
		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
803 804 805 806
		if (!open_file->invalidHandle)
			continue;
		cifsFileInfo_get(open_file);
		list_add_tail(&open_file->rlist, &tmp_list);
807 808
	}
	spin_unlock(&tcon->open_file_lock);
809 810 811

	list_for_each_safe(tmp, tmp1, &tmp_list) {
		open_file = list_entry(tmp, struct cifsFileInfo, rlist);
812 813
		if (cifs_reopen_file(open_file, false /* do not flush */))
			tcon->need_reopen_files = true;
814 815 816
		list_del_init(&open_file->rlist);
		cifsFileInfo_put(open_file);
	}
817 818
}

L
Linus Torvalds 已提交
819 820 821
int cifs_closedir(struct inode *inode, struct file *file)
{
	int rc = 0;
822
	unsigned int xid;
823
	struct cifsFileInfo *cfile = file->private_data;
824 825 826
	struct cifs_tcon *tcon;
	struct TCP_Server_Info *server;
	char *buf;
L
Linus Torvalds 已提交
827

828
	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
L
Linus Torvalds 已提交
829

830 831 832
	if (cfile == NULL)
		return rc;

833
	xid = get_xid();
834 835
	tcon = tlink_tcon(cfile->tlink);
	server = tcon->ses->server;
L
Linus Torvalds 已提交
836

837
	cifs_dbg(FYI, "Freeing private data in close dir\n");
838
	spin_lock(&cfile->file_info_lock);
839
	if (server->ops->dir_needs_close(cfile)) {
840
		cfile->invalidHandle = true;
841
		spin_unlock(&cfile->file_info_lock);
842 843 844 845
		if (server->ops->close_dir)
			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
		else
			rc = -ENOSYS;
846
		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
847 848 849
		/* not much we can do if it fails anyway, ignore rc */
		rc = 0;
	} else
850
		spin_unlock(&cfile->file_info_lock);
851 852 853

	buf = cfile->srch_inf.ntwrk_buf_start;
	if (buf) {
854
		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
855 856 857 858 859
		cfile->srch_inf.ntwrk_buf_start = NULL;
		if (cfile->srch_inf.smallBuf)
			cifs_small_buf_release(buf);
		else
			cifs_buf_release(buf);
L
Linus Torvalds 已提交
860
	}
861 862 863 864

	cifs_put_tlink(cfile->tlink);
	kfree(file->private_data);
	file->private_data = NULL;
L
Linus Torvalds 已提交
865
	/* BB can we lock the filestruct while this is going on? */
866
	free_xid(xid);
L
Linus Torvalds 已提交
867 868 869
	return rc;
}

870
static struct cifsLockInfo *
871
cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
J
[CIFS]  
Jeremy Allison 已提交
872
{
873
	struct cifsLockInfo *lock =
S
Steve French 已提交
874
		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
875 876 877 878 879 880
	if (!lock)
		return lock;
	lock->offset = offset;
	lock->length = length;
	lock->type = type;
	lock->pid = current->tgid;
881
	lock->flags = flags;
882 883 884
	INIT_LIST_HEAD(&lock->blist);
	init_waitqueue_head(&lock->block_q);
	return lock;
885 886
}

887
void
888 889 890 891 892 893 894 895 896
cifs_del_lock_waiters(struct cifsLockInfo *lock)
{
	struct cifsLockInfo *li, *tmp;
	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
		list_del_init(&li->blist);
		wake_up(&li->block_q);
	}
}

897 898 899 900 901
#define CIFS_LOCK_OP	0
#define CIFS_READ_OP	1
#define CIFS_WRITE_OP	2

/* @rw_check : 0 - no op, 1 - read, 2 - write */
902
static bool
903
cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
904 905
			    __u64 length, __u8 type, __u16 flags,
			    struct cifsFileInfo *cfile,
906
			    struct cifsLockInfo **conf_lock, int rw_check)
907
{
908
	struct cifsLockInfo *li;
909
	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
910
	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
911

912
	list_for_each_entry(li, &fdlocks->locks, llist) {
913 914 915
		if (offset + length <= li->offset ||
		    offset >= li->offset + li->length)
			continue;
916 917 918 919 920 921 922
		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
		    server->ops->compare_fids(cfile, cur_cfile)) {
			/* shared lock prevents write op through the same fid */
			if (!(li->type & server->vals->shared_lock_type) ||
			    rw_check != CIFS_WRITE_OP)
				continue;
		}
923 924 925
		if ((type & server->vals->shared_lock_type) &&
		    ((server->ops->compare_fids(cfile, cur_cfile) &&
		     current->tgid == li->pid) || type == li->type))
926
			continue;
927 928 929 930
		if (rw_check == CIFS_LOCK_OP &&
		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
		    server->ops->compare_fids(cfile, cur_cfile))
			continue;
931 932
		if (conf_lock)
			*conf_lock = li;
933
		return true;
934 935 936 937
	}
	return false;
}

938
bool
939
cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
940 941
			__u8 type, __u16 flags,
			struct cifsLockInfo **conf_lock, int rw_check)
942
{
943
	bool rc = false;
944
	struct cifs_fid_locks *cur;
945
	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
946

947 948
	list_for_each_entry(cur, &cinode->llist, llist) {
		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
949 950
						 flags, cfile, conf_lock,
						 rw_check);
951 952 953 954 955
		if (rc)
			break;
	}

	return rc;
956 957
}

958 959 960 961 962 963 964
/*
 * Check if there is another lock that prevents us to set the lock (mandatory
 * style). If such a lock exists, update the flock structure with its
 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 * or leave it the same if we can't. Returns 0 if we don't need to request to
 * the server or 1 otherwise.
 */
965
static int
966 967
cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
	       __u8 type, struct file_lock *flock)
968 969 970
{
	int rc = 0;
	struct cifsLockInfo *conf_lock;
971
	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
972
	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
973 974
	bool exist;

975
	down_read(&cinode->lock_sem);
976

977
	exist = cifs_find_lock_conflict(cfile, offset, length, type,
978 979
					flock->fl_flags, &conf_lock,
					CIFS_LOCK_OP);
980 981 982 983
	if (exist) {
		flock->fl_start = conf_lock->offset;
		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
		flock->fl_pid = conf_lock->pid;
984
		if (conf_lock->type & server->vals->shared_lock_type)
985 986 987 988 989 990 991 992
			flock->fl_type = F_RDLCK;
		else
			flock->fl_type = F_WRLCK;
	} else if (!cinode->can_cache_brlcks)
		rc = 1;
	else
		flock->fl_type = F_UNLCK;

993
	up_read(&cinode->lock_sem);
994 995 996
	return rc;
}

997
static void
998
cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
999
{
1000
	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1001
	down_write(&cinode->lock_sem);
1002
	list_add_tail(&lock->llist, &cfile->llist->locks);
1003
	up_write(&cinode->lock_sem);
J
[CIFS]  
Jeremy Allison 已提交
1004 1005
}

1006 1007 1008 1009
/*
 * Set the byte-range lock (mandatory style). Returns:
 * 1) 0, if we set the lock and don't need to request to the server;
 * 2) 1, if no locks prevent us but we need to request to the server;
1010
 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1011
 */
1012
static int
1013
cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1014
		 bool wait)
1015
{
1016
	struct cifsLockInfo *conf_lock;
1017
	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1018 1019 1020 1021 1022
	bool exist;
	int rc = 0;

try_again:
	exist = false;
1023
	down_write(&cinode->lock_sem);
1024

1025
	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1026 1027
					lock->type, lock->flags, &conf_lock,
					CIFS_LOCK_OP);
1028
	if (!exist && cinode->can_cache_brlcks) {
1029
		list_add_tail(&lock->llist, &cfile->llist->locks);
1030
		up_write(&cinode->lock_sem);
1031 1032 1033 1034 1035 1036 1037 1038 1039
		return rc;
	}

	if (!exist)
		rc = 1;
	else if (!wait)
		rc = -EACCES;
	else {
		list_add_tail(&lock->blist, &conf_lock->blist);
1040
		up_write(&cinode->lock_sem);
1041 1042 1043 1044 1045
		rc = wait_event_interruptible(lock->block_q,
					(lock->blist.prev == &lock->blist) &&
					(lock->blist.next == &lock->blist));
		if (!rc)
			goto try_again;
1046
		down_write(&cinode->lock_sem);
1047
		list_del_init(&lock->blist);
1048 1049
	}

1050
	up_write(&cinode->lock_sem);
1051 1052 1053
	return rc;
}

1054 1055 1056 1057 1058 1059 1060
/*
 * Check if there is another lock that prevents us to set the lock (posix
 * style). If such a lock exists, update the flock structure with its
 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 * or leave it the same if we can't. Returns 0 if we don't need to request to
 * the server or 1 otherwise.
 */
1061
static int
1062 1063 1064
cifs_posix_lock_test(struct file *file, struct file_lock *flock)
{
	int rc = 0;
A
Al Viro 已提交
1065
	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1066 1067
	unsigned char saved_type = flock->fl_type;

1068 1069 1070
	if ((flock->fl_flags & FL_POSIX) == 0)
		return 1;

1071
	down_read(&cinode->lock_sem);
1072 1073 1074 1075 1076 1077 1078
	posix_test_lock(file, flock);

	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
		flock->fl_type = saved_type;
		rc = 1;
	}

1079
	up_read(&cinode->lock_sem);
1080 1081 1082
	return rc;
}

1083 1084 1085 1086 1087 1088
/*
 * Set the byte-range lock (posix style). Returns:
 * 1) 0, if we set the lock and don't need to request to the server;
 * 2) 1, if we need to request to the server;
 * 3) <0, if the error occurs while setting the lock.
 */
1089 1090 1091
static int
cifs_posix_lock_set(struct file *file, struct file_lock *flock)
{
A
Al Viro 已提交
1092
	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1093 1094 1095 1096
	int rc = 1;

	if ((flock->fl_flags & FL_POSIX) == 0)
		return rc;
1097

1098
try_again:
1099
	down_write(&cinode->lock_sem);
1100
	if (!cinode->can_cache_brlcks) {
1101
		up_write(&cinode->lock_sem);
1102
		return rc;
1103
	}
1104 1105

	rc = posix_lock_file(file, flock, NULL);
1106
	up_write(&cinode->lock_sem);
1107
	if (rc == FILE_LOCK_DEFERRED) {
1108
		rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
1109 1110
		if (!rc)
			goto try_again;
1111
		locks_delete_block(flock);
1112
	}
1113
	return rc;
1114 1115
}

1116
int
1117
cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1118
{
1119 1120
	unsigned int xid;
	int rc = 0, stored_rc;
1121 1122
	struct cifsLockInfo *li, *tmp;
	struct cifs_tcon *tcon;
1123
	unsigned int num, max_num, max_buf;
1124
	LOCKING_ANDX_RANGE *buf, *cur;
1125 1126 1127 1128
	static const int types[] = {
		LOCKING_ANDX_LARGE_FILES,
		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
	};
1129
	int i;
1130

1131
	xid = get_xid();
1132 1133
	tcon = tlink_tcon(cfile->tlink);

1134 1135
	/*
	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1136
	 * and check it before using.
1137 1138
	 */
	max_buf = tcon->ses->server->maxBuf;
1139
	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1140
		free_xid(xid);
1141 1142 1143
		return -EINVAL;
	}

1144 1145 1146 1147
	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
		     PAGE_SIZE);
	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
			PAGE_SIZE);
1148 1149
	max_num = (max_buf - sizeof(struct smb_hdr)) /
						sizeof(LOCKING_ANDX_RANGE);
1150
	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1151
	if (!buf) {
1152
		free_xid(xid);
1153
		return -ENOMEM;
1154 1155 1156 1157 1158
	}

	for (i = 0; i < 2; i++) {
		cur = buf;
		num = 0;
1159
		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1160 1161 1162 1163 1164 1165 1166 1167
			if (li->type != types[i])
				continue;
			cur->Pid = cpu_to_le16(li->pid);
			cur->LengthLow = cpu_to_le32((u32)li->length);
			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
			cur->OffsetLow = cpu_to_le32((u32)li->offset);
			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
			if (++num == max_num) {
1168 1169
				stored_rc = cifs_lockv(xid, tcon,
						       cfile->fid.netfid,
1170 1171
						       (__u8)li->type, 0, num,
						       buf);
1172 1173 1174 1175 1176 1177 1178 1179 1180
				if (stored_rc)
					rc = stored_rc;
				cur = buf;
				num = 0;
			} else
				cur++;
		}

		if (num) {
1181
			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1182
					       (__u8)types[i], 0, num, buf);
1183 1184 1185
			if (stored_rc)
				rc = stored_rc;
		}
1186 1187
	}

1188
	kfree(buf);
1189
	free_xid(xid);
1190 1191 1192
	return rc;
}

1193 1194 1195 1196 1197 1198
static __u32
hash_lockowner(fl_owner_t owner)
{
	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
}

1199 1200 1201 1202 1203 1204 1205 1206 1207
struct lock_to_push {
	struct list_head llist;
	__u64 offset;
	__u64 length;
	__u32 pid;
	__u16 netfid;
	__u8 type;
};

1208
static int
1209
cifs_push_posix_locks(struct cifsFileInfo *cfile)
1210
{
1211
	struct inode *inode = d_inode(cfile->dentry);
1212
	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1213 1214
	struct file_lock *flock;
	struct file_lock_context *flctx = inode->i_flctx;
1215
	unsigned int count = 0, i;
1216
	int rc = 0, xid, type;
1217 1218
	struct list_head locks_to_send, *el;
	struct lock_to_push *lck, *tmp;
1219 1220
	__u64 length;

1221
	xid = get_xid();
1222

1223 1224
	if (!flctx)
		goto out;
1225

1226 1227 1228 1229 1230 1231
	spin_lock(&flctx->flc_lock);
	list_for_each(el, &flctx->flc_posix) {
		count++;
	}
	spin_unlock(&flctx->flc_lock);

1232 1233
	INIT_LIST_HEAD(&locks_to_send);

1234
	/*
1235 1236
	 * Allocating count locks is enough because no FL_POSIX locks can be
	 * added to the list while we are holding cinode->lock_sem that
1237
	 * protects locking operations of this inode.
1238
	 */
1239
	for (i = 0; i < count; i++) {
1240 1241 1242 1243 1244 1245 1246 1247 1248
		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
		if (!lck) {
			rc = -ENOMEM;
			goto err_out;
		}
		list_add_tail(&lck->llist, &locks_to_send);
	}

	el = locks_to_send.next;
1249
	spin_lock(&flctx->flc_lock);
1250
	list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1251
		if (el == &locks_to_send) {
1252 1253 1254 1255
			/*
			 * The list ended. We don't have enough allocated
			 * structures - something is really wrong.
			 */
1256
			cifs_dbg(VFS, "Can't push all brlocks!\n");
1257 1258
			break;
		}
1259 1260 1261 1262 1263
		length = 1 + flock->fl_end - flock->fl_start;
		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
			type = CIFS_RDLCK;
		else
			type = CIFS_WRLCK;
1264
		lck = list_entry(el, struct lock_to_push, llist);
1265
		lck->pid = hash_lockowner(flock->fl_owner);
1266
		lck->netfid = cfile->fid.netfid;
1267 1268 1269
		lck->length = length;
		lck->type = type;
		lck->offset = flock->fl_start;
1270
	}
1271
	spin_unlock(&flctx->flc_lock);
1272 1273 1274 1275 1276

	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
		int stored_rc;

		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1277
					     lck->offset, lck->length, NULL,
1278 1279 1280 1281 1282 1283 1284
					     lck->type, 0);
		if (stored_rc)
			rc = stored_rc;
		list_del(&lck->llist);
		kfree(lck);
	}

1285
out:
1286
	free_xid(xid);
1287
	return rc;
1288 1289 1290 1291 1292 1293
err_out:
	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
		list_del(&lck->llist);
		kfree(lck);
	}
	goto out;
1294 1295
}

1296
static int
1297
cifs_push_locks(struct cifsFileInfo *cfile)
1298
{
1299
	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1300
	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1301
	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1302 1303 1304 1305 1306 1307 1308 1309
	int rc = 0;

	/* we are going to update can_cache_brlcks here - need a write access */
	down_write(&cinode->lock_sem);
	if (!cinode->can_cache_brlcks) {
		up_write(&cinode->lock_sem);
		return rc;
	}
1310

1311
	if (cap_unix(tcon->ses) &&
1312 1313
	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1314 1315 1316
		rc = cifs_push_posix_locks(cfile);
	else
		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1317

1318 1319 1320
	cinode->can_cache_brlcks = false;
	up_write(&cinode->lock_sem);
	return rc;
1321 1322
}

1323
static void
1324
cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1325
		bool *wait_flag, struct TCP_Server_Info *server)
L
Linus Torvalds 已提交
1326
{
1327
	if (flock->fl_flags & FL_POSIX)
1328
		cifs_dbg(FYI, "Posix\n");
1329
	if (flock->fl_flags & FL_FLOCK)
1330
		cifs_dbg(FYI, "Flock\n");
1331
	if (flock->fl_flags & FL_SLEEP) {
1332
		cifs_dbg(FYI, "Blocking lock\n");
1333
		*wait_flag = true;
L
Linus Torvalds 已提交
1334
	}
1335
	if (flock->fl_flags & FL_ACCESS)
1336
		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1337
	if (flock->fl_flags & FL_LEASE)
1338
		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1339
	if (flock->fl_flags &
1340
	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1341
	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1342
		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
L
Linus Torvalds 已提交
1343

1344
	*type = server->vals->large_lock_type;
1345
	if (flock->fl_type == F_WRLCK) {
1346
		cifs_dbg(FYI, "F_WRLCK\n");
1347
		*type |= server->vals->exclusive_lock_type;
1348 1349
		*lock = 1;
	} else if (flock->fl_type == F_UNLCK) {
1350
		cifs_dbg(FYI, "F_UNLCK\n");
1351
		*type |= server->vals->unlock_lock_type;
1352 1353 1354
		*unlock = 1;
		/* Check if unlock includes more than one lock range */
	} else if (flock->fl_type == F_RDLCK) {
1355
		cifs_dbg(FYI, "F_RDLCK\n");
1356
		*type |= server->vals->shared_lock_type;
1357 1358
		*lock = 1;
	} else if (flock->fl_type == F_EXLCK) {
1359
		cifs_dbg(FYI, "F_EXLCK\n");
1360
		*type |= server->vals->exclusive_lock_type;
1361 1362
		*lock = 1;
	} else if (flock->fl_type == F_SHLCK) {
1363
		cifs_dbg(FYI, "F_SHLCK\n");
1364
		*type |= server->vals->shared_lock_type;
1365
		*lock = 1;
L
Linus Torvalds 已提交
1366
	} else
1367
		cifs_dbg(FYI, "Unknown type of lock\n");
1368
}
L
Linus Torvalds 已提交
1369

1370
static int
1371
cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1372
	   bool wait_flag, bool posix_lck, unsigned int xid)
1373 1374 1375
{
	int rc = 0;
	__u64 length = 1 + flock->fl_end - flock->fl_start;
1376 1377
	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1378
	struct TCP_Server_Info *server = tcon->ses->server;
1379
	__u16 netfid = cfile->fid.netfid;
1380

1381 1382
	if (posix_lck) {
		int posix_lock_type;
1383 1384 1385 1386 1387

		rc = cifs_posix_lock_test(file, flock);
		if (!rc)
			return rc;

1388
		if (type & server->vals->shared_lock_type)
1389 1390 1391
			posix_lock_type = CIFS_RDLCK;
		else
			posix_lock_type = CIFS_WRLCK;
1392 1393
		rc = CIFSSMBPosixLock(xid, tcon, netfid,
				      hash_lockowner(flock->fl_owner),
1394
				      flock->fl_start, length, flock,
1395
				      posix_lock_type, wait_flag);
1396 1397
		return rc;
	}
L
Linus Torvalds 已提交
1398

1399
	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1400 1401 1402
	if (!rc)
		return rc;

1403
	/* BB we could chain these into one lock request BB */
1404 1405
	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
				    1, 0, false);
1406
	if (rc == 0) {
1407 1408
		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
					    type, 0, 1, false);
1409 1410
		flock->fl_type = F_UNLCK;
		if (rc != 0)
1411 1412
			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
				 rc);
1413
		return 0;
L
Linus Torvalds 已提交
1414
	}
J
[CIFS]  
Jeremy Allison 已提交
1415

1416
	if (type & server->vals->shared_lock_type) {
1417
		flock->fl_type = F_WRLCK;
1418
		return 0;
J
[CIFS]  
Jeremy Allison 已提交
1419 1420
	}

1421 1422 1423 1424 1425
	type &= ~server->vals->exclusive_lock_type;

	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
				    type | server->vals->shared_lock_type,
				    1, 0, false);
1426
	if (rc == 0) {
1427 1428
		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
			type | server->vals->shared_lock_type, 0, 1, false);
1429 1430
		flock->fl_type = F_RDLCK;
		if (rc != 0)
1431 1432
			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
				 rc);
1433 1434 1435
	} else
		flock->fl_type = F_WRLCK;

1436
	return 0;
1437 1438
}

1439
void
1440 1441 1442 1443 1444 1445 1446
cifs_move_llist(struct list_head *source, struct list_head *dest)
{
	struct list_head *li, *tmp;
	list_for_each_safe(li, tmp, source)
		list_move(li, dest);
}

1447
void
1448 1449 1450 1451 1452 1453 1454 1455 1456 1457
cifs_free_llist(struct list_head *llist)
{
	struct cifsLockInfo *li, *tmp;
	list_for_each_entry_safe(li, tmp, llist, llist) {
		cifs_del_lock_waiters(li);
		list_del(&li->llist);
		kfree(li);
	}
}

1458
int
1459 1460
cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
		  unsigned int xid)
1461 1462
{
	int rc = 0, stored_rc;
1463 1464 1465 1466
	static const int types[] = {
		LOCKING_ANDX_LARGE_FILES,
		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
	};
1467
	unsigned int i;
1468
	unsigned int max_num, num, max_buf;
1469 1470
	LOCKING_ANDX_RANGE *buf, *cur;
	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1471
	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1472 1473 1474 1475 1476 1477
	struct cifsLockInfo *li, *tmp;
	__u64 length = 1 + flock->fl_end - flock->fl_start;
	struct list_head tmp_llist;

	INIT_LIST_HEAD(&tmp_llist);

1478 1479
	/*
	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1480
	 * and check it before using.
1481 1482
	 */
	max_buf = tcon->ses->server->maxBuf;
1483
	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1484 1485
		return -EINVAL;

1486 1487 1488 1489
	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
		     PAGE_SIZE);
	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
			PAGE_SIZE);
1490 1491
	max_num = (max_buf - sizeof(struct smb_hdr)) /
						sizeof(LOCKING_ANDX_RANGE);
1492
	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1493 1494 1495
	if (!buf)
		return -ENOMEM;

1496
	down_write(&cinode->lock_sem);
1497 1498 1499
	for (i = 0; i < 2; i++) {
		cur = buf;
		num = 0;
1500
		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1501 1502 1503 1504 1505 1506 1507 1508
			if (flock->fl_start > li->offset ||
			    (flock->fl_start + length) <
			    (li->offset + li->length))
				continue;
			if (current->tgid != li->pid)
				continue;
			if (types[i] != li->type)
				continue;
1509
			if (cinode->can_cache_brlcks) {
1510 1511
				/*
				 * We can cache brlock requests - simply remove
1512
				 * a lock from the file's list.
1513 1514 1515 1516
				 */
				list_del(&li->llist);
				cifs_del_lock_waiters(li);
				kfree(li);
1517
				continue;
1518
			}
1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530
			cur->Pid = cpu_to_le16(li->pid);
			cur->LengthLow = cpu_to_le32((u32)li->length);
			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
			cur->OffsetLow = cpu_to_le32((u32)li->offset);
			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
			/*
			 * We need to save a lock here to let us add it again to
			 * the file's list if the unlock range request fails on
			 * the server.
			 */
			list_move(&li->llist, &tmp_llist);
			if (++num == max_num) {
1531 1532
				stored_rc = cifs_lockv(xid, tcon,
						       cfile->fid.netfid,
1533 1534 1535 1536 1537 1538 1539 1540
						       li->type, num, 0, buf);
				if (stored_rc) {
					/*
					 * We failed on the unlock range
					 * request - add all locks from the tmp
					 * list to the head of the file's list.
					 */
					cifs_move_llist(&tmp_llist,
1541
							&cfile->llist->locks);
1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552
					rc = stored_rc;
				} else
					/*
					 * The unlock range request succeed -
					 * free the tmp list.
					 */
					cifs_free_llist(&tmp_llist);
				cur = buf;
				num = 0;
			} else
				cur++;
1553 1554
		}
		if (num) {
1555
			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1556 1557
					       types[i], num, 0, buf);
			if (stored_rc) {
1558 1559
				cifs_move_llist(&tmp_llist,
						&cfile->llist->locks);
1560 1561 1562 1563 1564 1565
				rc = stored_rc;
			} else
				cifs_free_llist(&tmp_llist);
		}
	}

1566
	up_write(&cinode->lock_sem);
1567 1568 1569 1570
	kfree(buf);
	return rc;
}

1571
static int
1572
cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1573 1574
	   bool wait_flag, bool posix_lck, int lock, int unlock,
	   unsigned int xid)
1575 1576 1577 1578 1579
{
	int rc = 0;
	__u64 length = 1 + flock->fl_end - flock->fl_start;
	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1580
	struct TCP_Server_Info *server = tcon->ses->server;
1581
	struct inode *inode = d_inode(cfile->dentry);
1582 1583

	if (posix_lck) {
1584
		int posix_lock_type;
1585 1586 1587 1588 1589

		rc = cifs_posix_lock_set(file, flock);
		if (!rc || rc < 0)
			return rc;

1590
		if (type & server->vals->shared_lock_type)
1591 1592 1593
			posix_lock_type = CIFS_RDLCK;
		else
			posix_lock_type = CIFS_WRLCK;
1594

1595
		if (unlock == 1)
1596
			posix_lock_type = CIFS_UNLCK;
J
[CIFS]  
Jeremy Allison 已提交
1597

1598
		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1599 1600
				      hash_lockowner(flock->fl_owner),
				      flock->fl_start, length,
1601
				      NULL, posix_lock_type, wait_flag);
1602 1603
		goto out;
	}
J
[CIFS]  
Jeremy Allison 已提交
1604

1605
	if (lock) {
1606 1607
		struct cifsLockInfo *lock;

1608 1609
		lock = cifs_lock_init(flock->fl_start, length, type,
				      flock->fl_flags);
1610 1611 1612
		if (!lock)
			return -ENOMEM;

1613
		rc = cifs_lock_add_if(cfile, lock, wait_flag);
1614
		if (rc < 0) {
1615
			kfree(lock);
1616 1617 1618
			return rc;
		}
		if (!rc)
1619 1620
			goto out;

1621 1622 1623 1624 1625 1626 1627
		/*
		 * Windows 7 server can delay breaking lease from read to None
		 * if we set a byte-range lock on a file - break it explicitly
		 * before sending the lock to the server to be sure the next
		 * read won't conflict with non-overlapted locks due to
		 * pagereading.
		 */
1628 1629
		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
					CIFS_CACHE_READ(CIFS_I(inode))) {
1630
			cifs_zap_mapping(inode);
1631 1632
			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
				 inode);
1633
			CIFS_I(inode)->oplock = 0;
1634 1635
		}

1636 1637
		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
					    type, 1, 0, wait_flag);
1638 1639
		if (rc) {
			kfree(lock);
1640
			return rc;
1641
		}
1642

1643
		cifs_lock_add(cfile, lock);
1644
	} else if (unlock)
1645
		rc = server->ops->mand_unlock_range(cfile, flock, xid);
1646 1647

out:
1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659
	if (flock->fl_flags & FL_POSIX) {
		/*
		 * If this is a request to remove all locks because we
		 * are closing the file, it doesn't matter if the
		 * unlocking failed as both cifs.ko and the SMB server
		 * remove the lock on file close
		 */
		if (rc) {
			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
			if (!(flock->fl_flags & FL_CLOSE))
				return rc;
		}
1660
		rc = locks_lock_file_wait(file, flock);
1661
	}
1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675
	return rc;
}

int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
{
	int rc, xid;
	int lock = 0, unlock = 0;
	bool wait_flag = false;
	bool posix_lck = false;
	struct cifs_sb_info *cifs_sb;
	struct cifs_tcon *tcon;
	struct cifsInodeInfo *cinode;
	struct cifsFileInfo *cfile;
	__u16 netfid;
1676
	__u32 type;
1677 1678

	rc = -EACCES;
1679
	xid = get_xid();
1680

1681 1682 1683
	cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
		 cmd, flock->fl_flags, flock->fl_type,
		 flock->fl_start, flock->fl_end);
1684 1685 1686

	cfile = (struct cifsFileInfo *)file->private_data;
	tcon = tlink_tcon(cfile->tlink);
1687 1688 1689

	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
			tcon->ses->server);
1690
	cifs_sb = CIFS_FILE_SB(file);
1691
	netfid = cfile->fid.netfid;
A
Al Viro 已提交
1692
	cinode = CIFS_I(file_inode(file));
1693

1694
	if (cap_unix(tcon->ses) &&
1695 1696 1697 1698 1699 1700 1701 1702
	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
		posix_lck = true;
	/*
	 * BB add code here to normalize offset and length to account for
	 * negative length which we can not accept over the wire.
	 */
	if (IS_GETLK(cmd)) {
1703
		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1704
		free_xid(xid);
1705 1706 1707 1708 1709 1710 1711 1712
		return rc;
	}

	if (!lock && !unlock) {
		/*
		 * if no lock or unlock then nothing to do since we do not
		 * know what it is
		 */
1713
		free_xid(xid);
1714
		return -EOPNOTSUPP;
J
[CIFS]  
Jeremy Allison 已提交
1715 1716
	}

1717 1718
	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
			xid);
1719
	free_xid(xid);
L
Linus Torvalds 已提交
1720 1721 1722
	return rc;
}

1723 1724 1725 1726
/*
 * update the file size (if needed) after a write. Should be called with
 * the inode->i_lock held
 */
1727
void
1728 1729 1730 1731 1732 1733 1734 1735 1736
cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
		      unsigned int bytes_written)
{
	loff_t end_of_write = offset + bytes_written;

	if (end_of_write > cifsi->server_eof)
		cifsi->server_eof = end_of_write;
}

1737 1738 1739
static ssize_t
cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
	   size_t write_size, loff_t *offset)
L
Linus Torvalds 已提交
1740 1741 1742 1743 1744
{
	int rc = 0;
	unsigned int bytes_written = 0;
	unsigned int total_written;
	struct cifs_sb_info *cifs_sb;
1745 1746
	struct cifs_tcon *tcon;
	struct TCP_Server_Info *server;
1747
	unsigned int xid;
1748
	struct dentry *dentry = open_file->dentry;
1749
	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1750
	struct cifs_io_parms io_parms;
L
Linus Torvalds 已提交
1751

1752
	cifs_sb = CIFS_SB(dentry->d_sb);
L
Linus Torvalds 已提交
1753

A
Al Viro 已提交
1754 1755
	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
		 write_size, *offset, dentry);
L
Linus Torvalds 已提交
1756

1757 1758 1759 1760 1761
	tcon = tlink_tcon(open_file->tlink);
	server = tcon->ses->server;

	if (!server->ops->sync_write)
		return -ENOSYS;
1762

1763
	xid = get_xid();
L
Linus Torvalds 已提交
1764 1765 1766 1767 1768

	for (total_written = 0; write_size > total_written;
	     total_written += bytes_written) {
		rc = -EAGAIN;
		while (rc == -EAGAIN) {
1769 1770 1771
			struct kvec iov[2];
			unsigned int len;

L
Linus Torvalds 已提交
1772 1773 1774
			if (open_file->invalidHandle) {
				/* we could deadlock if we called
				   filemap_fdatawait from here so tell
S
Steve French 已提交
1775
				   reopen_file not to flush data to
L
Linus Torvalds 已提交
1776
				   server now */
J
Jeff Layton 已提交
1777
				rc = cifs_reopen_file(open_file, false);
L
Linus Torvalds 已提交
1778 1779 1780
				if (rc != 0)
					break;
			}
1781

1782
			len = min(server->ops->wp_retry_size(d_inode(dentry)),
1783
				  (unsigned int)write_size - total_written);
1784 1785 1786
			/* iov[0] is reserved for smb header */
			iov[1].iov_base = (char *)write_data + total_written;
			iov[1].iov_len = len;
1787
			io_parms.pid = pid;
1788 1789
			io_parms.tcon = tcon;
			io_parms.offset = *offset;
1790
			io_parms.length = len;
1791 1792
			rc = server->ops->sync_write(xid, &open_file->fid,
					&io_parms, &bytes_written, iov, 1);
L
Linus Torvalds 已提交
1793 1794 1795 1796 1797
		}
		if (rc || (bytes_written == 0)) {
			if (total_written)
				break;
			else {
1798
				free_xid(xid);
L
Linus Torvalds 已提交
1799 1800
				return rc;
			}
1801
		} else {
1802
			spin_lock(&d_inode(dentry)->i_lock);
1803
			cifs_update_eof(cifsi, *offset, bytes_written);
1804
			spin_unlock(&d_inode(dentry)->i_lock);
1805
			*offset += bytes_written;
1806
		}
L
Linus Torvalds 已提交
1807 1808
	}

1809
	cifs_stats_bytes_written(tcon, total_written);
L
Linus Torvalds 已提交
1810

1811
	if (total_written > 0) {
1812 1813 1814 1815
		spin_lock(&d_inode(dentry)->i_lock);
		if (*offset > d_inode(dentry)->i_size)
			i_size_write(d_inode(dentry), *offset);
		spin_unlock(&d_inode(dentry)->i_lock);
L
Linus Torvalds 已提交
1816
	}
1817
	mark_inode_dirty_sync(d_inode(dentry));
1818
	free_xid(xid);
L
Linus Torvalds 已提交
1819 1820 1821
	return total_written;
}

1822 1823
struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
					bool fsuid_only)
S
Steve French 已提交
1824 1825
{
	struct cifsFileInfo *open_file = NULL;
1826
	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1827
	struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1828 1829 1830 1831

	/* only filter by fsuid on multiuser mounts */
	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
		fsuid_only = false;
S
Steve French 已提交
1832

1833
	spin_lock(&tcon->open_file_lock);
S
Steve French 已提交
1834 1835 1836 1837
	/* we could simply get the first_list_entry since write-only entries
	   are always at the end of the list but since the first entry might
	   have a close pending, we go through the whole list */
	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1838
		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1839
			continue;
1840
		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
S
Steve French 已提交
1841 1842 1843
			if (!open_file->invalidHandle) {
				/* found a good file */
				/* lock it so it will not be closed on us */
1844 1845
				cifsFileInfo_get(open_file);
				spin_unlock(&tcon->open_file_lock);
S
Steve French 已提交
1846 1847 1848 1849 1850 1851 1852
				return open_file;
			} /* else might as well continue, and look for
			     another, or simply have the caller reopen it
			     again rather than trying to fix this handle */
		} else /* write only file */
			break; /* write only files are last so must be done */
	}
1853
	spin_unlock(&tcon->open_file_lock);
S
Steve French 已提交
1854 1855 1856
	return NULL;
}

1857 1858 1859 1860
/* Return -EBADF if no handle is found and general rc otherwise */
int
cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
		       struct cifsFileInfo **ret_file)
1861
{
1862
	struct cifsFileInfo *open_file, *inv_file = NULL;
1863
	struct cifs_sb_info *cifs_sb;
1864
	struct cifs_tcon *tcon;
1865
	bool any_available = false;
1866
	int rc = -EBADF;
1867
	unsigned int refind = 0;
1868

1869 1870 1871 1872 1873 1874 1875
	*ret_file = NULL;

	/*
	 * Having a null inode here (because mapping->host was set to zero by
	 * the VFS or MM) should not happen but we had reports of on oops (due
	 * to it being zero) during stress testcases so we need to check for it
	 */
1876

S
Steve French 已提交
1877
	if (cifs_inode == NULL) {
1878
		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1879
		dump_stack();
1880
		return rc;
1881 1882
	}

1883
	cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1884
	tcon = cifs_sb_master_tcon(cifs_sb);
1885

1886 1887 1888 1889
	/* only filter by fsuid on multiuser mounts */
	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
		fsuid_only = false;

1890
	spin_lock(&tcon->open_file_lock);
1891
refind_writable:
1892
	if (refind > MAX_REOPEN_ATT) {
1893
		spin_unlock(&tcon->open_file_lock);
1894
		return rc;
1895
	}
1896
	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1897 1898
		if (!any_available && open_file->pid != current->tgid)
			continue;
1899
		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1900
			continue;
1901
		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1902 1903
			if (!open_file->invalidHandle) {
				/* found a good writable file */
1904 1905
				cifsFileInfo_get(open_file);
				spin_unlock(&tcon->open_file_lock);
1906 1907
				*ret_file = open_file;
				return 0;
1908 1909 1910
			} else {
				if (!inv_file)
					inv_file = open_file;
1911
			}
1912 1913
		}
	}
1914 1915 1916 1917 1918
	/* couldn't find useable FH with same pid, try any available */
	if (!any_available) {
		any_available = true;
		goto refind_writable;
	}
1919 1920 1921

	if (inv_file) {
		any_available = false;
1922
		cifsFileInfo_get(inv_file);
1923 1924
	}

1925
	spin_unlock(&tcon->open_file_lock);
1926 1927 1928

	if (inv_file) {
		rc = cifs_reopen_file(inv_file, false);
1929 1930 1931
		if (!rc) {
			*ret_file = inv_file;
			return 0;
1932
		}
1933 1934 1935 1936 1937 1938 1939 1940 1941

		spin_lock(&tcon->open_file_lock);
		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
		spin_unlock(&tcon->open_file_lock);
		cifsFileInfo_put(inv_file);
		++refind;
		inv_file = NULL;
		spin_lock(&tcon->open_file_lock);
		goto refind_writable;
1942 1943
	}

1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957
	return rc;
}

struct cifsFileInfo *
find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only)
{
	struct cifsFileInfo *cfile;
	int rc;

	rc = cifs_get_writable_file(cifs_inode, fsuid_only, &cfile);
	if (rc)
		cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc);

	return cfile;
1958 1959
}

L
Linus Torvalds 已提交
1960 1961 1962
static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
{
	struct address_space *mapping = page->mapping;
1963
	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
L
Linus Torvalds 已提交
1964 1965 1966 1967
	char *write_data;
	int rc = -EFAULT;
	int bytes_written = 0;
	struct inode *inode;
1968
	struct cifsFileInfo *open_file;
L
Linus Torvalds 已提交
1969 1970 1971 1972 1973 1974 1975 1976 1977 1978

	if (!mapping || !mapping->host)
		return -EFAULT;

	inode = page->mapping->host;

	offset += (loff_t)from;
	write_data = kmap(page);
	write_data += from;

1979
	if ((to > PAGE_SIZE) || (from > to)) {
L
Linus Torvalds 已提交
1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991
		kunmap(page);
		return -EIO;
	}

	/* racing with truncate? */
	if (offset > mapping->host->i_size) {
		kunmap(page);
		return 0; /* don't care */
	}

	/* check to make sure that we are not extending the file */
	if (mapping->host->i_size - offset < (loff_t)to)
S
Steve French 已提交
1992
		to = (unsigned)(mapping->host->i_size - offset);
L
Linus Torvalds 已提交
1993

1994 1995
	rc = cifs_get_writable_file(CIFS_I(mapping->host), false, &open_file);
	if (!rc) {
1996 1997
		bytes_written = cifs_write(open_file, open_file->pid,
					   write_data, to - from, &offset);
1998
		cifsFileInfo_put(open_file);
L
Linus Torvalds 已提交
1999
		/* Does mm or vfs already set times? */
2000
		inode->i_atime = inode->i_mtime = current_time(inode);
2001
		if ((bytes_written > 0) && (offset))
2002
			rc = 0;
2003 2004
		else if (bytes_written < 0)
			rc = bytes_written;
2005 2006
		else
			rc = -EFAULT;
2007
	} else {
2008 2009 2010
		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
		if (!is_retryable_error(rc))
			rc = -EIO;
L
Linus Torvalds 已提交
2011 2012 2013 2014 2015 2016
	}

	kunmap(page);
	return rc;
}

2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028
static struct cifs_writedata *
wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
			  pgoff_t end, pgoff_t *index,
			  unsigned int *found_pages)
{
	struct cifs_writedata *wdata;

	wdata = cifs_writedata_alloc((unsigned int)tofind,
				     cifs_writev_complete);
	if (!wdata)
		return NULL;

J
Jan Kara 已提交
2029 2030
	*found_pages = find_get_pages_range_tag(mapping, index, end,
				PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2031 2032 2033
	return wdata;
}

2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045
static unsigned int
wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
		    struct address_space *mapping,
		    struct writeback_control *wbc,
		    pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
{
	unsigned int nr_pages = 0, i;
	struct page *page;

	for (i = 0; i < found_pages; i++) {
		page = wdata->pages[i];
		/*
M
Matthew Wilcox 已提交
2046 2047 2048 2049
		 * At this point we hold neither the i_pages lock nor the
		 * page lock: the page may be truncated or invalidated
		 * (changing page->mapping to NULL), or even swizzled
		 * back from swapper_space to tmpfs file mapping
2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105
		 */

		if (nr_pages == 0)
			lock_page(page);
		else if (!trylock_page(page))
			break;

		if (unlikely(page->mapping != mapping)) {
			unlock_page(page);
			break;
		}

		if (!wbc->range_cyclic && page->index > end) {
			*done = true;
			unlock_page(page);
			break;
		}

		if (*next && (page->index != *next)) {
			/* Not next consecutive page */
			unlock_page(page);
			break;
		}

		if (wbc->sync_mode != WB_SYNC_NONE)
			wait_on_page_writeback(page);

		if (PageWriteback(page) ||
				!clear_page_dirty_for_io(page)) {
			unlock_page(page);
			break;
		}

		/*
		 * This actually clears the dirty bit in the radix tree.
		 * See cifs_writepage() for more commentary.
		 */
		set_page_writeback(page);
		if (page_offset(page) >= i_size_read(mapping->host)) {
			*done = true;
			unlock_page(page);
			end_page_writeback(page);
			break;
		}

		wdata->pages[i] = page;
		*next = page->index + 1;
		++nr_pages;
	}

	/* reset index to refind any pages skipped */
	if (nr_pages == 0)
		*index = wdata->pages[0]->index + 1;

	/* put any pages we aren't going to use */
	for (i = nr_pages; i < found_pages; i++) {
2106
		put_page(wdata->pages[i]);
2107 2108 2109 2110 2111 2112
		wdata->pages[i] = NULL;
	}

	return nr_pages;
}

2113
static int
2114 2115
wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
		 struct address_space *mapping, struct writeback_control *wbc)
2116
{
2117
	int rc;
2118 2119
	struct TCP_Server_Info *server =
				tlink_tcon(wdata->cfile->tlink)->ses->server;
2120 2121 2122 2123

	wdata->sync_mode = wbc->sync_mode;
	wdata->nr_pages = nr_pages;
	wdata->offset = page_offset(wdata->pages[0]);
2124
	wdata->pagesz = PAGE_SIZE;
2125 2126
	wdata->tailsz = min(i_size_read(mapping->host) -
			page_offset(wdata->pages[nr_pages - 1]),
2127 2128
			(loff_t)PAGE_SIZE);
	wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2129
	wdata->pid = wdata->cfile->pid;
2130

2131 2132
	rc = adjust_credits(server, &wdata->credits, wdata->bytes);
	if (rc)
2133
		return rc;
2134

2135 2136 2137 2138
	if (wdata->cfile->invalidHandle)
		rc = -EAGAIN;
	else
		rc = server->ops->async_writev(wdata, cifs_writedata_release);
2139 2140 2141 2142

	return rc;
}

L
Linus Torvalds 已提交
2143
static int cifs_writepages(struct address_space *mapping,
2144
			   struct writeback_control *wbc)
L
Linus Torvalds 已提交
2145
{
2146 2147
	struct inode *inode = mapping->host;
	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2148
	struct TCP_Server_Info *server;
2149 2150 2151
	bool done = false, scanned = false, range_whole = false;
	pgoff_t end, index;
	struct cifs_writedata *wdata;
2152
	struct cifsFileInfo *cfile = NULL;
2153
	int rc = 0;
2154
	int saved_rc = 0;
2155
	unsigned int xid;
2156

2157
	/*
2158
	 * If wsize is smaller than the page cache size, default to writing
2159 2160
	 * one page at a time via cifs_writepage
	 */
2161
	if (cifs_sb->wsize < PAGE_SIZE)
2162 2163
		return generic_writepages(mapping, wbc);

2164
	xid = get_xid();
2165
	if (wbc->range_cyclic) {
2166
		index = mapping->writeback_index; /* Start from prev offset */
2167 2168
		end = -1;
	} else {
2169 2170
		index = wbc->range_start >> PAGE_SHIFT;
		end = wbc->range_end >> PAGE_SHIFT;
2171
		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2172 2173
			range_whole = true;
		scanned = true;
2174
	}
2175
	server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2176
retry:
2177
	while (!done && index <= end) {
2178
		unsigned int i, nr_pages, found_pages, wsize;
2179
		pgoff_t next = 0, tofind, saved_index = index;
2180 2181
		struct cifs_credits credits_on_stack;
		struct cifs_credits *credits = &credits_on_stack;
2182
		int get_file_rc = 0;
2183

2184 2185 2186
		if (cfile)
			cifsFileInfo_put(cfile);

2187 2188 2189 2190 2191
		rc = cifs_get_writable_file(CIFS_I(inode), false, &cfile);

		/* in case of an error store it to return later */
		if (rc)
			get_file_rc = rc;
2192

2193
		rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2194
						   &wsize, credits);
2195 2196
		if (rc != 0) {
			done = true;
2197
			break;
2198
		}
2199

2200
		tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2201

2202 2203
		wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
						  &found_pages);
2204 2205
		if (!wdata) {
			rc = -ENOMEM;
2206
			done = true;
2207
			add_credits_and_wake_if(server, credits, 0);
2208 2209 2210 2211 2212
			break;
		}

		if (found_pages == 0) {
			kref_put(&wdata->refcount, cifs_writedata_release);
2213
			add_credits_and_wake_if(server, credits, 0);
2214 2215 2216
			break;
		}

2217 2218
		nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
					       end, &index, &next, &done);
2219

2220 2221 2222
		/* nothing to write? */
		if (nr_pages == 0) {
			kref_put(&wdata->refcount, cifs_writedata_release);
2223
			add_credits_and_wake_if(server, credits, 0);
2224
			continue;
2225
		}
2226

2227
		wdata->credits = credits_on_stack;
2228 2229
		wdata->cfile = cfile;
		cfile = NULL;
2230

2231
		if (!wdata->cfile) {
2232 2233 2234 2235 2236 2237
			cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
				 get_file_rc);
			if (is_retryable_error(get_file_rc))
				rc = get_file_rc;
			else
				rc = -EBADF;
2238 2239
		} else
			rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2240

2241 2242 2243
		for (i = 0; i < nr_pages; ++i)
			unlock_page(wdata->pages[i]);

2244 2245
		/* send failure -- clean up the mess */
		if (rc != 0) {
2246
			add_credits_and_wake_if(server, &wdata->credits, 0);
2247
			for (i = 0; i < nr_pages; ++i) {
2248
				if (is_retryable_error(rc))
2249 2250 2251 2252 2253
					redirty_page_for_writepage(wbc,
							   wdata->pages[i]);
				else
					SetPageError(wdata->pages[i]);
				end_page_writeback(wdata->pages[i]);
2254
				put_page(wdata->pages[i]);
2255
			}
2256
			if (!is_retryable_error(rc))
2257
				mapping_set_error(mapping, rc);
2258 2259
		}
		kref_put(&wdata->refcount, cifs_writedata_release);
2260

2261 2262 2263 2264 2265
		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
			index = saved_index;
			continue;
		}

2266 2267 2268 2269 2270 2271 2272 2273 2274
		/* Return immediately if we received a signal during writing */
		if (is_interrupt_error(rc)) {
			done = true;
			break;
		}

		if (rc != 0 && saved_rc == 0)
			saved_rc = rc;

2275 2276 2277
		wbc->nr_to_write -= nr_pages;
		if (wbc->nr_to_write <= 0)
			done = true;
2278

2279
		index = next;
2280
	}
2281

2282 2283 2284 2285 2286
	if (!scanned && !done) {
		/*
		 * We hit the last page and there is more work to be done: wrap
		 * back to the start of the file
		 */
2287
		scanned = true;
2288 2289 2290
		index = 0;
		goto retry;
	}
2291

2292 2293 2294
	if (saved_rc != 0)
		rc = saved_rc;

2295
	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2296 2297
		mapping->writeback_index = index;

2298 2299
	if (cfile)
		cifsFileInfo_put(cfile);
2300
	free_xid(xid);
L
Linus Torvalds 已提交
2301 2302 2303
	return rc;
}

2304 2305
static int
cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
L
Linus Torvalds 已提交
2306
{
2307
	int rc;
2308
	unsigned int xid;
L
Linus Torvalds 已提交
2309

2310
	xid = get_xid();
L
Linus Torvalds 已提交
2311
/* BB add check for wbc flags */
2312
	get_page(page);
S
Steve French 已提交
2313
	if (!PageUptodate(page))
2314
		cifs_dbg(FYI, "ppw - page not up to date\n");
2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325

	/*
	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
	 *
	 * A writepage() implementation always needs to do either this,
	 * or re-dirty the page with "redirty_page_for_writepage()" in
	 * the case of a failure.
	 *
	 * Just unlocking the page will cause the radix tree tag-bits
	 * to fail to update with the state of the page correctly.
	 */
S
Steve French 已提交
2326
	set_page_writeback(page);
2327
retry_write:
2328
	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2329 2330
	if (is_retryable_error(rc)) {
		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2331
			goto retry_write;
2332
		redirty_page_for_writepage(wbc, page);
2333
	} else if (rc != 0) {
2334
		SetPageError(page);
2335 2336
		mapping_set_error(page->mapping, rc);
	} else {
2337
		SetPageUptodate(page);
2338
	}
2339
	end_page_writeback(page);
2340
	put_page(page);
2341
	free_xid(xid);
L
Linus Torvalds 已提交
2342 2343 2344
	return rc;
}

2345 2346 2347 2348 2349 2350 2351
static int cifs_writepage(struct page *page, struct writeback_control *wbc)
{
	int rc = cifs_writepage_locked(page, wbc);
	unlock_page(page);
	return rc;
}

N
Nick Piggin 已提交
2352 2353 2354
static int cifs_write_end(struct file *file, struct address_space *mapping,
			loff_t pos, unsigned len, unsigned copied,
			struct page *page, void *fsdata)
L
Linus Torvalds 已提交
2355
{
N
Nick Piggin 已提交
2356 2357
	int rc;
	struct inode *inode = mapping->host;
2358 2359 2360 2361 2362 2363 2364 2365
	struct cifsFileInfo *cfile = file->private_data;
	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
	__u32 pid;

	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
		pid = cfile->pid;
	else
		pid = current->tgid;
L
Linus Torvalds 已提交
2366

2367
	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2368
		 page, pos, copied);
N
Nick Piggin 已提交
2369

2370 2371 2372 2373
	if (PageChecked(page)) {
		if (copied == len)
			SetPageUptodate(page);
		ClearPageChecked(page);
2374
	} else if (!PageUptodate(page) && copied == PAGE_SIZE)
N
Nick Piggin 已提交
2375
		SetPageUptodate(page);
S
Steve French 已提交
2376

L
Linus Torvalds 已提交
2377
	if (!PageUptodate(page)) {
N
Nick Piggin 已提交
2378
		char *page_data;
2379
		unsigned offset = pos & (PAGE_SIZE - 1);
2380
		unsigned int xid;
N
Nick Piggin 已提交
2381

2382
		xid = get_xid();
L
Linus Torvalds 已提交
2383 2384 2385 2386 2387 2388
		/* this is probably better than directly calling
		   partialpage_write since in this function the file handle is
		   known which we might as well	leverage */
		/* BB check if anything else missing out of ppw
		   such as updating last write time */
		page_data = kmap(page);
2389
		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
N
Nick Piggin 已提交
2390
		/* if (rc < 0) should we set writebehind rc? */
L
Linus Torvalds 已提交
2391
		kunmap(page);
N
Nick Piggin 已提交
2392

2393
		free_xid(xid);
S
Steve French 已提交
2394
	} else {
N
Nick Piggin 已提交
2395 2396
		rc = copied;
		pos += copied;
2397
		set_page_dirty(page);
L
Linus Torvalds 已提交
2398 2399
	}

N
Nick Piggin 已提交
2400 2401 2402 2403 2404 2405 2406 2407
	if (rc > 0) {
		spin_lock(&inode->i_lock);
		if (pos > inode->i_size)
			i_size_write(inode, pos);
		spin_unlock(&inode->i_lock);
	}

	unlock_page(page);
2408
	put_page(page);
N
Nick Piggin 已提交
2409

L
Linus Torvalds 已提交
2410 2411 2412
	return rc;
}

2413 2414
int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
		      int datasync)
L
Linus Torvalds 已提交
2415
{
2416
	unsigned int xid;
L
Linus Torvalds 已提交
2417
	int rc = 0;
2418
	struct cifs_tcon *tcon;
2419
	struct TCP_Server_Info *server;
2420
	struct cifsFileInfo *smbfile = file->private_data;
A
Al Viro 已提交
2421
	struct inode *inode = file_inode(file);
2422
	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
L
Linus Torvalds 已提交
2423

2424
	rc = file_write_and_wait_range(file, start, end);
2425 2426
	if (rc)
		return rc;
A
Al Viro 已提交
2427
	inode_lock(inode);
2428

2429
	xid = get_xid();
L
Linus Torvalds 已提交
2430

A
Al Viro 已提交
2431 2432
	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
		 file, datasync);
2433

2434
	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2435
		rc = cifs_zap_mapping(inode);
2436
		if (rc) {
2437
			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2438 2439 2440
			rc = 0; /* don't care about it in fsync */
		}
	}
2441

2442
	tcon = tlink_tcon(smbfile->tlink);
2443 2444 2445 2446 2447 2448 2449
	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
		server = tcon->ses->server;
		if (server->ops->flush)
			rc = server->ops->flush(xid, tcon, &smbfile->fid);
		else
			rc = -ENOSYS;
	}
2450

2451
	free_xid(xid);
A
Al Viro 已提交
2452
	inode_unlock(inode);
2453 2454 2455
	return rc;
}

2456
int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2457
{
2458
	unsigned int xid;
2459
	int rc = 0;
2460
	struct cifs_tcon *tcon;
2461
	struct TCP_Server_Info *server;
2462
	struct cifsFileInfo *smbfile = file->private_data;
2463
	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2464 2465
	struct inode *inode = file->f_mapping->host;

2466
	rc = file_write_and_wait_range(file, start, end);
2467 2468
	if (rc)
		return rc;
A
Al Viro 已提交
2469
	inode_lock(inode);
2470

2471
	xid = get_xid();
2472

A
Al Viro 已提交
2473 2474
	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
		 file, datasync);
2475 2476

	tcon = tlink_tcon(smbfile->tlink);
2477 2478 2479 2480 2481 2482 2483
	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
		server = tcon->ses->server;
		if (server->ops->flush)
			rc = server->ops->flush(xid, tcon, &smbfile->fid);
		else
			rc = -ENOSYS;
	}
2484

2485
	free_xid(xid);
A
Al Viro 已提交
2486
	inode_unlock(inode);
L
Linus Torvalds 已提交
2487 2488 2489 2490 2491 2492 2493
	return rc;
}

/*
 * As file closes, flush all cached write data for this inode checking
 * for write behind errors.
 */
2494
int cifs_flush(struct file *file, fl_owner_t id)
L
Linus Torvalds 已提交
2495
{
A
Al Viro 已提交
2496
	struct inode *inode = file_inode(file);
L
Linus Torvalds 已提交
2497 2498
	int rc = 0;

2499
	if (file->f_mode & FMODE_WRITE)
2500
		rc = filemap_write_and_wait(inode->i_mapping);
2501

2502
	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
L
Linus Torvalds 已提交
2503 2504 2505 2506

	return rc;
}

2507 2508 2509 2510 2511 2512 2513
static int
cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
{
	int rc = 0;
	unsigned long i;

	for (i = 0; i < num_pages; i++) {
2514
		pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2515 2516 2517 2518 2519 2520 2521
		if (!pages[i]) {
			/*
			 * save number of pages we have already allocated and
			 * return with ENOMEM error
			 */
			num_pages = i;
			rc = -ENOMEM;
2522
			break;
2523 2524 2525
		}
	}

2526 2527 2528 2529
	if (rc) {
		for (i = 0; i < num_pages; i++)
			put_page(pages[i]);
	}
2530 2531 2532 2533 2534 2535 2536 2537 2538 2539
	return rc;
}

static inline
size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
{
	size_t num_pages;
	size_t clen;

	clen = min_t(const size_t, len, wsize);
J
Jeff Layton 已提交
2540
	num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2541 2542 2543 2544 2545 2546 2547

	if (cur_len)
		*cur_len = clen;

	return num_pages;
}

2548
static void
2549
cifs_uncached_writedata_release(struct kref *refcount)
2550 2551
{
	int i;
2552 2553 2554
	struct cifs_writedata *wdata = container_of(refcount,
					struct cifs_writedata, refcount);

2555
	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2556 2557 2558 2559 2560
	for (i = 0; i < wdata->nr_pages; i++)
		put_page(wdata->pages[i]);
	cifs_writedata_release(refcount);
}

2561 2562
static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);

2563 2564 2565
static void
cifs_uncached_writev_complete(struct work_struct *work)
{
2566 2567
	struct cifs_writedata *wdata = container_of(work,
					struct cifs_writedata, work);
2568
	struct inode *inode = d_inode(wdata->cfile->dentry);
2569 2570 2571 2572 2573 2574 2575 2576 2577
	struct cifsInodeInfo *cifsi = CIFS_I(inode);

	spin_lock(&inode->i_lock);
	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
	if (cifsi->server_eof > inode->i_size)
		i_size_write(inode, cifsi->server_eof);
	spin_unlock(&inode->i_lock);

	complete(&wdata->done);
2578 2579
	collect_uncached_write_data(wdata->ctx);
	/* the below call can possibly free the last ref to aio ctx */
2580
	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2581 2582 2583
}

static int
2584 2585
wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
		      size_t *len, unsigned long *num_pages)
2586
{
2587 2588
	size_t save_len, copied, bytes, cur_len = *len;
	unsigned long i, nr_pages = *num_pages;
2589

2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606
	save_len = cur_len;
	for (i = 0; i < nr_pages; i++) {
		bytes = min_t(const size_t, cur_len, PAGE_SIZE);
		copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
		cur_len -= copied;
		/*
		 * If we didn't copy as much as we expected, then that
		 * may mean we trod into an unmapped area. Stop copying
		 * at that point. On the next pass through the big
		 * loop, we'll likely end up getting a zero-length
		 * write and bailing out of it.
		 */
		if (copied < bytes)
			break;
	}
	cur_len = save_len - cur_len;
	*len = cur_len;
2607

2608 2609 2610 2611 2612 2613 2614 2615
	/*
	 * If we have no data to send, then that probably means that
	 * the copy above failed altogether. That's most likely because
	 * the address in the iovec was bogus. Return -EFAULT and let
	 * the caller free anything we allocated and bail out.
	 */
	if (!cur_len)
		return -EFAULT;
2616

2617 2618 2619 2620 2621 2622
	/*
	 * i + 1 now represents the number of pages we actually used in
	 * the copy phase above.
	 */
	*num_pages = i + 1;
	return 0;
2623 2624
}

L
Long Li 已提交
2625 2626 2627 2628
static int
cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
	struct cifs_aio_ctx *ctx)
{
2629 2630
	unsigned int wsize;
	struct cifs_credits credits;
L
Long Li 已提交
2631 2632 2633 2634 2635
	int rc;
	struct TCP_Server_Info *server =
		tlink_tcon(wdata->cfile->tlink)->ses->server;

	do {
2636 2637 2638 2639 2640 2641 2642
		if (wdata->cfile->invalidHandle) {
			rc = cifs_reopen_file(wdata->cfile, false);
			if (rc == -EAGAIN)
				continue;
			else if (rc)
				break;
		}
L
Long Li 已提交
2643 2644


2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661
		/*
		 * Wait for credits to resend this wdata.
		 * Note: we are attempting to resend the whole wdata not in
		 * segments
		 */
		do {
			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
						&wsize, &credits);
			if (rc)
				goto fail;

			if (wsize < wdata->bytes) {
				add_credits_and_wake_if(server, &credits, 0);
				msleep(1000);
			}
		} while (wsize < wdata->bytes);
		wdata->credits = credits;
L
Long Li 已提交
2662

2663 2664 2665 2666 2667 2668 2669
		rc = adjust_credits(server, &wdata->credits, wdata->bytes);

		if (!rc) {
			if (wdata->cfile->invalidHandle)
				rc = -EAGAIN;
			else
				rc = server->ops->async_writev(wdata,
L
Long Li 已提交
2670
					cifs_uncached_writedata_release);
2671
		}
L
Long Li 已提交
2672

2673 2674 2675 2676 2677
		/* If the write was successfully sent, we are done */
		if (!rc) {
			list_add_tail(&wdata->list, wdata_list);
			return 0;
		}
L
Long Li 已提交
2678

2679 2680 2681
		/* Roll back credits and retry if needed */
		add_credits_and_wake_if(server, &wdata->credits, 0);
	} while (rc == -EAGAIN);
L
Long Li 已提交
2682

2683 2684
fail:
	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
L
Long Li 已提交
2685 2686 2687
	return rc;
}

2688 2689 2690
static int
cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
		     struct cifsFileInfo *open_file,
2691 2692
		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
		     struct cifs_aio_ctx *ctx)
2693
{
2694 2695
	int rc = 0;
	size_t cur_len;
2696
	unsigned long nr_pages, num_pages, i;
2697
	struct cifs_writedata *wdata;
2698
	struct iov_iter saved_from = *from;
2699
	loff_t saved_offset = offset;
2700
	pid_t pid;
2701
	struct TCP_Server_Info *server;
L
Long Li 已提交
2702 2703
	struct page **pagevec;
	size_t start;
2704
	unsigned int xid;
2705 2706 2707 2708 2709 2710

	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
		pid = open_file->pid;
	else
		pid = current->tgid;

2711
	server = tlink_tcon(open_file->tlink)->ses->server;
2712
	xid = get_xid();
2713

2714
	do {
2715 2716 2717
		unsigned int wsize;
		struct cifs_credits credits_on_stack;
		struct cifs_credits *credits = &credits_on_stack;
2718

2719 2720 2721 2722 2723 2724 2725 2726
		if (open_file->invalidHandle) {
			rc = cifs_reopen_file(open_file, false);
			if (rc == -EAGAIN)
				continue;
			else if (rc)
				break;
		}

2727
		rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2728
						   &wsize, credits);
2729 2730
		if (rc)
			break;
2731

2732 2733
		cur_len = min_t(const size_t, len, wsize);

L
Long Li 已提交
2734
		if (ctx->direct_io) {
2735 2736 2737
			ssize_t result;

			result = iov_iter_get_pages_alloc(
2738
				from, &pagevec, cur_len, &start);
2739
			if (result < 0) {
L
Long Li 已提交
2740 2741 2742 2743
				cifs_dbg(VFS,
					"direct_writev couldn't get user pages "
					"(rc=%zd) iter type %d iov_offset %zd "
					"count %zd\n",
2744
					result, from->type,
L
Long Li 已提交
2745 2746
					from->iov_offset, from->count);
				dump_stack();
2747 2748 2749

				rc = result;
				add_credits_and_wake_if(server, credits, 0);
L
Long Li 已提交
2750 2751
				break;
			}
2752
			cur_len = (size_t)result;
L
Long Li 已提交
2753 2754 2755 2756 2757 2758
			iov_iter_advance(from, cur_len);

			nr_pages =
				(cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;

			wdata = cifs_writedata_direct_alloc(pagevec,
2759
					     cifs_uncached_writev_complete);
L
Long Li 已提交
2760 2761 2762 2763 2764
			if (!wdata) {
				rc = -ENOMEM;
				add_credits_and_wake_if(server, credits, 0);
				break;
			}
2765 2766


L
Long Li 已提交
2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781
			wdata->page_offset = start;
			wdata->tailsz =
				nr_pages > 1 ?
					cur_len - (PAGE_SIZE - start) -
					(nr_pages - 2) * PAGE_SIZE :
					cur_len;
		} else {
			nr_pages = get_numpages(wsize, len, &cur_len);
			wdata = cifs_writedata_alloc(nr_pages,
					     cifs_uncached_writev_complete);
			if (!wdata) {
				rc = -ENOMEM;
				add_credits_and_wake_if(server, credits, 0);
				break;
			}
2782

L
Long Li 已提交
2783 2784
			rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
			if (rc) {
2785
				kvfree(wdata->pages);
L
Long Li 已提交
2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796
				kfree(wdata);
				add_credits_and_wake_if(server, credits, 0);
				break;
			}

			num_pages = nr_pages;
			rc = wdata_fill_from_iovec(
				wdata, from, &cur_len, &num_pages);
			if (rc) {
				for (i = 0; i < nr_pages; i++)
					put_page(wdata->pages[i]);
2797
				kvfree(wdata->pages);
L
Long Li 已提交
2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811
				kfree(wdata);
				add_credits_and_wake_if(server, credits, 0);
				break;
			}

			/*
			 * Bring nr_pages down to the number of pages we
			 * actually used, and free any pages that we didn't use.
			 */
			for ( ; nr_pages > num_pages; nr_pages--)
				put_page(wdata->pages[nr_pages - 1]);

			wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
		}
2812

2813 2814 2815 2816 2817 2818
		wdata->sync_mode = WB_SYNC_ALL;
		wdata->nr_pages = nr_pages;
		wdata->offset = (__u64)offset;
		wdata->cfile = cifsFileInfo_get(open_file);
		wdata->pid = pid;
		wdata->bytes = cur_len;
2819
		wdata->pagesz = PAGE_SIZE;
2820
		wdata->credits = credits_on_stack;
2821 2822
		wdata->ctx = ctx;
		kref_get(&ctx->refcount);
2823

2824 2825 2826 2827
		rc = adjust_credits(server, &wdata->credits, wdata->bytes);

		if (!rc) {
			if (wdata->cfile->invalidHandle)
2828 2829
				rc = -EAGAIN;
			else
2830
				rc = server->ops->async_writev(wdata,
2831
					cifs_uncached_writedata_release);
2832 2833
		}

2834
		if (rc) {
2835
			add_credits_and_wake_if(server, &wdata->credits, 0);
2836 2837
			kref_put(&wdata->refcount,
				 cifs_uncached_writedata_release);
2838
			if (rc == -EAGAIN) {
2839
				*from = saved_from;
2840 2841 2842
				iov_iter_advance(from, offset - saved_offset);
				continue;
			}
2843 2844 2845
			break;
		}

2846
		list_add_tail(&wdata->list, wdata_list);
2847 2848
		offset += cur_len;
		len -= cur_len;
2849 2850
	} while (len > 0);

2851
	free_xid(xid);
2852 2853 2854
	return rc;
}

2855
static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2856
{
2857
	struct cifs_writedata *wdata, *tmp;
2858 2859
	struct cifs_tcon *tcon;
	struct cifs_sb_info *cifs_sb;
2860 2861
	struct dentry *dentry = ctx->cfile->dentry;
	unsigned int i;
2862 2863
	int rc;

2864 2865
	tcon = tlink_tcon(ctx->cfile->tlink);
	cifs_sb = CIFS_SB(dentry->d_sb);
2866

2867
	mutex_lock(&ctx->aio_mutex);
2868

2869 2870 2871 2872
	if (list_empty(&ctx->list)) {
		mutex_unlock(&ctx->aio_mutex);
		return;
	}
2873

2874
	rc = ctx->rc;
2875 2876
	/*
	 * Wait for and collect replies for any successful sends in order of
2877 2878
	 * increasing offset. Once an error is hit, then return without waiting
	 * for any more replies.
2879 2880
	 */
restart_loop:
2881
	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2882
		if (!rc) {
2883 2884 2885 2886 2887 2888
			if (!try_wait_for_completion(&wdata->done)) {
				mutex_unlock(&ctx->aio_mutex);
				return;
			}

			if (wdata->result)
2889 2890
				rc = wdata->result;
			else
2891
				ctx->total_len += wdata->bytes;
2892 2893 2894

			/* resend call if it's a retryable error */
			if (rc == -EAGAIN) {
2895
				struct list_head tmp_list;
2896
				struct iov_iter tmp_from = ctx->iter;
2897 2898 2899 2900

				INIT_LIST_HEAD(&tmp_list);
				list_del_init(&wdata->list);

L
Long Li 已提交
2901 2902 2903 2904 2905
				if (ctx->direct_io)
					rc = cifs_resend_wdata(
						wdata, &tmp_list, ctx);
				else {
					iov_iter_advance(&tmp_from,
2906
						 wdata->offset - ctx->pos);
2907

L
Long Li 已提交
2908
					rc = cifs_write_from_iter(wdata->offset,
2909
						wdata->bytes, &tmp_from,
2910 2911
						ctx->cfile, cifs_sb, &tmp_list,
						ctx);
2912 2913 2914

					kref_put(&wdata->refcount,
						cifs_uncached_writedata_release);
L
Long Li 已提交
2915
				}
2916

2917
				list_splice(&tmp_list, &ctx->list);
2918 2919 2920 2921
				goto restart_loop;
			}
		}
		list_del_init(&wdata->list);
2922
		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2923 2924
	}

L
Long Li 已提交
2925 2926 2927
	if (!ctx->direct_io)
		for (i = 0; i < ctx->npages; i++)
			put_page(ctx->bv[i].bv_page);
2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941

	cifs_stats_bytes_written(tcon, ctx->total_len);
	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);

	ctx->rc = (rc == 0) ? ctx->total_len : rc;

	mutex_unlock(&ctx->aio_mutex);

	if (ctx->iocb && ctx->iocb->ki_complete)
		ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
	else
		complete(&ctx->done);
}

L
Long Li 已提交
2942 2943
static ssize_t __cifs_writev(
	struct kiocb *iocb, struct iov_iter *from, bool direct)
2944 2945 2946 2947 2948 2949 2950 2951
{
	struct file *file = iocb->ki_filp;
	ssize_t total_written = 0;
	struct cifsFileInfo *cfile;
	struct cifs_tcon *tcon;
	struct cifs_sb_info *cifs_sb;
	struct cifs_aio_ctx *ctx;
	struct iov_iter saved_from = *from;
L
Long Li 已提交
2952
	size_t len = iov_iter_count(from);
2953 2954 2955
	int rc;

	/*
L
Long Li 已提交
2956 2957 2958
	 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
	 * In this case, fall back to non-direct write function.
	 * this could be improved by getting pages directly in ITER_KVEC
2959
	 */
L
Long Li 已提交
2960 2961 2962 2963
	if (direct && from->type & ITER_KVEC) {
		cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
		direct = false;
	}
2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986

	rc = generic_write_checks(iocb, from);
	if (rc <= 0)
		return rc;

	cifs_sb = CIFS_FILE_SB(file);
	cfile = file->private_data;
	tcon = tlink_tcon(cfile->tlink);

	if (!tcon->ses->server->ops->async_writev)
		return -ENOSYS;

	ctx = cifs_aio_ctx_alloc();
	if (!ctx)
		return -ENOMEM;

	ctx->cfile = cifsFileInfo_get(cfile);

	if (!is_sync_kiocb(iocb))
		ctx->iocb = iocb;

	ctx->pos = iocb->ki_pos;

L
Long Li 已提交
2987 2988 2989 2990 2991 2992 2993 2994 2995 2996
	if (direct) {
		ctx->direct_io = true;
		ctx->iter = *from;
		ctx->len = len;
	} else {
		rc = setup_aio_ctx_iter(ctx, from, WRITE);
		if (rc) {
			kref_put(&ctx->refcount, cifs_aio_ctx_release);
			return rc;
		}
2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038
	}

	/* grab a lock here due to read response handlers can access ctx */
	mutex_lock(&ctx->aio_mutex);

	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
				  cfile, cifs_sb, &ctx->list, ctx);

	/*
	 * If at least one write was successfully sent, then discard any rc
	 * value from the later writes. If the other write succeeds, then
	 * we'll end up returning whatever was written. If it fails, then
	 * we'll get a new rc value from that.
	 */
	if (!list_empty(&ctx->list))
		rc = 0;

	mutex_unlock(&ctx->aio_mutex);

	if (rc) {
		kref_put(&ctx->refcount, cifs_aio_ctx_release);
		return rc;
	}

	if (!is_sync_kiocb(iocb)) {
		kref_put(&ctx->refcount, cifs_aio_ctx_release);
		return -EIOCBQUEUED;
	}

	rc = wait_for_completion_killable(&ctx->done);
	if (rc) {
		mutex_lock(&ctx->aio_mutex);
		ctx->rc = rc = -EINTR;
		total_written = ctx->total_len;
		mutex_unlock(&ctx->aio_mutex);
	} else {
		rc = ctx->rc;
		total_written = ctx->total_len;
	}

	kref_put(&ctx->refcount, cifs_aio_ctx_release);

3039 3040
	if (unlikely(!total_written))
		return rc;
3041

3042 3043
	iocb->ki_pos += total_written;
	return total_written;
3044 3045
}

L
Long Li 已提交
3046 3047 3048 3049 3050 3051 3052 3053 3054 3055
ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
{
	return __cifs_writev(iocb, from, true);
}

ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
{
	return __cifs_writev(iocb, from, false);
}

3056
static ssize_t
A
Al Viro 已提交
3057
cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3058
{
3059 3060 3061 3062 3063
	struct file *file = iocb->ki_filp;
	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
	struct inode *inode = file->f_mapping->host;
	struct cifsInodeInfo *cinode = CIFS_I(inode);
	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3064
	ssize_t rc;
3065

3066
	inode_lock(inode);
3067 3068 3069 3070 3071
	/*
	 * We need to hold the sem to be sure nobody modifies lock list
	 * with a brlock that prevents writing.
	 */
	down_read(&cinode->lock_sem);
3072

3073 3074
	rc = generic_write_checks(iocb, from);
	if (rc <= 0)
3075 3076 3077
		goto out;

	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3078 3079
				     server->vals->exclusive_lock_type, 0,
				     NULL, CIFS_WRITE_OP))
A
Al Viro 已提交
3080
		rc = __generic_file_write_iter(iocb, from);
3081 3082 3083
	else
		rc = -EACCES;
out:
3084
	up_read(&cinode->lock_sem);
A
Al Viro 已提交
3085
	inode_unlock(inode);
A
Al Viro 已提交
3086

3087 3088
	if (rc > 0)
		rc = generic_write_sync(iocb, rc);
3089 3090 3091 3092
	return rc;
}

ssize_t
A
Al Viro 已提交
3093
cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3094
{
A
Al Viro 已提交
3095
	struct inode *inode = file_inode(iocb->ki_filp);
3096 3097 3098 3099 3100
	struct cifsInodeInfo *cinode = CIFS_I(inode);
	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
						iocb->ki_filp->private_data;
	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3101
	ssize_t written;
3102

3103 3104 3105 3106
	written = cifs_get_writer(cinode);
	if (written)
		return written;

3107
	if (CIFS_CACHE_WRITE(cinode)) {
3108 3109
		if (cap_unix(tcon->ses) &&
		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3110
		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
A
Al Viro 已提交
3111
			written = generic_file_write_iter(iocb, from);
3112 3113
			goto out;
		}
A
Al Viro 已提交
3114
		written = cifs_writev(iocb, from);
3115
		goto out;
3116 3117
	}
	/*
3118 3119 3120 3121
	 * For non-oplocked files in strict cache mode we need to write the data
	 * to the server exactly from the pos to pos+len-1 rather than flush all
	 * affected pages because it may cause a error with mandatory locks on
	 * these pages but not on the region from pos to ppos+len-1.
3122
	 */
A
Al Viro 已提交
3123
	written = cifs_user_writev(iocb, from);
3124
	if (CIFS_CACHE_READ(cinode)) {
3125
		/*
3126 3127 3128 3129 3130
		 * We have read level caching and we have just sent a write
		 * request to the server thus making data in the cache stale.
		 * Zap the cache and set oplock/lease level to NONE to avoid
		 * reading stale data from the cache. All subsequent read
		 * operations will read new data from the server.
3131
		 */
3132
		cifs_zap_mapping(inode);
3133
		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3134
			 inode);
3135
		cinode->oplock = 0;
3136
	}
3137 3138
out:
	cifs_put_writer(cinode);
3139
	return written;
3140 3141
}

3142
static struct cifs_readdata *
3143
cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3144 3145
{
	struct cifs_readdata *rdata;
3146

3147
	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3148
	if (rdata != NULL) {
3149
		rdata->pages = pages;
3150
		kref_init(&rdata->refcount);
3151 3152
		INIT_LIST_HEAD(&rdata->list);
		init_completion(&rdata->done);
3153 3154
		INIT_WORK(&rdata->work, complete);
	}
3155

3156 3157 3158
	return rdata;
}

3159 3160 3161 3162
static struct cifs_readdata *
cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
{
	struct page **pages =
K
Kees Cook 已提交
3163
		kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174
	struct cifs_readdata *ret = NULL;

	if (pages) {
		ret = cifs_readdata_direct_alloc(pages, complete);
		if (!ret)
			kfree(pages);
	}

	return ret;
}

3175 3176
void
cifs_readdata_release(struct kref *refcount)
3177
{
3178 3179
	struct cifs_readdata *rdata = container_of(refcount,
					struct cifs_readdata, refcount);
3180 3181 3182 3183 3184 3185
#ifdef CONFIG_CIFS_SMB_DIRECT
	if (rdata->mr) {
		smbd_deregister_mr(rdata->mr);
		rdata->mr = NULL;
	}
#endif
3186 3187 3188
	if (rdata->cfile)
		cifsFileInfo_put(rdata->cfile);

3189
	kvfree(rdata->pages);
3190 3191 3192
	kfree(rdata);
}

3193
static int
3194
cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3195 3196
{
	int rc = 0;
3197
	struct page *page;
3198 3199
	unsigned int i;

3200
	for (i = 0; i < nr_pages; i++) {
3201 3202 3203 3204 3205
		page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
		if (!page) {
			rc = -ENOMEM;
			break;
		}
3206
		rdata->pages[i] = page;
3207 3208 3209
	}

	if (rc) {
3210 3211 3212
		for (i = 0; i < nr_pages; i++) {
			put_page(rdata->pages[i]);
			rdata->pages[i] = NULL;
3213 3214 3215 3216 3217 3218 3219 3220 3221 3222
		}
	}
	return rc;
}

static void
cifs_uncached_readdata_release(struct kref *refcount)
{
	struct cifs_readdata *rdata = container_of(refcount,
					struct cifs_readdata, refcount);
3223
	unsigned int i;
3224

3225
	kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3226 3227
	for (i = 0; i < rdata->nr_pages; i++) {
		put_page(rdata->pages[i]);
3228 3229 3230 3231 3232 3233 3234
	}
	cifs_readdata_release(refcount);
}

/**
 * cifs_readdata_to_iov - copy data from pages in response to an iovec
 * @rdata:	the readdata response with list of pages holding data
3235
 * @iter:	destination for our data
3236 3237 3238 3239 3240
 *
 * This function copies data from a list of pages in a readdata response into
 * an array of iovecs. It will first calculate where the data should go
 * based on the info in the readdata and then copy the data into that spot.
 */
3241 3242
static int
cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3243
{
3244
	size_t remaining = rdata->got_bytes;
3245
	unsigned int i;
3246

3247 3248
	for (i = 0; i < rdata->nr_pages; i++) {
		struct page *page = rdata->pages[i];
3249
		size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3250 3251
		size_t written;

D
David Howells 已提交
3252
		if (unlikely(iov_iter_is_pipe(iter))) {
3253 3254 3255 3256 3257 3258
			void *addr = kmap_atomic(page);

			written = copy_to_iter(addr, copy, iter);
			kunmap_atomic(addr);
		} else
			written = copy_page_to_iter(page, 0, copy, iter);
3259 3260 3261
		remaining -= written;
		if (written < copy && iov_iter_count(iter) > 0)
			break;
3262
	}
3263
	return remaining ? -EFAULT : 0;
3264 3265
}

3266 3267
static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);

3268 3269 3270 3271 3272 3273 3274
static void
cifs_uncached_readv_complete(struct work_struct *work)
{
	struct cifs_readdata *rdata = container_of(work,
						struct cifs_readdata, work);

	complete(&rdata->done);
3275 3276
	collect_uncached_read_data(rdata->ctx);
	/* the below call can possibly free the last ref to aio ctx */
3277 3278 3279 3280
	kref_put(&rdata->refcount, cifs_uncached_readdata_release);
}

static int
3281 3282 3283
uncached_fill_pages(struct TCP_Server_Info *server,
		    struct cifs_readdata *rdata, struct iov_iter *iter,
		    unsigned int len)
3284
{
3285
	int result = 0;
3286 3287
	unsigned int i;
	unsigned int nr_pages = rdata->nr_pages;
L
Long Li 已提交
3288
	unsigned int page_offset = rdata->page_offset;
3289

3290
	rdata->got_bytes = 0;
3291
	rdata->tailsz = PAGE_SIZE;
3292 3293
	for (i = 0; i < nr_pages; i++) {
		struct page *page = rdata->pages[i];
3294
		size_t n;
L
Long Li 已提交
3295 3296 3297 3298 3299 3300 3301
		unsigned int segment_size = rdata->pagesz;

		if (i == 0)
			segment_size -= page_offset;
		else
			page_offset = 0;

3302

3303
		if (len <= 0) {
3304
			/* no need to hold page hostage */
3305 3306
			rdata->pages[i] = NULL;
			rdata->nr_pages--;
3307
			put_page(page);
3308
			continue;
3309
		}
L
Long Li 已提交
3310

3311
		n = len;
L
Long Li 已提交
3312
		if (len >= segment_size)
3313
			/* enough data to fill the page */
L
Long Li 已提交
3314 3315
			n = segment_size;
		else
3316
			rdata->tailsz = len;
L
Long Li 已提交
3317 3318
		len -= n;

3319
		if (iter)
L
Long Li 已提交
3320 3321
			result = copy_page_from_iter(
					page, page_offset, n, iter);
3322 3323 3324 3325
#ifdef CONFIG_CIFS_SMB_DIRECT
		else if (rdata->mr)
			result = n;
#endif
3326
		else
L
Long Li 已提交
3327 3328
			result = cifs_read_page_from_socket(
					server, page, page_offset, n);
3329 3330 3331
		if (result < 0)
			break;

3332
		rdata->got_bytes += result;
3333 3334
	}

3335 3336
	return rdata->got_bytes > 0 && result != -ECONNABORTED ?
						rdata->got_bytes : result;
3337 3338
}

3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353
static int
cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
			      struct cifs_readdata *rdata, unsigned int len)
{
	return uncached_fill_pages(server, rdata, NULL, len);
}

static int
cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
			      struct cifs_readdata *rdata,
			      struct iov_iter *iter)
{
	return uncached_fill_pages(server, rdata, iter, iter->count);
}

L
Long Li 已提交
3354 3355 3356 3357
static int cifs_resend_rdata(struct cifs_readdata *rdata,
			struct list_head *rdata_list,
			struct cifs_aio_ctx *ctx)
{
3358 3359
	unsigned int rsize;
	struct cifs_credits credits;
L
Long Li 已提交
3360 3361 3362 3363 3364
	int rc;
	struct TCP_Server_Info *server =
		tlink_tcon(rdata->cfile->tlink)->ses->server;

	do {
3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379
		if (rdata->cfile->invalidHandle) {
			rc = cifs_reopen_file(rdata->cfile, true);
			if (rc == -EAGAIN)
				continue;
			else if (rc)
				break;
		}

		/*
		 * Wait for credits to resend this rdata.
		 * Note: we are attempting to resend the whole rdata not in
		 * segments
		 */
		do {
			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
L
Long Li 已提交
3380 3381
						&rsize, &credits);

3382 3383
			if (rc)
				goto fail;
L
Long Li 已提交
3384

3385 3386 3387 3388 3389 3390
			if (rsize < rdata->bytes) {
				add_credits_and_wake_if(server, &credits, 0);
				msleep(1000);
			}
		} while (rsize < rdata->bytes);
		rdata->credits = credits;
L
Long Li 已提交
3391

3392 3393 3394 3395 3396 3397 3398
		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
		if (!rc) {
			if (rdata->cfile->invalidHandle)
				rc = -EAGAIN;
			else
				rc = server->ops->async_readv(rdata);
		}
L
Long Li 已提交
3399

3400 3401 3402 3403 3404 3405
		/* If the read was successfully sent, we are done */
		if (!rc) {
			/* Add to aio pending list */
			list_add_tail(&rdata->list, rdata_list);
			return 0;
		}
L
Long Li 已提交
3406

3407 3408 3409
		/* Roll back credits and retry if needed */
		add_credits_and_wake_if(server, &rdata->credits, 0);
	} while (rc == -EAGAIN);
L
Long Li 已提交
3410

3411 3412
fail:
	kref_put(&rdata->refcount, cifs_uncached_readdata_release);
L
Long Li 已提交
3413 3414 3415
	return rc;
}

3416 3417
static int
cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3418 3419
		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
		     struct cifs_aio_ctx *ctx)
L
Linus Torvalds 已提交
3420
{
3421
	struct cifs_readdata *rdata;
3422 3423 3424
	unsigned int npages, rsize;
	struct cifs_credits credits_on_stack;
	struct cifs_credits *credits = &credits_on_stack;
3425 3426
	size_t cur_len;
	int rc;
3427
	pid_t pid;
3428
	struct TCP_Server_Info *server;
L
Long Li 已提交
3429 3430 3431
	struct page **pagevec;
	size_t start;
	struct iov_iter direct_iov = ctx->iter;
3432

3433
	server = tlink_tcon(open_file->tlink)->ses->server;
3434

3435 3436 3437 3438 3439
	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
		pid = open_file->pid;
	else
		pid = current->tgid;

L
Long Li 已提交
3440 3441 3442
	if (ctx->direct_io)
		iov_iter_advance(&direct_iov, offset - ctx->pos);

3443
	do {
3444 3445 3446 3447 3448 3449 3450 3451
		if (open_file->invalidHandle) {
			rc = cifs_reopen_file(open_file, true);
			if (rc == -EAGAIN)
				continue;
			else if (rc)
				break;
		}

3452
		rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3453
						   &rsize, credits);
3454 3455 3456 3457
		if (rc)
			break;

		cur_len = min_t(const size_t, len, rsize);
3458

L
Long Li 已提交
3459
		if (ctx->direct_io) {
3460
			ssize_t result;
L
Long Li 已提交
3461

3462
			result = iov_iter_get_pages_alloc(
L
Long Li 已提交
3463 3464
					&direct_iov, &pagevec,
					cur_len, &start);
3465
			if (result < 0) {
L
Long Li 已提交
3466
				cifs_dbg(VFS,
3467
					"couldn't get user pages (rc=%zd)"
L
Long Li 已提交
3468 3469
					" iter type %d"
					" iov_offset %zd count %zd\n",
3470
					result, direct_iov.type,
L
Long Li 已提交
3471 3472 3473
					direct_iov.iov_offset,
					direct_iov.count);
				dump_stack();
3474 3475 3476

				rc = result;
				add_credits_and_wake_if(server, credits, 0);
L
Long Li 已提交
3477 3478
				break;
			}
3479
			cur_len = (size_t)result;
L
Long Li 已提交
3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500
			iov_iter_advance(&direct_iov, cur_len);

			rdata = cifs_readdata_direct_alloc(
					pagevec, cifs_uncached_readv_complete);
			if (!rdata) {
				add_credits_and_wake_if(server, credits, 0);
				rc = -ENOMEM;
				break;
			}

			npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
			rdata->page_offset = start;
			rdata->tailsz = npages > 1 ?
				cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
				cur_len;

		} else {

			npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
			/* allocate a readdata struct */
			rdata = cifs_readdata_alloc(npages,
3501
					    cifs_uncached_readv_complete);
L
Long Li 已提交
3502 3503 3504 3505 3506
			if (!rdata) {
				add_credits_and_wake_if(server, credits, 0);
				rc = -ENOMEM;
				break;
			}
3507

L
Long Li 已提交
3508
			rc = cifs_read_allocate_pages(rdata, npages);
3509 3510 3511 3512 3513 3514
			if (rc) {
				kvfree(rdata->pages);
				kfree(rdata);
				add_credits_and_wake_if(server, credits, 0);
				break;
			}
L
Long Li 已提交
3515 3516 3517

			rdata->tailsz = PAGE_SIZE;
		}
3518 3519

		rdata->cfile = cifsFileInfo_get(open_file);
3520
		rdata->nr_pages = npages;
3521 3522 3523
		rdata->offset = offset;
		rdata->bytes = cur_len;
		rdata->pid = pid;
3524 3525
		rdata->pagesz = PAGE_SIZE;
		rdata->read_into_pages = cifs_uncached_read_into_pages;
3526
		rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3527
		rdata->credits = credits_on_stack;
3528 3529
		rdata->ctx = ctx;
		kref_get(&ctx->refcount);
3530

3531 3532 3533 3534
		rc = adjust_credits(server, &rdata->credits, rdata->bytes);

		if (!rc) {
			if (rdata->cfile->invalidHandle)
3535 3536
				rc = -EAGAIN;
			else
3537 3538 3539
				rc = server->ops->async_readv(rdata);
		}

3540
		if (rc) {
3541
			add_credits_and_wake_if(server, &rdata->credits, 0);
3542
			kref_put(&rdata->refcount,
L
Long Li 已提交
3543 3544 3545
				cifs_uncached_readdata_release);
			if (rc == -EAGAIN) {
				iov_iter_revert(&direct_iov, cur_len);
3546
				continue;
L
Long Li 已提交
3547
			}
3548 3549 3550
			break;
		}

3551
		list_add_tail(&rdata->list, rdata_list);
3552 3553 3554 3555
		offset += cur_len;
		len -= cur_len;
	} while (len > 0);

3556 3557 3558
	return rc;
}

3559 3560
static void
collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3561
{
3562 3563
	struct cifs_readdata *rdata, *tmp;
	struct iov_iter *to = &ctx->iter;
3564 3565
	struct cifs_sb_info *cifs_sb;
	struct cifs_tcon *tcon;
3566 3567
	unsigned int i;
	int rc;
3568

3569 3570
	tcon = tlink_tcon(ctx->cfile->tlink);
	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3571

3572
	mutex_lock(&ctx->aio_mutex);
3573

3574 3575 3576 3577
	if (list_empty(&ctx->list)) {
		mutex_unlock(&ctx->aio_mutex);
		return;
	}
3578

3579
	rc = ctx->rc;
3580
	/* the loop below should proceed in the order of increasing offsets */
3581
again:
3582
	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3583
		if (!rc) {
3584 3585 3586 3587 3588 3589
			if (!try_wait_for_completion(&rdata->done)) {
				mutex_unlock(&ctx->aio_mutex);
				return;
			}

			if (rdata->result == -EAGAIN) {
3590
				/* resend call if it's a retryable error */
3591
				struct list_head tmp_list;
3592
				unsigned int got_bytes = rdata->got_bytes;
3593

3594 3595
				list_del_init(&rdata->list);
				INIT_LIST_HEAD(&tmp_list);
3596

3597 3598 3599 3600 3601 3602
				/*
				 * Got a part of data and then reconnect has
				 * happened -- fill the buffer and continue
				 * reading.
				 */
				if (got_bytes && got_bytes < rdata->bytes) {
L
Long Li 已提交
3603 3604 3605
					rc = 0;
					if (!ctx->direct_io)
						rc = cifs_readdata_to_iov(rdata, to);
3606 3607
					if (rc) {
						kref_put(&rdata->refcount,
L
Long Li 已提交
3608
							cifs_uncached_readdata_release);
3609 3610
						continue;
					}
3611
				}
3612

L
Long Li 已提交
3613 3614 3615 3616 3617 3618 3619 3620 3621 3622
				if (ctx->direct_io) {
					/*
					 * Re-use rdata as this is a
					 * direct I/O
					 */
					rc = cifs_resend_rdata(
						rdata,
						&tmp_list, ctx);
				} else {
					rc = cifs_send_async_read(
3623 3624 3625
						rdata->offset + got_bytes,
						rdata->bytes - got_bytes,
						rdata->cfile, cifs_sb,
3626
						&tmp_list, ctx);
3627

L
Long Li 已提交
3628 3629 3630 3631
					kref_put(&rdata->refcount,
						cifs_uncached_readdata_release);
				}

3632
				list_splice(&tmp_list, &ctx->list);
3633

3634 3635 3636
				goto again;
			} else if (rdata->result)
				rc = rdata->result;
L
Long Li 已提交
3637
			else if (!ctx->direct_io)
A
Al Viro 已提交
3638
				rc = cifs_readdata_to_iov(rdata, to);
3639

3640 3641 3642
			/* if there was a short read -- discard anything left */
			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
				rc = -ENODATA;
L
Long Li 已提交
3643 3644

			ctx->total_len += rdata->got_bytes;
L
Linus Torvalds 已提交
3645
		}
3646 3647
		list_del_init(&rdata->list);
		kref_put(&rdata->refcount, cifs_uncached_readdata_release);
L
Linus Torvalds 已提交
3648
	}
3649

L
Long Li 已提交
3650 3651 3652 3653 3654 3655
	if (!ctx->direct_io) {
		for (i = 0; i < ctx->npages; i++) {
			if (ctx->should_dirty)
				set_page_dirty(ctx->bv[i].bv_page);
			put_page(ctx->bv[i].bv_page);
		}
3656

L
Long Li 已提交
3657 3658
		ctx->total_len = ctx->len - iov_iter_count(to);
	}
3659

3660 3661 3662 3663
	/* mask nodata case */
	if (rc == -ENODATA)
		rc = 0;

3664 3665 3666 3667 3668 3669 3670 3671 3672 3673
	ctx->rc = (rc == 0) ? ctx->total_len : rc;

	mutex_unlock(&ctx->aio_mutex);

	if (ctx->iocb && ctx->iocb->ki_complete)
		ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
	else
		complete(&ctx->done);
}

L
Long Li 已提交
3674 3675
static ssize_t __cifs_readv(
	struct kiocb *iocb, struct iov_iter *to, bool direct)
3676 3677
{
	size_t len;
L
Long Li 已提交
3678
	struct file *file = iocb->ki_filp;
3679 3680
	struct cifs_sb_info *cifs_sb;
	struct cifsFileInfo *cfile;
L
Long Li 已提交
3681 3682 3683
	struct cifs_tcon *tcon;
	ssize_t rc, total_read = 0;
	loff_t offset = iocb->ki_pos;
3684 3685
	struct cifs_aio_ctx *ctx;

L
Long Li 已提交
3686 3687 3688 3689 3690 3691 3692 3693 3694 3695
	/*
	 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
	 * fall back to data copy read path
	 * this could be improved by getting pages directly in ITER_KVEC
	 */
	if (direct && to->type & ITER_KVEC) {
		cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
		direct = false;
	}

3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718
	len = iov_iter_count(to);
	if (!len)
		return 0;

	cifs_sb = CIFS_FILE_SB(file);
	cfile = file->private_data;
	tcon = tlink_tcon(cfile->tlink);

	if (!tcon->ses->server->ops->async_readv)
		return -ENOSYS;

	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
		cifs_dbg(FYI, "attempting read on write only file instance\n");

	ctx = cifs_aio_ctx_alloc();
	if (!ctx)
		return -ENOMEM;

	ctx->cfile = cifsFileInfo_get(cfile);

	if (!is_sync_kiocb(iocb))
		ctx->iocb = iocb;

D
David Howells 已提交
3719
	if (iter_is_iovec(to))
3720 3721
		ctx->should_dirty = true;

L
Long Li 已提交
3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733
	if (direct) {
		ctx->pos = offset;
		ctx->direct_io = true;
		ctx->iter = *to;
		ctx->len = len;
	} else {
		rc = setup_aio_ctx_iter(ctx, to, READ);
		if (rc) {
			kref_put(&ctx->refcount, cifs_aio_ctx_release);
			return rc;
		}
		len = ctx->len;
3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769
	}

	/* grab a lock here due to read response handlers can access ctx */
	mutex_lock(&ctx->aio_mutex);

	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);

	/* if at least one read request send succeeded, then reset rc */
	if (!list_empty(&ctx->list))
		rc = 0;

	mutex_unlock(&ctx->aio_mutex);

	if (rc) {
		kref_put(&ctx->refcount, cifs_aio_ctx_release);
		return rc;
	}

	if (!is_sync_kiocb(iocb)) {
		kref_put(&ctx->refcount, cifs_aio_ctx_release);
		return -EIOCBQUEUED;
	}

	rc = wait_for_completion_killable(&ctx->done);
	if (rc) {
		mutex_lock(&ctx->aio_mutex);
		ctx->rc = rc = -EINTR;
		total_read = ctx->total_len;
		mutex_unlock(&ctx->aio_mutex);
	} else {
		rc = ctx->rc;
		total_read = ctx->total_len;
	}

	kref_put(&ctx->refcount, cifs_aio_ctx_release);

3770
	if (total_read) {
A
Al Viro 已提交
3771
		iocb->ki_pos += total_read;
3772 3773 3774
		return total_read;
	}
	return rc;
3775 3776
}

L
Long Li 已提交
3777 3778 3779 3780 3781 3782 3783 3784 3785 3786
ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
{
	return __cifs_readv(iocb, to, true);
}

ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
{
	return __cifs_readv(iocb, to, false);
}

3787
ssize_t
A
Al Viro 已提交
3788
cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3789
{
A
Al Viro 已提交
3790
	struct inode *inode = file_inode(iocb->ki_filp);
3791 3792 3793 3794 3795 3796
	struct cifsInodeInfo *cinode = CIFS_I(inode);
	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
						iocb->ki_filp->private_data;
	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
	int rc = -EACCES;
3797 3798 3799 3800 3801 3802 3803 3804 3805

	/*
	 * In strict cache mode we need to read from the server all the time
	 * if we don't have level II oplock because the server can delay mtime
	 * change - so we can't make a decision about inode invalidating.
	 * And we can also fail with pagereading if there are mandatory locks
	 * on pages affected by this read but not on the region from pos to
	 * pos+len-1.
	 */
3806
	if (!CIFS_CACHE_READ(cinode))
A
Al Viro 已提交
3807
		return cifs_user_readv(iocb, to);
3808

3809 3810 3811
	if (cap_unix(tcon->ses) &&
	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
A
Al Viro 已提交
3812
		return generic_file_read_iter(iocb, to);
3813 3814 3815 3816 3817 3818

	/*
	 * We need to hold the sem to be sure nobody modifies lock list
	 * with a brlock that prevents reading.
	 */
	down_read(&cinode->lock_sem);
A
Al Viro 已提交
3819
	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3820
				     tcon->ses->server->vals->shared_lock_type,
3821
				     0, NULL, CIFS_READ_OP))
A
Al Viro 已提交
3822
		rc = generic_file_read_iter(iocb, to);
3823 3824
	up_read(&cinode->lock_sem);
	return rc;
3825
}
L
Linus Torvalds 已提交
3826

3827 3828
static ssize_t
cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
L
Linus Torvalds 已提交
3829 3830 3831 3832 3833
{
	int rc = -EACCES;
	unsigned int bytes_read = 0;
	unsigned int total_read;
	unsigned int current_read_size;
3834
	unsigned int rsize;
L
Linus Torvalds 已提交
3835
	struct cifs_sb_info *cifs_sb;
3836
	struct cifs_tcon *tcon;
3837
	struct TCP_Server_Info *server;
3838
	unsigned int xid;
3839
	char *cur_offset;
L
Linus Torvalds 已提交
3840
	struct cifsFileInfo *open_file;
3841
	struct cifs_io_parms io_parms;
3842
	int buf_type = CIFS_NO_BUFFER;
3843
	__u32 pid;
L
Linus Torvalds 已提交
3844

3845
	xid = get_xid();
3846
	cifs_sb = CIFS_FILE_SB(file);
L
Linus Torvalds 已提交
3847

3848 3849 3850
	/* FIXME: set up handlers for larger reads and/or convert to async */
	rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);

L
Linus Torvalds 已提交
3851
	if (file->private_data == NULL) {
3852
		rc = -EBADF;
3853
		free_xid(xid);
3854
		return rc;
L
Linus Torvalds 已提交
3855
	}
3856
	open_file = file->private_data;
3857
	tcon = tlink_tcon(open_file->tlink);
3858 3859 3860 3861 3862 3863
	server = tcon->ses->server;

	if (!server->ops->sync_read) {
		free_xid(xid);
		return -ENOSYS;
	}
L
Linus Torvalds 已提交
3864

3865 3866 3867 3868 3869
	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
		pid = open_file->pid;
	else
		pid = current->tgid;

L
Linus Torvalds 已提交
3870
	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3871
		cifs_dbg(FYI, "attempting read on write only file instance\n");
L
Linus Torvalds 已提交
3872

3873 3874
	for (total_read = 0, cur_offset = read_data; read_size > total_read;
	     total_read += bytes_read, cur_offset += bytes_read) {
3875 3876 3877 3878 3879 3880 3881 3882 3883
		do {
			current_read_size = min_t(uint, read_size - total_read,
						  rsize);
			/*
			 * For windows me and 9x we do not want to request more
			 * than it negotiated since it will refuse the read
			 * then.
			 */
			if ((tcon->ses) && !(tcon->ses->capabilities &
3884
				tcon->ses->server->vals->cap_large_files)) {
3885 3886 3887
				current_read_size = min_t(uint,
					current_read_size, CIFSMaxBufSize);
			}
3888
			if (open_file->invalidHandle) {
J
Jeff Layton 已提交
3889
				rc = cifs_reopen_file(open_file, true);
L
Linus Torvalds 已提交
3890 3891 3892
				if (rc != 0)
					break;
			}
3893
			io_parms.pid = pid;
3894
			io_parms.tcon = tcon;
3895
			io_parms.offset = *offset;
3896
			io_parms.length = current_read_size;
3897
			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3898 3899
						    &bytes_read, &cur_offset,
						    &buf_type);
3900 3901
		} while (rc == -EAGAIN);

L
Linus Torvalds 已提交
3902 3903 3904 3905
		if (rc || (bytes_read == 0)) {
			if (total_read) {
				break;
			} else {
3906
				free_xid(xid);
L
Linus Torvalds 已提交
3907 3908 3909
				return rc;
			}
		} else {
3910
			cifs_stats_bytes_read(tcon, total_read);
3911
			*offset += bytes_read;
L
Linus Torvalds 已提交
3912 3913
		}
	}
3914
	free_xid(xid);
L
Linus Torvalds 已提交
3915 3916 3917
	return total_read;
}

3918 3919 3920 3921
/*
 * If the page is mmap'ed into a process' page tables, then we need to make
 * sure that it doesn't change while being written back.
 */
3922
static vm_fault_t
3923
cifs_page_mkwrite(struct vm_fault *vmf)
3924 3925 3926 3927 3928 3929 3930
{
	struct page *page = vmf->page;

	lock_page(page);
	return VM_FAULT_LOCKED;
}

3931
static const struct vm_operations_struct cifs_file_vm_ops = {
3932
	.fault = filemap_fault,
3933
	.map_pages = filemap_map_pages,
3934 3935 3936
	.page_mkwrite = cifs_page_mkwrite,
};

3937 3938
int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
{
3939
	int xid, rc = 0;
A
Al Viro 已提交
3940
	struct inode *inode = file_inode(file);
3941

3942
	xid = get_xid();
3943

3944
	if (!CIFS_CACHE_READ(CIFS_I(inode)))
3945
		rc = cifs_zap_mapping(inode);
3946 3947 3948
	if (!rc)
		rc = generic_file_mmap(file, vma);
	if (!rc)
3949
		vma->vm_ops = &cifs_file_vm_ops;
3950

3951
	free_xid(xid);
3952 3953 3954
	return rc;
}

L
Linus Torvalds 已提交
3955 3956 3957 3958
int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
	int rc, xid;

3959
	xid = get_xid();
3960

J
Jeff Layton 已提交
3961
	rc = cifs_revalidate_file(file);
3962
	if (rc)
3963 3964
		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
			 rc);
3965 3966 3967
	if (!rc)
		rc = generic_file_mmap(file, vma);
	if (!rc)
3968
		vma->vm_ops = &cifs_file_vm_ops;
3969

3970
	free_xid(xid);
L
Linus Torvalds 已提交
3971 3972 3973
	return rc;
}

3974 3975 3976
static void
cifs_readv_complete(struct work_struct *work)
{
3977
	unsigned int i, got_bytes;
3978 3979 3980
	struct cifs_readdata *rdata = container_of(work,
						struct cifs_readdata, work);

3981
	got_bytes = rdata->got_bytes;
3982 3983 3984
	for (i = 0; i < rdata->nr_pages; i++) {
		struct page *page = rdata->pages[i];

3985 3986
		lru_cache_add_file(page);

3987 3988
		if (rdata->result == 0 ||
		    (rdata->result == -EAGAIN && got_bytes)) {
3989 3990 3991 3992 3993 3994
			flush_dcache_page(page);
			SetPageUptodate(page);
		}

		unlock_page(page);

3995 3996
		if (rdata->result == 0 ||
		    (rdata->result == -EAGAIN && got_bytes))
3997 3998
			cifs_readpage_to_fscache(rdata->mapping->host, page);

3999
		got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4000

4001
		put_page(page);
4002
		rdata->pages[i] = NULL;
4003
	}
4004
	kref_put(&rdata->refcount, cifs_readdata_release);
4005 4006
}

4007
static int
4008 4009 4010
readpages_fill_pages(struct TCP_Server_Info *server,
		     struct cifs_readdata *rdata, struct iov_iter *iter,
		     unsigned int len)
4011
{
4012
	int result = 0;
4013
	unsigned int i;
4014 4015
	u64 eof;
	pgoff_t eof_index;
4016
	unsigned int nr_pages = rdata->nr_pages;
L
Long Li 已提交
4017
	unsigned int page_offset = rdata->page_offset;
4018 4019 4020

	/* determine the eof that the server (probably) has */
	eof = CIFS_I(rdata->mapping->host)->server_eof;
4021
	eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4022
	cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4023

4024
	rdata->got_bytes = 0;
4025
	rdata->tailsz = PAGE_SIZE;
4026 4027
	for (i = 0; i < nr_pages; i++) {
		struct page *page = rdata->pages[i];
L
Long Li 已提交
4028 4029 4030 4031 4032 4033 4034 4035 4036
		unsigned int to_read = rdata->pagesz;
		size_t n;

		if (i == 0)
			to_read -= page_offset;
		else
			page_offset = 0;

		n = to_read;
4037

L
Long Li 已提交
4038 4039
		if (len >= to_read) {
			len -= to_read;
4040
		} else if (len > 0) {
4041
			/* enough for partial page, fill and zero the rest */
L
Long Li 已提交
4042
			zero_user(page, len + page_offset, to_read - len);
4043
			n = rdata->tailsz = len;
4044
			len = 0;
4045 4046 4047 4048 4049 4050 4051 4052 4053
		} else if (page->index > eof_index) {
			/*
			 * The VFS will not try to do readahead past the
			 * i_size, but it's possible that we have outstanding
			 * writes with gaps in the middle and the i_size hasn't
			 * caught up yet. Populate those with zeroed out pages
			 * to prevent the VFS from repeatedly attempting to
			 * fill them until the writes are flushed.
			 */
4054
			zero_user(page, 0, PAGE_SIZE);
4055 4056 4057 4058
			lru_cache_add_file(page);
			flush_dcache_page(page);
			SetPageUptodate(page);
			unlock_page(page);
4059
			put_page(page);
4060 4061
			rdata->pages[i] = NULL;
			rdata->nr_pages--;
4062
			continue;
4063 4064 4065 4066
		} else {
			/* no need to hold page hostage */
			lru_cache_add_file(page);
			unlock_page(page);
4067
			put_page(page);
4068 4069
			rdata->pages[i] = NULL;
			rdata->nr_pages--;
4070
			continue;
4071
		}
4072

4073
		if (iter)
L
Long Li 已提交
4074 4075
			result = copy_page_from_iter(
					page, page_offset, n, iter);
4076 4077 4078 4079
#ifdef CONFIG_CIFS_SMB_DIRECT
		else if (rdata->mr)
			result = n;
#endif
4080
		else
L
Long Li 已提交
4081 4082
			result = cifs_read_page_from_socket(
					server, page, page_offset, n);
4083 4084 4085
		if (result < 0)
			break;

4086
		rdata->got_bytes += result;
4087 4088
	}

4089 4090
	return rdata->got_bytes > 0 && result != -ECONNABORTED ?
						rdata->got_bytes : result;
4091 4092
}

4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107
static int
cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
			       struct cifs_readdata *rdata, unsigned int len)
{
	return readpages_fill_pages(server, rdata, NULL, len);
}

static int
cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
			       struct cifs_readdata *rdata,
			       struct iov_iter *iter)
{
	return readpages_fill_pages(server, rdata, iter, iter->count);
}

4108 4109 4110 4111 4112 4113 4114 4115
static int
readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
		    unsigned int rsize, struct list_head *tmplist,
		    unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
{
	struct page *page, *tpage;
	unsigned int expected_index;
	int rc;
4116
	gfp_t gfp = readahead_gfp_mask(mapping);
4117

4118 4119
	INIT_LIST_HEAD(tmplist);

4120
	page = lru_to_page(page_list);
4121 4122 4123 4124 4125 4126

	/*
	 * Lock the page and put it in the cache. Since no one else
	 * should have access to this page, we're safe to simply set
	 * PG_locked without checking it first.
	 */
4127
	__SetPageLocked(page);
4128
	rc = add_to_page_cache_locked(page, mapping,
4129
				      page->index, gfp);
4130 4131 4132

	/* give up if we can't stick it in the cache */
	if (rc) {
4133
		__ClearPageLocked(page);
4134 4135 4136 4137
		return rc;
	}

	/* move first page to the tmplist */
4138 4139
	*offset = (loff_t)page->index << PAGE_SHIFT;
	*bytes = PAGE_SIZE;
4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150
	*nr_pages = 1;
	list_move_tail(&page->lru, tmplist);

	/* now try and add more pages onto the request */
	expected_index = page->index + 1;
	list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
		/* discontinuity ? */
		if (page->index != expected_index)
			break;

		/* would this page push the read over the rsize? */
4151
		if (*bytes + PAGE_SIZE > rsize)
4152 4153
			break;

4154
		__SetPageLocked(page);
4155
		if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4156
			__ClearPageLocked(page);
4157 4158 4159
			break;
		}
		list_move_tail(&page->lru, tmplist);
4160
		(*bytes) += PAGE_SIZE;
4161 4162 4163 4164
		expected_index++;
		(*nr_pages)++;
	}
	return rc;
4165 4166
}

L
Linus Torvalds 已提交
4167 4168 4169
static int cifs_readpages(struct file *file, struct address_space *mapping,
	struct list_head *page_list, unsigned num_pages)
{
4170 4171 4172
	int rc;
	struct list_head tmplist;
	struct cifsFileInfo *open_file = file->private_data;
4173
	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4174
	struct TCP_Server_Info *server;
4175
	pid_t pid;
4176
	unsigned int xid;
L
Linus Torvalds 已提交
4177

4178
	xid = get_xid();
4179 4180 4181
	/*
	 * Reads as many pages as possible from fscache. Returns -ENOBUFS
	 * immediately if the cookie is negative
4182 4183 4184
	 *
	 * After this point, every page in the list might have PG_fscache set,
	 * so we will need to clean that up off of every page we don't use.
4185 4186 4187
	 */
	rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
					 &num_pages);
4188 4189
	if (rc == 0) {
		free_xid(xid);
4190
		return rc;
4191
	}
4192

4193 4194 4195 4196 4197
	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
		pid = open_file->pid;
	else
		pid = current->tgid;

4198
	rc = 0;
4199
	server = tlink_tcon(open_file->tlink)->ses->server;
L
Linus Torvalds 已提交
4200

4201 4202
	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
		 __func__, file, mapping, num_pages);
4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215

	/*
	 * Start with the page at end of list and move it to private
	 * list. Do the same with any following pages until we hit
	 * the rsize limit, hit an index discontinuity, or run out of
	 * pages. Issue the async read and then start the loop again
	 * until the list is empty.
	 *
	 * Note that list order is important. The page_list is in
	 * the order of declining indexes. When we put the pages in
	 * the rdata->pages, then we want them in increasing order.
	 */
	while (!list_empty(page_list)) {
4216
		unsigned int i, nr_pages, bytes, rsize;
4217 4218 4219
		loff_t offset;
		struct page *page, *tpage;
		struct cifs_readdata *rdata;
4220 4221
		struct cifs_credits credits_on_stack;
		struct cifs_credits *credits = &credits_on_stack;
L
Linus Torvalds 已提交
4222

4223 4224 4225 4226 4227 4228 4229 4230
		if (open_file->invalidHandle) {
			rc = cifs_reopen_file(open_file, true);
			if (rc == -EAGAIN)
				continue;
			else if (rc)
				break;
		}

4231
		rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4232
						   &rsize, credits);
4233 4234
		if (rc)
			break;
4235 4236

		/*
4237 4238 4239 4240
		 * Give up immediately if rsize is too small to read an entire
		 * page. The VFS will fall back to readpage. We should never
		 * reach this point however since we set ra_pages to 0 when the
		 * rsize is smaller than a cache page.
4241
		 */
4242
		if (unlikely(rsize < PAGE_SIZE)) {
4243
			add_credits_and_wake_if(server, credits, 0);
4244
			free_xid(xid);
4245
			return 0;
4246
		}
4247

4248 4249
		rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
					 &nr_pages, &offset, &bytes);
4250
		if (rc) {
4251
			add_credits_and_wake_if(server, credits, 0);
4252 4253 4254
			break;
		}

4255
		rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4256 4257 4258 4259 4260 4261
		if (!rdata) {
			/* best to give up if we're out of mem */
			list_for_each_entry_safe(page, tpage, &tmplist, lru) {
				list_del(&page->lru);
				lru_cache_add_file(page);
				unlock_page(page);
4262
				put_page(page);
4263 4264
			}
			rc = -ENOMEM;
4265
			add_credits_and_wake_if(server, credits, 0);
4266 4267 4268
			break;
		}

4269
		rdata->cfile = cifsFileInfo_get(open_file);
4270 4271 4272 4273
		rdata->mapping = mapping;
		rdata->offset = offset;
		rdata->bytes = bytes;
		rdata->pid = pid;
4274
		rdata->pagesz = PAGE_SIZE;
L
Long Li 已提交
4275
		rdata->tailsz = PAGE_SIZE;
4276
		rdata->read_into_pages = cifs_readpages_read_into_pages;
4277
		rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4278
		rdata->credits = credits_on_stack;
4279 4280 4281 4282 4283

		list_for_each_entry_safe(page, tpage, &tmplist, lru) {
			list_del(&page->lru);
			rdata->pages[rdata->nr_pages++] = page;
		}
4284

4285 4286 4287 4288
		rc = adjust_credits(server, &rdata->credits, rdata->bytes);

		if (!rc) {
			if (rdata->cfile->invalidHandle)
4289 4290
				rc = -EAGAIN;
			else
4291 4292 4293
				rc = server->ops->async_readv(rdata);
		}

4294
		if (rc) {
4295
			add_credits_and_wake_if(server, &rdata->credits, 0);
4296 4297
			for (i = 0; i < rdata->nr_pages; i++) {
				page = rdata->pages[i];
4298 4299
				lru_cache_add_file(page);
				unlock_page(page);
4300
				put_page(page);
L
Linus Torvalds 已提交
4301
			}
4302
			/* Fallback to the readpage in error/reconnect cases */
4303
			kref_put(&rdata->refcount, cifs_readdata_release);
L
Linus Torvalds 已提交
4304 4305
			break;
		}
4306 4307

		kref_put(&rdata->refcount, cifs_readdata_release);
L
Linus Torvalds 已提交
4308 4309
	}

4310 4311 4312 4313 4314
	/* Any pages that have been shown to fscache but didn't get added to
	 * the pagecache must be uncached before they get returned to the
	 * allocator.
	 */
	cifs_fscache_readpages_cancel(mapping->host, page_list);
4315
	free_xid(xid);
L
Linus Torvalds 已提交
4316 4317 4318
	return rc;
}

4319 4320 4321
/*
 * cifs_readpage_worker must be called with the page pinned
 */
L
Linus Torvalds 已提交
4322 4323 4324 4325 4326 4327
static int cifs_readpage_worker(struct file *file, struct page *page,
	loff_t *poffset)
{
	char *read_data;
	int rc;

4328
	/* Is the page cached? */
A
Al Viro 已提交
4329
	rc = cifs_readpage_from_fscache(file_inode(file), page);
4330 4331 4332
	if (rc == 0)
		goto read_complete;

L
Linus Torvalds 已提交
4333 4334
	read_data = kmap(page);
	/* for reads over a certain size could initiate async read ahead */
S
Steve French 已提交
4335

4336
	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
S
Steve French 已提交
4337

L
Linus Torvalds 已提交
4338 4339 4340
	if (rc < 0)
		goto io_error;
	else
4341
		cifs_dbg(FYI, "Bytes read %d\n", rc);
S
Steve French 已提交
4342

4343 4344 4345 4346 4347 4348
	/* we do not want atime to be less than mtime, it broke some apps */
	file_inode(file)->i_atime = current_time(file_inode(file));
	if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
		file_inode(file)->i_atime = file_inode(file)->i_mtime;
	else
		file_inode(file)->i_atime = current_time(file_inode(file));
S
Steve French 已提交
4349

4350 4351
	if (PAGE_SIZE > rc)
		memset(read_data + rc, 0, PAGE_SIZE - rc);
L
Linus Torvalds 已提交
4352 4353 4354

	flush_dcache_page(page);
	SetPageUptodate(page);
4355 4356

	/* send this page to the cache */
A
Al Viro 已提交
4357
	cifs_readpage_to_fscache(file_inode(file), page);
4358

L
Linus Torvalds 已提交
4359
	rc = 0;
S
Steve French 已提交
4360

L
Linus Torvalds 已提交
4361
io_error:
S
Steve French 已提交
4362
	kunmap(page);
4363
	unlock_page(page);
4364 4365

read_complete:
L
Linus Torvalds 已提交
4366 4367 4368 4369 4370
	return rc;
}

static int cifs_readpage(struct file *file, struct page *page)
{
4371
	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
L
Linus Torvalds 已提交
4372
	int rc = -EACCES;
4373
	unsigned int xid;
L
Linus Torvalds 已提交
4374

4375
	xid = get_xid();
L
Linus Torvalds 已提交
4376 4377

	if (file->private_data == NULL) {
4378
		rc = -EBADF;
4379
		free_xid(xid);
4380
		return rc;
L
Linus Torvalds 已提交
4381 4382
	}

4383
	cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4384
		 page, (int)offset, (int)offset);
L
Linus Torvalds 已提交
4385 4386 4387

	rc = cifs_readpage_worker(file, page, &offset);

4388
	free_xid(xid);
L
Linus Torvalds 已提交
4389 4390 4391
	return rc;
}

4392 4393 4394
static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
{
	struct cifsFileInfo *open_file;
4395 4396
	struct cifs_tcon *tcon =
		cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
4397

4398
	spin_lock(&tcon->open_file_lock);
4399
	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4400
		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4401
			spin_unlock(&tcon->open_file_lock);
4402 4403 4404
			return 1;
		}
	}
4405
	spin_unlock(&tcon->open_file_lock);
4406 4407 4408
	return 0;
}

L
Linus Torvalds 已提交
4409 4410 4411
/* We do not want to update the file size from server for inodes
   open for write - to avoid races with writepage extending
   the file - in the future we could consider allowing
S
Steve French 已提交
4412
   refreshing the inode only on increases in the file size
L
Linus Torvalds 已提交
4413 4414
   but this is tricky to do without racing with writebehind
   page caching in the current Linux kernel design */
4415
bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
L
Linus Torvalds 已提交
4416
{
4417
	if (!cifsInode)
4418
		return true;
4419

4420 4421
	if (is_inode_writable(cifsInode)) {
		/* This inode is open for write at least once */
4422 4423 4424
		struct cifs_sb_info *cifs_sb;

		cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
S
Steve French 已提交
4425
		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
S
Steve French 已提交
4426
			/* since no page cache to corrupt on directio
4427
			we can change size safely */
4428
			return true;
4429 4430
		}

S
Steve French 已提交
4431
		if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4432
			return true;
4433

4434
		return false;
4435
	} else
4436
		return true;
L
Linus Torvalds 已提交
4437 4438
}

N
Nick Piggin 已提交
4439 4440 4441
static int cifs_write_begin(struct file *file, struct address_space *mapping,
			loff_t pos, unsigned len, unsigned flags,
			struct page **pagep, void **fsdata)
L
Linus Torvalds 已提交
4442
{
4443
	int oncethru = 0;
4444 4445
	pgoff_t index = pos >> PAGE_SHIFT;
	loff_t offset = pos & (PAGE_SIZE - 1);
4446 4447 4448 4449
	loff_t page_start = pos & PAGE_MASK;
	loff_t i_size;
	struct page *page;
	int rc = 0;
N
Nick Piggin 已提交
4450

4451
	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
N
Nick Piggin 已提交
4452

4453
start:
4454
	page = grab_cache_page_write_begin(mapping, index, flags);
4455 4456 4457 4458
	if (!page) {
		rc = -ENOMEM;
		goto out;
	}
4459

4460 4461
	if (PageUptodate(page))
		goto out;
4462

4463 4464 4465 4466 4467
	/*
	 * If we write a full page it will be up to date, no need to read from
	 * the server. If the write is short, we'll end up doing a sync write
	 * instead.
	 */
4468
	if (len == PAGE_SIZE)
4469
		goto out;
4470

4471 4472 4473 4474 4475 4476
	/*
	 * optimize away the read when we have an oplock, and we're not
	 * expecting to use any of the data we'd be reading in. That
	 * is, when the page lies beyond the EOF, or straddles the EOF
	 * and the write will cover all of the existing data.
	 */
4477
	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4478 4479 4480 4481 4482
		i_size = i_size_read(mapping->host);
		if (page_start >= i_size ||
		    (offset == 0 && (pos + len) >= i_size)) {
			zero_user_segments(page, 0, offset,
					   offset + len,
4483
					   PAGE_SIZE);
4484 4485 4486 4487 4488 4489 4490 4491 4492 4493
			/*
			 * PageChecked means that the parts of the page
			 * to which we're not writing are considered up
			 * to date. Once the data is copied to the
			 * page, it can be set uptodate.
			 */
			SetPageChecked(page);
			goto out;
		}
	}
N
Nick Piggin 已提交
4494

4495
	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4496 4497 4498 4499 4500 4501
		/*
		 * might as well read a page, it is fast enough. If we get
		 * an error, we don't need to return it. cifs_write_end will
		 * do a sync write instead since PG_uptodate isn't set.
		 */
		cifs_readpage_worker(file, page, &page_start);
4502
		put_page(page);
4503 4504
		oncethru = 1;
		goto start;
4505 4506 4507 4508
	} else {
		/* we could try using another file handle if there is one -
		   but how would we lock it to prevent close of that handle
		   racing with this read? In any case
N
Nick Piggin 已提交
4509
		   this will be written out by write_end so is fine */
L
Linus Torvalds 已提交
4510
	}
4511 4512 4513
out:
	*pagep = page;
	return rc;
L
Linus Torvalds 已提交
4514 4515
}

4516 4517 4518 4519 4520 4521 4522 4523
static int cifs_release_page(struct page *page, gfp_t gfp)
{
	if (PagePrivate(page))
		return 0;

	return cifs_fscache_release_page(page, gfp);
}

4524 4525
static void cifs_invalidate_page(struct page *page, unsigned int offset,
				 unsigned int length)
4526 4527 4528
{
	struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);

4529
	if (offset == 0 && length == PAGE_SIZE)
4530 4531 4532
		cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
}

4533 4534 4535 4536
static int cifs_launder_page(struct page *page)
{
	int rc = 0;
	loff_t range_start = page_offset(page);
4537
	loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4538 4539 4540 4541 4542 4543 4544
	struct writeback_control wbc = {
		.sync_mode = WB_SYNC_ALL,
		.nr_to_write = 0,
		.range_start = range_start,
		.range_end = range_end,
	};

4545
	cifs_dbg(FYI, "Launder page: %p\n", page);
4546 4547 4548 4549 4550 4551 4552 4553

	if (clear_page_dirty_for_io(page))
		rc = cifs_writepage_locked(page, &wbc);

	cifs_fscache_invalidate_page(page, page->mapping->host);
	return rc;
}

4554
void cifs_oplock_break(struct work_struct *work)
4555 4556 4557
{
	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
						  oplock_break);
4558
	struct inode *inode = d_inode(cfile->dentry);
4559
	struct cifsInodeInfo *cinode = CIFS_I(inode);
4560
	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4561
	struct TCP_Server_Info *server = tcon->ses->server;
4562
	int rc = 0;
4563

4564
	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4565
			TASK_UNINTERRUPTIBLE);
4566 4567 4568 4569

	server->ops->downgrade_oplock(server, cinode,
		test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));

4570
	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4571
						cifs_has_mand_locks(cinode)) {
4572 4573
		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
			 inode);
4574
		cinode->oplock = 0;
4575 4576
	}

4577
	if (inode && S_ISREG(inode->i_mode)) {
4578
		if (CIFS_CACHE_READ(cinode))
4579
			break_lease(inode, O_RDONLY);
S
Steve French 已提交
4580
		else
4581
			break_lease(inode, O_WRONLY);
4582
		rc = filemap_fdatawrite(inode->i_mapping);
4583
		if (!CIFS_CACHE_READ(cinode)) {
4584 4585
			rc = filemap_fdatawait(inode->i_mapping);
			mapping_set_error(inode->i_mapping, rc);
4586
			cifs_zap_mapping(inode);
4587
		}
4588
		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4589 4590
	}

4591 4592
	rc = cifs_push_locks(cfile);
	if (rc)
4593
		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4594

4595 4596 4597 4598 4599 4600
	/*
	 * releasing stale oplock after recent reconnect of smb session using
	 * a now incorrect file handle is not a data integrity issue but do
	 * not bother sending an oplock release if session to server still is
	 * disconnected since oplock already released by the server
	 */
4601
	if (!cfile->oplock_break_cancelled) {
4602 4603
		rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
							     cinode);
4604
		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4605
	}
4606
	cifs_done_oplock_break(cinode);
4607 4608
}

4609 4610 4611 4612 4613 4614 4615 4616 4617 4618
/*
 * The presence of cifs_direct_io() in the address space ops vector
 * allowes open() O_DIRECT flags which would have failed otherwise.
 *
 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
 * so this method should never be called.
 *
 * Direct IO is not yet supported in the cached mode. 
 */
static ssize_t
4619
cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4620 4621 4622 4623 4624 4625 4626 4627 4628
{
        /*
         * FIXME
         * Eventually need to support direct IO for non forcedirectio mounts
         */
        return -EINVAL;
}


4629
const struct address_space_operations cifs_addr_ops = {
L
Linus Torvalds 已提交
4630 4631 4632
	.readpage = cifs_readpage,
	.readpages = cifs_readpages,
	.writepage = cifs_writepage,
4633
	.writepages = cifs_writepages,
N
Nick Piggin 已提交
4634 4635
	.write_begin = cifs_write_begin,
	.write_end = cifs_write_end,
L
Linus Torvalds 已提交
4636
	.set_page_dirty = __set_page_dirty_nobuffers,
4637
	.releasepage = cifs_release_page,
4638
	.direct_IO = cifs_direct_io,
4639
	.invalidatepage = cifs_invalidate_page,
4640
	.launder_page = cifs_launder_page,
L
Linus Torvalds 已提交
4641
};
D
Dave Kleikamp 已提交
4642 4643 4644 4645 4646 4647

/*
 * cifs_readpages requires the server to support a buffer large enough to
 * contain the header plus one complete page of data.  Otherwise, we need
 * to leave cifs_readpages out of the address space operations.
 */
4648
const struct address_space_operations cifs_addr_ops_smallbuf = {
D
Dave Kleikamp 已提交
4649 4650 4651
	.readpage = cifs_readpage,
	.writepage = cifs_writepage,
	.writepages = cifs_writepages,
N
Nick Piggin 已提交
4652 4653
	.write_begin = cifs_write_begin,
	.write_end = cifs_write_end,
D
Dave Kleikamp 已提交
4654
	.set_page_dirty = __set_page_dirty_nobuffers,
4655 4656
	.releasepage = cifs_release_page,
	.invalidatepage = cifs_invalidate_page,
4657
	.launder_page = cifs_launder_page,
D
Dave Kleikamp 已提交
4658
};