file.c 31.9 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

C
Chris Mason 已提交
19 20 21 22 23 24 25 26 27 28 29 30
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
31
#include <linux/slab.h>
C
Chris Mason 已提交
32 33 34 35 36 37
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "ioctl.h"
#include "print-tree.h"
38 39
#include "tree-log.h"
#include "locking.h"
40
#include "compat.h"
C
Chris Mason 已提交
41 42


C
Chris Mason 已提交
43 44 45
/* simple helper to fault in pages and copy.  This should go away
 * and be replaced with calls into generic code.
 */
C
Chris Mason 已提交
46
static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
47 48
					 int write_bytes,
					 struct page **prepared_pages,
C
Chris Mason 已提交
49
					 const char __user *buf)
C
Chris Mason 已提交
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
{
	long page_fault = 0;
	int i;
	int offset = pos & (PAGE_CACHE_SIZE - 1);

	for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
		size_t count = min_t(size_t,
				     PAGE_CACHE_SIZE - offset, write_bytes);
		struct page *page = prepared_pages[i];
		fault_in_pages_readable(buf, count);

		/* Copy data from userspace to the current page */
		kmap(page);
		page_fault = __copy_from_user(page_address(page) + offset,
					      buf, count);
		/* Flush processor's dcache for this page */
		flush_dcache_page(page);
		kunmap(page);
		buf += count;
		write_bytes -= count;

		if (page_fault)
			break;
	}
	return page_fault ? -EFAULT : 0;
}

C
Chris Mason 已提交
77 78 79
/*
 * unlocks pages after btrfs_file_write is done with them
 */
C
Chris Mason 已提交
80
static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
C
Chris Mason 已提交
81 82 83 84 85
{
	size_t i;
	for (i = 0; i < num_pages; i++) {
		if (!pages[i])
			break;
C
Chris Mason 已提交
86 87 88 89
		/* page checked is some magic around finding pages that
		 * have been modified without going through btrfs_set_page_dirty
		 * clear it here
		 */
C
Chris Mason 已提交
90
		ClearPageChecked(pages[i]);
C
Chris Mason 已提交
91 92 93 94 95 96
		unlock_page(pages[i]);
		mark_page_accessed(pages[i]);
		page_cache_release(pages[i]);
	}
}

C
Chris Mason 已提交
97 98 99 100 101 102 103 104
/*
 * after copy_from_user, pages need to be dirtied and we need to make
 * sure holes are created between the current EOF and the start of
 * any next extents (if required).
 *
 * this also makes the decision about creating an inline extent vs
 * doing real data extents, marking pages dirty and delalloc as required.
 */
C
Chris Mason 已提交
105
static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
C
Chris Mason 已提交
106 107 108 109 110 111 112 113
				   struct btrfs_root *root,
				   struct file *file,
				   struct page **pages,
				   size_t num_pages,
				   loff_t pos,
				   size_t write_bytes)
{
	int err = 0;
114
	int i;
115
	struct inode *inode = fdentry(file)->d_inode;
116
	u64 num_bytes;
117 118 119 120
	u64 start_pos;
	u64 end_of_last_block;
	u64 end_pos = pos + write_bytes;
	loff_t isize = i_size_read(inode);
C
Chris Mason 已提交
121

122
	start_pos = pos & ~((u64)root->sectorsize - 1);
123 124
	num_bytes = (write_bytes + pos - start_pos +
		    root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
C
Chris Mason 已提交
125

126
	end_of_last_block = start_pos + num_bytes - 1;
127 128
	err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
					NULL);
129
	BUG_ON(err);
J
Josef Bacik 已提交
130

C
Chris Mason 已提交
131 132 133 134 135
	for (i = 0; i < num_pages; i++) {
		struct page *p = pages[i];
		SetPageUptodate(p);
		ClearPageChecked(p);
		set_page_dirty(p);
136 137 138
	}
	if (end_pos > isize) {
		i_size_write(inode, end_pos);
139 140 141 142
		/* we've only changed i_size in ram, and we haven't updated
		 * the disk i_size.  There is no need to log the inode
		 * at this time.
		 */
C
Chris Mason 已提交
143
	}
144
	return 0;
C
Chris Mason 已提交
145 146
}

C
Chris Mason 已提交
147 148 149 150
/*
 * this drops all the extents in the cache that intersect the range
 * [start, end].  Existing extents are split as required.
 */
151 152
int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
			    int skip_pinned)
153 154
{
	struct extent_map *em;
155 156
	struct extent_map *split = NULL;
	struct extent_map *split2 = NULL;
157
	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
158
	u64 len = end - start + 1;
159 160
	int ret;
	int testend = 1;
161
	unsigned long flags;
C
Chris Mason 已提交
162
	int compressed = 0;
163

164
	WARN_ON(end < start);
165
	if (end == (u64)-1) {
166
		len = (u64)-1;
167 168
		testend = 0;
	}
C
Chris Mason 已提交
169
	while (1) {
170 171 172 173 174
		if (!split)
			split = alloc_extent_map(GFP_NOFS);
		if (!split2)
			split2 = alloc_extent_map(GFP_NOFS);

175
		write_lock(&em_tree->lock);
176
		em = lookup_extent_mapping(em_tree, start, len);
177
		if (!em) {
178
			write_unlock(&em_tree->lock);
179
			break;
180
		}
181 182
		flags = em->flags;
		if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
183
			if (testend && em->start + em->len >= start + len) {
184
				free_extent_map(em);
C
Chris Mason 已提交
185
				write_unlock(&em_tree->lock);
186 187
				break;
			}
188 189
			start = em->start + em->len;
			if (testend)
190 191
				len = start + len - (em->start + em->len);
			free_extent_map(em);
C
Chris Mason 已提交
192
			write_unlock(&em_tree->lock);
193 194
			continue;
		}
C
Chris Mason 已提交
195
		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
196
		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
197
		remove_extent_mapping(em_tree, em);
198 199 200 201 202

		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
		    em->start < start) {
			split->start = em->start;
			split->len = start - em->start;
203
			split->orig_start = em->orig_start;
204
			split->block_start = em->block_start;
C
Chris Mason 已提交
205 206 207 208 209 210

			if (compressed)
				split->block_len = em->block_len;
			else
				split->block_len = split->len;

211
			split->bdev = em->bdev;
212
			split->flags = flags;
213 214 215 216 217 218 219 220 221 222 223 224 225
			ret = add_extent_mapping(em_tree, split);
			BUG_ON(ret);
			free_extent_map(split);
			split = split2;
			split2 = NULL;
		}
		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
		    testend && em->start + em->len > start + len) {
			u64 diff = start + len - em->start;

			split->start = start + len;
			split->len = em->start + em->len - (start + len);
			split->bdev = em->bdev;
226
			split->flags = flags;
227

C
Chris Mason 已提交
228 229 230
			if (compressed) {
				split->block_len = em->block_len;
				split->block_start = em->block_start;
231
				split->orig_start = em->orig_start;
C
Chris Mason 已提交
232 233 234
			} else {
				split->block_len = split->len;
				split->block_start = em->block_start + diff;
235
				split->orig_start = split->start;
C
Chris Mason 已提交
236
			}
237 238 239 240 241 242

			ret = add_extent_mapping(em_tree, split);
			BUG_ON(ret);
			free_extent_map(split);
			split = NULL;
		}
243
		write_unlock(&em_tree->lock);
244

245 246 247 248 249
		/* once for us */
		free_extent_map(em);
		/* once for the tree*/
		free_extent_map(em);
	}
250 251 252 253
	if (split)
		free_extent_map(split);
	if (split2)
		free_extent_map(split2);
254 255 256
	return 0;
}

C
Chris Mason 已提交
257 258 259 260 261 262 263 264 265
/*
 * this is very complex, but the basic idea is to drop all extents
 * in the range start - end.  hint_block is filled in with a block number
 * that would be a good hint to the block allocator for this file.
 *
 * If an extent intersects the range but is not entirely inside the range
 * it is either truncated or split.  Anything entirely inside the range
 * is deleted from the tree.
 */
Y
Yan, Zheng 已提交
266 267
int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
		       u64 start, u64 end, u64 *hint_byte, int drop_cache)
C
Chris Mason 已提交
268
{
Y
Yan, Zheng 已提交
269
	struct btrfs_root *root = BTRFS_I(inode)->root;
270
	struct extent_buffer *leaf;
Y
Yan, Zheng 已提交
271
	struct btrfs_file_extent_item *fi;
C
Chris Mason 已提交
272
	struct btrfs_path *path;
273
	struct btrfs_key key;
Y
Yan, Zheng 已提交
274 275 276 277 278 279 280 281 282
	struct btrfs_key new_key;
	u64 search_start = start;
	u64 disk_bytenr = 0;
	u64 num_bytes = 0;
	u64 extent_offset = 0;
	u64 extent_end = 0;
	int del_nr = 0;
	int del_slot = 0;
	int extent_type;
C
Chris Mason 已提交
283
	int recow;
284
	int ret;
C
Chris Mason 已提交
285

C
Chris Mason 已提交
286 287
	if (drop_cache)
		btrfs_drop_extent_cache(inode, start, end - 1, 0);
288

C
Chris Mason 已提交
289 290 291
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
Y
Yan, Zheng 已提交
292

C
Chris Mason 已提交
293
	while (1) {
C
Chris Mason 已提交
294
		recow = 0;
C
Chris Mason 已提交
295 296 297
		ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
					       search_start, -1);
		if (ret < 0)
Y
Yan, Zheng 已提交
298 299 300 301 302 303 304
			break;
		if (ret > 0 && path->slots[0] > 0 && search_start == start) {
			leaf = path->nodes[0];
			btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
			if (key.objectid == inode->i_ino &&
			    key.type == BTRFS_EXTENT_DATA_KEY)
				path->slots[0]--;
C
Chris Mason 已提交
305
		}
Y
Yan, Zheng 已提交
306
		ret = 0;
307
next_slot:
308
		leaf = path->nodes[0];
Y
Yan, Zheng 已提交
309 310 311 312 313 314 315 316
		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
			BUG_ON(del_nr > 0);
			ret = btrfs_next_leaf(root, path);
			if (ret < 0)
				break;
			if (ret > 0) {
				ret = 0;
				break;
317
			}
Y
Yan, Zheng 已提交
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
			leaf = path->nodes[0];
			recow = 1;
		}

		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
		if (key.objectid > inode->i_ino ||
		    key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
			break;

		fi = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_file_extent_item);
		extent_type = btrfs_file_extent_type(leaf, fi);

		if (extent_type == BTRFS_FILE_EXTENT_REG ||
		    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
			disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
			num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
			extent_offset = btrfs_file_extent_offset(leaf, fi);
			extent_end = key.offset +
				btrfs_file_extent_num_bytes(leaf, fi);
		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
			extent_end = key.offset +
				btrfs_file_extent_inline_len(leaf, fi);
341
		} else {
Y
Yan, Zheng 已提交
342
			WARN_ON(1);
343
			extent_end = search_start;
C
Chris Mason 已提交
344 345
		}

Y
Yan, Zheng 已提交
346 347
		if (extent_end <= search_start) {
			path->slots[0]++;
348
			goto next_slot;
C
Chris Mason 已提交
349 350
		}

Y
Yan, Zheng 已提交
351 352 353 354
		search_start = max(key.offset, start);
		if (recow) {
			btrfs_release_path(root, path);
			continue;
C
Chris Mason 已提交
355
		}
Y
Yan Zheng 已提交
356

Y
Yan, Zheng 已提交
357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
		/*
		 *     | - range to drop - |
		 *  | -------- extent -------- |
		 */
		if (start > key.offset && end < extent_end) {
			BUG_ON(del_nr > 0);
			BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);

			memcpy(&new_key, &key, sizeof(new_key));
			new_key.offset = start;
			ret = btrfs_duplicate_item(trans, root, path,
						   &new_key);
			if (ret == -EAGAIN) {
				btrfs_release_path(root, path);
				continue;
Y
Yan Zheng 已提交
372
			}
Y
Yan, Zheng 已提交
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
			if (ret < 0)
				break;

			leaf = path->nodes[0];
			fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
					    struct btrfs_file_extent_item);
			btrfs_set_file_extent_num_bytes(leaf, fi,
							start - key.offset);

			fi = btrfs_item_ptr(leaf, path->slots[0],
					    struct btrfs_file_extent_item);

			extent_offset += start - key.offset;
			btrfs_set_file_extent_offset(leaf, fi, extent_offset);
			btrfs_set_file_extent_num_bytes(leaf, fi,
							extent_end - start);
			btrfs_mark_buffer_dirty(leaf);

			if (disk_bytenr > 0) {
392
				ret = btrfs_inc_extent_ref(trans, root,
Y
Yan, Zheng 已提交
393 394 395 396
						disk_bytenr, num_bytes, 0,
						root->root_key.objectid,
						new_key.objectid,
						start - extent_offset);
397
				BUG_ON(ret);
Y
Yan, Zheng 已提交
398
				*hint_byte = disk_bytenr;
399
			}
Y
Yan, Zheng 已提交
400
			key.offset = start;
Y
Yan Zheng 已提交
401
		}
Y
Yan, Zheng 已提交
402 403 404 405 406 407
		/*
		 *  | ---- range to drop ----- |
		 *      | -------- extent -------- |
		 */
		if (start <= key.offset && end < extent_end) {
			BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
Y
Yan Zheng 已提交
408

Y
Yan, Zheng 已提交
409 410 411
			memcpy(&new_key, &key, sizeof(new_key));
			new_key.offset = end;
			btrfs_set_item_key_safe(trans, root, path, &new_key);
Y
Yan Zheng 已提交
412

Y
Yan, Zheng 已提交
413 414 415 416 417 418 419 420
			extent_offset += end - key.offset;
			btrfs_set_file_extent_offset(leaf, fi, extent_offset);
			btrfs_set_file_extent_num_bytes(leaf, fi,
							extent_end - end);
			btrfs_mark_buffer_dirty(leaf);
			if (disk_bytenr > 0) {
				inode_sub_bytes(inode, end - key.offset);
				*hint_byte = disk_bytenr;
C
Chris Mason 已提交
421
			}
Y
Yan, Zheng 已提交
422
			break;
C
Chris Mason 已提交
423
		}
424

Y
Yan, Zheng 已提交
425 426 427 428 429 430 431 432
		search_start = extent_end;
		/*
		 *       | ---- range to drop ----- |
		 *  | -------- extent -------- |
		 */
		if (start > key.offset && end >= extent_end) {
			BUG_ON(del_nr > 0);
			BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
433

Y
Yan, Zheng 已提交
434 435 436 437 438 439 440 441 442
			btrfs_set_file_extent_num_bytes(leaf, fi,
							start - key.offset);
			btrfs_mark_buffer_dirty(leaf);
			if (disk_bytenr > 0) {
				inode_sub_bytes(inode, extent_end - start);
				*hint_byte = disk_bytenr;
			}
			if (end == extent_end)
				break;
C
Chris Mason 已提交
443

Y
Yan, Zheng 已提交
444 445
			path->slots[0]++;
			goto next_slot;
Z
Zheng Yan 已提交
446 447
		}

Y
Yan, Zheng 已提交
448 449 450 451 452 453 454 455 456 457 458 459
		/*
		 *  | ---- range to drop ----- |
		 *    | ------ extent ------ |
		 */
		if (start <= key.offset && end >= extent_end) {
			if (del_nr == 0) {
				del_slot = path->slots[0];
				del_nr = 1;
			} else {
				BUG_ON(del_slot + del_nr != path->slots[0]);
				del_nr++;
			}
Z
Zheng Yan 已提交
460

Y
Yan, Zheng 已提交
461
			if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
462
				inode_sub_bytes(inode,
Y
Yan, Zheng 已提交
463 464 465 466
						extent_end - key.offset);
				extent_end = ALIGN(extent_end,
						   root->sectorsize);
			} else if (disk_bytenr > 0) {
Z
Zheng Yan 已提交
467
				ret = btrfs_free_extent(trans, root,
Y
Yan, Zheng 已提交
468 469
						disk_bytenr, num_bytes, 0,
						root->root_key.objectid,
470
						key.objectid, key.offset -
Y
Yan, Zheng 已提交
471
						extent_offset);
Z
Zheng Yan 已提交
472
				BUG_ON(ret);
Y
Yan, Zheng 已提交
473 474 475
				inode_sub_bytes(inode,
						extent_end - key.offset);
				*hint_byte = disk_bytenr;
Z
Zheng Yan 已提交
476 477
			}

Y
Yan, Zheng 已提交
478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494
			if (end == extent_end)
				break;

			if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
				path->slots[0]++;
				goto next_slot;
			}

			ret = btrfs_del_items(trans, root, path, del_slot,
					      del_nr);
			BUG_ON(ret);

			del_nr = 0;
			del_slot = 0;

			btrfs_release_path(root, path);
			continue;
C
Chris Mason 已提交
495
		}
Y
Yan, Zheng 已提交
496 497

		BUG_ON(1);
C
Chris Mason 已提交
498
	}
Y
Yan, Zheng 已提交
499 500 501 502

	if (del_nr > 0) {
		ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
		BUG_ON(ret);
Y
Yan Zheng 已提交
503
	}
Y
Yan, Zheng 已提交
504 505

	btrfs_free_path(path);
C
Chris Mason 已提交
506 507 508
	return ret;
}

Y
Yan Zheng 已提交
509
static int extent_mergeable(struct extent_buffer *leaf, int slot,
510 511
			    u64 objectid, u64 bytenr, u64 orig_offset,
			    u64 *start, u64 *end)
Y
Yan Zheng 已提交
512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
{
	struct btrfs_file_extent_item *fi;
	struct btrfs_key key;
	u64 extent_end;

	if (slot < 0 || slot >= btrfs_header_nritems(leaf))
		return 0;

	btrfs_item_key_to_cpu(leaf, &key, slot);
	if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
		return 0;

	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
	if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
	    btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
527
	    btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
Y
Yan Zheng 已提交
528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
	    btrfs_file_extent_compression(leaf, fi) ||
	    btrfs_file_extent_encryption(leaf, fi) ||
	    btrfs_file_extent_other_encoding(leaf, fi))
		return 0;

	extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
	if ((*start && *start != key.offset) || (*end && *end != extent_end))
		return 0;

	*start = key.offset;
	*end = extent_end;
	return 1;
}

/*
 * Mark extent in the range start - end as written.
 *
 * This changes extent type from 'pre-allocated' to 'regular'. If only
 * part of extent is marked as written, the extent will be split into
 * two or three.
 */
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
			      struct inode *inode, u64 start, u64 end)
{
Y
Yan, Zheng 已提交
552
	struct btrfs_root *root = BTRFS_I(inode)->root;
Y
Yan Zheng 已提交
553 554 555 556
	struct extent_buffer *leaf;
	struct btrfs_path *path;
	struct btrfs_file_extent_item *fi;
	struct btrfs_key key;
Y
Yan, Zheng 已提交
557
	struct btrfs_key new_key;
Y
Yan Zheng 已提交
558 559 560
	u64 bytenr;
	u64 num_bytes;
	u64 extent_end;
561
	u64 orig_offset;
Y
Yan Zheng 已提交
562 563
	u64 other_start;
	u64 other_end;
Y
Yan, Zheng 已提交
564 565 566
	u64 split;
	int del_nr = 0;
	int del_slot = 0;
567
	int recow;
Y
Yan Zheng 已提交
568 569 570 571 572 573 574
	int ret;

	btrfs_drop_extent_cache(inode, start, end - 1, 0);

	path = btrfs_alloc_path();
	BUG_ON(!path);
again:
575
	recow = 0;
Y
Yan, Zheng 已提交
576
	split = start;
Y
Yan Zheng 已提交
577 578
	key.objectid = inode->i_ino;
	key.type = BTRFS_EXTENT_DATA_KEY;
Y
Yan, Zheng 已提交
579
	key.offset = split;
Y
Yan Zheng 已提交
580 581 582 583 584 585 586 587 588 589 590

	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
	if (ret > 0 && path->slots[0] > 0)
		path->slots[0]--;

	leaf = path->nodes[0];
	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
	BUG_ON(key.objectid != inode->i_ino ||
	       key.type != BTRFS_EXTENT_DATA_KEY);
	fi = btrfs_item_ptr(leaf, path->slots[0],
			    struct btrfs_file_extent_item);
Y
Yan, Zheng 已提交
591 592
	BUG_ON(btrfs_file_extent_type(leaf, fi) !=
	       BTRFS_FILE_EXTENT_PREALLOC);
Y
Yan Zheng 已提交
593 594 595 596 597
	extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
	BUG_ON(key.offset > start || extent_end < end);

	bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
	num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
598
	orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647
	memcpy(&new_key, &key, sizeof(new_key));

	if (start == key.offset && end < extent_end) {
		other_start = 0;
		other_end = start;
		if (extent_mergeable(leaf, path->slots[0] - 1,
				     inode->i_ino, bytenr, orig_offset,
				     &other_start, &other_end)) {
			new_key.offset = end;
			btrfs_set_item_key_safe(trans, root, path, &new_key);
			fi = btrfs_item_ptr(leaf, path->slots[0],
					    struct btrfs_file_extent_item);
			btrfs_set_file_extent_num_bytes(leaf, fi,
							extent_end - end);
			btrfs_set_file_extent_offset(leaf, fi,
						     end - orig_offset);
			fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
					    struct btrfs_file_extent_item);
			btrfs_set_file_extent_num_bytes(leaf, fi,
							end - other_start);
			btrfs_mark_buffer_dirty(leaf);
			goto out;
		}
	}

	if (start > key.offset && end == extent_end) {
		other_start = end;
		other_end = 0;
		if (extent_mergeable(leaf, path->slots[0] + 1,
				     inode->i_ino, bytenr, orig_offset,
				     &other_start, &other_end)) {
			fi = btrfs_item_ptr(leaf, path->slots[0],
					    struct btrfs_file_extent_item);
			btrfs_set_file_extent_num_bytes(leaf, fi,
							start - key.offset);
			path->slots[0]++;
			new_key.offset = start;
			btrfs_set_item_key_safe(trans, root, path, &new_key);

			fi = btrfs_item_ptr(leaf, path->slots[0],
					    struct btrfs_file_extent_item);
			btrfs_set_file_extent_num_bytes(leaf, fi,
							other_end - start);
			btrfs_set_file_extent_offset(leaf, fi,
						     start - orig_offset);
			btrfs_mark_buffer_dirty(leaf);
			goto out;
		}
	}
Y
Yan Zheng 已提交
648

Y
Yan, Zheng 已提交
649 650 651 652 653 654 655 656 657
	while (start > key.offset || end < extent_end) {
		if (key.offset == start)
			split = end;

		new_key.offset = split;
		ret = btrfs_duplicate_item(trans, root, path, &new_key);
		if (ret == -EAGAIN) {
			btrfs_release_path(root, path);
			goto again;
Y
Yan Zheng 已提交
658
		}
Y
Yan, Zheng 已提交
659
		BUG_ON(ret < 0);
Y
Yan Zheng 已提交
660

Y
Yan, Zheng 已提交
661 662
		leaf = path->nodes[0];
		fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
Y
Yan Zheng 已提交
663 664
				    struct btrfs_file_extent_item);
		btrfs_set_file_extent_num_bytes(leaf, fi,
Y
Yan, Zheng 已提交
665 666 667 668 669 670 671 672
						split - key.offset);

		fi = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_file_extent_item);

		btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
		btrfs_set_file_extent_num_bytes(leaf, fi,
						extent_end - split);
Y
Yan Zheng 已提交
673 674
		btrfs_mark_buffer_dirty(leaf);

Y
Yan, Zheng 已提交
675 676 677
		ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
					   root->root_key.objectid,
					   inode->i_ino, orig_offset);
Y
Yan Zheng 已提交
678 679
		BUG_ON(ret);

Y
Yan, Zheng 已提交
680 681 682 683
		if (split == start) {
			key.offset = start;
		} else {
			BUG_ON(start != key.offset);
Y
Yan Zheng 已提交
684
			path->slots[0]--;
Y
Yan, Zheng 已提交
685
			extent_end = end;
Y
Yan Zheng 已提交
686
		}
687
		recow = 1;
Y
Yan Zheng 已提交
688 689
	}

Y
Yan, Zheng 已提交
690 691
	other_start = end;
	other_end = 0;
692 693 694 695 696 697 698
	if (extent_mergeable(leaf, path->slots[0] + 1,
			     inode->i_ino, bytenr, orig_offset,
			     &other_start, &other_end)) {
		if (recow) {
			btrfs_release_path(root, path);
			goto again;
		}
Y
Yan, Zheng 已提交
699 700 701 702 703 704 705
		extent_end = other_end;
		del_slot = path->slots[0] + 1;
		del_nr++;
		ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
					0, root->root_key.objectid,
					inode->i_ino, orig_offset);
		BUG_ON(ret);
Y
Yan Zheng 已提交
706
	}
Y
Yan, Zheng 已提交
707 708
	other_start = 0;
	other_end = start;
709 710 711 712 713 714 715
	if (extent_mergeable(leaf, path->slots[0] - 1,
			     inode->i_ino, bytenr, orig_offset,
			     &other_start, &other_end)) {
		if (recow) {
			btrfs_release_path(root, path);
			goto again;
		}
Y
Yan, Zheng 已提交
716 717 718 719 720 721 722 723 724
		key.offset = other_start;
		del_slot = path->slots[0];
		del_nr++;
		ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
					0, root->root_key.objectid,
					inode->i_ino, orig_offset);
		BUG_ON(ret);
	}
	if (del_nr == 0) {
725 726
		fi = btrfs_item_ptr(leaf, path->slots[0],
			   struct btrfs_file_extent_item);
Y
Yan, Zheng 已提交
727 728 729
		btrfs_set_file_extent_type(leaf, fi,
					   BTRFS_FILE_EXTENT_REG);
		btrfs_mark_buffer_dirty(leaf);
730
	} else {
731 732
		fi = btrfs_item_ptr(leaf, del_slot - 1,
			   struct btrfs_file_extent_item);
733 734 735 736 737
		btrfs_set_file_extent_type(leaf, fi,
					   BTRFS_FILE_EXTENT_REG);
		btrfs_set_file_extent_num_bytes(leaf, fi,
						extent_end - key.offset);
		btrfs_mark_buffer_dirty(leaf);
Y
Yan, Zheng 已提交
738

739 740 741
		ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
		BUG_ON(ret);
	}
Y
Yan, Zheng 已提交
742
out:
Y
Yan Zheng 已提交
743 744 745 746
	btrfs_free_path(path);
	return 0;
}

C
Chris Mason 已提交
747
/*
C
Chris Mason 已提交
748 749 750
 * this gets pages into the page cache and locks them down, it also properly
 * waits for data=ordered extents to finish before allowing the pages to be
 * modified.
C
Chris Mason 已提交
751
 */
C
Chris Mason 已提交
752
static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
753 754 755
			 struct page **pages, size_t num_pages,
			 loff_t pos, unsigned long first_index,
			 unsigned long last_index, size_t write_bytes)
C
Chris Mason 已提交
756
{
757
	struct extent_state *cached_state = NULL;
C
Chris Mason 已提交
758 759
	int i;
	unsigned long index = pos >> PAGE_CACHE_SHIFT;
760
	struct inode *inode = fdentry(file)->d_inode;
C
Chris Mason 已提交
761
	int err = 0;
762
	u64 start_pos;
763
	u64 last_pos;
764

765
	start_pos = pos & ~((u64)root->sectorsize - 1);
766
	last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
C
Chris Mason 已提交
767

Y
Yan Zheng 已提交
768 769 770 771 772 773
	if (start_pos > inode->i_size) {
		err = btrfs_cont_expand(inode, start_pos);
		if (err)
			return err;
	}

C
Chris Mason 已提交
774
	memset(pages, 0, num_pages * sizeof(struct page *));
775
again:
C
Chris Mason 已提交
776 777 778 779
	for (i = 0; i < num_pages; i++) {
		pages[i] = grab_cache_page(inode->i_mapping, index + i);
		if (!pages[i]) {
			err = -ENOMEM;
780
			BUG_ON(1);
C
Chris Mason 已提交
781
		}
C
Chris Mason 已提交
782
		wait_on_page_writeback(pages[i]);
C
Chris Mason 已提交
783
	}
784
	if (start_pos < inode->i_size) {
785
		struct btrfs_ordered_extent *ordered;
786 787 788
		lock_extent_bits(&BTRFS_I(inode)->io_tree,
				 start_pos, last_pos - 1, 0, &cached_state,
				 GFP_NOFS);
C
Chris Mason 已提交
789 790
		ordered = btrfs_lookup_first_ordered_extent(inode,
							    last_pos - 1);
791 792 793 794
		if (ordered &&
		    ordered->file_offset + ordered->len > start_pos &&
		    ordered->file_offset < last_pos) {
			btrfs_put_ordered_extent(ordered);
795 796 797
			unlock_extent_cached(&BTRFS_I(inode)->io_tree,
					     start_pos, last_pos - 1,
					     &cached_state, GFP_NOFS);
798 799 800 801 802 803 804 805 806 807 808
			for (i = 0; i < num_pages; i++) {
				unlock_page(pages[i]);
				page_cache_release(pages[i]);
			}
			btrfs_wait_ordered_range(inode, start_pos,
						 last_pos - start_pos);
			goto again;
		}
		if (ordered)
			btrfs_put_ordered_extent(ordered);

809
		clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
810
				  last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
811
				  EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
812
				  GFP_NOFS);
813 814 815
		unlock_extent_cached(&BTRFS_I(inode)->io_tree,
				     start_pos, last_pos - 1, &cached_state,
				     GFP_NOFS);
816
	}
817
	for (i = 0; i < num_pages; i++) {
818
		clear_page_dirty_for_io(pages[i]);
819 820 821
		set_page_extent_mapped(pages[i]);
		WARN_ON(!PageLocked(pages[i]));
	}
C
Chris Mason 已提交
822 823 824
	return 0;
}

825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865
/* Copied from read-write.c */
static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
{
	set_current_state(TASK_UNINTERRUPTIBLE);
	if (!kiocbIsKicked(iocb))
		schedule();
	else
		kiocbClearKicked(iocb);
	__set_current_state(TASK_RUNNING);
}

/*
 * Just a copy of what do_sync_write does.
 */
static ssize_t __btrfs_direct_write(struct file *file, const char __user *buf,
				    size_t count, loff_t pos, loff_t *ppos)
{
	struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
	unsigned long nr_segs = 1;
	struct kiocb kiocb;
	ssize_t ret;

	init_sync_kiocb(&kiocb, file);
	kiocb.ki_pos = pos;
	kiocb.ki_left = count;
	kiocb.ki_nbytes = count;

	while (1) {
		ret = generic_file_direct_write(&kiocb, &iov, &nr_segs, pos,
						ppos, count, count);
		if (ret != -EIOCBRETRY)
			break;
		wait_on_retry_sync_kiocb(&kiocb);
	}

	if (ret == -EIOCBQUEUED)
		ret = wait_on_sync_kiocb(&kiocb);
	*ppos = kiocb.ki_pos;
	return ret;
}

C
Chris Mason 已提交
866 867 868 869
static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
				size_t count, loff_t *ppos)
{
	loff_t pos;
870 871 872
	loff_t start_pos;
	ssize_t num_written = 0;
	ssize_t err = 0;
C
Chris Mason 已提交
873
	int ret = 0;
874
	struct inode *inode = fdentry(file)->d_inode;
C
Chris Mason 已提交
875
	struct btrfs_root *root = BTRFS_I(inode)->root;
876 877
	struct page **pages = NULL;
	int nrptrs;
C
Chris Mason 已提交
878 879 880
	struct page *pinned[2];
	unsigned long first_index;
	unsigned long last_index;
881
	int will_write;
882
	int buffered = 0;
883

884
	will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
885
		      (file->f_flags & O_DIRECT));
886

C
Chris Mason 已提交
887 888
	pinned[0] = NULL;
	pinned[1] = NULL;
889

C
Chris Mason 已提交
890
	pos = *ppos;
891 892
	start_pos = pos;

C
Chris Mason 已提交
893
	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
894 895 896

	mutex_lock(&inode->i_mutex);

C
Chris Mason 已提交
897 898 899
	current->backing_dev_info = inode->i_mapping->backing_dev_info;
	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
	if (err)
900 901
		goto out;

C
Chris Mason 已提交
902
	if (count == 0)
903
		goto out;
904

905
	err = file_remove_suid(file);
C
Chris Mason 已提交
906
	if (err)
907
		goto out;
J
Josef Bacik 已提交
908

C
Chris Mason 已提交
909
	file_update_time(file);
910 911 912 913 914 915 916 917 918 919 920
	BTRFS_I(inode)->sequence++;

	if (unlikely(file->f_flags & O_DIRECT)) {
		num_written = __btrfs_direct_write(file, buf, count, pos,
						   ppos);
		pos += num_written;
		count -= num_written;

		/* We've written everything we wanted to, exit */
		if (num_written < 0 || !count)
			goto out;
C
Chris Mason 已提交
921

922 923 924 925 926 927 928 929 930 931 932
		/*
		 * We are going to do buffered for the rest of the range, so we
		 * need to make sure to invalidate the buffered pages when we're
		 * done.
		 */
		buffered = 1;
		buf += num_written;
	}

	nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
		     PAGE_CACHE_SIZE / (sizeof(struct page *)));
933
	pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
C
Chris Mason 已提交
934

935 936 937
	/* generic_write_checks can change our pos */
	start_pos = pos;

C
Chris Mason 已提交
938 939 940 941 942 943 944 945 946 947 948
	first_index = pos >> PAGE_CACHE_SHIFT;
	last_index = (pos + count) >> PAGE_CACHE_SHIFT;

	/*
	 * there are lots of better ways to do this, but this code
	 * makes sure the first and last page in the file range are
	 * up to date and ready for cow
	 */
	if ((pos & (PAGE_CACHE_SIZE - 1))) {
		pinned[0] = grab_cache_page(inode->i_mapping, first_index);
		if (!PageUptodate(pinned[0])) {
C
Chris Mason 已提交
949
			ret = btrfs_readpage(NULL, pinned[0]);
C
Chris Mason 已提交
950 951 952 953 954 955 956 957 958
			BUG_ON(ret);
			wait_on_page_locked(pinned[0]);
		} else {
			unlock_page(pinned[0]);
		}
	}
	if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
		pinned[1] = grab_cache_page(inode->i_mapping, last_index);
		if (!PageUptodate(pinned[1])) {
C
Chris Mason 已提交
959
			ret = btrfs_readpage(NULL, pinned[1]);
C
Chris Mason 已提交
960 961 962 963 964 965 966
			BUG_ON(ret);
			wait_on_page_locked(pinned[1]);
		} else {
			unlock_page(pinned[1]);
		}
	}

C
Chris Mason 已提交
967
	while (count > 0) {
C
Chris Mason 已提交
968
		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
969 970
		size_t write_bytes = min(count, nrptrs *
					(size_t)PAGE_CACHE_SIZE -
971
					 offset);
C
Chris Mason 已提交
972 973 974
		size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
					PAGE_CACHE_SHIFT;

975
		WARN_ON(num_pages > nrptrs);
976
		memset(pages, 0, sizeof(struct page *) * nrptrs);
977

978
		ret = btrfs_delalloc_reserve_space(inode, write_bytes);
979 980 981
		if (ret)
			goto out;

C
Chris Mason 已提交
982 983
		ret = prepare_pages(root, file, pages, num_pages,
				    pos, first_index, last_index,
984
				    write_bytes);
J
Josef Bacik 已提交
985
		if (ret) {
986
			btrfs_delalloc_release_space(inode, write_bytes);
987
			goto out;
J
Josef Bacik 已提交
988
		}
C
Chris Mason 已提交
989 990 991

		ret = btrfs_copy_from_user(pos, num_pages,
					   write_bytes, pages, buf);
992 993 994
		if (ret == 0) {
			dirty_and_release_pages(NULL, root, file, pages,
						num_pages, pos, write_bytes);
995
		}
C
Chris Mason 已提交
996 997

		btrfs_drop_pages(pages, num_pages);
J
Josef Bacik 已提交
998
		if (ret) {
999
			btrfs_delalloc_release_space(inode, write_bytes);
1000
			goto out;
J
Josef Bacik 已提交
1001
		}
C
Chris Mason 已提交
1002

1003
		if (will_write) {
1004 1005
			filemap_fdatawrite_range(inode->i_mapping, pos,
						 pos + write_bytes - 1);
1006 1007 1008 1009 1010 1011 1012 1013 1014
		} else {
			balance_dirty_pages_ratelimited_nr(inode->i_mapping,
							   num_pages);
			if (num_pages <
			    (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
				btrfs_btree_balance_dirty(root, 1);
			btrfs_throttle(root);
		}

C
Chris Mason 已提交
1015 1016 1017 1018 1019 1020 1021 1022
		buf += write_bytes;
		count -= write_bytes;
		pos += write_bytes;
		num_written += write_bytes;

		cond_resched();
	}
out:
1023
	mutex_unlock(&inode->i_mutex);
J
Josef Bacik 已提交
1024 1025
	if (ret)
		err = ret;
1026

1027
	kfree(pages);
C
Chris Mason 已提交
1028 1029 1030 1031 1032
	if (pinned[0])
		page_cache_release(pinned[0]);
	if (pinned[1])
		page_cache_release(pinned[1]);
	*ppos = pos;
1033

1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
	/*
	 * we want to make sure fsync finds this change
	 * but we haven't joined a transaction running right now.
	 *
	 * Later on, someone is sure to update the inode and get the
	 * real transid recorded.
	 *
	 * We set last_trans now to the fs_info generation + 1,
	 * this will either be one more than the running transaction
	 * or the generation used for the next transaction if there isn't
	 * one running right now.
	 */
	BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;

1048
	if (num_written > 0 && will_write) {
1049 1050
		struct btrfs_trans_handle *trans;

1051 1052
		err = btrfs_wait_ordered_range(inode, start_pos, num_written);
		if (err)
1053
			num_written = err;
1054

1055
		if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
1056
			trans = btrfs_start_transaction(root, 0);
1057 1058 1059
			ret = btrfs_log_dentry_safe(trans, root,
						    file->f_dentry);
			if (ret == 0) {
1060 1061 1062 1063 1064
				ret = btrfs_sync_log(trans, root);
				if (ret == 0)
					btrfs_end_transaction(trans, root);
				else
					btrfs_commit_transaction(trans, root);
1065
			} else if (ret != BTRFS_NO_LOG_SYNC) {
1066
				btrfs_commit_transaction(trans, root);
1067 1068
			} else {
				btrfs_end_transaction(trans, root);
1069 1070
			}
		}
1071
		if (file->f_flags & O_DIRECT && buffered) {
1072 1073 1074
			invalidate_mapping_pages(inode->i_mapping,
			      start_pos >> PAGE_CACHE_SHIFT,
			     (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
1075
		}
1076
	}
C
Chris Mason 已提交
1077 1078 1079 1080
	current->backing_dev_info = NULL;
	return num_written ? num_written : err;
}

C
Chris Mason 已提交
1081
int btrfs_release_file(struct inode *inode, struct file *filp)
1082
{
1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
	/*
	 * ordered_data_close is set by settattr when we are about to truncate
	 * a file from a non-zero size to a zero size.  This tries to
	 * flush down new bytes that may have been written if the
	 * application were using truncate to replace a file in place.
	 */
	if (BTRFS_I(inode)->ordered_data_close) {
		BTRFS_I(inode)->ordered_data_close = 0;
		btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
		if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
			filemap_flush(inode->i_mapping);
	}
S
Sage Weil 已提交
1095 1096
	if (filp->private_data)
		btrfs_ioctl_trans_end(filp);
1097 1098 1099
	return 0;
}

C
Chris Mason 已提交
1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110
/*
 * fsync call for both files and directories.  This logs the inode into
 * the tree log instead of forcing full commits whenever possible.
 *
 * It needs to call filemap_fdatawait so that all ordered extent updates are
 * in the metadata btree are up to date for copying to the log.
 *
 * It drops the inode mutex before doing the tree log commit.  This is an
 * important optimization for directories because holding the mutex prevents
 * new operations on the dir while we write to disk.
 */
1111
int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
C
Chris Mason 已提交
1112 1113 1114
{
	struct inode *inode = dentry->d_inode;
	struct btrfs_root *root = BTRFS_I(inode)->root;
1115
	int ret = 0;
C
Chris Mason 已提交
1116 1117
	struct btrfs_trans_handle *trans;

1118 1119 1120 1121 1122 1123 1124

	/* we wait first, since the writeback may change the inode */
	root->log_batch++;
	/* the VFS called filemap_fdatawrite for us */
	btrfs_wait_ordered_range(inode, 0, (u64)-1);
	root->log_batch++;

C
Chris Mason 已提交
1125
	/*
1126 1127
	 * check the transaction that last modified this inode
	 * and see if its already been committed
C
Chris Mason 已提交
1128
	 */
1129 1130
	if (!BTRFS_I(inode)->last_trans)
		goto out;
1131

1132 1133 1134 1135 1136
	/*
	 * if the last transaction that changed this file was before
	 * the current transaction, we can bail out now without any
	 * syncing
	 */
1137 1138 1139 1140 1141 1142 1143 1144 1145 1146
	mutex_lock(&root->fs_info->trans_mutex);
	if (BTRFS_I(inode)->last_trans <=
	    root->fs_info->last_trans_committed) {
		BTRFS_I(inode)->last_trans = 0;
		mutex_unlock(&root->fs_info->trans_mutex);
		goto out;
	}
	mutex_unlock(&root->fs_info->trans_mutex);

	/*
1147 1148
	 * ok we haven't committed the transaction yet, lets do a commit
	 */
1149
	if (file && file->private_data)
S
Sage Weil 已提交
1150 1151
		btrfs_ioctl_trans_end(file);

1152 1153 1154
	trans = btrfs_start_transaction(root, 0);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
C
Chris Mason 已提交
1155 1156
		goto out;
	}
1157

1158
	ret = btrfs_log_dentry_safe(trans, root, dentry);
C
Chris Mason 已提交
1159
	if (ret < 0)
1160
		goto out;
C
Chris Mason 已提交
1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171

	/* we've logged all the items and now have a consistent
	 * version of the file in the log.  It is possible that
	 * someone will come in and modify the file, but that's
	 * fine because the log is consistent on disk, and we
	 * have references to all of the file's extents
	 *
	 * It is possible that someone will come in and log the
	 * file again, but that will end up using the synchronization
	 * inside btrfs_sync_log to keep things safe.
	 */
1172
	mutex_unlock(&dentry->d_inode->i_mutex);
C
Chris Mason 已提交
1173

1174 1175
	if (ret != BTRFS_NO_LOG_SYNC) {
		if (ret > 0) {
1176
			ret = btrfs_commit_transaction(trans, root);
1177 1178 1179 1180 1181 1182 1183 1184 1185
		} else {
			ret = btrfs_sync_log(trans, root);
			if (ret == 0)
				ret = btrfs_end_transaction(trans, root);
			else
				ret = btrfs_commit_transaction(trans, root);
		}
	} else {
		ret = btrfs_end_transaction(trans, root);
1186
	}
1187
	mutex_lock(&dentry->d_inode->i_mutex);
C
Chris Mason 已提交
1188
out:
1189
	return ret > 0 ? -EIO : ret;
C
Chris Mason 已提交
1190 1191
}

1192
static const struct vm_operations_struct btrfs_file_vm_ops = {
1193
	.fault		= filemap_fault,
C
Chris Mason 已提交
1194 1195 1196 1197 1198 1199 1200 1201 1202 1203
	.page_mkwrite	= btrfs_page_mkwrite,
};

static int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma)
{
	vma->vm_ops = &btrfs_file_vm_ops;
	file_accessed(filp);
	return 0;
}

1204
const struct file_operations btrfs_file_operations = {
C
Chris Mason 已提交
1205 1206
	.llseek		= generic_file_llseek,
	.read		= do_sync_read,
C
Chris Mason 已提交
1207
	.aio_read       = generic_file_aio_read,
C
Chris Mason 已提交
1208
	.splice_read	= generic_file_splice_read,
C
Chris Mason 已提交
1209
	.write		= btrfs_file_write,
C
Chris Mason 已提交
1210
	.mmap		= btrfs_file_mmap,
C
Chris Mason 已提交
1211
	.open		= generic_file_open,
1212
	.release	= btrfs_release_file,
C
Chris Mason 已提交
1213
	.fsync		= btrfs_sync_file,
C
Christoph Hellwig 已提交
1214
	.unlocked_ioctl	= btrfs_ioctl,
C
Chris Mason 已提交
1215
#ifdef CONFIG_COMPAT
C
Christoph Hellwig 已提交
1216
	.compat_ioctl	= btrfs_ioctl,
C
Chris Mason 已提交
1217 1218
#endif
};