file.c 33.6 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

C
Chris Mason 已提交
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "ioctl.h"
#include "print-tree.h"
37 38
#include "tree-log.h"
#include "locking.h"
39
#include "compat.h"
C
Chris Mason 已提交
40 41


C
Chris Mason 已提交
42 43 44
/* simple helper to fault in pages and copy.  This should go away
 * and be replaced with calls into generic code.
 */
C
Chris Mason 已提交
45
static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
46 47
					 int write_bytes,
					 struct page **prepared_pages,
C
Chris Mason 已提交
48
					 const char __user *buf)
C
Chris Mason 已提交
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
{
	long page_fault = 0;
	int i;
	int offset = pos & (PAGE_CACHE_SIZE - 1);

	for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
		size_t count = min_t(size_t,
				     PAGE_CACHE_SIZE - offset, write_bytes);
		struct page *page = prepared_pages[i];
		fault_in_pages_readable(buf, count);

		/* Copy data from userspace to the current page */
		kmap(page);
		page_fault = __copy_from_user(page_address(page) + offset,
					      buf, count);
		/* Flush processor's dcache for this page */
		flush_dcache_page(page);
		kunmap(page);
		buf += count;
		write_bytes -= count;

		if (page_fault)
			break;
	}
	return page_fault ? -EFAULT : 0;
}

C
Chris Mason 已提交
76 77 78
/*
 * unlocks pages after btrfs_file_write is done with them
 */
C
Chris Mason 已提交
79
static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
C
Chris Mason 已提交
80 81 82 83 84
{
	size_t i;
	for (i = 0; i < num_pages; i++) {
		if (!pages[i])
			break;
C
Chris Mason 已提交
85 86 87 88
		/* page checked is some magic around finding pages that
		 * have been modified without going through btrfs_set_page_dirty
		 * clear it here
		 */
C
Chris Mason 已提交
89
		ClearPageChecked(pages[i]);
C
Chris Mason 已提交
90 91 92 93 94 95
		unlock_page(pages[i]);
		mark_page_accessed(pages[i]);
		page_cache_release(pages[i]);
	}
}

C
Chris Mason 已提交
96 97 98 99 100 101 102 103
/*
 * after copy_from_user, pages need to be dirtied and we need to make
 * sure holes are created between the current EOF and the start of
 * any next extents (if required).
 *
 * this also makes the decision about creating an inline extent vs
 * doing real data extents, marking pages dirty and delalloc as required.
 */
C
Chris Mason 已提交
104
static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
C
Chris Mason 已提交
105 106 107 108 109 110 111 112
				   struct btrfs_root *root,
				   struct file *file,
				   struct page **pages,
				   size_t num_pages,
				   loff_t pos,
				   size_t write_bytes)
{
	int err = 0;
113
	int i;
114
	struct inode *inode = fdentry(file)->d_inode;
115
	u64 num_bytes;
116 117 118 119
	u64 start_pos;
	u64 end_of_last_block;
	u64 end_pos = pos + write_bytes;
	loff_t isize = i_size_read(inode);
C
Chris Mason 已提交
120

121
	start_pos = pos & ~((u64)root->sectorsize - 1);
122 123
	num_bytes = (write_bytes + pos - start_pos +
		    root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
C
Chris Mason 已提交
124

125
	end_of_last_block = start_pos + num_bytes - 1;
J
Josef Bacik 已提交
126 127 128 129
	err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
	if (err)
		return err;

C
Chris Mason 已提交
130 131 132 133 134
	for (i = 0; i < num_pages; i++) {
		struct page *p = pages[i];
		SetPageUptodate(p);
		ClearPageChecked(p);
		set_page_dirty(p);
135 136 137
	}
	if (end_pos > isize) {
		i_size_write(inode, end_pos);
138 139 140 141
		/* we've only changed i_size in ram, and we haven't updated
		 * the disk i_size.  There is no need to log the inode
		 * at this time.
		 */
C
Chris Mason 已提交
142 143 144 145
	}
	return err;
}

C
Chris Mason 已提交
146 147 148 149
/*
 * this drops all the extents in the cache that intersect the range
 * [start, end].  Existing extents are split as required.
 */
150 151
int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
			    int skip_pinned)
152 153
{
	struct extent_map *em;
154 155
	struct extent_map *split = NULL;
	struct extent_map *split2 = NULL;
156
	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
157
	u64 len = end - start + 1;
158 159
	int ret;
	int testend = 1;
160
	unsigned long flags;
C
Chris Mason 已提交
161
	int compressed = 0;
162

163
	WARN_ON(end < start);
164
	if (end == (u64)-1) {
165
		len = (u64)-1;
166 167
		testend = 0;
	}
C
Chris Mason 已提交
168
	while (1) {
169 170 171 172 173
		if (!split)
			split = alloc_extent_map(GFP_NOFS);
		if (!split2)
			split2 = alloc_extent_map(GFP_NOFS);

174
		write_lock(&em_tree->lock);
175
		em = lookup_extent_mapping(em_tree, start, len);
176
		if (!em) {
177
			write_unlock(&em_tree->lock);
178
			break;
179
		}
180 181 182 183 184
		flags = em->flags;
		if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
			if (em->start <= start &&
			    (!testend || em->start + em->len >= start + len)) {
				free_extent_map(em);
C
Chris Mason 已提交
185
				write_unlock(&em_tree->lock);
186 187 188 189 190 191 192 193 194
				break;
			}
			if (start < em->start) {
				len = em->start - start;
			} else {
				len = start + len - (em->start + em->len);
				start = em->start + em->len;
			}
			free_extent_map(em);
C
Chris Mason 已提交
195
			write_unlock(&em_tree->lock);
196 197
			continue;
		}
C
Chris Mason 已提交
198
		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
199
		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
200
		remove_extent_mapping(em_tree, em);
201 202 203 204 205

		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
		    em->start < start) {
			split->start = em->start;
			split->len = start - em->start;
206
			split->orig_start = em->orig_start;
207
			split->block_start = em->block_start;
C
Chris Mason 已提交
208 209 210 211 212 213

			if (compressed)
				split->block_len = em->block_len;
			else
				split->block_len = split->len;

214
			split->bdev = em->bdev;
215
			split->flags = flags;
216 217 218 219 220 221 222 223 224 225 226 227 228
			ret = add_extent_mapping(em_tree, split);
			BUG_ON(ret);
			free_extent_map(split);
			split = split2;
			split2 = NULL;
		}
		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
		    testend && em->start + em->len > start + len) {
			u64 diff = start + len - em->start;

			split->start = start + len;
			split->len = em->start + em->len - (start + len);
			split->bdev = em->bdev;
229
			split->flags = flags;
230

C
Chris Mason 已提交
231 232 233
			if (compressed) {
				split->block_len = em->block_len;
				split->block_start = em->block_start;
234
				split->orig_start = em->orig_start;
C
Chris Mason 已提交
235 236 237
			} else {
				split->block_len = split->len;
				split->block_start = em->block_start + diff;
238
				split->orig_start = split->start;
C
Chris Mason 已提交
239
			}
240 241 242 243 244 245

			ret = add_extent_mapping(em_tree, split);
			BUG_ON(ret);
			free_extent_map(split);
			split = NULL;
		}
246
		write_unlock(&em_tree->lock);
247

248 249 250 251 252
		/* once for us */
		free_extent_map(em);
		/* once for the tree*/
		free_extent_map(em);
	}
253 254 255 256
	if (split)
		free_extent_map(split);
	if (split2)
		free_extent_map(split2);
257 258 259
	return 0;
}

C
Chris Mason 已提交
260 261 262 263 264 265 266 267
/*
 * this is very complex, but the basic idea is to drop all extents
 * in the range start - end.  hint_block is filled in with a block number
 * that would be a good hint to the block allocator for this file.
 *
 * If an extent intersects the range but is not entirely inside the range
 * it is either truncated or split.  Anything entirely inside the range
 * is deleted from the tree.
C
Chris Mason 已提交
268 269 270
 *
 * inline_limit is used to tell this code which offsets in the file to keep
 * if they contain inline extents.
C
Chris Mason 已提交
271
 */
C
Chris Mason 已提交
272
noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
C
Chris Mason 已提交
273
		       struct btrfs_root *root, struct inode *inode,
274
		       u64 start, u64 end, u64 locked_end,
C
Chris Mason 已提交
275
		       u64 inline_limit, u64 *hint_byte, int drop_cache)
C
Chris Mason 已提交
276
{
277 278
	u64 extent_end = 0;
	u64 search_start = start;
C
Chris Mason 已提交
279
	u64 ram_bytes = 0;
280
	u64 disk_bytenr = 0;
281
	u64 orig_locked_end = locked_end;
C
Chris Mason 已提交
282 283
	u8 compression;
	u8 encryption;
C
Chris Mason 已提交
284
	u16 other_encoding = 0;
285
	struct extent_buffer *leaf;
C
Chris Mason 已提交
286 287
	struct btrfs_file_extent_item *extent;
	struct btrfs_path *path;
288 289 290 291
	struct btrfs_key key;
	struct btrfs_file_extent_item old;
	int keep;
	int slot;
C
Chris Mason 已提交
292
	int bookend;
Y
Yan Zheng 已提交
293
	int found_type = 0;
C
Chris Mason 已提交
294 295
	int found_extent;
	int found_inline;
C
Chris Mason 已提交
296
	int recow;
297
	int ret;
C
Chris Mason 已提交
298

C
Chris Mason 已提交
299
	inline_limit = 0;
C
Chris Mason 已提交
300 301
	if (drop_cache)
		btrfs_drop_extent_cache(inode, start, end - 1, 0);
302

C
Chris Mason 已提交
303 304 305
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
C
Chris Mason 已提交
306
	while (1) {
C
Chris Mason 已提交
307
		recow = 0;
C
Chris Mason 已提交
308 309 310 311 312 313 314 315 316 317 318 319
		btrfs_release_path(root, path);
		ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
					       search_start, -1);
		if (ret < 0)
			goto out;
		if (ret > 0) {
			if (path->slots[0] == 0) {
				ret = 0;
				goto out;
			}
			path->slots[0]--;
		}
320
next_slot:
C
Chris Mason 已提交
321 322 323 324
		keep = 0;
		bookend = 0;
		found_extent = 0;
		found_inline = 0;
C
Chris Mason 已提交
325 326
		compression = 0;
		encryption = 0;
C
Chris Mason 已提交
327
		extent = NULL;
328
		leaf = path->nodes[0];
C
Chris Mason 已提交
329
		slot = path->slots[0];
330
		ret = 0;
331
		btrfs_item_key_to_cpu(leaf, &key, slot);
332 333
		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY &&
		    key.offset >= end) {
C
Chris Mason 已提交
334 335
			goto out;
		}
336 337
		if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
		    key.objectid != inode->i_ino) {
C
Chris Mason 已提交
338 339
			goto out;
		}
C
Chris Mason 已提交
340
		if (recow) {
341
			search_start = max(key.offset, start);
C
Chris Mason 已提交
342 343
			continue;
		}
344 345 346
		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
			extent = btrfs_item_ptr(leaf, slot,
						struct btrfs_file_extent_item);
347
			found_type = btrfs_file_extent_type(leaf, extent);
C
Chris Mason 已提交
348 349 350 351 352 353
			compression = btrfs_file_extent_compression(leaf,
								    extent);
			encryption = btrfs_file_extent_encryption(leaf,
								  extent);
			other_encoding = btrfs_file_extent_other_encoding(leaf,
								  extent);
Y
Yan Zheng 已提交
354 355
			if (found_type == BTRFS_FILE_EXTENT_REG ||
			    found_type == BTRFS_FILE_EXTENT_PREALLOC) {
356 357 358 359 360 361
				extent_end =
				     btrfs_file_extent_disk_bytenr(leaf,
								   extent);
				if (extent_end)
					*hint_byte = extent_end;

362
				extent_end = key.offset +
363
				     btrfs_file_extent_num_bytes(leaf, extent);
C
Chris Mason 已提交
364 365
				ram_bytes = btrfs_file_extent_ram_bytes(leaf,
								extent);
366 367 368 369
				found_extent = 1;
			} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
				found_inline = 1;
				extent_end = key.offset +
C
Chris Mason 已提交
370
				     btrfs_file_extent_inline_len(leaf, extent);
371 372 373
			}
		} else {
			extent_end = search_start;
C
Chris Mason 已提交
374 375 376
		}

		/* we found nothing we can drop */
377 378 379 380
		if ((!found_extent && !found_inline) ||
		    search_start >= extent_end) {
			int nextret;
			u32 nritems;
381
			nritems = btrfs_header_nritems(leaf);
382 383 384 385
			if (slot >= nritems - 1) {
				nextret = btrfs_next_leaf(root, path);
				if (nextret)
					goto out;
C
Chris Mason 已提交
386
				recow = 1;
387 388 389 390
			} else {
				path->slots[0]++;
			}
			goto next_slot;
C
Chris Mason 已提交
391 392
		}

C
Chris Mason 已提交
393
		if (end <= extent_end && start >= key.offset && found_inline)
394
			*hint_byte = EXTENT_MAP_INLINE;
Z
Zheng Yan 已提交
395 396 397 398

		if (found_extent) {
			read_extent_buffer(leaf, &old, (unsigned long)extent,
					   sizeof(old));
399
		}
Z
Zheng Yan 已提交
400

C
Chris Mason 已提交
401
		if (end < extent_end && end >= key.offset) {
402
			bookend = 1;
403
			if (found_inline && start <= key.offset)
404
				keep = 1;
C
Chris Mason 已提交
405
		}
Y
Yan Zheng 已提交
406

407 408 409 410 411 412 413 414 415 416 417 418 419
		if (bookend && found_extent) {
			if (locked_end < extent_end) {
				ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
						locked_end, extent_end - 1,
						GFP_NOFS);
				if (!ret) {
					btrfs_release_path(root, path);
					lock_extent(&BTRFS_I(inode)->io_tree,
						locked_end, extent_end - 1,
						GFP_NOFS);
					locked_end = extent_end;
					continue;
				}
Y
Yan Zheng 已提交
420 421
				locked_end = extent_end;
			}
422 423 424 425
			disk_bytenr = le64_to_cpu(old.disk_bytenr);
			if (disk_bytenr != 0) {
				ret = btrfs_inc_extent_ref(trans, root,
					   disk_bytenr,
426 427 428 429
					   le64_to_cpu(old.disk_num_bytes), 0,
					   root->root_key.objectid,
					   key.objectid, key.offset -
					   le64_to_cpu(old.offset));
430 431
				BUG_ON(ret);
			}
Y
Yan Zheng 已提交
432 433 434 435 436 437 438 439
		}

		if (found_inline) {
			u64 mask = root->sectorsize - 1;
			search_start = (extent_end + mask) & ~mask;
		} else
			search_start = extent_end;

C
Chris Mason 已提交
440 441 442 443 444
		/* truncate existing extent */
		if (start > key.offset) {
			u64 new_num;
			u64 old_num;
			keep = 1;
445
			WARN_ON(start & (root->sectorsize - 1));
C
Chris Mason 已提交
446
			if (found_extent) {
447 448 449 450 451 452 453 454
				new_num = start - key.offset;
				old_num = btrfs_file_extent_num_bytes(leaf,
								      extent);
				*hint_byte =
					btrfs_file_extent_disk_bytenr(leaf,
								      extent);
				if (btrfs_file_extent_disk_bytenr(leaf,
								  extent)) {
455 456
					inode_sub_bytes(inode, old_num -
							new_num);
C
Chris Mason 已提交
457
				}
458 459
				btrfs_set_file_extent_num_bytes(leaf,
							extent, new_num);
460
				btrfs_mark_buffer_dirty(leaf);
461 462 463
			} else if (key.offset < inline_limit &&
				   (end > extent_end) &&
				   (inline_limit < extent_end)) {
464 465
				u32 new_size;
				new_size = btrfs_file_extent_calc_inline_size(
466
						   inline_limit - key.offset);
467 468
				inode_sub_bytes(inode, extent_end -
						inline_limit);
C
Chris Mason 已提交
469 470 471 472 473 474
				btrfs_set_file_extent_ram_bytes(leaf, extent,
							new_size);
				if (!compression && !encryption) {
					btrfs_truncate_item(trans, root, path,
							    new_size, 1);
				}
C
Chris Mason 已提交
475 476 477 478
			}
		}
		/* delete the entire extent */
		if (!keep) {
479 480 481
			if (found_inline)
				inode_sub_bytes(inode, extent_end -
						key.offset);
C
Chris Mason 已提交
482
			ret = btrfs_del_item(trans, root, path);
483
			/* TODO update progress marker and return */
C
Chris Mason 已提交
484 485
			BUG_ON(ret);
			extent = NULL;
Z
Zheng Yan 已提交
486 487
			btrfs_release_path(root, path);
			/* the extent will be freed later */
C
Chris Mason 已提交
488
		}
489
		if (bookend && found_inline && start <= key.offset) {
490 491
			u32 new_size;
			new_size = btrfs_file_extent_calc_inline_size(
492
						   extent_end - end);
493
			inode_sub_bytes(inode, end - key.offset);
C
Chris Mason 已提交
494 495 496 497 498
			btrfs_set_file_extent_ram_bytes(leaf, extent,
							new_size);
			if (!compression && !encryption)
				ret = btrfs_truncate_item(trans, root, path,
							  new_size, 0);
Z
Zheng Yan 已提交
499
			BUG_ON(ret);
500
		}
C
Chris Mason 已提交
501 502 503 504 505 506
		/* create bookend, splitting the extent in two */
		if (bookend && found_extent) {
			struct btrfs_key ins;
			ins.objectid = inode->i_ino;
			ins.offset = end;
			btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
507

C
Chris Mason 已提交
508
			btrfs_release_path(root, path);
509
			path->leave_spinning = 1;
C
Chris Mason 已提交
510 511
			ret = btrfs_insert_empty_item(trans, root, path, &ins,
						      sizeof(*extent));
Z
Zheng Yan 已提交
512
			BUG_ON(ret);
513

514 515 516 517 518 519
			leaf = path->nodes[0];
			extent = btrfs_item_ptr(leaf, path->slots[0],
						struct btrfs_file_extent_item);
			write_extent_buffer(leaf, &old,
					    (unsigned long)extent, sizeof(old));

C
Chris Mason 已提交
520 521 522 523 524 525
			btrfs_set_file_extent_compression(leaf, extent,
							  compression);
			btrfs_set_file_extent_encryption(leaf, extent,
							 encryption);
			btrfs_set_file_extent_other_encoding(leaf, extent,
							     other_encoding);
526
			btrfs_set_file_extent_offset(leaf, extent,
527 528 529 530 531
				    le64_to_cpu(old.offset) + end - key.offset);
			WARN_ON(le64_to_cpu(old.num_bytes) <
				(extent_end - end));
			btrfs_set_file_extent_num_bytes(leaf, extent,
							extent_end - end);
C
Chris Mason 已提交
532 533 534 535 536 537 538 539 540

			/*
			 * set the ram bytes to the size of the full extent
			 * before splitting.  This is a worst case flag,
			 * but its the best we can do because we don't know
			 * how splitting affects compression
			 */
			btrfs_set_file_extent_ram_bytes(leaf, extent,
							ram_bytes);
Y
Yan Zheng 已提交
541
			btrfs_set_file_extent_type(leaf, extent, found_type);
542

543
			btrfs_unlock_up_safe(path, 1);
C
Chris Mason 已提交
544
			btrfs_mark_buffer_dirty(path->nodes[0]);
545
			btrfs_set_lock_blocking(path->nodes[0]);
Z
Zheng Yan 已提交
546

547
			path->leave_spinning = 0;
Z
Zheng Yan 已提交
548
			btrfs_release_path(root, path);
C
Chris Mason 已提交
549
			if (disk_bytenr != 0)
550
				inode_add_bytes(inode, extent_end - end);
Z
Zheng Yan 已提交
551 552 553
		}

		if (found_extent && !keep) {
554
			u64 old_disk_bytenr = le64_to_cpu(old.disk_bytenr);
Z
Zheng Yan 已提交
555

556
			if (old_disk_bytenr != 0) {
557 558
				inode_sub_bytes(inode,
						le64_to_cpu(old.num_bytes));
Z
Zheng Yan 已提交
559
				ret = btrfs_free_extent(trans, root,
560
						old_disk_bytenr,
Z
Zheng Yan 已提交
561
						le64_to_cpu(old.disk_num_bytes),
562 563 564
						0, root->root_key.objectid,
						key.objectid, key.offset -
						le64_to_cpu(old.offset));
Z
Zheng Yan 已提交
565
				BUG_ON(ret);
566
				*hint_byte = old_disk_bytenr;
Z
Zheng Yan 已提交
567 568 569 570
			}
		}

		if (search_start >= end) {
C
Chris Mason 已提交
571 572 573 574 575 576
			ret = 0;
			goto out;
		}
	}
out:
	btrfs_free_path(path);
577 578 579
	if (locked_end > orig_locked_end) {
		unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end,
			      locked_end - 1, GFP_NOFS);
Y
Yan Zheng 已提交
580
	}
C
Chris Mason 已提交
581 582 583
	return ret;
}

Y
Yan Zheng 已提交
584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632
static int extent_mergeable(struct extent_buffer *leaf, int slot,
			    u64 objectid, u64 bytenr, u64 *start, u64 *end)
{
	struct btrfs_file_extent_item *fi;
	struct btrfs_key key;
	u64 extent_end;

	if (slot < 0 || slot >= btrfs_header_nritems(leaf))
		return 0;

	btrfs_item_key_to_cpu(leaf, &key, slot);
	if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
		return 0;

	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
	if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
	    btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
	    btrfs_file_extent_compression(leaf, fi) ||
	    btrfs_file_extent_encryption(leaf, fi) ||
	    btrfs_file_extent_other_encoding(leaf, fi))
		return 0;

	extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
	if ((*start && *start != key.offset) || (*end && *end != extent_end))
		return 0;

	*start = key.offset;
	*end = extent_end;
	return 1;
}

/*
 * Mark extent in the range start - end as written.
 *
 * This changes extent type from 'pre-allocated' to 'regular'. If only
 * part of extent is marked as written, the extent will be split into
 * two or three.
 */
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      struct inode *inode, u64 start, u64 end)
{
	struct extent_buffer *leaf;
	struct btrfs_path *path;
	struct btrfs_file_extent_item *fi;
	struct btrfs_key key;
	u64 bytenr;
	u64 num_bytes;
	u64 extent_end;
633
	u64 orig_offset;
Y
Yan Zheng 已提交
634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670
	u64 other_start;
	u64 other_end;
	u64 split = start;
	u64 locked_end = end;
	int extent_type;
	int split_end = 1;
	int ret;

	btrfs_drop_extent_cache(inode, start, end - 1, 0);

	path = btrfs_alloc_path();
	BUG_ON(!path);
again:
	key.objectid = inode->i_ino;
	key.type = BTRFS_EXTENT_DATA_KEY;
	if (split == start)
		key.offset = split;
	else
		key.offset = split - 1;

	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
	if (ret > 0 && path->slots[0] > 0)
		path->slots[0]--;

	leaf = path->nodes[0];
	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
	BUG_ON(key.objectid != inode->i_ino ||
	       key.type != BTRFS_EXTENT_DATA_KEY);
	fi = btrfs_item_ptr(leaf, path->slots[0],
			    struct btrfs_file_extent_item);
	extent_type = btrfs_file_extent_type(leaf, fi);
	BUG_ON(extent_type != BTRFS_FILE_EXTENT_PREALLOC);
	extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
	BUG_ON(key.offset > start || extent_end < end);

	bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
	num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
671
	orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
Y
Yan Zheng 已提交
672 673 674 675 676 677 678 679 680 681 682 683 684 685 686

	if (key.offset == start)
		split = end;

	if (key.offset == start && extent_end == end) {
		int del_nr = 0;
		int del_slot = 0;
		other_start = end;
		other_end = 0;
		if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
				     bytenr, &other_start, &other_end)) {
			extent_end = other_end;
			del_slot = path->slots[0] + 1;
			del_nr++;
			ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
687 688
						0, root->root_key.objectid,
						inode->i_ino, orig_offset);
Y
Yan Zheng 已提交
689 690 691 692 693 694 695 696 697 698
			BUG_ON(ret);
		}
		other_start = 0;
		other_end = start;
		if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino,
				     bytenr, &other_start, &other_end)) {
			key.offset = other_start;
			del_slot = path->slots[0];
			del_nr++;
			ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
699 700
						0, root->root_key.objectid,
						inode->i_ino, orig_offset);
Y
Yan Zheng 已提交
701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718
			BUG_ON(ret);
		}
		split_end = 0;
		if (del_nr == 0) {
			btrfs_set_file_extent_type(leaf, fi,
						   BTRFS_FILE_EXTENT_REG);
			goto done;
		}

		fi = btrfs_item_ptr(leaf, del_slot - 1,
				    struct btrfs_file_extent_item);
		btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
		btrfs_set_file_extent_num_bytes(leaf, fi,
						extent_end - key.offset);
		btrfs_mark_buffer_dirty(leaf);

		ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
		BUG_ON(ret);
719
		goto release;
Y
Yan Zheng 已提交
720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736
	} else if (split == start) {
		if (locked_end < extent_end) {
			ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
					locked_end, extent_end - 1, GFP_NOFS);
			if (!ret) {
				btrfs_release_path(root, path);
				lock_extent(&BTRFS_I(inode)->io_tree,
					locked_end, extent_end - 1, GFP_NOFS);
				locked_end = extent_end;
				goto again;
			}
			locked_end = extent_end;
		}
		btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
	} else  {
		BUG_ON(key.offset != start);
		key.offset = split;
737 738 739
		btrfs_set_file_extent_offset(leaf, fi, key.offset -
					     orig_offset);
		btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
Y
Yan Zheng 已提交
740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757
		btrfs_set_item_key_safe(trans, root, path, &key);
		extent_end = split;
	}

	if (extent_end == end) {
		split_end = 0;
		extent_type = BTRFS_FILE_EXTENT_REG;
	}
	if (extent_end == end && split == start) {
		other_start = end;
		other_end = 0;
		if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
				     bytenr, &other_start, &other_end)) {
			path->slots[0]++;
			fi = btrfs_item_ptr(leaf, path->slots[0],
					    struct btrfs_file_extent_item);
			key.offset = split;
			btrfs_set_item_key_safe(trans, root, path, &key);
758 759
			btrfs_set_file_extent_offset(leaf, fi, key.offset -
						     orig_offset);
Y
Yan Zheng 已提交
760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779
			btrfs_set_file_extent_num_bytes(leaf, fi,
							other_end - split);
			goto done;
		}
	}
	if (extent_end == end && split == end) {
		other_start = 0;
		other_end = start;
		if (extent_mergeable(leaf, path->slots[0] - 1 , inode->i_ino,
				     bytenr, &other_start, &other_end)) {
			path->slots[0]--;
			fi = btrfs_item_ptr(leaf, path->slots[0],
					    struct btrfs_file_extent_item);
			btrfs_set_file_extent_num_bytes(leaf, fi, extent_end -
							other_start);
			goto done;
		}
	}

	btrfs_mark_buffer_dirty(leaf);
780

781 782 783
	ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
				   root->root_key.objectid,
				   inode->i_ino, orig_offset);
784
	BUG_ON(ret);
Y
Yan Zheng 已提交
785 786 787 788 789 790 791 792 793 794 795 796 797
	btrfs_release_path(root, path);

	key.offset = start;
	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi));
	BUG_ON(ret);

	leaf = path->nodes[0];
	fi = btrfs_item_ptr(leaf, path->slots[0],
			    struct btrfs_file_extent_item);
	btrfs_set_file_extent_generation(leaf, fi, trans->transid);
	btrfs_set_file_extent_type(leaf, fi, extent_type);
	btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr);
	btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
798
	btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset);
Y
Yan Zheng 已提交
799 800 801 802 803 804 805
	btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset);
	btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
	btrfs_set_file_extent_compression(leaf, fi, 0);
	btrfs_set_file_extent_encryption(leaf, fi, 0);
	btrfs_set_file_extent_other_encoding(leaf, fi, 0);
done:
	btrfs_mark_buffer_dirty(leaf);
806 807

release:
Y
Yan Zheng 已提交
808 809 810 811 812 813 814 815 816 817 818 819 820
	btrfs_release_path(root, path);
	if (split_end && split == start) {
		split = end;
		goto again;
	}
	if (locked_end > end) {
		unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1,
			      GFP_NOFS);
	}
	btrfs_free_path(path);
	return 0;
}

C
Chris Mason 已提交
821
/*
C
Chris Mason 已提交
822 823 824
 * this gets pages into the page cache and locks them down, it also properly
 * waits for data=ordered extents to finish before allowing the pages to be
 * modified.
C
Chris Mason 已提交
825
 */
C
Chris Mason 已提交
826
static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
827 828 829
			 struct page **pages, size_t num_pages,
			 loff_t pos, unsigned long first_index,
			 unsigned long last_index, size_t write_bytes)
C
Chris Mason 已提交
830 831 832
{
	int i;
	unsigned long index = pos >> PAGE_CACHE_SHIFT;
833
	struct inode *inode = fdentry(file)->d_inode;
C
Chris Mason 已提交
834
	int err = 0;
835
	u64 start_pos;
836
	u64 last_pos;
837

838
	start_pos = pos & ~((u64)root->sectorsize - 1);
839
	last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
C
Chris Mason 已提交
840

Y
Yan Zheng 已提交
841 842 843 844 845 846
	if (start_pos > inode->i_size) {
		err = btrfs_cont_expand(inode, start_pos);
		if (err)
			return err;
	}

C
Chris Mason 已提交
847
	memset(pages, 0, num_pages * sizeof(struct page *));
848
again:
C
Chris Mason 已提交
849 850 851 852
	for (i = 0; i < num_pages; i++) {
		pages[i] = grab_cache_page(inode->i_mapping, index + i);
		if (!pages[i]) {
			err = -ENOMEM;
853
			BUG_ON(1);
C
Chris Mason 已提交
854
		}
C
Chris Mason 已提交
855
		wait_on_page_writeback(pages[i]);
C
Chris Mason 已提交
856
	}
857
	if (start_pos < inode->i_size) {
858
		struct btrfs_ordered_extent *ordered;
859 860
		lock_extent(&BTRFS_I(inode)->io_tree,
			    start_pos, last_pos - 1, GFP_NOFS);
C
Chris Mason 已提交
861 862
		ordered = btrfs_lookup_first_ordered_extent(inode,
							    last_pos - 1);
863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879
		if (ordered &&
		    ordered->file_offset + ordered->len > start_pos &&
		    ordered->file_offset < last_pos) {
			btrfs_put_ordered_extent(ordered);
			unlock_extent(&BTRFS_I(inode)->io_tree,
				      start_pos, last_pos - 1, GFP_NOFS);
			for (i = 0; i < num_pages; i++) {
				unlock_page(pages[i]);
				page_cache_release(pages[i]);
			}
			btrfs_wait_ordered_range(inode, start_pos,
						 last_pos - start_pos);
			goto again;
		}
		if (ordered)
			btrfs_put_ordered_extent(ordered);

880
		clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
881 882
				  last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
				  EXTENT_DO_ACCOUNTING,
883
				  GFP_NOFS);
884 885
		unlock_extent(&BTRFS_I(inode)->io_tree,
			      start_pos, last_pos - 1, GFP_NOFS);
886
	}
887
	for (i = 0; i < num_pages; i++) {
888
		clear_page_dirty_for_io(pages[i]);
889 890 891
		set_page_extent_mapped(pages[i]);
		WARN_ON(!PageLocked(pages[i]));
	}
C
Chris Mason 已提交
892 893 894 895 896 897 898
	return 0;
}

static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
				size_t count, loff_t *ppos)
{
	loff_t pos;
899 900 901
	loff_t start_pos;
	ssize_t num_written = 0;
	ssize_t err = 0;
C
Chris Mason 已提交
902
	int ret = 0;
903
	struct inode *inode = fdentry(file)->d_inode;
C
Chris Mason 已提交
904
	struct btrfs_root *root = BTRFS_I(inode)->root;
905 906
	struct page **pages = NULL;
	int nrptrs;
C
Chris Mason 已提交
907 908 909
	struct page *pinned[2];
	unsigned long first_index;
	unsigned long last_index;
910 911 912 913
	int will_write;

	will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) ||
		      (file->f_flags & O_DIRECT));
914 915 916

	nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
		     PAGE_CACHE_SIZE / (sizeof(struct page *)));
C
Chris Mason 已提交
917 918
	pinned[0] = NULL;
	pinned[1] = NULL;
919

C
Chris Mason 已提交
920
	pos = *ppos;
921 922
	start_pos = pos;

C
Chris Mason 已提交
923
	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
924 925 926 927 928 929 930 931 932 933

	/* do the reserve before the mutex lock in case we have to do some
	 * flushing.  We wouldn't deadlock, but this is more polite.
	 */
	err = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
	if (err)
		goto out_nolock;

	mutex_lock(&inode->i_mutex);

C
Chris Mason 已提交
934 935 936
	current->backing_dev_info = inode->i_mapping->backing_dev_info;
	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
	if (err)
937 938
		goto out;

C
Chris Mason 已提交
939
	if (count == 0)
940
		goto out;
941

942
	err = file_remove_suid(file);
C
Chris Mason 已提交
943
	if (err)
944
		goto out;
J
Josef Bacik 已提交
945

C
Chris Mason 已提交
946 947
	file_update_time(file);

948
	pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
C
Chris Mason 已提交
949

950 951 952
	/* generic_write_checks can change our pos */
	start_pos = pos;

953
	BTRFS_I(inode)->sequence++;
C
Chris Mason 已提交
954 955 956 957 958 959 960 961 962 963 964
	first_index = pos >> PAGE_CACHE_SHIFT;
	last_index = (pos + count) >> PAGE_CACHE_SHIFT;

	/*
	 * there are lots of better ways to do this, but this code
	 * makes sure the first and last page in the file range are
	 * up to date and ready for cow
	 */
	if ((pos & (PAGE_CACHE_SIZE - 1))) {
		pinned[0] = grab_cache_page(inode->i_mapping, first_index);
		if (!PageUptodate(pinned[0])) {
C
Chris Mason 已提交
965
			ret = btrfs_readpage(NULL, pinned[0]);
C
Chris Mason 已提交
966 967 968 969 970 971 972 973 974
			BUG_ON(ret);
			wait_on_page_locked(pinned[0]);
		} else {
			unlock_page(pinned[0]);
		}
	}
	if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
		pinned[1] = grab_cache_page(inode->i_mapping, last_index);
		if (!PageUptodate(pinned[1])) {
C
Chris Mason 已提交
975
			ret = btrfs_readpage(NULL, pinned[1]);
C
Chris Mason 已提交
976 977 978 979 980 981 982
			BUG_ON(ret);
			wait_on_page_locked(pinned[1]);
		} else {
			unlock_page(pinned[1]);
		}
	}

C
Chris Mason 已提交
983
	while (count > 0) {
C
Chris Mason 已提交
984
		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
985 986
		size_t write_bytes = min(count, nrptrs *
					(size_t)PAGE_CACHE_SIZE -
987
					 offset);
C
Chris Mason 已提交
988 989 990
		size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
					PAGE_CACHE_SHIFT;

991
		WARN_ON(num_pages > nrptrs);
992
		memset(pages, 0, sizeof(struct page *) * nrptrs);
993

J
Josef Bacik 已提交
994
		ret = btrfs_check_data_free_space(root, inode, write_bytes);
995 996 997
		if (ret)
			goto out;

C
Chris Mason 已提交
998 999
		ret = prepare_pages(root, file, pages, num_pages,
				    pos, first_index, last_index,
1000
				    write_bytes);
J
Josef Bacik 已提交
1001 1002 1003
		if (ret) {
			btrfs_free_reserved_data_space(root, inode,
						       write_bytes);
1004
			goto out;
J
Josef Bacik 已提交
1005
		}
C
Chris Mason 已提交
1006 1007 1008

		ret = btrfs_copy_from_user(pos, num_pages,
					   write_bytes, pages, buf);
1009
		if (ret) {
J
Josef Bacik 已提交
1010 1011
			btrfs_free_reserved_data_space(root, inode,
						       write_bytes);
1012 1013 1014
			btrfs_drop_pages(pages, num_pages);
			goto out;
		}
C
Chris Mason 已提交
1015 1016 1017 1018

		ret = dirty_and_release_pages(NULL, root, file, pages,
					      num_pages, pos, write_bytes);
		btrfs_drop_pages(pages, num_pages);
J
Josef Bacik 已提交
1019 1020 1021
		if (ret) {
			btrfs_free_reserved_data_space(root, inode,
						       write_bytes);
1022
			goto out;
J
Josef Bacik 已提交
1023
		}
C
Chris Mason 已提交
1024

1025
		if (will_write) {
1026 1027
			filemap_fdatawrite_range(inode->i_mapping, pos,
						 pos + write_bytes - 1);
1028 1029 1030 1031 1032 1033 1034 1035 1036
		} else {
			balance_dirty_pages_ratelimited_nr(inode->i_mapping,
							   num_pages);
			if (num_pages <
			    (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
				btrfs_btree_balance_dirty(root, 1);
			btrfs_throttle(root);
		}

C
Chris Mason 已提交
1037 1038 1039 1040 1041 1042 1043 1044
		buf += write_bytes;
		count -= write_bytes;
		pos += write_bytes;
		num_written += write_bytes;

		cond_resched();
	}
out:
1045
	mutex_unlock(&inode->i_mutex);
J
Josef Bacik 已提交
1046 1047
	if (ret)
		err = ret;
J
Josef Bacik 已提交
1048
	btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
1049

1050
out_nolock:
1051
	kfree(pages);
C
Chris Mason 已提交
1052 1053 1054 1055 1056
	if (pinned[0])
		page_cache_release(pinned[0]);
	if (pinned[1])
		page_cache_release(pinned[1]);
	*ppos = pos;
1057

1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071
	/*
	 * we want to make sure fsync finds this change
	 * but we haven't joined a transaction running right now.
	 *
	 * Later on, someone is sure to update the inode and get the
	 * real transid recorded.
	 *
	 * We set last_trans now to the fs_info generation + 1,
	 * this will either be one more than the running transaction
	 * or the generation used for the next transaction if there isn't
	 * one running right now.
	 */
	BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;

1072
	if (num_written > 0 && will_write) {
1073 1074
		struct btrfs_trans_handle *trans;

1075 1076
		err = btrfs_wait_ordered_range(inode, start_pos, num_written);
		if (err)
1077
			num_written = err;
1078

1079 1080 1081 1082 1083
		if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
			trans = btrfs_start_transaction(root, 1);
			ret = btrfs_log_dentry_safe(trans, root,
						    file->f_dentry);
			if (ret == 0) {
1084 1085 1086 1087 1088
				ret = btrfs_sync_log(trans, root);
				if (ret == 0)
					btrfs_end_transaction(trans, root);
				else
					btrfs_commit_transaction(trans, root);
1089
			} else if (ret != BTRFS_NO_LOG_SYNC) {
1090
				btrfs_commit_transaction(trans, root);
1091 1092
			} else {
				btrfs_end_transaction(trans, root);
1093 1094 1095 1096 1097 1098
			}
		}
		if (file->f_flags & O_DIRECT) {
			invalidate_mapping_pages(inode->i_mapping,
			      start_pos >> PAGE_CACHE_SHIFT,
			     (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
1099
		}
1100
	}
C
Chris Mason 已提交
1101 1102 1103 1104
	current->backing_dev_info = NULL;
	return num_written ? num_written : err;
}

C
Chris Mason 已提交
1105
int btrfs_release_file(struct inode *inode, struct file *filp)
1106
{
1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118
	/*
	 * ordered_data_close is set by settattr when we are about to truncate
	 * a file from a non-zero size to a zero size.  This tries to
	 * flush down new bytes that may have been written if the
	 * application were using truncate to replace a file in place.
	 */
	if (BTRFS_I(inode)->ordered_data_close) {
		BTRFS_I(inode)->ordered_data_close = 0;
		btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
		if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
			filemap_flush(inode->i_mapping);
	}
S
Sage Weil 已提交
1119 1120
	if (filp->private_data)
		btrfs_ioctl_trans_end(filp);
1121 1122 1123
	return 0;
}

C
Chris Mason 已提交
1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134
/*
 * fsync call for both files and directories.  This logs the inode into
 * the tree log instead of forcing full commits whenever possible.
 *
 * It needs to call filemap_fdatawait so that all ordered extent updates are
 * in the metadata btree are up to date for copying to the log.
 *
 * It drops the inode mutex before doing the tree log commit.  This is an
 * important optimization for directories because holding the mutex prevents
 * new operations on the dir while we write to disk.
 */
1135
int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
C
Chris Mason 已提交
1136 1137 1138
{
	struct inode *inode = dentry->d_inode;
	struct btrfs_root *root = BTRFS_I(inode)->root;
1139
	int ret = 0;
C
Chris Mason 已提交
1140 1141
	struct btrfs_trans_handle *trans;

1142 1143 1144 1145 1146 1147 1148

	/* we wait first, since the writeback may change the inode */
	root->log_batch++;
	/* the VFS called filemap_fdatawrite for us */
	btrfs_wait_ordered_range(inode, 0, (u64)-1);
	root->log_batch++;

C
Chris Mason 已提交
1149
	/*
1150 1151
	 * check the transaction that last modified this inode
	 * and see if its already been committed
C
Chris Mason 已提交
1152
	 */
1153 1154
	if (!BTRFS_I(inode)->last_trans)
		goto out;
1155

1156 1157 1158 1159 1160
	/*
	 * if the last transaction that changed this file was before
	 * the current transaction, we can bail out now without any
	 * syncing
	 */
1161 1162 1163 1164 1165 1166 1167 1168 1169 1170
	mutex_lock(&root->fs_info->trans_mutex);
	if (BTRFS_I(inode)->last_trans <=
	    root->fs_info->last_trans_committed) {
		BTRFS_I(inode)->last_trans = 0;
		mutex_unlock(&root->fs_info->trans_mutex);
		goto out;
	}
	mutex_unlock(&root->fs_info->trans_mutex);

	/*
1171 1172
	 * ok we haven't committed the transaction yet, lets do a commit
	 */
1173
	if (file && file->private_data)
S
Sage Weil 已提交
1174 1175
		btrfs_ioctl_trans_end(file);

C
Chris Mason 已提交
1176 1177 1178 1179 1180
	trans = btrfs_start_transaction(root, 1);
	if (!trans) {
		ret = -ENOMEM;
		goto out;
	}
1181

1182
	ret = btrfs_log_dentry_safe(trans, root, dentry);
C
Chris Mason 已提交
1183
	if (ret < 0)
1184
		goto out;
C
Chris Mason 已提交
1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195

	/* we've logged all the items and now have a consistent
	 * version of the file in the log.  It is possible that
	 * someone will come in and modify the file, but that's
	 * fine because the log is consistent on disk, and we
	 * have references to all of the file's extents
	 *
	 * It is possible that someone will come in and log the
	 * file again, but that will end up using the synchronization
	 * inside btrfs_sync_log to keep things safe.
	 */
1196
	mutex_unlock(&dentry->d_inode->i_mutex);
C
Chris Mason 已提交
1197

1198 1199
	if (ret != BTRFS_NO_LOG_SYNC) {
		if (ret > 0) {
1200
			ret = btrfs_commit_transaction(trans, root);
1201 1202 1203 1204 1205 1206 1207 1208 1209
		} else {
			ret = btrfs_sync_log(trans, root);
			if (ret == 0)
				ret = btrfs_end_transaction(trans, root);
			else
				ret = btrfs_commit_transaction(trans, root);
		}
	} else {
		ret = btrfs_end_transaction(trans, root);
1210
	}
1211
	mutex_lock(&dentry->d_inode->i_mutex);
C
Chris Mason 已提交
1212 1213 1214 1215
out:
	return ret > 0 ? EIO : ret;
}

1216
static const struct vm_operations_struct btrfs_file_vm_ops = {
1217
	.fault		= filemap_fault,
C
Chris Mason 已提交
1218 1219 1220 1221 1222 1223 1224 1225 1226 1227
	.page_mkwrite	= btrfs_page_mkwrite,
};

static int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma)
{
	vma->vm_ops = &btrfs_file_vm_ops;
	file_accessed(filp);
	return 0;
}

1228
const struct file_operations btrfs_file_operations = {
C
Chris Mason 已提交
1229 1230
	.llseek		= generic_file_llseek,
	.read		= do_sync_read,
C
Chris Mason 已提交
1231
	.aio_read       = generic_file_aio_read,
C
Chris Mason 已提交
1232
	.splice_read	= generic_file_splice_read,
C
Chris Mason 已提交
1233
	.write		= btrfs_file_write,
C
Chris Mason 已提交
1234
	.mmap		= btrfs_file_mmap,
C
Chris Mason 已提交
1235
	.open		= generic_file_open,
1236
	.release	= btrfs_release_file,
C
Chris Mason 已提交
1237
	.fsync		= btrfs_sync_file,
C
Christoph Hellwig 已提交
1238
	.unlocked_ioctl	= btrfs_ioctl,
C
Chris Mason 已提交
1239
#ifdef CONFIG_COMPAT
C
Christoph Hellwig 已提交
1240
	.compat_ioctl	= btrfs_ioctl,
C
Chris Mason 已提交
1241 1242
#endif
};