file.c 26.9 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

C
Chris Mason 已提交
19 20 21 22 23 24 25 26 27 28 29 30 31
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
32
#include <linux/version.h>
C
Chris Mason 已提交
33 34 35 36 37 38
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "ioctl.h"
#include "print-tree.h"
39 40
#include "tree-log.h"
#include "locking.h"
41
#include "compat.h"
C
Chris Mason 已提交
42 43


C
Chris Mason 已提交
44 45 46
/* simple helper to fault in pages and copy.  This should go away
 * and be replaced with calls into generic code.
 */
47 48 49 50
static int noinline btrfs_copy_from_user(loff_t pos, int num_pages,
					 int write_bytes,
					 struct page **prepared_pages,
					 const char __user * buf)
C
Chris Mason 已提交
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
{
	long page_fault = 0;
	int i;
	int offset = pos & (PAGE_CACHE_SIZE - 1);

	for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
		size_t count = min_t(size_t,
				     PAGE_CACHE_SIZE - offset, write_bytes);
		struct page *page = prepared_pages[i];
		fault_in_pages_readable(buf, count);

		/* Copy data from userspace to the current page */
		kmap(page);
		page_fault = __copy_from_user(page_address(page) + offset,
					      buf, count);
		/* Flush processor's dcache for this page */
		flush_dcache_page(page);
		kunmap(page);
		buf += count;
		write_bytes -= count;

		if (page_fault)
			break;
	}
	return page_fault ? -EFAULT : 0;
}

C
Chris Mason 已提交
78 79 80
/*
 * unlocks pages after btrfs_file_write is done with them
 */
81
static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages)
C
Chris Mason 已提交
82 83 84 85 86
{
	size_t i;
	for (i = 0; i < num_pages; i++) {
		if (!pages[i])
			break;
C
Chris Mason 已提交
87 88 89 90
		/* page checked is some magic around finding pages that
		 * have been modified without going through btrfs_set_page_dirty
		 * clear it here
		 */
C
Chris Mason 已提交
91
		ClearPageChecked(pages[i]);
C
Chris Mason 已提交
92 93 94 95 96 97
		unlock_page(pages[i]);
		mark_page_accessed(pages[i]);
		page_cache_release(pages[i]);
	}
}

C
Chris Mason 已提交
98 99 100 101 102 103 104 105
/*
 * after copy_from_user, pages need to be dirtied and we need to make
 * sure holes are created between the current EOF and the start of
 * any next extents (if required).
 *
 * this also makes the decision about creating an inline extent vs
 * doing real data extents, marking pages dirty and delalloc as required.
 */
106
static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
C
Chris Mason 已提交
107 108 109 110 111 112 113 114
				   struct btrfs_root *root,
				   struct file *file,
				   struct page **pages,
				   size_t num_pages,
				   loff_t pos,
				   size_t write_bytes)
{
	int err = 0;
115
	int i;
116
	struct inode *inode = fdentry(file)->d_inode;
117
	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
118 119
	u64 hint_byte;
	u64 num_bytes;
120 121 122 123
	u64 start_pos;
	u64 end_of_last_block;
	u64 end_pos = pos + write_bytes;
	loff_t isize = i_size_read(inode);
C
Chris Mason 已提交
124

125
	start_pos = pos & ~((u64)root->sectorsize - 1);
126 127
	num_bytes = (write_bytes + pos - start_pos +
		    root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
C
Chris Mason 已提交
128

129 130
	end_of_last_block = start_pos + num_bytes - 1;

131
	lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
C
Chris Mason 已提交
132
	trans = btrfs_join_transaction(root, 1);
133 134 135 136 137
	if (!trans) {
		err = -ENOMEM;
		goto out_unlock;
	}
	btrfs_set_trans_block_group(trans, inode);
138
	hint_byte = 0;
139 140

	if ((end_of_last_block & 4095) == 0) {
141
		printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block);
142
	}
143
	set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
144

C
Chris Mason 已提交
145 146 147
	/* check for reserved extents on each page, we don't want
	 * to reset the delalloc bit on things that already have
	 * extents reserved.
148
	 */
C
Chris Mason 已提交
149 150 151 152 153 154
	btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
	for (i = 0; i < num_pages; i++) {
		struct page *p = pages[i];
		SetPageUptodate(p);
		ClearPageChecked(p);
		set_page_dirty(p);
155 156 157 158
	}
	if (end_pos > isize) {
		i_size_write(inode, end_pos);
		btrfs_update_inode(trans, root, inode);
C
Chris Mason 已提交
159
	}
C
Chris Mason 已提交
160
	err = btrfs_end_transaction(trans, root);
161
out_unlock:
162
	unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
C
Chris Mason 已提交
163 164 165
	return err;
}

C
Chris Mason 已提交
166 167 168 169
/*
 * this drops all the extents in the cache that intersect the range
 * [start, end].  Existing extents are split as required.
 */
170 171
int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
			    int skip_pinned)
172 173
{
	struct extent_map *em;
174 175
	struct extent_map *split = NULL;
	struct extent_map *split2 = NULL;
176
	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
177
	u64 len = end - start + 1;
178 179
	int ret;
	int testend = 1;
180
	unsigned long flags;
C
Chris Mason 已提交
181
	int compressed = 0;
182

183
	WARN_ON(end < start);
184
	if (end == (u64)-1) {
185
		len = (u64)-1;
186 187
		testend = 0;
	}
188
	while(1) {
189 190 191 192 193
		if (!split)
			split = alloc_extent_map(GFP_NOFS);
		if (!split2)
			split2 = alloc_extent_map(GFP_NOFS);

194
		spin_lock(&em_tree->lock);
195
		em = lookup_extent_mapping(em_tree, start, len);
196 197
		if (!em) {
			spin_unlock(&em_tree->lock);
198
			break;
199
		}
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
		flags = em->flags;
		if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
			spin_unlock(&em_tree->lock);
			if (em->start <= start &&
			    (!testend || em->start + em->len >= start + len)) {
				free_extent_map(em);
				break;
			}
			if (start < em->start) {
				len = em->start - start;
			} else {
				len = start + len - (em->start + em->len);
				start = em->start + em->len;
			}
			free_extent_map(em);
			continue;
		}
C
Chris Mason 已提交
217
		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
218
		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
219
		remove_extent_mapping(em_tree, em);
220 221 222 223 224 225

		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
		    em->start < start) {
			split->start = em->start;
			split->len = start - em->start;
			split->block_start = em->block_start;
C
Chris Mason 已提交
226 227 228 229 230 231

			if (compressed)
				split->block_len = em->block_len;
			else
				split->block_len = split->len;

232
			split->bdev = em->bdev;
233
			split->flags = flags;
234 235 236 237 238 239 240 241 242 243 244 245 246
			ret = add_extent_mapping(em_tree, split);
			BUG_ON(ret);
			free_extent_map(split);
			split = split2;
			split2 = NULL;
		}
		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
		    testend && em->start + em->len > start + len) {
			u64 diff = start + len - em->start;

			split->start = start + len;
			split->len = em->start + em->len - (start + len);
			split->bdev = em->bdev;
247
			split->flags = flags;
248

C
Chris Mason 已提交
249 250 251 252 253 254 255
			if (compressed) {
				split->block_len = em->block_len;
				split->block_start = em->block_start;
			} else {
				split->block_len = split->len;
				split->block_start = em->block_start + diff;
			}
256 257 258 259 260 261

			ret = add_extent_mapping(em_tree, split);
			BUG_ON(ret);
			free_extent_map(split);
			split = NULL;
		}
262 263
		spin_unlock(&em_tree->lock);

264 265 266 267 268
		/* once for us */
		free_extent_map(em);
		/* once for the tree*/
		free_extent_map(em);
	}
269 270 271 272
	if (split)
		free_extent_map(split);
	if (split2)
		free_extent_map(split2);
273 274 275
	return 0;
}

276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
{
	return 0;
#if 0
	struct btrfs_path *path;
	struct btrfs_key found_key;
	struct extent_buffer *leaf;
	struct btrfs_file_extent_item *extent;
	u64 last_offset = 0;
	int nritems;
	int slot;
	int found_type;
	int ret;
	int err = 0;
	u64 extent_end = 0;

	path = btrfs_alloc_path();
	ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino,
				       last_offset, 0);
	while(1) {
		nritems = btrfs_header_nritems(path->nodes[0]);
		if (path->slots[0] >= nritems) {
			ret = btrfs_next_leaf(root, path);
			if (ret)
				goto out;
			nritems = btrfs_header_nritems(path->nodes[0]);
		}
		slot = path->slots[0];
		leaf = path->nodes[0];
		btrfs_item_key_to_cpu(leaf, &found_key, slot);
		if (found_key.objectid != inode->i_ino)
			break;
		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
			goto out;

C
Chris Mason 已提交
311
		if (found_key.offset < last_offset) {
312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328
			WARN_ON(1);
			btrfs_print_leaf(root, leaf);
			printk("inode %lu found offset %Lu expected %Lu\n",
			       inode->i_ino, found_key.offset, last_offset);
			err = 1;
			goto out;
		}
		extent = btrfs_item_ptr(leaf, slot,
					struct btrfs_file_extent_item);
		found_type = btrfs_file_extent_type(leaf, extent);
		if (found_type == BTRFS_FILE_EXTENT_REG) {
			extent_end = found_key.offset +
			     btrfs_file_extent_num_bytes(leaf, extent);
		} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
			struct btrfs_item *item;
			item = btrfs_item_nr(leaf, slot);
			extent_end = found_key.offset +
C
Chris Mason 已提交
329
			     btrfs_file_extent_inline_len(leaf, extent);
330 331 332 333 334 335
			extent_end = (extent_end + root->sectorsize - 1) &
				~((u64)root->sectorsize -1 );
		}
		last_offset = extent_end;
		path->slots[0]++;
	}
C
Chris Mason 已提交
336
	if (0 && last_offset < inode->i_size) {
337 338 339 340 341 342 343 344 345 346 347 348 349
		WARN_ON(1);
		btrfs_print_leaf(root, leaf);
		printk("inode %lu found offset %Lu size %Lu\n", inode->i_ino,
		       last_offset, inode->i_size);
		err = 1;

	}
out:
	btrfs_free_path(path);
	return err;
#endif
}

C
Chris Mason 已提交
350 351 352 353 354 355 356 357
/*
 * this is very complex, but the basic idea is to drop all extents
 * in the range start - end.  hint_block is filled in with a block number
 * that would be a good hint to the block allocator for this file.
 *
 * If an extent intersects the range but is not entirely inside the range
 * it is either truncated or split.  Anything entirely inside the range
 * is deleted from the tree.
C
Chris Mason 已提交
358 359 360
 *
 * inline_limit is used to tell this code which offsets in the file to keep
 * if they contain inline extents.
C
Chris Mason 已提交
361
 */
362
int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
C
Chris Mason 已提交
363
		       struct btrfs_root *root, struct inode *inode,
364
		       u64 start, u64 end, u64 inline_limit, u64 *hint_byte)
C
Chris Mason 已提交
365
{
366
	u64 extent_end = 0;
Y
Yan Zheng 已提交
367
	u64 locked_end = end;
368
	u64 search_start = start;
Z
Zheng Yan 已提交
369
	u64 leaf_start;
C
Chris Mason 已提交
370 371 372 373
	u64 ram_bytes = 0;
	u8 compression = 0;
	u8 encryption = 0;
	u16 other_encoding = 0;
Z
Zheng Yan 已提交
374 375
	u64 root_gen;
	u64 root_owner;
376
	struct extent_buffer *leaf;
C
Chris Mason 已提交
377 378
	struct btrfs_file_extent_item *extent;
	struct btrfs_path *path;
379 380 381 382
	struct btrfs_key key;
	struct btrfs_file_extent_item old;
	int keep;
	int slot;
C
Chris Mason 已提交
383 384 385 386
	int bookend;
	int found_type;
	int found_extent;
	int found_inline;
C
Chris Mason 已提交
387
	int recow;
388
	int ret;
C
Chris Mason 已提交
389

C
Chris Mason 已提交
390
	inline_limit = 0;
391
	btrfs_drop_extent_cache(inode, start, end - 1, 0);
392

C
Chris Mason 已提交
393 394 395 396
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
	while(1) {
C
Chris Mason 已提交
397
		recow = 0;
C
Chris Mason 已提交
398 399 400 401 402 403 404 405 406 407 408 409
		btrfs_release_path(root, path);
		ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
					       search_start, -1);
		if (ret < 0)
			goto out;
		if (ret > 0) {
			if (path->slots[0] == 0) {
				ret = 0;
				goto out;
			}
			path->slots[0]--;
		}
410
next_slot:
C
Chris Mason 已提交
411 412 413 414
		keep = 0;
		bookend = 0;
		found_extent = 0;
		found_inline = 0;
Z
Zheng Yan 已提交
415 416 417
		leaf_start = 0;
		root_gen = 0;
		root_owner = 0;
C
Chris Mason 已提交
418
		extent = NULL;
419
		leaf = path->nodes[0];
C
Chris Mason 已提交
420
		slot = path->slots[0];
421
		ret = 0;
422
		btrfs_item_key_to_cpu(leaf, &key, slot);
423 424
		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY &&
		    key.offset >= end) {
C
Chris Mason 已提交
425 426
			goto out;
		}
427 428
		if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
		    key.objectid != inode->i_ino) {
C
Chris Mason 已提交
429 430
			goto out;
		}
C
Chris Mason 已提交
431 432 433 434
		if (recow) {
			search_start = key.offset;
			continue;
		}
435 436 437
		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
			extent = btrfs_item_ptr(leaf, slot,
						struct btrfs_file_extent_item);
438
			found_type = btrfs_file_extent_type(leaf, extent);
C
Chris Mason 已提交
439 440 441 442 443 444
			compression = btrfs_file_extent_compression(leaf,
								    extent);
			encryption = btrfs_file_extent_encryption(leaf,
								  extent);
			other_encoding = btrfs_file_extent_other_encoding(leaf,
								  extent);
445
			if (found_type == BTRFS_FILE_EXTENT_REG) {
446 447 448 449 450 451
				extent_end =
				     btrfs_file_extent_disk_bytenr(leaf,
								   extent);
				if (extent_end)
					*hint_byte = extent_end;

452
				extent_end = key.offset +
453
				     btrfs_file_extent_num_bytes(leaf, extent);
C
Chris Mason 已提交
454 455
				ram_bytes = btrfs_file_extent_ram_bytes(leaf,
								extent);
456 457 458 459
				found_extent = 1;
			} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
				found_inline = 1;
				extent_end = key.offset +
C
Chris Mason 已提交
460
				     btrfs_file_extent_inline_len(leaf, extent);
461 462 463
			}
		} else {
			extent_end = search_start;
C
Chris Mason 已提交
464 465 466
		}

		/* we found nothing we can drop */
467 468 469 470
		if ((!found_extent && !found_inline) ||
		    search_start >= extent_end) {
			int nextret;
			u32 nritems;
471
			nritems = btrfs_header_nritems(leaf);
472 473 474 475
			if (slot >= nritems - 1) {
				nextret = btrfs_next_leaf(root, path);
				if (nextret)
					goto out;
C
Chris Mason 已提交
476
				recow = 1;
477 478 479 480
			} else {
				path->slots[0]++;
			}
			goto next_slot;
C
Chris Mason 已提交
481 482
		}

C
Chris Mason 已提交
483
		if (end <= extent_end && start >= key.offset && found_inline)
484
			*hint_byte = EXTENT_MAP_INLINE;
Z
Zheng Yan 已提交
485 486 487 488 489 490 491

		if (found_extent) {
			read_extent_buffer(leaf, &old, (unsigned long)extent,
					   sizeof(old));
			root_gen = btrfs_header_generation(leaf);
			root_owner = btrfs_header_owner(leaf);
			leaf_start = leaf->start;
492
		}
Z
Zheng Yan 已提交
493

C
Chris Mason 已提交
494
		if (end < extent_end && end >= key.offset) {
495
			bookend = 1;
496
			if (found_inline && start <= key.offset)
497
				keep = 1;
C
Chris Mason 已提交
498
		}
Y
Yan Zheng 已提交
499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518

		if (bookend && found_extent && locked_end < extent_end) {
			ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
					locked_end, extent_end - 1, GFP_NOFS);
			if (!ret) {
				btrfs_release_path(root, path);
				lock_extent(&BTRFS_I(inode)->io_tree,
					locked_end, extent_end - 1, GFP_NOFS);
				locked_end = extent_end;
				continue;
			}
			locked_end = extent_end;
		}

		if (found_inline) {
			u64 mask = root->sectorsize - 1;
			search_start = (extent_end + mask) & ~mask;
		} else
			search_start = extent_end;

C
Chris Mason 已提交
519 520 521 522 523
		/* truncate existing extent */
		if (start > key.offset) {
			u64 new_num;
			u64 old_num;
			keep = 1;
524
			WARN_ON(start & (root->sectorsize - 1));
C
Chris Mason 已提交
525
			if (found_extent) {
526 527 528 529 530 531 532 533
				new_num = start - key.offset;
				old_num = btrfs_file_extent_num_bytes(leaf,
								      extent);
				*hint_byte =
					btrfs_file_extent_disk_bytenr(leaf,
								      extent);
				if (btrfs_file_extent_disk_bytenr(leaf,
								  extent)) {
534 535
					inode_sub_bytes(inode, old_num -
							new_num);
C
Chris Mason 已提交
536
				}
537 538
				btrfs_set_file_extent_num_bytes(leaf, extent,
								new_num);
539
				btrfs_mark_buffer_dirty(leaf);
540 541 542
			} else if (key.offset < inline_limit &&
				   (end > extent_end) &&
				   (inline_limit < extent_end)) {
543 544
				u32 new_size;
				new_size = btrfs_file_extent_calc_inline_size(
545
						   inline_limit - key.offset);
546 547
				inode_sub_bytes(inode, extent_end -
						inline_limit);
548
				btrfs_truncate_item(trans, root, path,
549
						    new_size, 1);
C
Chris Mason 已提交
550 551 552 553
			}
		}
		/* delete the entire extent */
		if (!keep) {
554 555 556
			if (found_inline)
				inode_sub_bytes(inode, extent_end -
						key.offset);
C
Chris Mason 已提交
557
			ret = btrfs_del_item(trans, root, path);
558
			/* TODO update progress marker and return */
C
Chris Mason 已提交
559 560
			BUG_ON(ret);
			extent = NULL;
Z
Zheng Yan 已提交
561 562
			btrfs_release_path(root, path);
			/* the extent will be freed later */
C
Chris Mason 已提交
563
		}
564
		if (bookend && found_inline && start <= key.offset) {
565 566
			u32 new_size;
			new_size = btrfs_file_extent_calc_inline_size(
567
						   extent_end - end);
568
			inode_sub_bytes(inode, end - key.offset);
Z
Zheng Yan 已提交
569 570 571
			ret = btrfs_truncate_item(trans, root, path,
						  new_size, 0);
			BUG_ON(ret);
572
		}
C
Chris Mason 已提交
573 574
		/* create bookend, splitting the extent in two */
		if (bookend && found_extent) {
Z
Zheng Yan 已提交
575
			u64 disk_bytenr;
C
Chris Mason 已提交
576 577 578 579 580 581 582
			struct btrfs_key ins;
			ins.objectid = inode->i_ino;
			ins.offset = end;
			btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
			btrfs_release_path(root, path);
			ret = btrfs_insert_empty_item(trans, root, path, &ins,
						      sizeof(*extent));
Z
Zheng Yan 已提交
583
			BUG_ON(ret);
584

585 586 587 588 589 590
			leaf = path->nodes[0];
			extent = btrfs_item_ptr(leaf, path->slots[0],
						struct btrfs_file_extent_item);
			write_extent_buffer(leaf, &old,
					    (unsigned long)extent, sizeof(old));

C
Chris Mason 已提交
591 592 593 594 595 596
			btrfs_set_file_extent_compression(leaf, extent,
							  compression);
			btrfs_set_file_extent_encryption(leaf, extent,
							 encryption);
			btrfs_set_file_extent_other_encoding(leaf, extent,
							     other_encoding);
597
			btrfs_set_file_extent_offset(leaf, extent,
598 599 600 601 602
				    le64_to_cpu(old.offset) + end - key.offset);
			WARN_ON(le64_to_cpu(old.num_bytes) <
				(extent_end - end));
			btrfs_set_file_extent_num_bytes(leaf, extent,
							extent_end - end);
C
Chris Mason 已提交
603 604 605 606 607 608 609 610 611

			/*
			 * set the ram bytes to the size of the full extent
			 * before splitting.  This is a worst case flag,
			 * but its the best we can do because we don't know
			 * how splitting affects compression
			 */
			btrfs_set_file_extent_ram_bytes(leaf, extent,
							ram_bytes);
612
			btrfs_set_file_extent_type(leaf, extent,
C
Chris Mason 已提交
613
						   BTRFS_FILE_EXTENT_REG);
614

C
Chris Mason 已提交
615
			btrfs_mark_buffer_dirty(path->nodes[0]);
Z
Zheng Yan 已提交
616 617 618 619 620 621 622 623

			disk_bytenr = le64_to_cpu(old.disk_bytenr);
			if (disk_bytenr != 0) {
				ret = btrfs_inc_extent_ref(trans, root,
						disk_bytenr,
						le64_to_cpu(old.disk_num_bytes),
						leaf->start,
						root->root_key.objectid,
624
						trans->transid, ins.objectid);
Z
Zheng Yan 已提交
625 626 627 628
				BUG_ON(ret);
			}
			btrfs_release_path(root, path);
			if (disk_bytenr != 0) {
629
				inode_add_bytes(inode, extent_end - end);
C
Chris Mason 已提交
630
			}
Z
Zheng Yan 已提交
631 632 633 634 635 636
		}

		if (found_extent && !keep) {
			u64 disk_bytenr = le64_to_cpu(old.disk_bytenr);

			if (disk_bytenr != 0) {
637 638
				inode_sub_bytes(inode,
						le64_to_cpu(old.num_bytes));
Z
Zheng Yan 已提交
639 640 641 642
				ret = btrfs_free_extent(trans, root,
						disk_bytenr,
						le64_to_cpu(old.disk_num_bytes),
						leaf_start, root_owner,
643
						root_gen, key.objectid, 0);
Z
Zheng Yan 已提交
644 645 646 647 648 649
				BUG_ON(ret);
				*hint_byte = disk_bytenr;
			}
		}

		if (search_start >= end) {
C
Chris Mason 已提交
650 651 652 653 654 655
			ret = 0;
			goto out;
		}
	}
out:
	btrfs_free_path(path);
Y
Yan Zheng 已提交
656 657 658 659
	if (locked_end > end) {
		unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1,
			      GFP_NOFS);
	}
C
Chris Mason 已提交
660
	btrfs_check_file(root, inode);
C
Chris Mason 已提交
661 662 663 664
	return ret;
}

/*
C
Chris Mason 已提交
665 666 667
 * this gets pages into the page cache and locks them down, it also properly
 * waits for data=ordered extents to finish before allowing the pages to be
 * modified.
C
Chris Mason 已提交
668
 */
669
static int noinline prepare_pages(struct btrfs_root *root, struct file *file,
670 671 672
			 struct page **pages, size_t num_pages,
			 loff_t pos, unsigned long first_index,
			 unsigned long last_index, size_t write_bytes)
C
Chris Mason 已提交
673 674 675
{
	int i;
	unsigned long index = pos >> PAGE_CACHE_SHIFT;
676
	struct inode *inode = fdentry(file)->d_inode;
C
Chris Mason 已提交
677
	int err = 0;
678
	u64 start_pos;
679
	u64 last_pos;
680

681
	start_pos = pos & ~((u64)root->sectorsize - 1);
682
	last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
C
Chris Mason 已提交
683

Y
Yan Zheng 已提交
684 685 686 687 688 689
	if (start_pos > inode->i_size) {
		err = btrfs_cont_expand(inode, start_pos);
		if (err)
			return err;
	}

C
Chris Mason 已提交
690
	memset(pages, 0, num_pages * sizeof(struct page *));
691
again:
C
Chris Mason 已提交
692 693 694 695
	for (i = 0; i < num_pages; i++) {
		pages[i] = grab_cache_page(inode->i_mapping, index + i);
		if (!pages[i]) {
			err = -ENOMEM;
696
			BUG_ON(1);
C
Chris Mason 已提交
697
		}
C
Chris Mason 已提交
698
		wait_on_page_writeback(pages[i]);
C
Chris Mason 已提交
699
	}
700
	if (start_pos < inode->i_size) {
701
		struct btrfs_ordered_extent *ordered;
702 703
		lock_extent(&BTRFS_I(inode)->io_tree,
			    start_pos, last_pos - 1, GFP_NOFS);
704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721
		ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1);
		if (ordered &&
		    ordered->file_offset + ordered->len > start_pos &&
		    ordered->file_offset < last_pos) {
			btrfs_put_ordered_extent(ordered);
			unlock_extent(&BTRFS_I(inode)->io_tree,
				      start_pos, last_pos - 1, GFP_NOFS);
			for (i = 0; i < num_pages; i++) {
				unlock_page(pages[i]);
				page_cache_release(pages[i]);
			}
			btrfs_wait_ordered_range(inode, start_pos,
						 last_pos - start_pos);
			goto again;
		}
		if (ordered)
			btrfs_put_ordered_extent(ordered);

722 723 724
		clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
				  last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC,
				  GFP_NOFS);
725 726
		unlock_extent(&BTRFS_I(inode)->io_tree,
			      start_pos, last_pos - 1, GFP_NOFS);
727
	}
728
	for (i = 0; i < num_pages; i++) {
729
		clear_page_dirty_for_io(pages[i]);
730 731 732
		set_page_extent_mapped(pages[i]);
		WARN_ON(!PageLocked(pages[i]));
	}
C
Chris Mason 已提交
733 734 735 736 737 738 739
	return 0;
}

static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
				size_t count, loff_t *ppos)
{
	loff_t pos;
740 741 742
	loff_t start_pos;
	ssize_t num_written = 0;
	ssize_t err = 0;
C
Chris Mason 已提交
743
	int ret = 0;
744
	struct inode *inode = fdentry(file)->d_inode;
C
Chris Mason 已提交
745
	struct btrfs_root *root = BTRFS_I(inode)->root;
746 747
	struct page **pages = NULL;
	int nrptrs;
C
Chris Mason 已提交
748 749 750
	struct page *pinned[2];
	unsigned long first_index;
	unsigned long last_index;
751 752 753 754
	int will_write;

	will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) ||
		      (file->f_flags & O_DIRECT));
755 756 757

	nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
		     PAGE_CACHE_SIZE / (sizeof(struct page *)));
C
Chris Mason 已提交
758 759
	pinned[0] = NULL;
	pinned[1] = NULL;
760

C
Chris Mason 已提交
761
	pos = *ppos;
762 763
	start_pos = pos;

C
Chris Mason 已提交
764 765 766 767
	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
	current->backing_dev_info = inode->i_mapping->backing_dev_info;
	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
	if (err)
768
		goto out_nolock;
C
Chris Mason 已提交
769
	if (count == 0)
770
		goto out_nolock;
771

772
	err = file_remove_suid(file);
C
Chris Mason 已提交
773
	if (err)
774
		goto out_nolock;
C
Chris Mason 已提交
775 776
	file_update_time(file);

777
	pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
C
Chris Mason 已提交
778 779 780 781 782

	mutex_lock(&inode->i_mutex);
	first_index = pos >> PAGE_CACHE_SHIFT;
	last_index = (pos + count) >> PAGE_CACHE_SHIFT;

783 784 785 786 787 788 789 790
	/*
	 * if this is a nodatasum mount, force summing off for the inode
	 * all the time.  That way a later mount with summing on won't
	 * get confused
	 */
	if (btrfs_test_opt(root, NODATASUM))
		btrfs_set_flag(inode, NODATASUM);

C
Chris Mason 已提交
791 792 793 794 795 796 797 798
	/*
	 * there are lots of better ways to do this, but this code
	 * makes sure the first and last page in the file range are
	 * up to date and ready for cow
	 */
	if ((pos & (PAGE_CACHE_SIZE - 1))) {
		pinned[0] = grab_cache_page(inode->i_mapping, first_index);
		if (!PageUptodate(pinned[0])) {
C
Chris Mason 已提交
799
			ret = btrfs_readpage(NULL, pinned[0]);
C
Chris Mason 已提交
800 801 802 803 804 805 806 807 808
			BUG_ON(ret);
			wait_on_page_locked(pinned[0]);
		} else {
			unlock_page(pinned[0]);
		}
	}
	if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
		pinned[1] = grab_cache_page(inode->i_mapping, last_index);
		if (!PageUptodate(pinned[1])) {
C
Chris Mason 已提交
809
			ret = btrfs_readpage(NULL, pinned[1]);
C
Chris Mason 已提交
810 811 812 813 814 815 816 817 818
			BUG_ON(ret);
			wait_on_page_locked(pinned[1]);
		} else {
			unlock_page(pinned[1]);
		}
	}

	while(count > 0) {
		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
819 820
		size_t write_bytes = min(count, nrptrs *
					(size_t)PAGE_CACHE_SIZE -
821
					 offset);
C
Chris Mason 已提交
822 823 824
		size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
					PAGE_CACHE_SHIFT;

825
		WARN_ON(num_pages > nrptrs);
C
Chris Mason 已提交
826
		memset(pages, 0, sizeof(pages));
827 828 829 830 831

		ret = btrfs_check_free_space(root, write_bytes, 0);
		if (ret)
			goto out;

C
Chris Mason 已提交
832 833
		ret = prepare_pages(root, file, pages, num_pages,
				    pos, first_index, last_index,
834
				    write_bytes);
835 836
		if (ret)
			goto out;
C
Chris Mason 已提交
837 838 839

		ret = btrfs_copy_from_user(pos, num_pages,
					   write_bytes, pages, buf);
840 841 842 843
		if (ret) {
			btrfs_drop_pages(pages, num_pages);
			goto out;
		}
C
Chris Mason 已提交
844 845 846 847

		ret = dirty_and_release_pages(NULL, root, file, pages,
					      num_pages, pos, write_bytes);
		btrfs_drop_pages(pages, num_pages);
848 849
		if (ret)
			goto out;
C
Chris Mason 已提交
850

851 852 853 854 855 856 857 858 859 860 861 862 863
		if (will_write) {
			btrfs_fdatawrite_range(inode->i_mapping, pos,
					       pos + write_bytes - 1,
					       WB_SYNC_NONE);
		} else {
			balance_dirty_pages_ratelimited_nr(inode->i_mapping,
							   num_pages);
			if (num_pages <
			    (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
				btrfs_btree_balance_dirty(root, 1);
			btrfs_throttle(root);
		}

C
Chris Mason 已提交
864 865 866 867 868 869 870 871
		buf += write_bytes;
		count -= write_bytes;
		pos += write_bytes;
		num_written += write_bytes;

		cond_resched();
	}
out:
872
	mutex_unlock(&inode->i_mutex);
873

874
out_nolock:
875
	kfree(pages);
C
Chris Mason 已提交
876 877 878 879 880
	if (pinned[0])
		page_cache_release(pinned[0]);
	if (pinned[1])
		page_cache_release(pinned[1]);
	*ppos = pos;
881

882
	if (num_written > 0 && will_write) {
883 884
		struct btrfs_trans_handle *trans;

885 886
		err = btrfs_wait_ordered_range(inode, start_pos, num_written);
		if (err)
887
			num_written = err;
888

889 890 891 892 893 894 895 896 897 898 899 900 901 902 903
		if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
			trans = btrfs_start_transaction(root, 1);
			ret = btrfs_log_dentry_safe(trans, root,
						    file->f_dentry);
			if (ret == 0) {
				btrfs_sync_log(trans, root);
				btrfs_end_transaction(trans, root);
			} else {
				btrfs_commit_transaction(trans, root);
			}
		}
		if (file->f_flags & O_DIRECT) {
			invalidate_mapping_pages(inode->i_mapping,
			      start_pos >> PAGE_CACHE_SHIFT,
			     (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
904
		}
905
	}
C
Chris Mason 已提交
906 907 908 909
	current->backing_dev_info = NULL;
	return num_written ? num_written : err;
}

S
Sage Weil 已提交
910
int btrfs_release_file(struct inode * inode, struct file * filp)
911
{
S
Sage Weil 已提交
912 913
	if (filp->private_data)
		btrfs_ioctl_trans_end(filp);
914 915 916
	return 0;
}

C
Chris Mason 已提交
917 918 919 920 921 922 923 924 925 926 927
/*
 * fsync call for both files and directories.  This logs the inode into
 * the tree log instead of forcing full commits whenever possible.
 *
 * It needs to call filemap_fdatawait so that all ordered extent updates are
 * in the metadata btree are up to date for copying to the log.
 *
 * It drops the inode mutex before doing the tree log commit.  This is an
 * important optimization for directories because holding the mutex prevents
 * new operations on the dir while we write to disk.
 */
928
int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
C
Chris Mason 已提交
929 930 931
{
	struct inode *inode = dentry->d_inode;
	struct btrfs_root *root = BTRFS_I(inode)->root;
932
	int ret = 0;
C
Chris Mason 已提交
933 934 935
	struct btrfs_trans_handle *trans;

	/*
936 937
	 * check the transaction that last modified this inode
	 * and see if its already been committed
C
Chris Mason 已提交
938
	 */
939 940
	if (!BTRFS_I(inode)->last_trans)
		goto out;
941

942 943 944 945 946 947 948 949 950
	mutex_lock(&root->fs_info->trans_mutex);
	if (BTRFS_I(inode)->last_trans <=
	    root->fs_info->last_trans_committed) {
		BTRFS_I(inode)->last_trans = 0;
		mutex_unlock(&root->fs_info->trans_mutex);
		goto out;
	}
	mutex_unlock(&root->fs_info->trans_mutex);

C
Chris Mason 已提交
951
	root->fs_info->tree_log_batch++;
952
	filemap_fdatawait(inode->i_mapping);
C
Chris Mason 已提交
953
	root->fs_info->tree_log_batch++;
954

955
	/*
956 957
	 * ok we haven't committed the transaction yet, lets do a commit
	 */
S
Sage Weil 已提交
958 959 960
	if (file->private_data)
		btrfs_ioctl_trans_end(file);

C
Chris Mason 已提交
961 962 963 964 965
	trans = btrfs_start_transaction(root, 1);
	if (!trans) {
		ret = -ENOMEM;
		goto out;
	}
966 967

	ret = btrfs_log_dentry_safe(trans, root, file->f_dentry);
C
Chris Mason 已提交
968
	if (ret < 0) {
969
		goto out;
C
Chris Mason 已提交
970 971 972 973 974 975 976 977 978 979 980 981 982 983
	}

	/* we've logged all the items and now have a consistent
	 * version of the file in the log.  It is possible that
	 * someone will come in and modify the file, but that's
	 * fine because the log is consistent on disk, and we
	 * have references to all of the file's extents
	 *
	 * It is possible that someone will come in and log the
	 * file again, but that will end up using the synchronization
	 * inside btrfs_sync_log to keep things safe.
	 */
	mutex_unlock(&file->f_dentry->d_inode->i_mutex);

984 985 986 987 988 989
	if (ret > 0) {
		ret = btrfs_commit_transaction(trans, root);
	} else {
		btrfs_sync_log(trans, root);
		ret = btrfs_end_transaction(trans, root);
	}
C
Chris Mason 已提交
990
	mutex_lock(&file->f_dentry->d_inode->i_mutex);
C
Chris Mason 已提交
991 992 993 994
out:
	return ret > 0 ? EIO : ret;
}

C
Chris Mason 已提交
995
static struct vm_operations_struct btrfs_file_vm_ops = {
996
	.fault		= filemap_fault,
C
Chris Mason 已提交
997 998 999 1000 1001 1002 1003 1004 1005 1006
	.page_mkwrite	= btrfs_page_mkwrite,
};

static int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma)
{
	vma->vm_ops = &btrfs_file_vm_ops;
	file_accessed(filp);
	return 0;
}

C
Chris Mason 已提交
1007 1008 1009
struct file_operations btrfs_file_operations = {
	.llseek		= generic_file_llseek,
	.read		= do_sync_read,
C
Chris Mason 已提交
1010
	.aio_read       = generic_file_aio_read,
C
Chris Mason 已提交
1011
	.splice_read	= generic_file_splice_read,
C
Chris Mason 已提交
1012
	.write		= btrfs_file_write,
C
Chris Mason 已提交
1013
	.mmap		= btrfs_file_mmap,
C
Chris Mason 已提交
1014
	.open		= generic_file_open,
1015
	.release	= btrfs_release_file,
C
Chris Mason 已提交
1016
	.fsync		= btrfs_sync_file,
C
Christoph Hellwig 已提交
1017
	.unlocked_ioctl	= btrfs_ioctl,
C
Chris Mason 已提交
1018
#ifdef CONFIG_COMPAT
C
Christoph Hellwig 已提交
1019
	.compat_ioctl	= btrfs_ioctl,
C
Chris Mason 已提交
1020 1021
#endif
};