file.c 22.0 KB
Newer Older
C
Chris Mason 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

C
Chris Mason 已提交
19 20 21 22 23 24 25 26 27 28 29 30 31
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
32
#include <linux/version.h>
C
Chris Mason 已提交
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "ioctl.h"
#include "print-tree.h"


static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
				struct page **prepared_pages,
				const char __user * buf)
{
	long page_fault = 0;
	int i;
	int offset = pos & (PAGE_CACHE_SIZE - 1);

	for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
		size_t count = min_t(size_t,
				     PAGE_CACHE_SIZE - offset, write_bytes);
		struct page *page = prepared_pages[i];
		fault_in_pages_readable(buf, count);

		/* Copy data from userspace to the current page */
		kmap(page);
		page_fault = __copy_from_user(page_address(page) + offset,
					      buf, count);
		/* Flush processor's dcache for this page */
		flush_dcache_page(page);
		kunmap(page);
		buf += count;
		write_bytes -= count;

		if (page_fault)
			break;
	}
	return page_fault ? -EFAULT : 0;
}

static void btrfs_drop_pages(struct page **pages, size_t num_pages)
{
	size_t i;
	for (i = 0; i < num_pages; i++) {
		if (!pages[i])
			break;
		unlock_page(pages[i]);
		mark_page_accessed(pages[i]);
		page_cache_release(pages[i]);
	}
}

83 84
static int insert_inline_extent(struct btrfs_trans_handle *trans,
				struct btrfs_root *root, struct inode *inode,
85 86 87
				u64 offset, size_t size,
				struct page **pages, size_t page_offset,
				int num_pages)
88 89 90
{
	struct btrfs_key key;
	struct btrfs_path *path;
91 92 93
	struct extent_buffer *leaf;
	char *kaddr;
	unsigned long ptr;
94
	struct btrfs_file_extent_item *ei;
95
	struct page *page;
96 97 98
	u32 datasize;
	int err = 0;
	int ret;
99 100
	int i;
	ssize_t cur_size;
101 102 103 104 105 106 107 108 109 110 111

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	btrfs_set_trans_block_group(trans, inode);

	key.objectid = inode->i_ino;
	key.offset = offset;
	btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);

112 113
	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
	if (ret < 0) {
114 115 116
		err = ret;
		goto fail;
	}
117
	if (ret == 1) {
118 119 120 121 122
		struct btrfs_key found_key;

		if (path->slots[0] == 0)
			goto insert;

123 124
		path->slots[0]--;
		leaf = path->nodes[0];
125 126 127 128 129 130 131
		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);

		if (found_key.objectid != inode->i_ino)
			goto insert;

		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
			goto insert;
132 133 134 135 136 137 138 139 140 141 142 143
		ei = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_file_extent_item);

		if (btrfs_file_extent_type(leaf, ei) !=
		    BTRFS_FILE_EXTENT_INLINE) {
			goto insert;
		}
		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
		ret = 0;
	}
	if (ret == 0) {
		u32 found_size;
144
		u64 found_end;
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159

		leaf = path->nodes[0];
		ei = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_file_extent_item);

		if (btrfs_file_extent_type(leaf, ei) !=
		    BTRFS_FILE_EXTENT_INLINE) {
			err = ret;
			btrfs_print_leaf(root, leaf);
			printk("found wasn't inline offset %Lu inode %lu\n",
			       offset, inode->i_ino);
			goto fail;
		}
		found_size = btrfs_file_extent_inline_len(leaf,
					  btrfs_item_nr(leaf, path->slots[0]));
160
		found_end = key.offset + found_size;
161

162
		if (found_end < offset + size) {
163 164
			btrfs_release_path(root, path);
			ret = btrfs_search_slot(trans, root, &key, path,
165
						offset + size - found_end, 1);
166
			BUG_ON(ret != 0);
167

168
			ret = btrfs_extend_item(trans, root, path,
169
						offset + size - found_end);
170 171 172 173 174 175 176 177
			if (ret) {
				err = ret;
				goto fail;
			}
			leaf = path->nodes[0];
			ei = btrfs_item_ptr(leaf, path->slots[0],
					    struct btrfs_file_extent_item);
		}
178 179 180 181
		if (found_end < offset) {
			ptr = btrfs_file_extent_inline_start(ei) + found_size;
			memset_extent_buffer(leaf, 0, ptr, offset - found_end);
		}
182 183 184
	} else {
insert:
		btrfs_release_path(root, path);
185 186
		datasize = offset + size - key.offset;
		datasize = btrfs_file_extent_calc_inline_size(datasize);
187 188 189 190 191 192 193 194 195 196 197 198 199
		ret = btrfs_insert_empty_item(trans, root, path, &key,
					      datasize);
		if (ret) {
			err = ret;
			printk("got bad ret %d\n", ret);
			goto fail;
		}
		leaf = path->nodes[0];
		ei = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_file_extent_item);
		btrfs_set_file_extent_generation(leaf, ei, trans->transid);
		btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
	}
200
	ptr = btrfs_file_extent_inline_start(ei) + offset - key.offset;
201 202 203 204 205 206

	cur_size = size;
	i = 0;
	while (size > 0) {
		page = pages[i];
		kaddr = kmap_atomic(page, KM_USER0);
J
Jens Axboe 已提交
207
		cur_size = min_t(size_t, PAGE_CACHE_SIZE - page_offset, size);
208 209 210 211 212 213 214 215 216 217
		write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size);
		kunmap_atomic(kaddr, KM_USER0);
		page_offset = 0;
		ptr += cur_size;
		size -= cur_size;
		if (i >= num_pages) {
			printk("i %d num_pages %d\n", i, num_pages);
		}
		i++;
	}
218
	btrfs_mark_buffer_dirty(leaf);
219 220 221 222 223
fail:
	btrfs_free_path(path);
	return err;
}

C
Chris Mason 已提交
224 225 226 227 228 229 230 231 232
static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root,
				   struct file *file,
				   struct page **pages,
				   size_t num_pages,
				   loff_t pos,
				   size_t write_bytes)
{
	int err = 0;
233
	int i;
C
Chris Mason 已提交
234
	struct inode *inode = file->f_path.dentry->d_inode;
235 236
	struct extent_map *em;
	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
237 238
	u64 hint_byte;
	u64 num_bytes;
239 240 241
	u64 start_pos;
	u64 end_of_last_block;
	u64 end_pos = pos + write_bytes;
242
	u32 inline_size;
243
	loff_t isize = i_size_read(inode);
C
Chris Mason 已提交
244

245 246 247
	em = alloc_extent_map(GFP_NOFS);
	if (!em)
		return -ENOMEM;
C
Chris Mason 已提交
248

249
	em->bdev = inode->i_sb->s_bdev;
C
Chris Mason 已提交
250

251
	start_pos = pos & ~((u64)root->sectorsize - 1);
252 253
	num_bytes = (write_bytes + pos - start_pos +
		    root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
C
Chris Mason 已提交
254

255
	down_read(&BTRFS_I(inode)->root->snap_sem);
256 257
	end_of_last_block = start_pos + num_bytes - 1;

258
	lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS);
259 260 261 262 263 264 265
	mutex_lock(&root->fs_info->fs_mutex);
	trans = btrfs_start_transaction(root, 1);
	if (!trans) {
		err = -ENOMEM;
		goto out_unlock;
	}
	btrfs_set_trans_block_group(trans, inode);
266 267
	inode->i_blocks += num_bytes >> 9;
	hint_byte = 0;
268 269

	if ((end_of_last_block & 4095) == 0) {
270
		printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block);
271 272 273 274 275 276 277 278 279
	}
	set_extent_uptodate(em_tree, start_pos, end_of_last_block, GFP_NOFS);

	/* FIXME...EIEIO, ENOSPC and more */

	/* insert any holes we need to create */
	if (inode->i_size < start_pos) {
		u64 last_pos_in_file;
		u64 hole_size;
280
		u64 mask = root->sectorsize - 1;
281 282
		last_pos_in_file = (isize + mask) & ~mask;
		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
C
Chris Mason 已提交
283

284
		if (last_pos_in_file < start_pos) {
C
Chris Mason 已提交
285 286 287
			err = btrfs_drop_extents(trans, root, inode,
						 last_pos_in_file,
						 last_pos_in_file + hole_size,
288
						 last_pos_in_file,
289
						 &hint_byte);
C
Chris Mason 已提交
290 291 292
			if (err)
				goto failed;

293 294 295 296 297 298
			err = btrfs_insert_file_extent(trans, root,
						       inode->i_ino,
						       last_pos_in_file,
						       0, 0, hole_size);
		}
		if (err)
C
Chris Mason 已提交
299
			goto failed;
300 301 302 303 304 305
	}

	/*
	 * either allocate an extent for the new bytes or setup the key
	 * to show we are doing inline data in the extent
	 */
306 307
	inline_size = end_pos;
	if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
308
	    inline_size > 32768 ||
309
	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
310
		u64 last_end;
311

312 313 314
		for (i = 0; i < num_pages; i++) {
			struct page *p = pages[i];
			SetPageUptodate(p);
315
			set_page_dirty(p);
C
Chris Mason 已提交
316
		}
317 318
		last_end = (u64)(pages[num_pages -1]->index) <<
				PAGE_CACHE_SHIFT;
319 320 321
		last_end += PAGE_CACHE_SIZE - 1;
		set_extent_delalloc(em_tree, start_pos, end_of_last_block,
				 GFP_NOFS);
322
	} else {
323
		u64 aligned_end;
324
		/* step one, delete the existing extents in this range */
325 326
		aligned_end = (pos + write_bytes + root->sectorsize - 1) &
			~((u64)root->sectorsize - 1);
C
Chris Mason 已提交
327
		err = btrfs_drop_extents(trans, root, inode, start_pos,
328
					 aligned_end, aligned_end, &hint_byte);
C
Chris Mason 已提交
329 330
		if (err)
			goto failed;
331
		err = insert_inline_extent(trans, root, inode, start_pos,
332 333
					   end_pos - start_pos, pages, 0,
					   num_pages);
334 335 336 337 338
		BUG_ON(err);
	}
	if (end_pos > isize) {
		i_size_write(inode, end_pos);
		btrfs_update_inode(trans, root, inode);
C
Chris Mason 已提交
339 340
	}
failed:
341 342 343
	err = btrfs_end_transaction(trans, root);
out_unlock:
	mutex_unlock(&root->fs_info->fs_mutex);
344
	unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS);
345
	free_extent_map(em);
346
	up_read(&BTRFS_I(inode)->root->snap_sem);
C
Chris Mason 已提交
347 348 349
	return err;
}

350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
{
	struct extent_map *em;
	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;

	while(1) {
		em = lookup_extent_mapping(em_tree, start, end);
		if (!em)
			break;
		remove_extent_mapping(em_tree, em);
		/* once for us */
		free_extent_map(em);
		/* once for the tree*/
		free_extent_map(em);
	}
	return 0;
}

C
Chris Mason 已提交
368 369 370 371 372 373 374 375 376 377 378
/*
 * this is very complex, but the basic idea is to drop all extents
 * in the range start - end.  hint_block is filled in with a block number
 * that would be a good hint to the block allocator for this file.
 *
 * If an extent intersects the range but is not entirely inside the range
 * it is either truncated or split.  Anything entirely inside the range
 * is deleted from the tree.
 */
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root, struct inode *inode,
379
		       u64 start, u64 end, u64 inline_end, u64 *hint_byte)
C
Chris Mason 已提交
380 381 382
{
	int ret;
	struct btrfs_key key;
383
	struct extent_buffer *leaf;
C
Chris Mason 已提交
384 385 386 387 388 389 390 391 392 393 394
	int slot;
	struct btrfs_file_extent_item *extent;
	u64 extent_end = 0;
	int keep;
	struct btrfs_file_extent_item old;
	struct btrfs_path *path;
	u64 search_start = start;
	int bookend;
	int found_type;
	int found_extent;
	int found_inline;
C
Chris Mason 已提交
395
	int recow;
C
Chris Mason 已提交
396

397 398
	btrfs_drop_extent_cache(inode, start, end - 1);

C
Chris Mason 已提交
399 400 401 402
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
	while(1) {
C
Chris Mason 已提交
403
		recow = 0;
C
Chris Mason 已提交
404 405 406 407 408 409 410 411 412 413 414 415
		btrfs_release_path(root, path);
		ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
					       search_start, -1);
		if (ret < 0)
			goto out;
		if (ret > 0) {
			if (path->slots[0] == 0) {
				ret = 0;
				goto out;
			}
			path->slots[0]--;
		}
416
next_slot:
C
Chris Mason 已提交
417 418 419 420 421
		keep = 0;
		bookend = 0;
		found_extent = 0;
		found_inline = 0;
		extent = NULL;
422
		leaf = path->nodes[0];
C
Chris Mason 已提交
423
		slot = path->slots[0];
424
		ret = 0;
425
		btrfs_item_key_to_cpu(leaf, &key, slot);
C
Chris Mason 已提交
426 427 428
		if (key.offset >= end || key.objectid != inode->i_ino) {
			goto out;
		}
429
		if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY) {
C
Chris Mason 已提交
430 431
			goto out;
		}
C
Chris Mason 已提交
432 433 434 435
		if (recow) {
			search_start = key.offset;
			continue;
		}
436 437 438
		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
			extent = btrfs_item_ptr(leaf, slot,
						struct btrfs_file_extent_item);
439
			found_type = btrfs_file_extent_type(leaf, extent);
440 441
			if (found_type == BTRFS_FILE_EXTENT_REG) {
				extent_end = key.offset +
442
				     btrfs_file_extent_num_bytes(leaf, extent);
443 444
				found_extent = 1;
			} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
445 446
				struct btrfs_item *item;
				item = btrfs_item_nr(leaf, slot);
447 448
				found_inline = 1;
				extent_end = key.offset +
449
				     btrfs_file_extent_inline_len(leaf, item);
450 451 452
			}
		} else {
			extent_end = search_start;
C
Chris Mason 已提交
453 454 455
		}

		/* we found nothing we can drop */
456 457 458 459
		if ((!found_extent && !found_inline) ||
		    search_start >= extent_end) {
			int nextret;
			u32 nritems;
460
			nritems = btrfs_header_nritems(leaf);
461 462 463 464
			if (slot >= nritems - 1) {
				nextret = btrfs_next_leaf(root, path);
				if (nextret)
					goto out;
C
Chris Mason 已提交
465
				recow = 1;
466 467 468 469
			} else {
				path->slots[0]++;
			}
			goto next_slot;
C
Chris Mason 已提交
470 471 472
		}

		if (found_inline) {
473
			u64 mask = root->sectorsize - 1;
C
Chris Mason 已提交
474 475 476 477
			search_start = (extent_end + mask) & ~mask;
		} else
			search_start = extent_end;

478 479 480
		if (end <= extent_end && start >= key.offset && found_inline) {
			*hint_byte = EXTENT_MAP_INLINE;
		}
C
Chris Mason 已提交
481 482
		if (end < extent_end && end >= key.offset) {
			if (found_extent) {
483 484 485 486
				u64 disk_bytenr =
				    btrfs_file_extent_disk_bytenr(leaf, extent);
				u64 disk_num_bytes =
				    btrfs_file_extent_disk_num_bytes(leaf,
487 488 489 490
								      extent);
				read_extent_buffer(leaf, &old,
						   (unsigned long)extent,
						   sizeof(old));
491
				if (disk_bytenr != 0) {
C
Chris Mason 已提交
492
					ret = btrfs_inc_extent_ref(trans, root,
493
					         disk_bytenr, disk_num_bytes);
C
Chris Mason 已提交
494 495 496
					BUG_ON(ret);
				}
			}
497 498 499 500
			bookend = 1;
			if (found_inline && start <= key.offset &&
			    inline_end < extent_end)
				keep = 1;
C
Chris Mason 已提交
501 502 503 504 505 506
		}
		/* truncate existing extent */
		if (start > key.offset) {
			u64 new_num;
			u64 old_num;
			keep = 1;
507
			WARN_ON(start & (root->sectorsize - 1));
C
Chris Mason 已提交
508
			if (found_extent) {
509 510 511 512 513 514 515 516
				new_num = start - key.offset;
				old_num = btrfs_file_extent_num_bytes(leaf,
								      extent);
				*hint_byte =
					btrfs_file_extent_disk_bytenr(leaf,
								      extent);
				if (btrfs_file_extent_disk_bytenr(leaf,
								  extent)) {
C
Chris Mason 已提交
517
					inode->i_blocks -=
518
						(old_num - new_num) >> 9;
C
Chris Mason 已提交
519
				}
520 521
				btrfs_set_file_extent_num_bytes(leaf, extent,
								new_num);
522
				btrfs_mark_buffer_dirty(leaf);
523 524 525 526 527 528 529
			} else if (end > extent_end &&
				   key.offset < inline_end &&
				   inline_end < extent_end) {
				u32 new_size;
				new_size = btrfs_file_extent_calc_inline_size(
						   inline_end - key.offset);
				btrfs_truncate_item(trans, root, path,
530
						    new_size, 1);
C
Chris Mason 已提交
531 532 533 534
			}
		}
		/* delete the entire extent */
		if (!keep) {
535 536 537
			u64 disk_bytenr = 0;
			u64 disk_num_bytes = 0;
			u64 extent_num_bytes = 0;
C
Chris Mason 已提交
538
			if (found_extent) {
539 540
				disk_bytenr =
				      btrfs_file_extent_disk_bytenr(leaf,
541
								     extent);
542 543
				disk_num_bytes =
				      btrfs_file_extent_disk_num_bytes(leaf,
544
								       extent);
545 546 547 548 549
				extent_num_bytes =
				      btrfs_file_extent_num_bytes(leaf, extent);
				*hint_byte =
					btrfs_file_extent_disk_bytenr(leaf,
								      extent);
C
Chris Mason 已提交
550 551
			}
			ret = btrfs_del_item(trans, root, path);
552
			/* TODO update progress marker and return */
C
Chris Mason 已提交
553 554 555
			BUG_ON(ret);
			btrfs_release_path(root, path);
			extent = NULL;
556 557
			if (found_extent && disk_bytenr != 0) {
				inode->i_blocks -= extent_num_bytes >> 9;
C
Chris Mason 已提交
558
				ret = btrfs_free_extent(trans, root,
559 560
							disk_bytenr,
							disk_num_bytes, 0);
C
Chris Mason 已提交
561 562 563 564 565 566 567 568 569 570
			}

			BUG_ON(ret);
			if (!bookend && search_start >= end) {
				ret = 0;
				goto out;
			}
			if (!bookend)
				continue;
		}
571 572 573 574 575 576 577
		if (bookend && found_inline && start <= key.offset &&
		    inline_end < extent_end) {
			u32 new_size;
			new_size = btrfs_file_extent_calc_inline_size(
						   extent_end - inline_end);
			btrfs_truncate_item(trans, root, path, new_size, 0);
		}
C
Chris Mason 已提交
578 579 580 581 582 583 584 585 586
		/* create bookend, splitting the extent in two */
		if (bookend && found_extent) {
			struct btrfs_key ins;
			ins.objectid = inode->i_ino;
			ins.offset = end;
			btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
			btrfs_release_path(root, path);
			ret = btrfs_insert_empty_item(trans, root, path, &ins,
						      sizeof(*extent));
587

588
			leaf = path->nodes[0];
589
			if (ret) {
590 591
				btrfs_print_leaf(root, leaf);
				printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.type, ins.offset, start, end, key.offset, extent_end, keep);
592
			}
C
Chris Mason 已提交
593
			BUG_ON(ret);
594 595 596 597 598 599
			extent = btrfs_item_ptr(leaf, path->slots[0],
						struct btrfs_file_extent_item);
			write_extent_buffer(leaf, &old,
					    (unsigned long)extent, sizeof(old));

			btrfs_set_file_extent_offset(leaf, extent,
600 601 602 603 604
				    le64_to_cpu(old.offset) + end - key.offset);
			WARN_ON(le64_to_cpu(old.num_bytes) <
				(extent_end - end));
			btrfs_set_file_extent_num_bytes(leaf, extent,
							extent_end - end);
605
			btrfs_set_file_extent_type(leaf, extent,
C
Chris Mason 已提交
606
						   BTRFS_FILE_EXTENT_REG);
607

C
Chris Mason 已提交
608
			btrfs_mark_buffer_dirty(path->nodes[0]);
609
			if (le64_to_cpu(old.disk_bytenr) != 0) {
C
Chris Mason 已提交
610
				inode->i_blocks +=
611 612
				      btrfs_file_extent_num_bytes(leaf,
								  extent) >> 9;
C
Chris Mason 已提交
613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632
			}
			ret = 0;
			goto out;
		}
	}
out:
	btrfs_free_path(path);
	return ret;
}

/*
 * this gets pages into the page cache and locks them down
 */
static int prepare_pages(struct btrfs_root *root,
			 struct file *file,
			 struct page **pages,
			 size_t num_pages,
			 loff_t pos,
			 unsigned long first_index,
			 unsigned long last_index,
633
			 size_t write_bytes)
C
Chris Mason 已提交
634 635 636 637 638
{
	int i;
	unsigned long index = pos >> PAGE_CACHE_SHIFT;
	struct inode *inode = file->f_path.dentry->d_inode;
	int err = 0;
639 640
	u64 start_pos;

641
	start_pos = pos & ~((u64)root->sectorsize - 1);
C
Chris Mason 已提交
642 643 644 645 646 647 648

	memset(pages, 0, num_pages * sizeof(struct page *));

	for (i = 0; i < num_pages; i++) {
		pages[i] = grab_cache_page(inode->i_mapping, index + i);
		if (!pages[i]) {
			err = -ENOMEM;
649
			BUG_ON(1);
C
Chris Mason 已提交
650
		}
C
Chris Mason 已提交
651 652
		cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
		wait_on_page_writeback(pages[i]);
653
		set_page_extent_mapped(pages[i]);
654
		WARN_ON(!PageLocked(pages[i]));
C
Chris Mason 已提交
655 656 657 658 659 660 661 662
	}
	return 0;
}

static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
				size_t count, loff_t *ppos)
{
	loff_t pos;
663 664 665
	loff_t start_pos;
	ssize_t num_written = 0;
	ssize_t err = 0;
C
Chris Mason 已提交
666 667 668
	int ret = 0;
	struct inode *inode = file->f_path.dentry->d_inode;
	struct btrfs_root *root = BTRFS_I(inode)->root;
669 670
	struct page **pages = NULL;
	int nrptrs;
C
Chris Mason 已提交
671 672 673
	struct page *pinned[2];
	unsigned long first_index;
	unsigned long last_index;
674 675 676

	nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
		     PAGE_CACHE_SIZE / (sizeof(struct page *)));
C
Chris Mason 已提交
677 678 679 680
	pinned[0] = NULL;
	pinned[1] = NULL;
	if (file->f_flags & O_DIRECT)
		return -EINVAL;
681

C
Chris Mason 已提交
682
	pos = *ppos;
683 684
	start_pos = pos;

C
Chris Mason 已提交
685 686 687 688 689 690 691 692 693 694 695 696
	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
	current->backing_dev_info = inode->i_mapping->backing_dev_info;
	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
	if (err)
		goto out;
	if (count == 0)
		goto out;
	err = remove_suid(file->f_path.dentry);
	if (err)
		goto out;
	file_update_time(file);

697
	pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
C
Chris Mason 已提交
698 699 700 701 702 703 704 705 706 707 708 709 710

	mutex_lock(&inode->i_mutex);
	first_index = pos >> PAGE_CACHE_SHIFT;
	last_index = (pos + count) >> PAGE_CACHE_SHIFT;

	/*
	 * there are lots of better ways to do this, but this code
	 * makes sure the first and last page in the file range are
	 * up to date and ready for cow
	 */
	if ((pos & (PAGE_CACHE_SIZE - 1))) {
		pinned[0] = grab_cache_page(inode->i_mapping, first_index);
		if (!PageUptodate(pinned[0])) {
C
Chris Mason 已提交
711
			ret = btrfs_readpage(NULL, pinned[0]);
C
Chris Mason 已提交
712 713 714 715 716 717 718 719 720
			BUG_ON(ret);
			wait_on_page_locked(pinned[0]);
		} else {
			unlock_page(pinned[0]);
		}
	}
	if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
		pinned[1] = grab_cache_page(inode->i_mapping, last_index);
		if (!PageUptodate(pinned[1])) {
C
Chris Mason 已提交
721
			ret = btrfs_readpage(NULL, pinned[1]);
C
Chris Mason 已提交
722 723 724 725 726 727 728 729 730
			BUG_ON(ret);
			wait_on_page_locked(pinned[1]);
		} else {
			unlock_page(pinned[1]);
		}
	}

	while(count > 0) {
		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
731 732
		size_t write_bytes = min(count, nrptrs *
					(size_t)PAGE_CACHE_SIZE -
733
					 offset);
C
Chris Mason 已提交
734 735 736
		size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
					PAGE_CACHE_SHIFT;

737
		WARN_ON(num_pages > nrptrs);
C
Chris Mason 已提交
738 739 740
		memset(pages, 0, sizeof(pages));
		ret = prepare_pages(root, file, pages, num_pages,
				    pos, first_index, last_index,
741
				    write_bytes);
742 743
		if (ret)
			goto out;
C
Chris Mason 已提交
744 745 746

		ret = btrfs_copy_from_user(pos, num_pages,
					   write_bytes, pages, buf);
747 748 749 750
		if (ret) {
			btrfs_drop_pages(pages, num_pages);
			goto out;
		}
C
Chris Mason 已提交
751 752 753 754

		ret = dirty_and_release_pages(NULL, root, file, pages,
					      num_pages, pos, write_bytes);
		btrfs_drop_pages(pages, num_pages);
755 756
		if (ret)
			goto out;
C
Chris Mason 已提交
757 758 759 760 761 762

		buf += write_bytes;
		count -= write_bytes;
		pos += write_bytes;
		num_written += write_bytes;

763
		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
764
		btrfs_btree_balance_dirty(root, 1);
C
Chris Mason 已提交
765 766 767 768
		cond_resched();
	}
	mutex_unlock(&inode->i_mutex);
out:
769
	kfree(pages);
C
Chris Mason 已提交
770 771 772 773 774
	if (pinned[0])
		page_cache_release(pinned[0]);
	if (pinned[1])
		page_cache_release(pinned[1]);
	*ppos = pos;
775 776 777 778 779 780 781

	if (num_written > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
		err = sync_page_range(inode, inode->i_mapping,
				      start_pos, num_written);
		if (err < 0)
			num_written = err;
	}
C
Chris Mason 已提交
782 783 784 785 786 787 788 789 790
	current->backing_dev_info = NULL;
	return num_written ? num_written : err;
}

static int btrfs_sync_file(struct file *file,
			   struct dentry *dentry, int datasync)
{
	struct inode *inode = dentry->d_inode;
	struct btrfs_root *root = BTRFS_I(inode)->root;
791
	int ret = 0;
C
Chris Mason 已提交
792 793 794
	struct btrfs_trans_handle *trans;

	/*
795 796
	 * check the transaction that last modified this inode
	 * and see if its already been committed
C
Chris Mason 已提交
797 798
	 */
	mutex_lock(&root->fs_info->fs_mutex);
799 800 801 802 803 804 805 806 807 808 809 810
	if (!BTRFS_I(inode)->last_trans)
		goto out;
	mutex_lock(&root->fs_info->trans_mutex);
	if (BTRFS_I(inode)->last_trans <=
	    root->fs_info->last_trans_committed) {
		BTRFS_I(inode)->last_trans = 0;
		mutex_unlock(&root->fs_info->trans_mutex);
		goto out;
	}
	mutex_unlock(&root->fs_info->trans_mutex);

	/*
811 812
	 * ok we haven't committed the transaction yet, lets do a commit
	 */
C
Chris Mason 已提交
813 814 815 816 817 818 819
	trans = btrfs_start_transaction(root, 1);
	if (!trans) {
		ret = -ENOMEM;
		goto out;
	}
	ret = btrfs_commit_transaction(trans, root);
out:
820
	mutex_unlock(&root->fs_info->fs_mutex);
C
Chris Mason 已提交
821 822 823
	return ret > 0 ? EIO : ret;
}

C
Chris Mason 已提交
824
static struct vm_operations_struct btrfs_file_vm_ops = {
825 826 827 828 829 830
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
	.nopage         = filemap_nopage,
	.populate       = filemap_populate,
#else
	.fault		= filemap_fault,
#endif
C
Chris Mason 已提交
831 832 833 834 835 836 837 838 839 840
	.page_mkwrite	= btrfs_page_mkwrite,
};

static int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma)
{
	vma->vm_ops = &btrfs_file_vm_ops;
	file_accessed(filp);
	return 0;
}

C
Chris Mason 已提交
841 842 843
struct file_operations btrfs_file_operations = {
	.llseek		= generic_file_llseek,
	.read		= do_sync_read,
C
Chris Mason 已提交
844
	.aio_read       = generic_file_aio_read,
C
Chris Mason 已提交
845
	.write		= btrfs_file_write,
C
Chris Mason 已提交
846
	.mmap		= btrfs_file_mmap,
C
Chris Mason 已提交
847 848
	.open		= generic_file_open,
	.fsync		= btrfs_sync_file,
C
Christoph Hellwig 已提交
849
	.unlocked_ioctl	= btrfs_ioctl,
C
Chris Mason 已提交
850
#ifdef CONFIG_COMPAT
C
Christoph Hellwig 已提交
851
	.compat_ioctl	= btrfs_ioctl,
C
Chris Mason 已提交
852 853 854
#endif
};