delayed-ref.c 25.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/*
 * Copyright (C) 2009 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

#include <linux/sched.h>
20
#include <linux/slab.h>
21 22 23 24
#include <linux/sort.h>
#include "ctree.h"
#include "delayed-ref.h"
#include "transaction.h"
25
#include "qgroup.h"
26

27 28 29 30
struct kmem_cache *btrfs_delayed_ref_head_cachep;
struct kmem_cache *btrfs_delayed_tree_ref_cachep;
struct kmem_cache *btrfs_delayed_data_ref_cachep;
struct kmem_cache *btrfs_delayed_extent_op_cachep;
31 32 33 34 35 36 37 38 39 40
/*
 * delayed back reference update tracking.  For subvolume trees
 * we queue up extent allocations and backref maintenance for
 * delayed processing.   This avoids deep call chains where we
 * add extents in the middle of btrfs_search_slot, and it allows
 * us to buffer up frequently modified backrefs in an rb tree instead
 * of hammering updates on the extent allocation tree.
 */

/*
41 42
 * compare two delayed tree backrefs with same bytenr and type
 */
J
Josef Bacik 已提交
43 44
static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1,
			  struct btrfs_delayed_tree_ref *ref2)
45
{
46
	if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
47 48 49 50 51 52 53 54 55 56
		if (ref1->root < ref2->root)
			return -1;
		if (ref1->root > ref2->root)
			return 1;
	} else {
		if (ref1->parent < ref2->parent)
			return -1;
		if (ref1->parent > ref2->parent)
			return 1;
	}
57 58 59 60 61
	return 0;
}

/*
 * compare two delayed data backrefs with same bytenr and type
62
 */
J
Josef Bacik 已提交
63 64
static int comp_data_refs(struct btrfs_delayed_data_ref *ref1,
			  struct btrfs_delayed_data_ref *ref2)
65
{
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
	if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
		if (ref1->root < ref2->root)
			return -1;
		if (ref1->root > ref2->root)
			return 1;
		if (ref1->objectid < ref2->objectid)
			return -1;
		if (ref1->objectid > ref2->objectid)
			return 1;
		if (ref1->offset < ref2->offset)
			return -1;
		if (ref1->offset > ref2->offset)
			return 1;
	} else {
		if (ref1->parent < ref2->parent)
			return -1;
		if (ref1->parent > ref2->parent)
			return 1;
	}
	return 0;
}

J
Josef Bacik 已提交
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
static int comp_refs(struct btrfs_delayed_ref_node *ref1,
		     struct btrfs_delayed_ref_node *ref2,
		     bool check_seq)
{
	int ret = 0;

	if (ref1->type < ref2->type)
		return -1;
	if (ref1->type > ref2->type)
		return 1;
	if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
	    ref1->type == BTRFS_SHARED_BLOCK_REF_KEY)
		ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1),
				     btrfs_delayed_node_to_tree_ref(ref2));
	else
		ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1),
				     btrfs_delayed_node_to_data_ref(ref2));
	if (ret)
		return ret;
	if (check_seq) {
		if (ref1->seq < ref2->seq)
			return -1;
		if (ref1->seq > ref2->seq)
			return 1;
	}
	return 0;
}

L
Liu Bo 已提交
116 117 118 119 120 121 122 123 124 125 126
/* insert a new ref to head ref rbtree */
static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
						   struct rb_node *node)
{
	struct rb_node **p = &root->rb_node;
	struct rb_node *parent_node = NULL;
	struct btrfs_delayed_ref_head *entry;
	struct btrfs_delayed_ref_head *ins;
	u64 bytenr;

	ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
127
	bytenr = ins->bytenr;
L
Liu Bo 已提交
128 129 130 131 132
	while (*p) {
		parent_node = *p;
		entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
				 href_node);

133
		if (bytenr < entry->bytenr)
L
Liu Bo 已提交
134
			p = &(*p)->rb_left;
135
		else if (bytenr > entry->bytenr)
L
Liu Bo 已提交
136 137 138 139 140 141 142 143 144 145
			p = &(*p)->rb_right;
		else
			return entry;
	}

	rb_link_node(node, parent_node, p);
	rb_insert_color(node, root);
	return NULL;
}

146
/*
147
 * find an head entry based on bytenr. This returns the delayed ref
148 149 150
 * head if it was able to find one, or NULL if nothing was in that spot.
 * If return_bigger is given, the next bigger entry is returned if no exact
 * match is found.
151
 */
L
Liu Bo 已提交
152 153
static struct btrfs_delayed_ref_head *
find_ref_head(struct rb_root *root, u64 bytenr,
154
	      int return_bigger)
155
{
156
	struct rb_node *n;
L
Liu Bo 已提交
157
	struct btrfs_delayed_ref_head *entry;
158

159 160
	n = root->rb_node;
	entry = NULL;
161
	while (n) {
L
Liu Bo 已提交
162
		entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
163

164
		if (bytenr < entry->bytenr)
165
			n = n->rb_left;
166
		else if (bytenr > entry->bytenr)
167 168 169 170
			n = n->rb_right;
		else
			return entry;
	}
171
	if (entry && return_bigger) {
172
		if (bytenr > entry->bytenr) {
L
Liu Bo 已提交
173
			n = rb_next(&entry->href_node);
174 175
			if (!n)
				n = rb_first(root);
L
Liu Bo 已提交
176 177
			entry = rb_entry(n, struct btrfs_delayed_ref_head,
					 href_node);
178
			return entry;
179 180 181
		}
		return entry;
	}
182 183 184
	return NULL;
}

185 186
int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
			   struct btrfs_delayed_ref_head *head)
187
{
188 189 190 191 192 193 194
	struct btrfs_delayed_ref_root *delayed_refs;

	delayed_refs = &trans->transaction->delayed_refs;
	assert_spin_locked(&delayed_refs->lock);
	if (mutex_trylock(&head->mutex))
		return 0;

195
	refcount_inc(&head->refs);
196 197 198 199
	spin_unlock(&delayed_refs->lock);

	mutex_lock(&head->mutex);
	spin_lock(&delayed_refs->lock);
200
	if (RB_EMPTY_NODE(&head->href_node)) {
201
		mutex_unlock(&head->mutex);
202
		btrfs_put_delayed_ref_head(head);
203 204
		return -EAGAIN;
	}
205
	btrfs_put_delayed_ref_head(head);
206 207 208
	return 0;
}

209
static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
210
				    struct btrfs_delayed_ref_root *delayed_refs,
211
				    struct btrfs_delayed_ref_head *head,
212 213
				    struct btrfs_delayed_ref_node *ref)
{
214 215 216 217
	assert_spin_locked(&head->lock);
	list_del(&ref->list);
	if (!list_empty(&ref->add_list))
		list_del(&ref->add_list);
218 219
	ref->in_tree = 0;
	btrfs_put_delayed_ref(ref);
220
	atomic_dec(&delayed_refs->num_entries);
221 222 223 224
	if (trans->delayed_ref_updates)
		trans->delayed_ref_updates--;
}

225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
static bool merge_ref(struct btrfs_trans_handle *trans,
		      struct btrfs_delayed_ref_root *delayed_refs,
		      struct btrfs_delayed_ref_head *head,
		      struct btrfs_delayed_ref_node *ref,
		      u64 seq)
{
	struct btrfs_delayed_ref_node *next;
	bool done = false;

	next = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
				list);
	while (!done && &next->list != &head->ref_list) {
		int mod;
		struct btrfs_delayed_ref_node *next2;

		next2 = list_next_entry(next, list);

		if (next == ref)
			goto next;

		if (seq && next->seq >= seq)
			goto next;

J
Josef Bacik 已提交
248
		if (comp_refs(ref, next, false))
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
			goto next;

		if (ref->action == next->action) {
			mod = next->ref_mod;
		} else {
			if (ref->ref_mod < next->ref_mod) {
				swap(ref, next);
				done = true;
			}
			mod = -next->ref_mod;
		}

		drop_delayed_ref(trans, delayed_refs, head, next);
		ref->ref_mod += mod;
		if (ref->ref_mod == 0) {
			drop_delayed_ref(trans, delayed_refs, head, ref);
			done = true;
		} else {
			/*
			 * Can't have multiples of the same ref on a tree block.
			 */
			WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
				ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
		}
next:
		next = next2;
	}

	return done;
}

void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
			      struct btrfs_fs_info *fs_info,
			      struct btrfs_delayed_ref_root *delayed_refs,
			      struct btrfs_delayed_ref_head *head)
{
	struct btrfs_delayed_ref_node *ref;
	u64 seq = 0;

	assert_spin_locked(&head->lock);

	if (list_empty(&head->ref_list))
		return;

	/* We don't have too many refs to merge for data. */
	if (head->is_data)
		return;

	spin_lock(&fs_info->tree_mod_seq_lock);
	if (!list_empty(&fs_info->tree_mod_seq_list)) {
		struct seq_list *elem;

		elem = list_first_entry(&fs_info->tree_mod_seq_list,
					struct seq_list, list);
		seq = elem->seq;
	}
	spin_unlock(&fs_info->tree_mod_seq_lock);

	ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
			       list);
	while (&ref->list != &head->ref_list) {
		if (seq && ref->seq >= seq)
			goto next;

		if (merge_ref(trans, delayed_refs, head, ref, seq)) {
			if (list_empty(&head->ref_list))
				break;
			ref = list_first_entry(&head->ref_list,
					       struct btrfs_delayed_ref_node,
					       list);
			continue;
		}
next:
		ref = list_next_entry(ref, list);
	}
}

326 327
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
			    struct btrfs_delayed_ref_root *delayed_refs,
328 329 330
			    u64 seq)
{
	struct seq_list *elem;
331 332 333 334 335 336 337
	int ret = 0;

	spin_lock(&fs_info->tree_mod_seq_lock);
	if (!list_empty(&fs_info->tree_mod_seq_list)) {
		elem = list_first_entry(&fs_info->tree_mod_seq_list,
					struct seq_list, list);
		if (seq >= elem->seq) {
338 339 340 341 342
			btrfs_debug(fs_info,
				"holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)",
				(u32)(seq >> 32), (u32)seq,
				(u32)(elem->seq >> 32), (u32)elem->seq,
				delayed_refs);
343 344
			ret = 1;
		}
345
	}
346 347 348

	spin_unlock(&fs_info->tree_mod_seq_lock);
	return ret;
349 350
}

351 352
struct btrfs_delayed_ref_head *
btrfs_select_ref_head(struct btrfs_trans_handle *trans)
353 354
{
	struct btrfs_delayed_ref_root *delayed_refs;
355 356 357
	struct btrfs_delayed_ref_head *head;
	u64 start;
	bool loop = false;
358

359
	delayed_refs = &trans->transaction->delayed_refs;
L
Liu Bo 已提交
360

361
again:
362
	start = delayed_refs->run_delayed_start;
363
	head = find_ref_head(&delayed_refs->href_root, start, 1);
364 365
	if (!head && !loop) {
		delayed_refs->run_delayed_start = 0;
366
		start = 0;
367
		loop = true;
368
		head = find_ref_head(&delayed_refs->href_root, start, 1);
369 370 371 372
		if (!head)
			return NULL;
	} else if (!head && loop) {
		return NULL;
373
	}
374

375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
	while (head->processing) {
		struct rb_node *node;

		node = rb_next(&head->href_node);
		if (!node) {
			if (loop)
				return NULL;
			delayed_refs->run_delayed_start = 0;
			start = 0;
			loop = true;
			goto again;
		}
		head = rb_entry(node, struct btrfs_delayed_ref_head,
				href_node);
	}
390

391 392 393
	head->processing = 1;
	WARN_ON(delayed_refs->num_heads_ready == 0);
	delayed_refs->num_heads_ready--;
394 395
	delayed_refs->run_delayed_start = head->bytenr +
		head->num_bytes;
396
	return head;
397 398
}

399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421
/*
 * Helper to insert the ref_node to the tail or merge with tail.
 *
 * Return 0 for insert.
 * Return >0 for merge.
 */
static int
add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
			   struct btrfs_delayed_ref_root *root,
			   struct btrfs_delayed_ref_head *href,
			   struct btrfs_delayed_ref_node *ref)
{
	struct btrfs_delayed_ref_node *exist;
	int mod;
	int ret = 0;

	spin_lock(&href->lock);
	/* Check whether we can merge the tail node with ref */
	if (list_empty(&href->ref_list))
		goto add_tail;
	exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
			   list);
	/* No need to compare bytenr nor is_head */
J
Josef Bacik 已提交
422
	if (comp_refs(exist, ref, true))
423 424 425 426 427 428 429 430 431 432 433 434
		goto add_tail;

	/* Now we are sure we can merge */
	ret = 1;
	if (exist->action == ref->action) {
		mod = ref->ref_mod;
	} else {
		/* Need to change action */
		if (exist->ref_mod < ref->ref_mod) {
			exist->action = ref->action;
			mod = -exist->ref_mod;
			exist->ref_mod = ref->ref_mod;
435 436 437 438 439 440 441 442 443
			if (ref->action == BTRFS_ADD_DELAYED_REF)
				list_add_tail(&exist->add_list,
					      &href->ref_add_list);
			else if (ref->action == BTRFS_DROP_DELAYED_REF) {
				ASSERT(!list_empty(&exist->add_list));
				list_del(&exist->add_list);
			} else {
				ASSERT(0);
			}
444 445 446 447 448 449 450 451 452 453 454 455 456
		} else
			mod = -ref->ref_mod;
	}
	exist->ref_mod += mod;

	/* remove existing tail if its ref_mod is zero */
	if (exist->ref_mod == 0)
		drop_delayed_ref(trans, root, href, exist);
	spin_unlock(&href->lock);
	return ret;

add_tail:
	list_add_tail(&ref->list, &href->ref_list);
457 458
	if (ref->action == BTRFS_ADD_DELAYED_REF)
		list_add_tail(&ref->add_list, &href->ref_add_list);
459 460 461 462 463 464
	atomic_inc(&root->num_entries);
	trans->delayed_ref_updates++;
	spin_unlock(&href->lock);
	return ret;
}

465 466 467 468 469
/*
 * helper function to update the accounting in the head ref
 * existing and update must have the same bytenr
 */
static noinline void
470
update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
471 472
			 struct btrfs_delayed_ref_head *existing,
			 struct btrfs_delayed_ref_head *update,
473
			 int *old_ref_mod_ret)
474
{
475
	int old_ref_mod;
476

477
	BUG_ON(existing->is_data != update->is_data);
478

479 480
	spin_lock(&existing->lock);
	if (update->must_insert_reserved) {
481 482 483 484 485 486 487
		/* if the extent was freed and then
		 * reallocated before the delayed ref
		 * entries were processed, we can end up
		 * with an existing head ref without
		 * the must_insert_reserved flag set.
		 * Set it again here
		 */
488
		existing->must_insert_reserved = update->must_insert_reserved;
489 490 491 492 493 494 495 496 497

		/*
		 * update the num_bytes so we make sure the accounting
		 * is done correctly
		 */
		existing->num_bytes = update->num_bytes;

	}

498 499 500
	if (update->extent_op) {
		if (!existing->extent_op) {
			existing->extent_op = update->extent_op;
501
		} else {
502 503 504 505 506
			if (update->extent_op->update_key) {
				memcpy(&existing->extent_op->key,
				       &update->extent_op->key,
				       sizeof(update->extent_op->key));
				existing->extent_op->update_key = true;
507
			}
508 509 510 511
			if (update->extent_op->update_flags) {
				existing->extent_op->flags_to_set |=
					update->extent_op->flags_to_set;
				existing->extent_op->update_flags = true;
512
			}
513
			btrfs_free_delayed_extent_op(update->extent_op);
514 515
		}
	}
516
	/*
517 518 519
	 * update the reference mod on the head to reflect this new operation,
	 * only need the lock for this case cause we could be processing it
	 * currently, for refs we just added we know we're a-ok.
520
	 */
521
	old_ref_mod = existing->total_ref_mod;
522 523
	if (old_ref_mod_ret)
		*old_ref_mod_ret = old_ref_mod;
524
	existing->ref_mod += update->ref_mod;
525
	existing->total_ref_mod += update->ref_mod;
526 527 528 529 530

	/*
	 * If we are going to from a positive ref mod to a negative or vice
	 * versa we need to make sure to adjust pending_csums accordingly.
	 */
531 532
	if (existing->is_data) {
		if (existing->total_ref_mod >= 0 && old_ref_mod < 0)
533
			delayed_refs->pending_csums -= existing->num_bytes;
534
		if (existing->total_ref_mod < 0 && old_ref_mod >= 0)
535 536
			delayed_refs->pending_csums += existing->num_bytes;
	}
537
	spin_unlock(&existing->lock);
538 539 540
}

/*
541
 * helper function to actually insert a head node into the rbtree.
542
 * this does all the dirty work in terms of maintaining the correct
543
 * overall modification count.
544
 */
545 546 547
static noinline struct btrfs_delayed_ref_head *
add_delayed_ref_head(struct btrfs_fs_info *fs_info,
		     struct btrfs_trans_handle *trans,
548
		     struct btrfs_delayed_ref_head *head_ref,
549
		     struct btrfs_qgroup_extent_record *qrecord,
550
		     u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
551 552
		     int action, int is_data, int *qrecord_inserted_ret,
		     int *old_ref_mod, int *new_ref_mod)
553
{
554
	struct btrfs_delayed_ref_head *existing;
555 556 557
	struct btrfs_delayed_ref_root *delayed_refs;
	int count_mod = 1;
	int must_insert_reserved = 0;
558
	int qrecord_inserted = 0;
559

560 561 562
	/* If reserved is provided, it must be a data extent. */
	BUG_ON(!is_data && reserved);

563 564 565 566
	/*
	 * the head node stores the sum of all the mods, so dropping a ref
	 * should drop the sum in the head node by one.
	 */
567 568 569 570
	if (action == BTRFS_UPDATE_DELAYED_HEAD)
		count_mod = 0;
	else if (action == BTRFS_DROP_DELAYED_REF)
		count_mod = -1;
571 572 573 574 575 576 577 578 579 580 581 582

	/*
	 * BTRFS_ADD_DELAYED_EXTENT means that we need to update
	 * the reserved accounting when the extent is finally added, or
	 * if a later modification deletes the delayed ref without ever
	 * inserting the extent into the extent allocation tree.
	 * ref->must_insert_reserved is the flag used to record
	 * that accounting mods are required.
	 *
	 * Once we record must_insert_reserved, switch the action to
	 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
	 */
583
	if (action == BTRFS_ADD_DELAYED_EXTENT)
584
		must_insert_reserved = 1;
585
	else
586 587 588 589
		must_insert_reserved = 0;

	delayed_refs = &trans->transaction->delayed_refs;

590 591 592 593
	refcount_set(&head_ref->refs, 1);
	head_ref->bytenr = bytenr;
	head_ref->num_bytes = num_bytes;
	head_ref->ref_mod = count_mod;
594 595
	head_ref->must_insert_reserved = must_insert_reserved;
	head_ref->is_data = is_data;
596
	INIT_LIST_HEAD(&head_ref->ref_list);
597
	INIT_LIST_HEAD(&head_ref->ref_add_list);
598
	RB_CLEAR_NODE(&head_ref->href_node);
599
	head_ref->processing = 0;
600
	head_ref->total_ref_mod = count_mod;
601 602
	head_ref->qgroup_reserved = 0;
	head_ref->qgroup_ref_root = 0;
603 604
	spin_lock_init(&head_ref->lock);
	mutex_init(&head_ref->mutex);
605

606 607
	/* Record qgroup extent info if provided */
	if (qrecord) {
608 609 610 611 612
		if (ref_root && reserved) {
			head_ref->qgroup_ref_root = ref_root;
			head_ref->qgroup_reserved = reserved;
		}

613 614 615 616
		qrecord->bytenr = bytenr;
		qrecord->num_bytes = num_bytes;
		qrecord->old_roots = NULL;

617
		if(btrfs_qgroup_trace_extent_nolock(fs_info,
618
					delayed_refs, qrecord))
619
			kfree(qrecord);
620 621
		else
			qrecord_inserted = 1;
622 623
	}

624
	trace_add_delayed_ref_head(fs_info, head_ref, action);
625

626 627
	existing = htree_insert(&delayed_refs->href_root,
				&head_ref->href_node);
628
	if (existing) {
629 630
		WARN_ON(ref_root && reserved && existing->qgroup_ref_root
			&& existing->qgroup_reserved);
631
		update_existing_head_ref(delayed_refs, existing, head_ref,
632
					 old_ref_mod);
633 634 635 636
		/*
		 * we've updated the existing ref, free the newly
		 * allocated ref
		 */
637
		kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
638
		head_ref = existing;
639
	} else {
640 641
		if (old_ref_mod)
			*old_ref_mod = 0;
642 643
		if (is_data && count_mod < 0)
			delayed_refs->pending_csums += num_bytes;
644 645
		delayed_refs->num_heads++;
		delayed_refs->num_heads_ready++;
646
		atomic_inc(&delayed_refs->num_entries);
647 648
		trans->delayed_ref_updates++;
	}
649 650
	if (qrecord_inserted_ret)
		*qrecord_inserted_ret = qrecord_inserted;
651 652
	if (new_ref_mod)
		*new_ref_mod = head_ref->total_ref_mod;
653
	return head_ref;
654 655 656 657 658
}

/*
 * helper to insert a delayed tree ref into the rbtree.
 */
659 660 661 662 663 664
static noinline void
add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
		     struct btrfs_trans_handle *trans,
		     struct btrfs_delayed_ref_head *head_ref,
		     struct btrfs_delayed_ref_node *ref, u64 bytenr,
		     u64 num_bytes, u64 parent, u64 ref_root, int level,
665
		     int action)
666 667 668
{
	struct btrfs_delayed_tree_ref *full_ref;
	struct btrfs_delayed_ref_root *delayed_refs;
669
	u64 seq = 0;
670
	int ret;
671 672 673 674

	if (action == BTRFS_ADD_DELAYED_EXTENT)
		action = BTRFS_ADD_DELAYED_REF;

J
Josef Bacik 已提交
675 676
	if (is_fstree(ref_root))
		seq = atomic64_read(&fs_info->tree_mod_seq);
677 678 679
	delayed_refs = &trans->transaction->delayed_refs;

	/* first set the basic ref node struct up */
680
	refcount_set(&ref->refs, 1);
681
	ref->bytenr = bytenr;
682
	ref->num_bytes = num_bytes;
683 684 685 686
	ref->ref_mod = 1;
	ref->action = action;
	ref->is_head = 0;
	ref->in_tree = 1;
687
	ref->seq = seq;
688 689
	INIT_LIST_HEAD(&ref->list);
	INIT_LIST_HEAD(&ref->add_list);
690

691
	full_ref = btrfs_delayed_node_to_tree_ref(ref);
692 693 694
	full_ref->parent = parent;
	full_ref->root = ref_root;
	if (parent)
695
		ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
696
	else
697 698
		ref->type = BTRFS_TREE_BLOCK_REF_KEY;
	full_ref->level = level;
699

700
	trace_add_delayed_tree_ref(fs_info, ref, full_ref, action);
701

702 703 704 705 706 707 708
	ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);

	/*
	 * XXX: memory should be freed at the same level allocated.
	 * But bad practice is anywhere... Follow it now. Need cleanup.
	 */
	if (ret > 0)
709
		kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
710 711 712 713 714
}

/*
 * helper to insert a delayed data ref into the rbtree.
 */
715 716 717 718 719 720
static noinline void
add_delayed_data_ref(struct btrfs_fs_info *fs_info,
		     struct btrfs_trans_handle *trans,
		     struct btrfs_delayed_ref_head *head_ref,
		     struct btrfs_delayed_ref_node *ref, u64 bytenr,
		     u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
721
		     u64 offset, int action)
722 723 724
{
	struct btrfs_delayed_data_ref *full_ref;
	struct btrfs_delayed_ref_root *delayed_refs;
725
	u64 seq = 0;
726
	int ret;
727 728 729 730 731 732

	if (action == BTRFS_ADD_DELAYED_EXTENT)
		action = BTRFS_ADD_DELAYED_REF;

	delayed_refs = &trans->transaction->delayed_refs;

J
Josef Bacik 已提交
733 734 735
	if (is_fstree(ref_root))
		seq = atomic64_read(&fs_info->tree_mod_seq);

736
	/* first set the basic ref node struct up */
737
	refcount_set(&ref->refs, 1);
738 739 740 741 742 743
	ref->bytenr = bytenr;
	ref->num_bytes = num_bytes;
	ref->ref_mod = 1;
	ref->action = action;
	ref->is_head = 0;
	ref->in_tree = 1;
744
	ref->seq = seq;
745 746
	INIT_LIST_HEAD(&ref->list);
	INIT_LIST_HEAD(&ref->add_list);
747

748
	full_ref = btrfs_delayed_node_to_data_ref(ref);
749 750 751
	full_ref->parent = parent;
	full_ref->root = ref_root;
	if (parent)
752
		ref->type = BTRFS_SHARED_DATA_REF_KEY;
753
	else
754
		ref->type = BTRFS_EXTENT_DATA_REF_KEY;
A
Arne Jansen 已提交
755

756 757
	full_ref->objectid = owner;
	full_ref->offset = offset;
758

759
	trace_add_delayed_data_ref(fs_info, ref, full_ref, action);
760

761 762 763
	ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);

	if (ret > 0)
764
		kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
765 766 767
}

/*
768
 * add a delayed tree ref.  This does all of the accounting required
769 770 771
 * to make sure the delayed ref is eventually processed before this
 * transaction commits.
 */
A
Arne Jansen 已提交
772 773
int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
			       struct btrfs_trans_handle *trans,
774 775
			       u64 bytenr, u64 num_bytes, u64 parent,
			       u64 ref_root,  int level, int action,
776 777
			       struct btrfs_delayed_extent_op *extent_op,
			       int *old_ref_mod, int *new_ref_mod)
778
{
779
	struct btrfs_delayed_tree_ref *ref;
780 781
	struct btrfs_delayed_ref_head *head_ref;
	struct btrfs_delayed_ref_root *delayed_refs;
782
	struct btrfs_qgroup_extent_record *record = NULL;
783
	int qrecord_inserted;
784

785
	BUG_ON(extent_op && extent_op->is_data);
786
	ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
787 788 789
	if (!ref)
		return -ENOMEM;

790
	head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
791 792
	if (!head_ref)
		goto free_ref;
793

794 795
	if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
	    is_fstree(ref_root)) {
796
		record = kmalloc(sizeof(*record), GFP_NOFS);
797 798
		if (!record)
			goto free_head_ref;
799 800
	}

801 802 803 804 805
	head_ref->extent_op = extent_op;

	delayed_refs = &trans->transaction->delayed_refs;
	spin_lock(&delayed_refs->lock);

806
	/*
807 808
	 * insert both the head node and the new ref without dropping
	 * the spin lock
809
	 */
810
	head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
811
					bytenr, num_bytes, 0, 0, action, 0,
812 813
					&qrecord_inserted, old_ref_mod,
					new_ref_mod);
814

815
	add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
816
			     num_bytes, parent, ref_root, level, action);
817
	spin_unlock(&delayed_refs->lock);
818

819 820
	if (qrecord_inserted)
		return btrfs_qgroup_trace_extent_post(fs_info, record);
821
	return 0;
822 823 824 825 826 827 828

free_head_ref:
	kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
free_ref:
	kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);

	return -ENOMEM;
829 830 831 832 833
}

/*
 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
 */
A
Arne Jansen 已提交
834 835
int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
			       struct btrfs_trans_handle *trans,
836 837
			       u64 bytenr, u64 num_bytes,
			       u64 parent, u64 ref_root,
838 839
			       u64 owner, u64 offset, u64 reserved, int action,
			       int *old_ref_mod, int *new_ref_mod)
840 841 842 843
{
	struct btrfs_delayed_data_ref *ref;
	struct btrfs_delayed_ref_head *head_ref;
	struct btrfs_delayed_ref_root *delayed_refs;
844
	struct btrfs_qgroup_extent_record *record = NULL;
845
	int qrecord_inserted;
846

847
	ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
848 849
	if (!ref)
		return -ENOMEM;
850

851
	head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
852
	if (!head_ref) {
853
		kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
854 855
		return -ENOMEM;
	}
856

857 858
	if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
	    is_fstree(ref_root)) {
859 860 861 862 863 864 865 866 867
		record = kmalloc(sizeof(*record), GFP_NOFS);
		if (!record) {
			kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
			kmem_cache_free(btrfs_delayed_ref_head_cachep,
					head_ref);
			return -ENOMEM;
		}
	}

868
	head_ref->extent_op = NULL;
869

870 871 872 873 874 875 876
	delayed_refs = &trans->transaction->delayed_refs;
	spin_lock(&delayed_refs->lock);

	/*
	 * insert both the head node and the new ref without dropping
	 * the spin lock
	 */
877
	head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
878
					bytenr, num_bytes, ref_root, reserved,
879 880
					action, 1, &qrecord_inserted,
					old_ref_mod, new_ref_mod);
881

882
	add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
A
Arne Jansen 已提交
883
				   num_bytes, parent, ref_root, owner, offset,
884
				   action);
885
	spin_unlock(&delayed_refs->lock);
886

887 888
	if (qrecord_inserted)
		return btrfs_qgroup_trace_extent_post(fs_info, record);
889 890 891
	return 0;
}

A
Arne Jansen 已提交
892 893
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
				struct btrfs_trans_handle *trans,
894 895 896 897 898 899
				u64 bytenr, u64 num_bytes,
				struct btrfs_delayed_extent_op *extent_op)
{
	struct btrfs_delayed_ref_head *head_ref;
	struct btrfs_delayed_ref_root *delayed_refs;

900
	head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
901 902 903 904 905 906 907 908
	if (!head_ref)
		return -ENOMEM;

	head_ref->extent_op = extent_op;

	delayed_refs = &trans->transaction->delayed_refs;
	spin_lock(&delayed_refs->lock);

909
	add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr,
910
			     num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
911
			     extent_op->is_data, NULL, NULL, NULL);
912

913 914 915 916
	spin_unlock(&delayed_refs->lock);
	return 0;
}

917 918 919 920 921 922
/*
 * this does a simple search for the head node for a given extent.
 * It must be called with the delayed ref spinlock held, and it returns
 * the head node if any where found, or NULL if not.
 */
struct btrfs_delayed_ref_head *
923
btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
924
{
925
	return find_ref_head(&delayed_refs->href_root, bytenr, 0);
926
}
927 928 929

void btrfs_delayed_ref_exit(void)
{
930 931 932 933
	kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
	kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
	kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
	kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
934 935 936 937 938 939 940
}

int btrfs_delayed_ref_init(void)
{
	btrfs_delayed_ref_head_cachep = kmem_cache_create(
				"btrfs_delayed_ref_head",
				sizeof(struct btrfs_delayed_ref_head), 0,
941
				SLAB_MEM_SPREAD, NULL);
942 943 944 945 946 947
	if (!btrfs_delayed_ref_head_cachep)
		goto fail;

	btrfs_delayed_tree_ref_cachep = kmem_cache_create(
				"btrfs_delayed_tree_ref",
				sizeof(struct btrfs_delayed_tree_ref), 0,
948
				SLAB_MEM_SPREAD, NULL);
949 950 951 952 953 954
	if (!btrfs_delayed_tree_ref_cachep)
		goto fail;

	btrfs_delayed_data_ref_cachep = kmem_cache_create(
				"btrfs_delayed_data_ref",
				sizeof(struct btrfs_delayed_data_ref), 0,
955
				SLAB_MEM_SPREAD, NULL);
956 957 958 959 960 961
	if (!btrfs_delayed_data_ref_cachep)
		goto fail;

	btrfs_delayed_extent_op_cachep = kmem_cache_create(
				"btrfs_delayed_extent_op",
				sizeof(struct btrfs_delayed_extent_op), 0,
962
				SLAB_MEM_SPREAD, NULL);
963 964 965 966 967 968 969 970
	if (!btrfs_delayed_extent_op_cachep)
		goto fail;

	return 0;
fail:
	btrfs_delayed_ref_exit();
	return -ENOMEM;
}