inode-map.c 14.1 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
C
Chris Mason 已提交
2 3 4 5
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 */

6 7 8 9
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/pagemap.h>

10 11
#include "ctree.h"
#include "disk-io.h"
12 13
#include "free-space-cache.h"
#include "inode-map.h"
14 15
#include "transaction.h"

16 17 18 19 20 21 22 23 24 25 26 27
static int caching_kthread(void *data)
{
	struct btrfs_root *root = data;
	struct btrfs_fs_info *fs_info = root->fs_info;
	struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
	struct btrfs_key key;
	struct btrfs_path *path;
	struct extent_buffer *leaf;
	u64 last = (u64)-1;
	int slot;
	int ret;

28
	if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
C
Chris Mason 已提交
29 30
		return 0;

31 32 33 34 35 36 37
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	/* Since the commit root is read-only, we can safely skip locking. */
	path->skip_locking = 1;
	path->search_commit_root = 1;
38
	path->reada = READA_FORWARD;
39 40 41 42 43 44

	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
	key.offset = 0;
	key.type = BTRFS_INODE_ITEM_KEY;
again:
	/* need to make sure the commit_root doesn't disappear */
45
	down_read(&fs_info->commit_root_sem);
46 47 48 49 50 51

	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
	if (ret < 0)
		goto out;

	while (1) {
52
		if (btrfs_fs_closing(fs_info))
53 54 55 56
			goto out;

		leaf = path->nodes[0];
		slot = path->slots[0];
57
		if (slot >= btrfs_header_nritems(leaf)) {
58 59 60 61 62 63 64 65 66 67
			ret = btrfs_next_leaf(root, path);
			if (ret < 0)
				goto out;
			else if (ret > 0)
				break;

			if (need_resched() ||
			    btrfs_transaction_in_commit(fs_info)) {
				leaf = path->nodes[0];

68
				if (WARN_ON(btrfs_header_nritems(leaf) == 0))
69 70 71 72 73 74 75
					break;

				/*
				 * Save the key so we can advances forward
				 * in the next search.
				 */
				btrfs_item_key_to_cpu(leaf, &key, 0);
76
				btrfs_release_path(path);
77
				root->ino_cache_progress = last;
78
				up_read(&fs_info->commit_root_sem);
79 80 81 82 83 84 85 86 87 88 89
				schedule_timeout(1);
				goto again;
			} else
				continue;
		}

		btrfs_item_key_to_cpu(leaf, &key, slot);

		if (key.type != BTRFS_INODE_ITEM_KEY)
			goto next;

90
		if (key.objectid >= root->highest_objectid)
91 92 93
			break;

		if (last != (u64)-1 && last + 1 != key.objectid) {
94
			__btrfs_add_free_space(fs_info, ctl, last + 1,
95
					       key.objectid - last - 1);
96
			wake_up(&root->ino_cache_wait);
97 98 99 100 101 102 103
		}

		last = key.objectid;
next:
		path->slots[0]++;
	}

104
	if (last < root->highest_objectid - 1) {
105
		__btrfs_add_free_space(fs_info, ctl, last + 1,
106
				       root->highest_objectid - last - 1);
107 108
	}

109 110 111
	spin_lock(&root->ino_cache_lock);
	root->ino_cache_state = BTRFS_CACHE_FINISHED;
	spin_unlock(&root->ino_cache_lock);
112

113
	root->ino_cache_progress = (u64)-1;
114 115
	btrfs_unpin_free_ino(root);
out:
116
	wake_up(&root->ino_cache_wait);
117
	up_read(&fs_info->commit_root_sem);
118 119 120 121 122 123 124 125

	btrfs_free_path(path);

	return ret;
}

static void start_caching(struct btrfs_root *root)
{
126
	struct btrfs_fs_info *fs_info = root->fs_info;
127
	struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
128
	struct task_struct *tsk;
129
	int ret;
130
	u64 objectid;
131

132
	if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
C
Chris Mason 已提交
133 134
		return;

135 136 137
	spin_lock(&root->ino_cache_lock);
	if (root->ino_cache_state != BTRFS_CACHE_NO) {
		spin_unlock(&root->ino_cache_lock);
138 139 140
		return;
	}

141 142
	root->ino_cache_state = BTRFS_CACHE_STARTED;
	spin_unlock(&root->ino_cache_lock);
143

144
	ret = load_free_ino_cache(fs_info, root);
145
	if (ret == 1) {
146 147 148
		spin_lock(&root->ino_cache_lock);
		root->ino_cache_state = BTRFS_CACHE_FINISHED;
		spin_unlock(&root->ino_cache_lock);
149 150 151
		return;
	}

152 153 154 155 156 157 158 159 160
	/*
	 * It can be quite time-consuming to fill the cache by searching
	 * through the extent tree, and this can keep ino allocation path
	 * waiting. Therefore at start we quickly find out the highest
	 * inode number and we know we can use inode numbers which fall in
	 * [highest_ino + 1, BTRFS_LAST_FREE_OBJECTID].
	 */
	ret = btrfs_find_free_objectid(root, &objectid);
	if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) {
161
		__btrfs_add_free_space(fs_info, ctl, objectid,
162 163 164
				       BTRFS_LAST_FREE_OBJECTID - objectid + 1);
	}

165
	tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu",
166
			  root->root_key.objectid);
167
	if (IS_ERR(tsk)) {
168 169
		btrfs_warn(fs_info, "failed to start inode caching task");
		btrfs_clear_pending_and_info(fs_info, INODE_MAP_CACHE,
170
					     "disabling inode map caching");
171
	}
172 173 174 175
}

int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
{
176
	if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
C
Chris Mason 已提交
177 178
		return btrfs_find_free_objectid(root, objectid);

179 180 181 182 183 184 185 186
again:
	*objectid = btrfs_find_ino_for_alloc(root);

	if (*objectid != 0)
		return 0;

	start_caching(root);

187 188
	wait_event(root->ino_cache_wait,
		   root->ino_cache_state == BTRFS_CACHE_FINISHED ||
189 190
		   root->free_ino_ctl->free_space > 0);

191
	if (root->ino_cache_state == BTRFS_CACHE_FINISHED &&
192 193 194 195 196 197 198 199
	    root->free_ino_ctl->free_space == 0)
		return -ENOSPC;
	else
		goto again;
}

void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
{
200
	struct btrfs_fs_info *fs_info = root->fs_info;
201
	struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
C
Chris Mason 已提交
202

203
	if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
C
Chris Mason 已提交
204
		return;
205
again:
206
	if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
207
		__btrfs_add_free_space(fs_info, pinned, objectid, 1);
208
	} else {
209
		down_write(&fs_info->commit_root_sem);
210 211 212
		spin_lock(&root->ino_cache_lock);
		if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
			spin_unlock(&root->ino_cache_lock);
213
			up_write(&fs_info->commit_root_sem);
214 215
			goto again;
		}
216
		spin_unlock(&root->ino_cache_lock);
217 218 219

		start_caching(root);

220
		__btrfs_add_free_space(fs_info, pinned, objectid, 1);
221

222
		up_write(&fs_info->commit_root_sem);
223 224 225 226
	}
}

/*
227 228 229 230
 * When a transaction is committed, we'll move those inode numbers which are
 * smaller than root->ino_cache_progress from pinned tree to free_ino tree, and
 * others will just be dropped, because the commit root we were searching has
 * changed.
231
 *
232
 * Must be called with root->fs_info->commit_root_sem held
233 234 235 236 237
 */
void btrfs_unpin_free_ino(struct btrfs_root *root)
{
	struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
	struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
238
	spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock;
239 240 241 242
	struct btrfs_free_space *info;
	struct rb_node *n;
	u64 count;

243
	if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
C
Chris Mason 已提交
244 245
		return;

246
	while (1) {
247 248 249
		bool add_to_ctl = true;

		spin_lock(rbroot_lock);
250
		n = rb_first(rbroot);
251 252
		if (!n) {
			spin_unlock(rbroot_lock);
253
			break;
254
		}
255 256

		info = rb_entry(n, struct btrfs_free_space, offset_index);
257
		BUG_ON(info->bitmap); /* Logic error */
258

259
		if (info->offset > root->ino_cache_progress)
260
			add_to_ctl = false;
261 262
		else if (info->offset + info->bytes > root->ino_cache_progress)
			count = root->ino_cache_progress - info->offset + 1;
263 264 265 266
		else
			count = info->bytes;

		rb_erase(&info->offset_index, rbroot);
267 268
		spin_unlock(rbroot_lock);
		if (add_to_ctl)
269 270
			__btrfs_add_free_space(root->fs_info, ctl,
					       info->offset, count);
271
		kmem_cache_free(btrfs_free_space_cachep, info);
272 273 274
	}
}

275
#define INIT_THRESHOLD	((SZ_32K / 2) / sizeof(struct btrfs_free_space))
276
#define INODES_PER_BITMAP (PAGE_SIZE * 8)
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309

/*
 * The goal is to keep the memory used by the free_ino tree won't
 * exceed the memory if we use bitmaps only.
 */
static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
{
	struct btrfs_free_space *info;
	struct rb_node *n;
	int max_ino;
	int max_bitmaps;

	n = rb_last(&ctl->free_space_offset);
	if (!n) {
		ctl->extents_thresh = INIT_THRESHOLD;
		return;
	}
	info = rb_entry(n, struct btrfs_free_space, offset_index);

	/*
	 * Find the maximum inode number in the filesystem. Note we
	 * ignore the fact that this can be a bitmap, because we are
	 * not doing precise calculation.
	 */
	max_ino = info->bytes - 1;

	max_bitmaps = ALIGN(max_ino, INODES_PER_BITMAP) / INODES_PER_BITMAP;
	if (max_bitmaps <= ctl->total_bitmaps) {
		ctl->extents_thresh = 0;
		return;
	}

	ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) *
310
				PAGE_SIZE / sizeof(*info);
311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
}

/*
 * We don't fall back to bitmap, if we are below the extents threshold
 * or this chunk of inode numbers is a big one.
 */
static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
		       struct btrfs_free_space *info)
{
	if (ctl->free_extents < ctl->extents_thresh ||
	    info->bytes > INODES_PER_BITMAP / 10)
		return false;

	return true;
}

327
static const struct btrfs_free_space_op free_ino_op = {
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
	.recalc_thresholds	= recalculate_thresholds,
	.use_bitmap		= use_bitmap,
};

static void pinned_recalc_thresholds(struct btrfs_free_space_ctl *ctl)
{
}

static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl,
			      struct btrfs_free_space *info)
{
	/*
	 * We always use extents for two reasons:
	 *
	 * - The pinned tree is only used during the process of caching
	 *   work.
	 * - Make code simpler. See btrfs_unpin_free_ino().
	 */
	return false;
}

349
static const struct btrfs_free_space_op pinned_free_ino_op = {
350 351 352 353 354 355 356 357 358 359 360 361 362 363
	.recalc_thresholds	= pinned_recalc_thresholds,
	.use_bitmap		= pinned_use_bitmap,
};

void btrfs_init_free_ino_ctl(struct btrfs_root *root)
{
	struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
	struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;

	spin_lock_init(&ctl->tree_lock);
	ctl->unit = 1;
	ctl->start = 0;
	ctl->private = NULL;
	ctl->op = &free_ino_op;
364 365
	INIT_LIST_HEAD(&ctl->trimming_ranges);
	mutex_init(&ctl->cache_writeout_mutex);
366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381

	/*
	 * Initially we allow to use 16K of ram to cache chunks of
	 * inode numbers before we resort to bitmaps. This is somewhat
	 * arbitrary, but it will be adjusted in runtime.
	 */
	ctl->extents_thresh = INIT_THRESHOLD;

	spin_lock_init(&pinned->tree_lock);
	pinned->unit = 1;
	pinned->start = 0;
	pinned->private = NULL;
	pinned->extents_thresh = 0;
	pinned->op = &pinned_free_ino_op;
}

382 383 384
int btrfs_save_ino_cache(struct btrfs_root *root,
			 struct btrfs_trans_handle *trans)
{
385
	struct btrfs_fs_info *fs_info = root->fs_info;
386 387 388
	struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
	struct btrfs_path *path;
	struct inode *inode;
389
	struct btrfs_block_rsv *rsv;
390
	struct extent_changeset *data_reserved = NULL;
391
	u64 num_bytes;
392 393 394 395 396
	u64 alloc_hint = 0;
	int ret;
	int prealloc;
	bool retry = false;

397 398 399 400 401 402
	/* only fs tree and subvol/snap needs ino cache */
	if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID &&
	    (root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID ||
	     root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID))
		return 0;

403
	/* Don't save inode cache if we are deleting this root */
404
	if (btrfs_root_refs(&root->root_item) == 0)
405 406
		return 0;

407
	if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
C
Chris Mason 已提交
408 409
		return 0;

410 411 412
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
C
Chris Mason 已提交
413

414
	rsv = trans->block_rsv;
415
	trans->block_rsv = &fs_info->trans_block_rsv;
416 417 418 419

	num_bytes = trans->bytes_reserved;
	/*
	 * 1 item for inode item insertion if need
420 421
	 * 4 items for inode item update (in the worst case)
	 * 1 items for slack space if we need do truncation
422 423 424
	 * 1 item for free space object
	 * 3 items for pre-allocation
	 */
425
	trans->bytes_reserved = btrfs_calc_trans_metadata_size(fs_info, 10);
M
Miao Xie 已提交
426 427 428
	ret = btrfs_block_rsv_add(root, trans->block_rsv,
				  trans->bytes_reserved,
				  BTRFS_RESERVE_NO_FLUSH);
429 430
	if (ret)
		goto out;
431 432
	trace_btrfs_space_reservation(fs_info, "ino_cache", trans->transid,
				      trans->bytes_reserved, 1);
433 434
again:
	inode = lookup_free_ino_inode(root, path);
435
	if (IS_ERR(inode) && (PTR_ERR(inode) != -ENOENT || retry)) {
436
		ret = PTR_ERR(inode);
437
		goto out_release;
438 439 440
	}

	if (IS_ERR(inode)) {
441
		BUG_ON(retry); /* Logic error */
442 443 444 445
		retry = true;

		ret = create_free_ino_inode(root, trans, path);
		if (ret)
446
			goto out_release;
447 448 449 450 451
		goto again;
	}

	BTRFS_I(inode)->generation = 0;
	ret = btrfs_update_inode(trans, root, inode);
452
	if (ret) {
453
		btrfs_abort_transaction(trans, ret);
454 455
		goto out_put;
	}
456 457

	if (i_size_read(inode) > 0) {
458
		ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
459
		if (ret) {
460
			if (ret != -ENOSPC)
461
				btrfs_abort_transaction(trans, ret);
462
			goto out_put;
463
		}
464 465
	}

466 467
	spin_lock(&root->ino_cache_lock);
	if (root->ino_cache_state != BTRFS_CACHE_FINISHED) {
468
		ret = -1;
469
		spin_unlock(&root->ino_cache_lock);
470 471
		goto out_put;
	}
472
	spin_unlock(&root->ino_cache_lock);
473 474 475

	spin_lock(&ctl->tree_lock);
	prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
476 477
	prealloc = ALIGN(prealloc, PAGE_SIZE);
	prealloc += ctl->total_bitmaps * PAGE_SIZE;
478 479 480
	spin_unlock(&ctl->tree_lock);

	/* Just to make sure we have enough space */
481
	prealloc += 8 * PAGE_SIZE;
482

483
	ret = btrfs_delalloc_reserve_space(inode, &data_reserved, 0, prealloc);
484 485 486 487 488
	if (ret)
		goto out_put;

	ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
					      prealloc, prealloc, &alloc_hint);
489
	if (ret) {
490
		btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, true);
491
		goto out_put;
492
	}
493

494
	ret = btrfs_write_out_ino_cache(root, trans, path, inode);
495
	btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, false);
496 497
out_put:
	iput(inode);
498
out_release:
499 500
	trace_btrfs_space_reservation(fs_info, "ino_cache", trans->transid,
				      trans->bytes_reserved, 0);
501 502
	btrfs_block_rsv_release(fs_info, trans->block_rsv,
				trans->bytes_reserved);
503
out:
504 505
	trans->block_rsv = rsv;
	trans->bytes_reserved = num_bytes;
506 507

	btrfs_free_path(path);
508
	extent_changeset_free(data_reserved);
509 510 511
	return ret;
}

512
int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
C
Chris Mason 已提交
513 514 515
{
	struct btrfs_path *path;
	int ret;
516
	struct extent_buffer *l;
C
Chris Mason 已提交
517
	struct btrfs_key search_key;
518
	struct btrfs_key found_key;
C
Chris Mason 已提交
519 520 521
	int slot;

	path = btrfs_alloc_path();
T
Tsutomu Itoh 已提交
522 523
	if (!path)
		return -ENOMEM;
C
Chris Mason 已提交
524

525 526
	search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
	search_key.type = -1;
C
Chris Mason 已提交
527 528 529 530
	search_key.offset = (u64)-1;
	ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
	if (ret < 0)
		goto error;
531
	BUG_ON(ret == 0); /* Corruption */
C
Chris Mason 已提交
532 533
	if (path->slots[0] > 0) {
		slot = path->slots[0] - 1;
534 535
		l = path->nodes[0];
		btrfs_item_key_to_cpu(l, &found_key, slot);
536 537
		*objectid = max_t(u64, found_key.objectid,
				  BTRFS_FIRST_FREE_OBJECTID - 1);
C
Chris Mason 已提交
538
	} else {
539
		*objectid = BTRFS_FIRST_FREE_OBJECTID - 1;
C
Chris Mason 已提交
540 541 542 543 544 545 546
	}
	ret = 0;
error:
	btrfs_free_path(path);
	return ret;
}

547
int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
548 549
{
	int ret;
550
	mutex_lock(&root->objectid_mutex);
551

552
	if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
553 554 555
		btrfs_warn(root->fs_info,
			   "the objectid of root %llu reaches its highest value",
			   root->root_key.objectid);
556 557
		ret = -ENOSPC;
		goto out;
558
	}
559 560 561 562

	*objectid = ++root->highest_objectid;
	ret = 0;
out:
563
	mutex_unlock(&root->objectid_mutex);
564 565
	return ret;
}