sysfs.c 26.0 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
K
Kent Overstreet 已提交
2 3 4 5 6 7 8 9 10 11 12
/*
 * bcache sysfs interfaces
 *
 * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
 * Copyright 2012 Google, Inc.
 */

#include "bcache.h"
#include "sysfs.h"
#include "btree.h"
#include "request.h"
13
#include "writeback.h"
K
Kent Overstreet 已提交
14

K
Kent Overstreet 已提交
15
#include <linux/blkdev.h>
K
Kent Overstreet 已提交
16
#include <linux/sort.h>
17
#include <linux/sched/clock.h>
K
Kent Overstreet 已提交
18

S
Shenghui Wang 已提交
19
/* Default is 0 ("writethrough") */
20 21 22 23 24 25 26 27
static const char * const bch_cache_modes[] = {
	"writethrough",
	"writeback",
	"writearound",
	"none",
	NULL
};

S
Shenghui Wang 已提交
28
/* Default is 0 ("auto") */
29 30 31 32 33 34
static const char * const bch_stop_on_failure_modes[] = {
	"auto",
	"always",
	NULL
};

K
Kent Overstreet 已提交
35 36 37 38 39 40 41
static const char * const cache_replacement_policies[] = {
	"lru",
	"fifo",
	"random",
	NULL
};

42 43 44 45 46 47
static const char * const error_actions[] = {
	"unregister",
	"panic",
	NULL
};

K
Kent Overstreet 已提交
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
write_attribute(attach);
write_attribute(detach);
write_attribute(unregister);
write_attribute(stop);
write_attribute(clear_stats);
write_attribute(trigger_gc);
write_attribute(prune_cache);
write_attribute(flash_vol_create);

read_attribute(bucket_size);
read_attribute(block_size);
read_attribute(nbuckets);
read_attribute(tree_depth);
read_attribute(root_usage_percent);
read_attribute(priority_stats);
read_attribute(btree_cache_size);
read_attribute(btree_cache_max_chain);
read_attribute(cache_available_percent);
read_attribute(written);
read_attribute(btree_written);
read_attribute(metadata_written);
read_attribute(active_journal_entries);

sysfs_time_stats_attribute(btree_gc,	sec, ms);
sysfs_time_stats_attribute(btree_split, sec, us);
sysfs_time_stats_attribute(btree_sort,	ms,  us);
sysfs_time_stats_attribute(btree_read,	ms,  us);

read_attribute(btree_nodes);
read_attribute(btree_used_percent);
read_attribute(average_key_size);
read_attribute(dirty_data);
read_attribute(bset_tree_stats);

read_attribute(state);
read_attribute(cache_read_races);
T
Tang Junhui 已提交
84 85 86
read_attribute(reclaim);
read_attribute(flush_write);
read_attribute(retry_flush_write);
K
Kent Overstreet 已提交
87 88 89 90
read_attribute(writeback_keys_done);
read_attribute(writeback_keys_failed);
read_attribute(io_errors);
read_attribute(congested);
91 92
read_attribute(cutoff_writeback);
read_attribute(cutoff_writeback_sync);
K
Kent Overstreet 已提交
93 94 95 96 97 98
rw_attribute(congested_read_threshold_us);
rw_attribute(congested_write_threshold_us);

rw_attribute(sequential_cutoff);
rw_attribute(data_csum);
rw_attribute(cache_mode);
99
rw_attribute(stop_when_cache_set_failed);
K
Kent Overstreet 已提交
100 101 102 103 104 105 106
rw_attribute(writeback_metadata);
rw_attribute(writeback_running);
rw_attribute(writeback_percent);
rw_attribute(writeback_delay);
rw_attribute(writeback_rate);

rw_attribute(writeback_rate_update_seconds);
107
rw_attribute(writeback_rate_i_term_inverse);
K
Kent Overstreet 已提交
108
rw_attribute(writeback_rate_p_term_inverse);
109
rw_attribute(writeback_rate_minimum);
K
Kent Overstreet 已提交
110 111
read_attribute(writeback_rate_debug);

K
Kent Overstreet 已提交
112 113 114
read_attribute(stripe_size);
read_attribute(partial_stripes_expensive);

K
Kent Overstreet 已提交
115 116
rw_attribute(synchronous);
rw_attribute(journal_delay_ms);
117
rw_attribute(io_disable);
K
Kent Overstreet 已提交
118 119 120 121
rw_attribute(discard);
rw_attribute(running);
rw_attribute(label);
rw_attribute(readahead);
122
rw_attribute(errors);
K
Kent Overstreet 已提交
123 124 125
rw_attribute(io_error_limit);
rw_attribute(io_error_halflife);
rw_attribute(verify);
K
Kent Overstreet 已提交
126
rw_attribute(bypass_torture_test);
K
Kent Overstreet 已提交
127 128
rw_attribute(key_merging_disabled);
rw_attribute(gc_always_rewrite);
K
Kent Overstreet 已提交
129
rw_attribute(expensive_debug_checks);
K
Kent Overstreet 已提交
130 131 132
rw_attribute(cache_replacement_policy);
rw_attribute(btree_shrinker_disabled);
rw_attribute(copy_gc_enabled);
133
rw_attribute(gc_after_writeback);
K
Kent Overstreet 已提交
134 135
rw_attribute(size);

136 137 138 139
static ssize_t bch_snprint_string_list(char *buf,
				       size_t size,
				       const char * const list[],
				       size_t selected)
140 141 142 143 144 145 146 147 148 149 150 151
{
	char *out = buf;
	size_t i;

	for (i = 0; list[i]; i++)
		out += snprintf(out, buf + size - out,
				i == selected ? "[%s] " : "%s ", list[i]);

	out[-1] = '\n';
	return out - buf;
}

K
Kent Overstreet 已提交
152 153 154 155
SHOW(__bch_cached_dev)
{
	struct cached_dev *dc = container_of(kobj, struct cached_dev,
					     disk.kobj);
156
	char const *states[] = { "no cache", "clean", "dirty", "inconsistent" };
157
	int wb = dc->writeback_running;
K
Kent Overstreet 已提交
158 159 160 161

#define var(stat)		(dc->stat)

	if (attr == &sysfs_cache_mode)
162
		return bch_snprint_string_list(buf, PAGE_SIZE,
163
					       bch_cache_modes,
164
					       BDEV_CACHE_MODE(&dc->sb));
K
Kent Overstreet 已提交
165

166 167
	if (attr == &sysfs_stop_when_cache_set_failed)
		return bch_snprint_string_list(buf, PAGE_SIZE,
168
					       bch_stop_on_failure_modes,
169 170 171
					       dc->stop_when_cache_set_failed);


K
Kent Overstreet 已提交
172 173
	sysfs_printf(data_csum,		"%i", dc->disk.data_csum);
	var_printf(verify,		"%i");
K
Kent Overstreet 已提交
174
	var_printf(bypass_torture_test,	"%i");
K
Kent Overstreet 已提交
175 176 177 178
	var_printf(writeback_metadata,	"%i");
	var_printf(writeback_running,	"%i");
	var_print(writeback_delay);
	var_print(writeback_percent);
179 180
	sysfs_hprint(writeback_rate,
		     wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 : 0);
181 182 183
	sysfs_hprint(io_errors,		atomic_read(&dc->io_errors));
	sysfs_printf(io_error_limit,	"%i", dc->error_limit);
	sysfs_printf(io_disable,	"%i", dc->io_disable);
K
Kent Overstreet 已提交
184
	var_print(writeback_rate_update_seconds);
185
	var_print(writeback_rate_i_term_inverse);
K
Kent Overstreet 已提交
186
	var_print(writeback_rate_p_term_inverse);
187
	var_print(writeback_rate_minimum);
K
Kent Overstreet 已提交
188 189

	if (attr == &sysfs_writeback_rate_debug) {
190
		char rate[20];
K
Kent Overstreet 已提交
191 192
		char dirty[20];
		char target[20];
193
		char proportional[20];
194
		char integral[20];
195 196 197
		char change[20];
		s64 next_io;

198 199 200 201
		/*
		 * Except for dirty and target, other values should
		 * be 0 if writeback is not running.
		 */
202 203 204
		bch_hprint(rate,
			   wb ? atomic_long_read(&dc->writeback_rate.rate) << 9
			      : 0);
205 206 207 208 209 210 211 212 213
		bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9);
		bch_hprint(target, dc->writeback_rate_target << 9);
		bch_hprint(proportional,
			   wb ? dc->writeback_rate_proportional << 9 : 0);
		bch_hprint(integral,
			   wb ? dc->writeback_rate_integral_scaled << 9 : 0);
		bch_hprint(change, wb ? dc->writeback_rate_change << 9 : 0);
		next_io = wb ? div64_s64(dc->writeback_rate.next-local_clock(),
					 NSEC_PER_MSEC) : 0;
K
Kent Overstreet 已提交
214 215

		return sprintf(buf,
216
			       "rate:\t\t%s/sec\n"
K
Kent Overstreet 已提交
217
			       "dirty:\t\t%s\n"
218 219
			       "target:\t\t%s\n"
			       "proportional:\t%s\n"
220
			       "integral:\t%s\n"
221 222 223
			       "change:\t\t%s/sec\n"
			       "next io:\t%llims\n",
			       rate, dirty, target, proportional,
224
			       integral, change, next_io);
K
Kent Overstreet 已提交
225 226 227
	}

	sysfs_hprint(dirty_data,
228
		     bcache_dev_sectors_dirty(&dc->disk) << 9);
K
Kent Overstreet 已提交
229

230
	sysfs_hprint(stripe_size,	 ((uint64_t)dc->disk.stripe_size) << 9);
K
Kent Overstreet 已提交
231 232
	var_printf(partial_stripes_expensive,	"%u");

K
Kent Overstreet 已提交
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
	var_hprint(sequential_cutoff);
	var_hprint(readahead);

	sysfs_print(running,		atomic_read(&dc->running));
	sysfs_print(state,		states[BDEV_STATE(&dc->sb)]);

	if (attr == &sysfs_label) {
		memcpy(buf, dc->sb.label, SB_LABEL_SIZE);
		buf[SB_LABEL_SIZE + 1] = '\0';
		strcat(buf, "\n");
		return strlen(buf);
	}

#undef var
	return 0;
}
SHOW_LOCKED(bch_cached_dev)

STORE(__cached_dev)
{
	struct cached_dev *dc = container_of(kobj, struct cached_dev,
					     disk.kobj);
255
	ssize_t v;
K
Kent Overstreet 已提交
256
	struct cache_set *c;
G
Gabriel de Perthuis 已提交
257
	struct kobj_uevent_env *env;
K
Kent Overstreet 已提交
258 259

#define d_strtoul(var)		sysfs_strtoul(var, dc->var)
260
#define d_strtoul_nonzero(var)	sysfs_strtoul_clamp(var, dc->var, 1, INT_MAX)
K
Kent Overstreet 已提交
261 262 263 264
#define d_strtoi_h(var)		sysfs_hatoi(var, dc->var)

	sysfs_strtoul(data_csum,	dc->disk.data_csum);
	d_strtoul(verify);
K
Kent Overstreet 已提交
265
	d_strtoul(bypass_torture_test);
K
Kent Overstreet 已提交
266 267 268
	d_strtoul(writeback_metadata);
	d_strtoul(writeback_running);
	d_strtoul(writeback_delay);
269

270 271
	sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent,
			    0, bch_cutoff_writeback);
K
Kent Overstreet 已提交
272

273
	if (attr == &sysfs_writeback_rate) {
274 275
		ssize_t ret;
		long int v = atomic_long_read(&dc->writeback_rate.rate);
276

277 278 279 280 281 282 283 284
		ret = strtoul_safe_clamp(buf, v, 1, INT_MAX);

		if (!ret) {
			atomic_long_set(&dc->writeback_rate.rate, v);
			ret = size;
		}

		return ret;
285
	}
286

287 288 289
	sysfs_strtoul_clamp(writeback_rate_update_seconds,
			    dc->writeback_rate_update_seconds,
			    1, WRITEBACK_RATE_UPDATE_SECS_MAX);
290
	d_strtoul(writeback_rate_i_term_inverse);
291
	d_strtoul_nonzero(writeback_rate_p_term_inverse);
292
	d_strtoul_nonzero(writeback_rate_minimum);
K
Kent Overstreet 已提交
293

294 295 296 297 298 299 300 301
	sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX);

	if (attr == &sysfs_io_disable) {
		int v = strtoul_or_return(buf);

		dc->io_disable = v ? 1 : 0;
	}

K
Kent Overstreet 已提交
302 303 304 305 306 307 308 309 310 311 312
	d_strtoi_h(sequential_cutoff);
	d_strtoi_h(readahead);

	if (attr == &sysfs_clear_stats)
		bch_cache_accounting_clear(&dc->accounting);

	if (attr == &sysfs_running &&
	    strtoul_or_return(buf))
		bch_cached_dev_run(dc);

	if (attr == &sysfs_cache_mode) {
313
		v = __sysfs_match_string(bch_cache_modes, -1, buf);
K
Kent Overstreet 已提交
314 315 316
		if (v < 0)
			return v;

317
		if ((unsigned int) v != BDEV_CACHE_MODE(&dc->sb)) {
K
Kent Overstreet 已提交
318 319 320 321 322
			SET_BDEV_CACHE_MODE(&dc->sb, v);
			bch_write_bdev_super(dc, NULL);
		}
	}

323
	if (attr == &sysfs_stop_when_cache_set_failed) {
324
		v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf);
325 326 327 328 329 330
		if (v < 0)
			return v;

		dc->stop_when_cache_set_failed = v;
	}

K
Kent Overstreet 已提交
331
	if (attr == &sysfs_label) {
332 333 334 335 336 337 338
		if (size > SB_LABEL_SIZE)
			return -EINVAL;
		memcpy(dc->sb.label, buf, size);
		if (size < SB_LABEL_SIZE)
			dc->sb.label[size] = '\0';
		if (size && dc->sb.label[size - 1] == '\n')
			dc->sb.label[size - 1] = '\0';
K
Kent Overstreet 已提交
339 340 341 342 343 344
		bch_write_bdev_super(dc, NULL);
		if (dc->disk.c) {
			memcpy(dc->disk.c->uuids[dc->disk.id].label,
			       buf, SB_LABEL_SIZE);
			bch_uuid_write(dc->disk.c);
		}
G
Gabriel de Perthuis 已提交
345
		env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
346 347
		if (!env)
			return -ENOMEM;
G
Gabriel de Perthuis 已提交
348 349 350
		add_uevent_var(env, "DRIVER=bcache");
		add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid),
		add_uevent_var(env, "CACHED_LABEL=%s", buf);
351 352 353
		kobject_uevent_env(&disk_to_dev(dc->disk.disk)->kobj,
				   KOBJ_CHANGE,
				   env->envp);
G
Gabriel de Perthuis 已提交
354
		kfree(env);
K
Kent Overstreet 已提交
355 356 357
	}

	if (attr == &sysfs_attach) {
358 359 360
		uint8_t		set_uuid[16];

		if (bch_parse_uuid(buf, set_uuid) < 16)
K
Kent Overstreet 已提交
361 362
			return -EINVAL;

363
		v = -ENOENT;
K
Kent Overstreet 已提交
364
		list_for_each_entry(c, &bch_cache_sets, list) {
365
			v = bch_cached_dev_attach(dc, c, set_uuid);
K
Kent Overstreet 已提交
366 367 368
			if (!v)
				return size;
		}
369 370
		if (v == -ENOENT)
			pr_err("Can't attach %s: cache set not found", buf);
371
		return v;
K
Kent Overstreet 已提交
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
	}

	if (attr == &sysfs_detach && dc->disk.c)
		bch_cached_dev_detach(dc);

	if (attr == &sysfs_stop)
		bcache_device_stop(&dc->disk);

	return size;
}

STORE(bch_cached_dev)
{
	struct cached_dev *dc = container_of(kobj, struct cached_dev,
					     disk.kobj);

	mutex_lock(&bch_register_lock);
	size = __cached_dev_store(kobj, attr, buf, size);

391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409
	if (attr == &sysfs_writeback_running) {
		/* dc->writeback_running changed in __cached_dev_store() */
		if (IS_ERR_OR_NULL(dc->writeback_thread)) {
			/*
			 * reject setting it to 1 via sysfs if writeback
			 * kthread is not created yet.
			 */
			if (dc->writeback_running) {
				dc->writeback_running = false;
				pr_err("%s: failed to run non-existent writeback thread",
						dc->disk.disk->disk_name);
			}
		} else
			/*
			 * writeback kthread will check if dc->writeback_running
			 * is true or false.
			 */
			bch_writeback_queue(dc);
	}
K
Kent Overstreet 已提交
410 411

	if (attr == &sysfs_writeback_percent)
412 413
		if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
			schedule_delayed_work(&dc->writeback_rate_update,
K
Kent Overstreet 已提交
414 415 416 417 418 419 420 421 422 423 424 425 426 427
				      dc->writeback_rate_update_seconds * HZ);

	mutex_unlock(&bch_register_lock);
	return size;
}

static struct attribute *bch_cached_dev_files[] = {
	&sysfs_attach,
	&sysfs_detach,
	&sysfs_stop,
#if 0
	&sysfs_data_csum,
#endif
	&sysfs_cache_mode,
428
	&sysfs_stop_when_cache_set_failed,
K
Kent Overstreet 已提交
429 430 431 432 433 434
	&sysfs_writeback_metadata,
	&sysfs_writeback_running,
	&sysfs_writeback_delay,
	&sysfs_writeback_percent,
	&sysfs_writeback_rate,
	&sysfs_writeback_rate_update_seconds,
435
	&sysfs_writeback_rate_i_term_inverse,
K
Kent Overstreet 已提交
436
	&sysfs_writeback_rate_p_term_inverse,
437
	&sysfs_writeback_rate_minimum,
K
Kent Overstreet 已提交
438
	&sysfs_writeback_rate_debug,
439 440 441
	&sysfs_errors,
	&sysfs_io_error_limit,
	&sysfs_io_disable,
K
Kent Overstreet 已提交
442
	&sysfs_dirty_data,
K
Kent Overstreet 已提交
443 444
	&sysfs_stripe_size,
	&sysfs_partial_stripes_expensive,
K
Kent Overstreet 已提交
445 446 447 448 449 450 451 452
	&sysfs_sequential_cutoff,
	&sysfs_clear_stats,
	&sysfs_running,
	&sysfs_state,
	&sysfs_label,
	&sysfs_readahead,
#ifdef CONFIG_BCACHE_DEBUG
	&sysfs_verify,
K
Kent Overstreet 已提交
453
	&sysfs_bypass_torture_test,
K
Kent Overstreet 已提交
454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487
#endif
	NULL
};
KTYPE(bch_cached_dev);

SHOW(bch_flash_dev)
{
	struct bcache_device *d = container_of(kobj, struct bcache_device,
					       kobj);
	struct uuid_entry *u = &d->c->uuids[d->id];

	sysfs_printf(data_csum,	"%i", d->data_csum);
	sysfs_hprint(size,	u->sectors << 9);

	if (attr == &sysfs_label) {
		memcpy(buf, u->label, SB_LABEL_SIZE);
		buf[SB_LABEL_SIZE + 1] = '\0';
		strcat(buf, "\n");
		return strlen(buf);
	}

	return 0;
}

STORE(__bch_flash_dev)
{
	struct bcache_device *d = container_of(kobj, struct bcache_device,
					       kobj);
	struct uuid_entry *u = &d->c->uuids[d->id];

	sysfs_strtoul(data_csum,	d->data_csum);

	if (attr == &sysfs_size) {
		uint64_t v;
488

K
Kent Overstreet 已提交
489 490 491 492 493 494 495 496 497 498 499 500 501
		strtoi_h_or_return(buf, v);

		u->sectors = v >> 9;
		bch_uuid_write(d->c);
		set_capacity(d->disk, u->sectors);
	}

	if (attr == &sysfs_label) {
		memcpy(u->label, buf, SB_LABEL_SIZE);
		bch_uuid_write(d->c);
	}

	if (attr == &sysfs_unregister) {
502
		set_bit(BCACHE_DEV_DETACHING, &d->flags);
K
Kent Overstreet 已提交
503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520
		bcache_device_stop(d);
	}

	return size;
}
STORE_LOCKED(bch_flash_dev)

static struct attribute *bch_flash_dev_files[] = {
	&sysfs_unregister,
#if 0
	&sysfs_data_csum,
#endif
	&sysfs_label,
	&sysfs_size,
	NULL
};
KTYPE(bch_flash_dev);

521 522 523 524 525 526
struct bset_stats_op {
	struct btree_op op;
	size_t nodes;
	struct bset_stats stats;
};

J
John Sheu 已提交
527
static int bch_btree_bset_stats(struct btree_op *b_op, struct btree *b)
528 529 530 531 532 533 534 535 536
{
	struct bset_stats_op *op = container_of(b_op, struct bset_stats_op, op);

	op->nodes++;
	bch_btree_keys_stats(&b->keys, &op->stats);

	return MAP_CONTINUE;
}

537
static int bch_bset_print_stats(struct cache_set *c, char *buf)
538 539 540 541 542 543 544
{
	struct bset_stats_op op;
	int ret;

	memset(&op, 0, sizeof(op));
	bch_btree_op_init(&op.op, -1);

J
John Sheu 已提交
545
	ret = bch_btree_map_nodes(&op.op, c, &ZERO_KEY, bch_btree_bset_stats);
546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562
	if (ret < 0)
		return ret;

	return snprintf(buf, PAGE_SIZE,
			"btree nodes:		%zu\n"
			"written sets:		%zu\n"
			"unwritten sets:		%zu\n"
			"written key bytes:	%zu\n"
			"unwritten key bytes:	%zu\n"
			"floats:			%zu\n"
			"failed:			%zu\n",
			op.nodes,
			op.stats.sets_written, op.stats.sets_unwritten,
			op.stats.bytes_written, op.stats.bytes_unwritten,
			op.stats.floats, op.stats.failed);
}

563
static unsigned int bch_root_usage(struct cache_set *c)
K
Kent Overstreet 已提交
564
{
565
	unsigned int bytes = 0;
J
John Sheu 已提交
566 567 568
	struct bkey *k;
	struct btree *b;
	struct btree_iter iter;
K
Kent Overstreet 已提交
569

J
John Sheu 已提交
570
	goto lock_root;
K
Kent Overstreet 已提交
571

J
John Sheu 已提交
572 573
	do {
		rw_unlock(false, b);
K
Kent Overstreet 已提交
574
lock_root:
J
John Sheu 已提交
575 576 577
		b = c->root;
		rw_lock(false, b, b->level);
	} while (b != c->root);
K
Kent Overstreet 已提交
578

J
John Sheu 已提交
579 580
	for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
		bytes += bkey_bytes(k);
K
Kent Overstreet 已提交
581

J
John Sheu 已提交
582
	rw_unlock(false, b);
K
Kent Overstreet 已提交
583

J
John Sheu 已提交
584 585
	return (bytes * 100) / btree_bytes(c);
}
K
Kent Overstreet 已提交
586

J
John Sheu 已提交
587 588 589 590
static size_t bch_cache_size(struct cache_set *c)
{
	size_t ret = 0;
	struct btree *b;
K
Kent Overstreet 已提交
591

J
John Sheu 已提交
592 593 594
	mutex_lock(&c->bucket_lock);
	list_for_each_entry(b, &c->btree_cache, list)
		ret += 1 << (b->keys.page_order + PAGE_SHIFT);
K
Kent Overstreet 已提交
595

J
John Sheu 已提交
596 597 598
	mutex_unlock(&c->bucket_lock);
	return ret;
}
K
Kent Overstreet 已提交
599

600
static unsigned int bch_cache_max_chain(struct cache_set *c)
J
John Sheu 已提交
601
{
602
	unsigned int ret = 0;
J
John Sheu 已提交
603
	struct hlist_head *h;
K
Kent Overstreet 已提交
604

J
John Sheu 已提交
605
	mutex_lock(&c->bucket_lock);
K
Kent Overstreet 已提交
606

J
John Sheu 已提交
607 608 609
	for (h = c->bucket_hash;
	     h < c->bucket_hash + (1 << BUCKET_HASH_BITS);
	     h++) {
610
		unsigned int i = 0;
J
John Sheu 已提交
611
		struct hlist_node *p;
K
Kent Overstreet 已提交
612

J
John Sheu 已提交
613 614
		hlist_for_each(p, h)
			i++;
K
Kent Overstreet 已提交
615

J
John Sheu 已提交
616
		ret = max(ret, i);
K
Kent Overstreet 已提交
617 618
	}

J
John Sheu 已提交
619 620 621
	mutex_unlock(&c->bucket_lock);
	return ret;
}
K
Kent Overstreet 已提交
622

623
static unsigned int bch_btree_used(struct cache_set *c)
J
John Sheu 已提交
624 625 626 627
{
	return div64_u64(c->gc_stats.key_bytes * 100,
			 (c->gc_stats.nodes ?: 1) * btree_bytes(c));
}
K
Kent Overstreet 已提交
628

629
static unsigned int bch_average_key_size(struct cache_set *c)
J
John Sheu 已提交
630 631 632 633 634 635 636 637
{
	return c->gc_stats.nkeys
		? div64_u64(c->gc_stats.data, c->gc_stats.nkeys)
		: 0;
}

SHOW(__bch_cache_set)
{
K
Kent Overstreet 已提交
638 639 640 641 642 643 644
	struct cache_set *c = container_of(kobj, struct cache_set, kobj);

	sysfs_print(synchronous,		CACHE_SYNC(&c->sb));
	sysfs_print(journal_delay_ms,		c->journal_delay_ms);
	sysfs_hprint(bucket_size,		bucket_bytes(c));
	sysfs_hprint(block_size,		block_bytes(c));
	sysfs_print(tree_depth,			c->root->level);
J
John Sheu 已提交
645
	sysfs_print(root_usage_percent,		bch_root_usage(c));
K
Kent Overstreet 已提交
646

J
John Sheu 已提交
647 648
	sysfs_hprint(btree_cache_size,		bch_cache_size(c));
	sysfs_print(btree_cache_max_chain,	bch_cache_max_chain(c));
K
Kent Overstreet 已提交
649 650 651 652
	sysfs_print(cache_available_percent,	100 - c->gc_stats.in_use);

	sysfs_print_time_stats(&c->btree_gc_time,	btree_gc, sec, ms);
	sysfs_print_time_stats(&c->btree_split_time,	btree_split, sec, us);
653
	sysfs_print_time_stats(&c->sort.time,		btree_sort, ms, us);
K
Kent Overstreet 已提交
654 655
	sysfs_print_time_stats(&c->btree_read_time,	btree_read, ms, us);

J
John Sheu 已提交
656
	sysfs_print(btree_used_percent,	bch_btree_used(c));
K
Kent Overstreet 已提交
657
	sysfs_print(btree_nodes,	c->gc_stats.nodes);
J
John Sheu 已提交
658
	sysfs_hprint(average_key_size,	bch_average_key_size(c));
K
Kent Overstreet 已提交
659 660 661 662

	sysfs_print(cache_read_races,
		    atomic_long_read(&c->cache_read_races));

T
Tang Junhui 已提交
663 664 665 666 667 668 669 670 671
	sysfs_print(reclaim,
		    atomic_long_read(&c->reclaim));

	sysfs_print(flush_write,
		    atomic_long_read(&c->flush_write));

	sysfs_print(retry_flush_write,
		    atomic_long_read(&c->retry_flush_write));

K
Kent Overstreet 已提交
672 673 674 675 676
	sysfs_print(writeback_keys_done,
		    atomic_long_read(&c->writeback_keys_done));
	sysfs_print(writeback_keys_failed,
		    atomic_long_read(&c->writeback_keys_failed));

677 678 679 680
	if (attr == &sysfs_errors)
		return bch_snprint_string_list(buf, PAGE_SIZE, error_actions,
					       c->on_error);

K
Kent Overstreet 已提交
681 682
	/* See count_io_errors for why 88 */
	sysfs_print(io_error_halflife,	c->error_decay * 88);
C
Coly Li 已提交
683
	sysfs_print(io_error_limit,	c->error_limit);
K
Kent Overstreet 已提交
684 685 686 687 688 689 690 691

	sysfs_hprint(congested,
		     ((uint64_t) bch_get_congested(c)) << 9);
	sysfs_print(congested_read_threshold_us,
		    c->congested_read_threshold_us);
	sysfs_print(congested_write_threshold_us,
		    c->congested_write_threshold_us);

692 693 694
	sysfs_print(cutoff_writeback, bch_cutoff_writeback);
	sysfs_print(cutoff_writeback_sync, bch_cutoff_writeback_sync);

K
Kent Overstreet 已提交
695 696 697
	sysfs_print(active_journal_entries,	fifo_used(&c->journal.pin));
	sysfs_printf(verify,			"%i", c->verify);
	sysfs_printf(key_merging_disabled,	"%i", c->key_merging_disabled);
K
Kent Overstreet 已提交
698 699
	sysfs_printf(expensive_debug_checks,
		     "%i", c->expensive_debug_checks);
K
Kent Overstreet 已提交
700 701 702
	sysfs_printf(gc_always_rewrite,		"%i", c->gc_always_rewrite);
	sysfs_printf(btree_shrinker_disabled,	"%i", c->shrinker_disabled);
	sysfs_printf(copy_gc_enabled,		"%i", c->copy_gc_enabled);
703
	sysfs_printf(gc_after_writeback,	"%i", c->gc_after_writeback);
704 705
	sysfs_printf(io_disable,		"%i",
		     test_bit(CACHE_SET_IO_DISABLE, &c->flags));
K
Kent Overstreet 已提交
706 707 708 709 710 711 712 713 714 715 716

	if (attr == &sysfs_bset_tree_stats)
		return bch_bset_print_stats(c, buf);

	return 0;
}
SHOW_LOCKED(bch_cache_set)

STORE(__bch_cache_set)
{
	struct cache_set *c = container_of(kobj, struct cache_set, kobj);
717
	ssize_t v;
K
Kent Overstreet 已提交
718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736

	if (attr == &sysfs_unregister)
		bch_cache_set_unregister(c);

	if (attr == &sysfs_stop)
		bch_cache_set_stop(c);

	if (attr == &sysfs_synchronous) {
		bool sync = strtoul_or_return(buf);

		if (sync != CACHE_SYNC(&c->sb)) {
			SET_CACHE_SYNC(&c->sb, sync);
			bcache_write_super(c);
		}
	}

	if (attr == &sysfs_flash_vol_create) {
		int r;
		uint64_t v;
737

K
Kent Overstreet 已提交
738 739 740 741 742 743 744 745 746 747 748 749 750 751 752
		strtoi_h_or_return(buf, v);

		r = bch_flash_dev_create(c, v);
		if (r)
			return r;
	}

	if (attr == &sysfs_clear_stats) {
		atomic_long_set(&c->writeback_keys_done,	0);
		atomic_long_set(&c->writeback_keys_failed,	0);

		memset(&c->gc_stats, 0, sizeof(struct gc_stat));
		bch_cache_accounting_clear(&c->accounting);
	}

C
Coly Li 已提交
753 754
	if (attr == &sysfs_trigger_gc)
		force_wake_up_gc(c);
K
Kent Overstreet 已提交
755 756 757

	if (attr == &sysfs_prune_cache) {
		struct shrink_control sc;
758

K
Kent Overstreet 已提交
759 760
		sc.gfp_mask = GFP_KERNEL;
		sc.nr_to_scan = strtoul_or_return(buf);
761
		c->shrink.scan_objects(&c->shrink, &sc);
K
Kent Overstreet 已提交
762 763 764 765 766 767 768
	}

	sysfs_strtoul(congested_read_threshold_us,
		      c->congested_read_threshold_us);
	sysfs_strtoul(congested_write_threshold_us,
		      c->congested_write_threshold_us);

769
	if (attr == &sysfs_errors) {
770
		v = __sysfs_match_string(error_actions, -1, buf);
771 772 773 774 775 776
		if (v < 0)
			return v;

		c->on_error = v;
	}

K
Kent Overstreet 已提交
777
	if (attr == &sysfs_io_error_limit)
C
Coly Li 已提交
778
		c->error_limit = strtoul_or_return(buf);
K
Kent Overstreet 已提交
779 780 781 782 783

	/* See count_io_errors() for why 88 */
	if (attr == &sysfs_io_error_halflife)
		c->error_decay = strtoul_or_return(buf) / 88;

784
	if (attr == &sysfs_io_disable) {
785
		v = strtoul_or_return(buf);
786 787 788 789 790 791 792 793 794 795 796
		if (v) {
			if (test_and_set_bit(CACHE_SET_IO_DISABLE,
					     &c->flags))
				pr_warn("CACHE_SET_IO_DISABLE already set");
		} else {
			if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
						&c->flags))
				pr_warn("CACHE_SET_IO_DISABLE already cleared");
		}
	}

K
Kent Overstreet 已提交
797 798 799
	sysfs_strtoul(journal_delay_ms,		c->journal_delay_ms);
	sysfs_strtoul(verify,			c->verify);
	sysfs_strtoul(key_merging_disabled,	c->key_merging_disabled);
K
Kent Overstreet 已提交
800
	sysfs_strtoul(expensive_debug_checks,	c->expensive_debug_checks);
K
Kent Overstreet 已提交
801 802 803
	sysfs_strtoul(gc_always_rewrite,	c->gc_always_rewrite);
	sysfs_strtoul(btree_shrinker_disabled,	c->shrinker_disabled);
	sysfs_strtoul(copy_gc_enabled,		c->copy_gc_enabled);
804 805 806 807 808 809
	/*
	 * write gc_after_writeback here may overwrite an already set
	 * BCH_DO_AUTO_GC, it doesn't matter because this flag will be
	 * set in next chance.
	 */
	sysfs_strtoul_clamp(gc_after_writeback, c->gc_after_writeback, 0, 1);
K
Kent Overstreet 已提交
810 811 812 813 814 815 816 817

	return size;
}
STORE_LOCKED(bch_cache_set)

SHOW(bch_cache_set_internal)
{
	struct cache_set *c = container_of(kobj, struct cache_set, internal);
818

K
Kent Overstreet 已提交
819 820 821 822 823 824
	return bch_cache_set_show(&c->kobj, attr, buf);
}

STORE(bch_cache_set_internal)
{
	struct cache_set *c = container_of(kobj, struct cache_set, internal);
825

K
Kent Overstreet 已提交
826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848
	return bch_cache_set_store(&c->kobj, attr, buf, size);
}

static void bch_cache_set_internal_release(struct kobject *k)
{
}

static struct attribute *bch_cache_set_files[] = {
	&sysfs_unregister,
	&sysfs_stop,
	&sysfs_synchronous,
	&sysfs_journal_delay_ms,
	&sysfs_flash_vol_create,

	&sysfs_bucket_size,
	&sysfs_block_size,
	&sysfs_tree_depth,
	&sysfs_root_usage_percent,
	&sysfs_btree_cache_size,
	&sysfs_cache_available_percent,

	&sysfs_average_key_size,

849
	&sysfs_errors,
K
Kent Overstreet 已提交
850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873
	&sysfs_io_error_limit,
	&sysfs_io_error_halflife,
	&sysfs_congested,
	&sysfs_congested_read_threshold_us,
	&sysfs_congested_write_threshold_us,
	&sysfs_clear_stats,
	NULL
};
KTYPE(bch_cache_set);

static struct attribute *bch_cache_set_internal_files[] = {
	&sysfs_active_journal_entries,

	sysfs_time_stats_attribute_list(btree_gc, sec, ms)
	sysfs_time_stats_attribute_list(btree_split, sec, us)
	sysfs_time_stats_attribute_list(btree_sort, ms, us)
	sysfs_time_stats_attribute_list(btree_read, ms, us)

	&sysfs_btree_nodes,
	&sysfs_btree_used_percent,
	&sysfs_btree_cache_max_chain,

	&sysfs_bset_tree_stats,
	&sysfs_cache_read_races,
T
Tang Junhui 已提交
874 875 876
	&sysfs_reclaim,
	&sysfs_flush_write,
	&sysfs_retry_flush_write,
K
Kent Overstreet 已提交
877 878 879 880 881 882 883 884
	&sysfs_writeback_keys_done,
	&sysfs_writeback_keys_failed,

	&sysfs_trigger_gc,
	&sysfs_prune_cache,
#ifdef CONFIG_BCACHE_DEBUG
	&sysfs_verify,
	&sysfs_key_merging_disabled,
K
Kent Overstreet 已提交
885
	&sysfs_expensive_debug_checks,
K
Kent Overstreet 已提交
886 887 888 889
#endif
	&sysfs_gc_always_rewrite,
	&sysfs_btree_shrinker_disabled,
	&sysfs_copy_gc_enabled,
890
	&sysfs_gc_after_writeback,
891
	&sysfs_io_disable,
892 893
	&sysfs_cutoff_writeback,
	&sysfs_cutoff_writeback_sync,
K
Kent Overstreet 已提交
894 895 896 897
	NULL
};
KTYPE(bch_cache_set_internal);

898 899 900 901 902
static int __bch_cache_cmp(const void *l, const void *r)
{
	return *((uint16_t *)r) - *((uint16_t *)l);
}

K
Kent Overstreet 已提交
903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921
SHOW(__bch_cache)
{
	struct cache *ca = container_of(kobj, struct cache, kobj);

	sysfs_hprint(bucket_size,	bucket_bytes(ca));
	sysfs_hprint(block_size,	block_bytes(ca));
	sysfs_print(nbuckets,		ca->sb.nbuckets);
	sysfs_print(discard,		ca->discard);
	sysfs_hprint(written, atomic_long_read(&ca->sectors_written) << 9);
	sysfs_hprint(btree_written,
		     atomic_long_read(&ca->btree_sectors_written) << 9);
	sysfs_hprint(metadata_written,
		     (atomic_long_read(&ca->meta_sectors_written) +
		      atomic_long_read(&ca->btree_sectors_written)) << 9);

	sysfs_print(io_errors,
		    atomic_read(&ca->io_errors) >> IO_ERROR_SHIFT);

	if (attr == &sysfs_cache_replacement_policy)
922 923 924
		return bch_snprint_string_list(buf, PAGE_SIZE,
					       cache_replacement_policies,
					       CACHE_REPLACEMENT(&ca->sb));
K
Kent Overstreet 已提交
925 926

	if (attr == &sysfs_priority_stats) {
K
Kent Overstreet 已提交
927 928 929
		struct bucket *b;
		size_t n = ca->sb.nbuckets, i;
		size_t unused = 0, available = 0, dirty = 0, meta = 0;
K
Kent Overstreet 已提交
930
		uint64_t sum = 0;
931 932
		/* Compute 31 quantiles */
		uint16_t q[31], *p, *cached;
K
Kent Overstreet 已提交
933 934
		ssize_t ret;

935 936
		cached = p = vmalloc(array_size(sizeof(uint16_t),
						ca->sb.nbuckets));
K
Kent Overstreet 已提交
937 938 939 940
		if (!p)
			return -ENOMEM;

		mutex_lock(&ca->set->bucket_lock);
K
Kent Overstreet 已提交
941 942 943 944 945 946 947 948 949 950 951
		for_each_bucket(b, ca) {
			if (!GC_SECTORS_USED(b))
				unused++;
			if (GC_MARK(b) == GC_MARK_RECLAIMABLE)
				available++;
			if (GC_MARK(b) == GC_MARK_DIRTY)
				dirty++;
			if (GC_MARK(b) == GC_MARK_METADATA)
				meta++;
		}

K
Kent Overstreet 已提交
952 953 954 955
		for (i = ca->sb.first_bucket; i < n; i++)
			p[i] = ca->buckets[i].prio;
		mutex_unlock(&ca->set->bucket_lock);

956
		sort(p, n, sizeof(uint16_t), __bch_cache_cmp, NULL);
K
Kent Overstreet 已提交
957 958 959 960 961 962 963 964 965

		while (n &&
		       !cached[n - 1])
			--n;

		unused = ca->sb.nbuckets - n;

		while (cached < p + n &&
		       *cached == BTREE_PRIO)
K
Kent Overstreet 已提交
966
			cached++, n--;
K
Kent Overstreet 已提交
967 968 969 970 971 972 973

		for (i = 0; i < n; i++)
			sum += INITIAL_PRIO - cached[i];

		if (n)
			do_div(sum, n);

974 975 976
		for (i = 0; i < ARRAY_SIZE(q); i++)
			q[i] = INITIAL_PRIO - cached[n * (i + 1) /
				(ARRAY_SIZE(q) + 1)];
K
Kent Overstreet 已提交
977 978 979

		vfree(p);

980 981
		ret = scnprintf(buf, PAGE_SIZE,
				"Unused:		%zu%%\n"
K
Kent Overstreet 已提交
982 983
				"Clean:		%zu%%\n"
				"Dirty:		%zu%%\n"
984 985 986 987 988
				"Metadata:	%zu%%\n"
				"Average:	%llu\n"
				"Sectors per Q:	%zu\n"
				"Quantiles:	[",
				unused * 100 / (size_t) ca->sb.nbuckets,
K
Kent Overstreet 已提交
989 990 991
				available * 100 / (size_t) ca->sb.nbuckets,
				dirty * 100 / (size_t) ca->sb.nbuckets,
				meta * 100 / (size_t) ca->sb.nbuckets, sum,
992 993 994 995 996 997 998 999 1000
				n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1));

		for (i = 0; i < ARRAY_SIZE(q); i++)
			ret += scnprintf(buf + ret, PAGE_SIZE - ret,
					 "%u ", q[i]);
		ret--;

		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "]\n");

K
Kent Overstreet 已提交
1001 1002 1003 1004 1005 1006 1007 1008 1009 1010
		return ret;
	}

	return 0;
}
SHOW_LOCKED(bch_cache)

STORE(__bch_cache)
{
	struct cache *ca = container_of(kobj, struct cache, kobj);
1011
	ssize_t v;
K
Kent Overstreet 已提交
1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025

	if (attr == &sysfs_discard) {
		bool v = strtoul_or_return(buf);

		if (blk_queue_discard(bdev_get_queue(ca->bdev)))
			ca->discard = v;

		if (v != CACHE_DISCARD(&ca->sb)) {
			SET_CACHE_DISCARD(&ca->sb, v);
			bcache_write_super(ca->set);
		}
	}

	if (attr == &sysfs_cache_replacement_policy) {
1026
		v = __sysfs_match_string(cache_replacement_policies, -1, buf);
K
Kent Overstreet 已提交
1027 1028 1029
		if (v < 0)
			return v;

1030
		if ((unsigned int) v != CACHE_REPLACEMENT(&ca->sb)) {
K
Kent Overstreet 已提交
1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065
			mutex_lock(&ca->set->bucket_lock);
			SET_CACHE_REPLACEMENT(&ca->sb, v);
			mutex_unlock(&ca->set->bucket_lock);

			bcache_write_super(ca->set);
		}
	}

	if (attr == &sysfs_clear_stats) {
		atomic_long_set(&ca->sectors_written, 0);
		atomic_long_set(&ca->btree_sectors_written, 0);
		atomic_long_set(&ca->meta_sectors_written, 0);
		atomic_set(&ca->io_count, 0);
		atomic_set(&ca->io_errors, 0);
	}

	return size;
}
STORE_LOCKED(bch_cache)

static struct attribute *bch_cache_files[] = {
	&sysfs_bucket_size,
	&sysfs_block_size,
	&sysfs_nbuckets,
	&sysfs_priority_stats,
	&sysfs_discard,
	&sysfs_written,
	&sysfs_btree_written,
	&sysfs_metadata_written,
	&sysfs_io_errors,
	&sysfs_clear_stats,
	&sysfs_cache_replacement_policy,
	NULL
};
KTYPE(bch_cache);