sysfs.c 27.7 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
K
Kent Overstreet 已提交
2 3 4 5 6 7 8 9 10 11 12
/*
 * bcache sysfs interfaces
 *
 * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
 * Copyright 2012 Google, Inc.
 */

#include "bcache.h"
#include "sysfs.h"
#include "btree.h"
#include "request.h"
13
#include "writeback.h"
K
Kent Overstreet 已提交
14

K
Kent Overstreet 已提交
15
#include <linux/blkdev.h>
K
Kent Overstreet 已提交
16
#include <linux/sort.h>
17
#include <linux/sched/clock.h>
K
Kent Overstreet 已提交
18

19 20
extern bool bcache_is_reboot;

S
Shenghui Wang 已提交
21
/* Default is 0 ("writethrough") */
22 23 24 25
static const char * const bch_cache_modes[] = {
	"writethrough",
	"writeback",
	"writearound",
26
	"none"
27 28
};

S
Shenghui Wang 已提交
29
/* Default is 0 ("auto") */
30 31
static const char * const bch_stop_on_failure_modes[] = {
	"auto",
32
	"always"
33 34
};

K
Kent Overstreet 已提交
35 36 37
static const char * const cache_replacement_policies[] = {
	"lru",
	"fifo",
38
	"random"
K
Kent Overstreet 已提交
39 40
};

41 42
static const char * const error_actions[] = {
	"unregister",
43
	"panic"
44 45
};

K
Kent Overstreet 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
write_attribute(attach);
write_attribute(detach);
write_attribute(unregister);
write_attribute(stop);
write_attribute(clear_stats);
write_attribute(trigger_gc);
write_attribute(prune_cache);
write_attribute(flash_vol_create);

read_attribute(bucket_size);
read_attribute(block_size);
read_attribute(nbuckets);
read_attribute(tree_depth);
read_attribute(root_usage_percent);
read_attribute(priority_stats);
read_attribute(btree_cache_size);
read_attribute(btree_cache_max_chain);
read_attribute(cache_available_percent);
read_attribute(written);
read_attribute(btree_written);
read_attribute(metadata_written);
read_attribute(active_journal_entries);
68
read_attribute(backing_dev_name);
69
read_attribute(backing_dev_uuid);
K
Kent Overstreet 已提交
70 71 72 73 74 75 76 77 78 79 80 81 82 83

sysfs_time_stats_attribute(btree_gc,	sec, ms);
sysfs_time_stats_attribute(btree_split, sec, us);
sysfs_time_stats_attribute(btree_sort,	ms,  us);
sysfs_time_stats_attribute(btree_read,	ms,  us);

read_attribute(btree_nodes);
read_attribute(btree_used_percent);
read_attribute(average_key_size);
read_attribute(dirty_data);
read_attribute(bset_tree_stats);

read_attribute(state);
read_attribute(cache_read_races);
T
Tang Junhui 已提交
84
read_attribute(reclaim);
85
read_attribute(reclaimed_journal_buckets);
T
Tang Junhui 已提交
86
read_attribute(flush_write);
K
Kent Overstreet 已提交
87 88 89 90
read_attribute(writeback_keys_done);
read_attribute(writeback_keys_failed);
read_attribute(io_errors);
read_attribute(congested);
91 92
read_attribute(cutoff_writeback);
read_attribute(cutoff_writeback_sync);
K
Kent Overstreet 已提交
93 94 95 96 97 98
rw_attribute(congested_read_threshold_us);
rw_attribute(congested_write_threshold_us);

rw_attribute(sequential_cutoff);
rw_attribute(data_csum);
rw_attribute(cache_mode);
99
rw_attribute(stop_when_cache_set_failed);
K
Kent Overstreet 已提交
100 101 102 103 104 105 106
rw_attribute(writeback_metadata);
rw_attribute(writeback_running);
rw_attribute(writeback_percent);
rw_attribute(writeback_delay);
rw_attribute(writeback_rate);

rw_attribute(writeback_rate_update_seconds);
107
rw_attribute(writeback_rate_i_term_inverse);
K
Kent Overstreet 已提交
108
rw_attribute(writeback_rate_p_term_inverse);
109
rw_attribute(writeback_rate_minimum);
K
Kent Overstreet 已提交
110 111
read_attribute(writeback_rate_debug);

K
Kent Overstreet 已提交
112 113 114
read_attribute(stripe_size);
read_attribute(partial_stripes_expensive);

K
Kent Overstreet 已提交
115 116
rw_attribute(synchronous);
rw_attribute(journal_delay_ms);
117
rw_attribute(io_disable);
K
Kent Overstreet 已提交
118 119 120 121
rw_attribute(discard);
rw_attribute(running);
rw_attribute(label);
rw_attribute(readahead);
122
rw_attribute(errors);
K
Kent Overstreet 已提交
123 124 125
rw_attribute(io_error_limit);
rw_attribute(io_error_halflife);
rw_attribute(verify);
K
Kent Overstreet 已提交
126
rw_attribute(bypass_torture_test);
K
Kent Overstreet 已提交
127 128
rw_attribute(key_merging_disabled);
rw_attribute(gc_always_rewrite);
K
Kent Overstreet 已提交
129
rw_attribute(expensive_debug_checks);
K
Kent Overstreet 已提交
130 131 132
rw_attribute(cache_replacement_policy);
rw_attribute(btree_shrinker_disabled);
rw_attribute(copy_gc_enabled);
133
rw_attribute(gc_after_writeback);
K
Kent Overstreet 已提交
134 135
rw_attribute(size);

136 137 138 139
static ssize_t bch_snprint_string_list(char *buf,
				       size_t size,
				       const char * const list[],
				       size_t selected)
140 141 142 143 144 145 146 147 148 149 150 151
{
	char *out = buf;
	size_t i;

	for (i = 0; list[i]; i++)
		out += snprintf(out, buf + size - out,
				i == selected ? "[%s] " : "%s ", list[i]);

	out[-1] = '\n';
	return out - buf;
}

K
Kent Overstreet 已提交
152 153 154 155
SHOW(__bch_cached_dev)
{
	struct cached_dev *dc = container_of(kobj, struct cached_dev,
					     disk.kobj);
156
	char const *states[] = { "no cache", "clean", "dirty", "inconsistent" };
157
	int wb = dc->writeback_running;
K
Kent Overstreet 已提交
158 159 160 161

#define var(stat)		(dc->stat)

	if (attr == &sysfs_cache_mode)
162
		return bch_snprint_string_list(buf, PAGE_SIZE,
163
					       bch_cache_modes,
164
					       BDEV_CACHE_MODE(&dc->sb));
K
Kent Overstreet 已提交
165

166 167
	if (attr == &sysfs_stop_when_cache_set_failed)
		return bch_snprint_string_list(buf, PAGE_SIZE,
168
					       bch_stop_on_failure_modes,
169 170 171
					       dc->stop_when_cache_set_failed);


K
Kent Overstreet 已提交
172 173
	sysfs_printf(data_csum,		"%i", dc->disk.data_csum);
	var_printf(verify,		"%i");
K
Kent Overstreet 已提交
174
	var_printf(bypass_torture_test,	"%i");
K
Kent Overstreet 已提交
175 176 177 178
	var_printf(writeback_metadata,	"%i");
	var_printf(writeback_running,	"%i");
	var_print(writeback_delay);
	var_print(writeback_percent);
179 180
	sysfs_hprint(writeback_rate,
		     wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 : 0);
181
	sysfs_printf(io_errors,		"%i", atomic_read(&dc->io_errors));
182 183
	sysfs_printf(io_error_limit,	"%i", dc->error_limit);
	sysfs_printf(io_disable,	"%i", dc->io_disable);
K
Kent Overstreet 已提交
184
	var_print(writeback_rate_update_seconds);
185
	var_print(writeback_rate_i_term_inverse);
K
Kent Overstreet 已提交
186
	var_print(writeback_rate_p_term_inverse);
187
	var_print(writeback_rate_minimum);
K
Kent Overstreet 已提交
188 189

	if (attr == &sysfs_writeback_rate_debug) {
190
		char rate[20];
K
Kent Overstreet 已提交
191 192
		char dirty[20];
		char target[20];
193
		char proportional[20];
194
		char integral[20];
195 196 197
		char change[20];
		s64 next_io;

198 199 200 201
		/*
		 * Except for dirty and target, other values should
		 * be 0 if writeback is not running.
		 */
202 203 204
		bch_hprint(rate,
			   wb ? atomic_long_read(&dc->writeback_rate.rate) << 9
			      : 0);
205 206 207 208 209 210 211 212 213
		bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9);
		bch_hprint(target, dc->writeback_rate_target << 9);
		bch_hprint(proportional,
			   wb ? dc->writeback_rate_proportional << 9 : 0);
		bch_hprint(integral,
			   wb ? dc->writeback_rate_integral_scaled << 9 : 0);
		bch_hprint(change, wb ? dc->writeback_rate_change << 9 : 0);
		next_io = wb ? div64_s64(dc->writeback_rate.next-local_clock(),
					 NSEC_PER_MSEC) : 0;
K
Kent Overstreet 已提交
214 215

		return sprintf(buf,
216
			       "rate:\t\t%s/sec\n"
K
Kent Overstreet 已提交
217
			       "dirty:\t\t%s\n"
218 219
			       "target:\t\t%s\n"
			       "proportional:\t%s\n"
220
			       "integral:\t%s\n"
221 222 223
			       "change:\t\t%s/sec\n"
			       "next io:\t%llims\n",
			       rate, dirty, target, proportional,
224
			       integral, change, next_io);
K
Kent Overstreet 已提交
225 226 227
	}

	sysfs_hprint(dirty_data,
228
		     bcache_dev_sectors_dirty(&dc->disk) << 9);
K
Kent Overstreet 已提交
229

230
	sysfs_hprint(stripe_size,	 ((uint64_t)dc->disk.stripe_size) << 9);
K
Kent Overstreet 已提交
231 232
	var_printf(partial_stripes_expensive,	"%u");

K
Kent Overstreet 已提交
233 234 235 236 237 238 239 240 241 242 243 244 245
	var_hprint(sequential_cutoff);
	var_hprint(readahead);

	sysfs_print(running,		atomic_read(&dc->running));
	sysfs_print(state,		states[BDEV_STATE(&dc->sb)]);

	if (attr == &sysfs_label) {
		memcpy(buf, dc->sb.label, SB_LABEL_SIZE);
		buf[SB_LABEL_SIZE + 1] = '\0';
		strcat(buf, "\n");
		return strlen(buf);
	}

246 247 248 249 250 251
	if (attr == &sysfs_backing_dev_name) {
		snprintf(buf, BDEVNAME_SIZE + 1, "%s", dc->backing_dev_name);
		strcat(buf, "\n");
		return strlen(buf);
	}

252 253 254 255 256 257 258
	if (attr == &sysfs_backing_dev_uuid) {
		/* convert binary uuid into 36-byte string plus '\0' */
		snprintf(buf, 36+1, "%pU", dc->sb.uuid);
		strcat(buf, "\n");
		return strlen(buf);
	}

K
Kent Overstreet 已提交
259 260 261 262 263 264 265 266 267
#undef var
	return 0;
}
SHOW_LOCKED(bch_cached_dev)

STORE(__cached_dev)
{
	struct cached_dev *dc = container_of(kobj, struct cached_dev,
					     disk.kobj);
268
	ssize_t v;
K
Kent Overstreet 已提交
269
	struct cache_set *c;
G
Gabriel de Perthuis 已提交
270
	struct kobj_uevent_env *env;
K
Kent Overstreet 已提交
271

272 273 274 275
	/* no user space access if system is rebooting */
	if (bcache_is_reboot)
		return -EBUSY;

K
Kent Overstreet 已提交
276
#define d_strtoul(var)		sysfs_strtoul(var, dc->var)
277
#define d_strtoul_nonzero(var)	sysfs_strtoul_clamp(var, dc->var, 1, INT_MAX)
K
Kent Overstreet 已提交
278 279 280 281
#define d_strtoi_h(var)		sysfs_hatoi(var, dc->var)

	sysfs_strtoul(data_csum,	dc->disk.data_csum);
	d_strtoul(verify);
282 283 284
	sysfs_strtoul_bool(bypass_torture_test, dc->bypass_torture_test);
	sysfs_strtoul_bool(writeback_metadata, dc->writeback_metadata);
	sysfs_strtoul_bool(writeback_running, dc->writeback_running);
285
	sysfs_strtoul_clamp(writeback_delay, dc->writeback_delay, 0, UINT_MAX);
286

287 288
	sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent,
			    0, bch_cutoff_writeback);
K
Kent Overstreet 已提交
289

290
	if (attr == &sysfs_writeback_rate) {
291 292
		ssize_t ret;
		long int v = atomic_long_read(&dc->writeback_rate.rate);
293

294 295 296 297 298 299 300 301
		ret = strtoul_safe_clamp(buf, v, 1, INT_MAX);

		if (!ret) {
			atomic_long_set(&dc->writeback_rate.rate, v);
			ret = size;
		}

		return ret;
302
	}
303

304 305 306
	sysfs_strtoul_clamp(writeback_rate_update_seconds,
			    dc->writeback_rate_update_seconds,
			    1, WRITEBACK_RATE_UPDATE_SECS_MAX);
307 308 309
	sysfs_strtoul_clamp(writeback_rate_i_term_inverse,
			    dc->writeback_rate_i_term_inverse,
			    1, UINT_MAX);
310 311 312
	sysfs_strtoul_clamp(writeback_rate_p_term_inverse,
			    dc->writeback_rate_p_term_inverse,
			    1, UINT_MAX);
313 314 315
	sysfs_strtoul_clamp(writeback_rate_minimum,
			    dc->writeback_rate_minimum,
			    1, UINT_MAX);
K
Kent Overstreet 已提交
316

317 318 319 320 321 322 323 324
	sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX);

	if (attr == &sysfs_io_disable) {
		int v = strtoul_or_return(buf);

		dc->io_disable = v ? 1 : 0;
	}

325 326 327
	sysfs_strtoul_clamp(sequential_cutoff,
			    dc->sequential_cutoff,
			    0, UINT_MAX);
K
Kent Overstreet 已提交
328 329 330 331 332 333
	d_strtoi_h(readahead);

	if (attr == &sysfs_clear_stats)
		bch_cache_accounting_clear(&dc->accounting);

	if (attr == &sysfs_running &&
334 335 336 337 338
	    strtoul_or_return(buf)) {
		v = bch_cached_dev_run(dc);
		if (v)
			return v;
	}
K
Kent Overstreet 已提交
339 340

	if (attr == &sysfs_cache_mode) {
341
		v = sysfs_match_string(bch_cache_modes, buf);
K
Kent Overstreet 已提交
342 343 344
		if (v < 0)
			return v;

345
		if ((unsigned int) v != BDEV_CACHE_MODE(&dc->sb)) {
K
Kent Overstreet 已提交
346 347 348 349 350
			SET_BDEV_CACHE_MODE(&dc->sb, v);
			bch_write_bdev_super(dc, NULL);
		}
	}

351
	if (attr == &sysfs_stop_when_cache_set_failed) {
352
		v = sysfs_match_string(bch_stop_on_failure_modes, buf);
353 354 355 356 357 358
		if (v < 0)
			return v;

		dc->stop_when_cache_set_failed = v;
	}

K
Kent Overstreet 已提交
359
	if (attr == &sysfs_label) {
360 361 362 363 364 365 366
		if (size > SB_LABEL_SIZE)
			return -EINVAL;
		memcpy(dc->sb.label, buf, size);
		if (size < SB_LABEL_SIZE)
			dc->sb.label[size] = '\0';
		if (size && dc->sb.label[size - 1] == '\n')
			dc->sb.label[size - 1] = '\0';
K
Kent Overstreet 已提交
367 368 369 370 371 372
		bch_write_bdev_super(dc, NULL);
		if (dc->disk.c) {
			memcpy(dc->disk.c->uuids[dc->disk.id].label,
			       buf, SB_LABEL_SIZE);
			bch_uuid_write(dc->disk.c);
		}
G
Gabriel de Perthuis 已提交
373
		env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
374 375
		if (!env)
			return -ENOMEM;
G
Gabriel de Perthuis 已提交
376 377 378
		add_uevent_var(env, "DRIVER=bcache");
		add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid),
		add_uevent_var(env, "CACHED_LABEL=%s", buf);
379 380 381
		kobject_uevent_env(&disk_to_dev(dc->disk.disk)->kobj,
				   KOBJ_CHANGE,
				   env->envp);
G
Gabriel de Perthuis 已提交
382
		kfree(env);
K
Kent Overstreet 已提交
383 384 385
	}

	if (attr == &sysfs_attach) {
386 387 388
		uint8_t		set_uuid[16];

		if (bch_parse_uuid(buf, set_uuid) < 16)
K
Kent Overstreet 已提交
389 390
			return -EINVAL;

391
		v = -ENOENT;
K
Kent Overstreet 已提交
392
		list_for_each_entry(c, &bch_cache_sets, list) {
393
			v = bch_cached_dev_attach(dc, c, set_uuid);
K
Kent Overstreet 已提交
394 395 396
			if (!v)
				return size;
		}
397 398
		if (v == -ENOENT)
			pr_err("Can't attach %s: cache set not found", buf);
399
		return v;
K
Kent Overstreet 已提交
400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415
	}

	if (attr == &sysfs_detach && dc->disk.c)
		bch_cached_dev_detach(dc);

	if (attr == &sysfs_stop)
		bcache_device_stop(&dc->disk);

	return size;
}

STORE(bch_cached_dev)
{
	struct cached_dev *dc = container_of(kobj, struct cached_dev,
					     disk.kobj);

416 417 418 419
	/* no user space access if system is rebooting */
	if (bcache_is_reboot)
		return -EBUSY;

K
Kent Overstreet 已提交
420 421 422
	mutex_lock(&bch_register_lock);
	size = __cached_dev_store(kobj, attr, buf, size);

423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441
	if (attr == &sysfs_writeback_running) {
		/* dc->writeback_running changed in __cached_dev_store() */
		if (IS_ERR_OR_NULL(dc->writeback_thread)) {
			/*
			 * reject setting it to 1 via sysfs if writeback
			 * kthread is not created yet.
			 */
			if (dc->writeback_running) {
				dc->writeback_running = false;
				pr_err("%s: failed to run non-existent writeback thread",
						dc->disk.disk->disk_name);
			}
		} else
			/*
			 * writeback kthread will check if dc->writeback_running
			 * is true or false.
			 */
			bch_writeback_queue(dc);
	}
K
Kent Overstreet 已提交
442

443 444 445 446
	/*
	 * Only set BCACHE_DEV_WB_RUNNING when cached device attached to
	 * a cache set, otherwise it doesn't make sense.
	 */
K
Kent Overstreet 已提交
447
	if (attr == &sysfs_writeback_percent)
448 449
		if ((dc->disk.c != NULL) &&
		    (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)))
450
			schedule_delayed_work(&dc->writeback_rate_update,
K
Kent Overstreet 已提交
451 452 453 454 455 456 457 458 459 460 461 462 463 464
				      dc->writeback_rate_update_seconds * HZ);

	mutex_unlock(&bch_register_lock);
	return size;
}

static struct attribute *bch_cached_dev_files[] = {
	&sysfs_attach,
	&sysfs_detach,
	&sysfs_stop,
#if 0
	&sysfs_data_csum,
#endif
	&sysfs_cache_mode,
465
	&sysfs_stop_when_cache_set_failed,
K
Kent Overstreet 已提交
466 467 468 469 470 471
	&sysfs_writeback_metadata,
	&sysfs_writeback_running,
	&sysfs_writeback_delay,
	&sysfs_writeback_percent,
	&sysfs_writeback_rate,
	&sysfs_writeback_rate_update_seconds,
472
	&sysfs_writeback_rate_i_term_inverse,
K
Kent Overstreet 已提交
473
	&sysfs_writeback_rate_p_term_inverse,
474
	&sysfs_writeback_rate_minimum,
K
Kent Overstreet 已提交
475
	&sysfs_writeback_rate_debug,
476
	&sysfs_io_errors,
477 478
	&sysfs_io_error_limit,
	&sysfs_io_disable,
K
Kent Overstreet 已提交
479
	&sysfs_dirty_data,
K
Kent Overstreet 已提交
480 481
	&sysfs_stripe_size,
	&sysfs_partial_stripes_expensive,
K
Kent Overstreet 已提交
482 483 484 485 486 487 488 489
	&sysfs_sequential_cutoff,
	&sysfs_clear_stats,
	&sysfs_running,
	&sysfs_state,
	&sysfs_label,
	&sysfs_readahead,
#ifdef CONFIG_BCACHE_DEBUG
	&sysfs_verify,
K
Kent Overstreet 已提交
490
	&sysfs_bypass_torture_test,
K
Kent Overstreet 已提交
491
#endif
492
	&sysfs_backing_dev_name,
493
	&sysfs_backing_dev_uuid,
K
Kent Overstreet 已提交
494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522
	NULL
};
KTYPE(bch_cached_dev);

SHOW(bch_flash_dev)
{
	struct bcache_device *d = container_of(kobj, struct bcache_device,
					       kobj);
	struct uuid_entry *u = &d->c->uuids[d->id];

	sysfs_printf(data_csum,	"%i", d->data_csum);
	sysfs_hprint(size,	u->sectors << 9);

	if (attr == &sysfs_label) {
		memcpy(buf, u->label, SB_LABEL_SIZE);
		buf[SB_LABEL_SIZE + 1] = '\0';
		strcat(buf, "\n");
		return strlen(buf);
	}

	return 0;
}

STORE(__bch_flash_dev)
{
	struct bcache_device *d = container_of(kobj, struct bcache_device,
					       kobj);
	struct uuid_entry *u = &d->c->uuids[d->id];

523 524 525 526
	/* no user space access if system is rebooting */
	if (bcache_is_reboot)
		return -EBUSY;

K
Kent Overstreet 已提交
527 528 529 530
	sysfs_strtoul(data_csum,	d->data_csum);

	if (attr == &sysfs_size) {
		uint64_t v;
531

K
Kent Overstreet 已提交
532 533 534 535 536 537 538 539 540 541 542 543 544
		strtoi_h_or_return(buf, v);

		u->sectors = v >> 9;
		bch_uuid_write(d->c);
		set_capacity(d->disk, u->sectors);
	}

	if (attr == &sysfs_label) {
		memcpy(u->label, buf, SB_LABEL_SIZE);
		bch_uuid_write(d->c);
	}

	if (attr == &sysfs_unregister) {
545
		set_bit(BCACHE_DEV_DETACHING, &d->flags);
K
Kent Overstreet 已提交
546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563
		bcache_device_stop(d);
	}

	return size;
}
STORE_LOCKED(bch_flash_dev)

static struct attribute *bch_flash_dev_files[] = {
	&sysfs_unregister,
#if 0
	&sysfs_data_csum,
#endif
	&sysfs_label,
	&sysfs_size,
	NULL
};
KTYPE(bch_flash_dev);

564 565 566 567 568 569
struct bset_stats_op {
	struct btree_op op;
	size_t nodes;
	struct bset_stats stats;
};

J
John Sheu 已提交
570
static int bch_btree_bset_stats(struct btree_op *b_op, struct btree *b)
571 572 573 574 575 576 577 578 579
{
	struct bset_stats_op *op = container_of(b_op, struct bset_stats_op, op);

	op->nodes++;
	bch_btree_keys_stats(&b->keys, &op->stats);

	return MAP_CONTINUE;
}

580
static int bch_bset_print_stats(struct cache_set *c, char *buf)
581 582 583 584 585 586 587
{
	struct bset_stats_op op;
	int ret;

	memset(&op, 0, sizeof(op));
	bch_btree_op_init(&op.op, -1);

J
John Sheu 已提交
588
	ret = bch_btree_map_nodes(&op.op, c, &ZERO_KEY, bch_btree_bset_stats);
589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605
	if (ret < 0)
		return ret;

	return snprintf(buf, PAGE_SIZE,
			"btree nodes:		%zu\n"
			"written sets:		%zu\n"
			"unwritten sets:		%zu\n"
			"written key bytes:	%zu\n"
			"unwritten key bytes:	%zu\n"
			"floats:			%zu\n"
			"failed:			%zu\n",
			op.nodes,
			op.stats.sets_written, op.stats.sets_unwritten,
			op.stats.bytes_written, op.stats.bytes_unwritten,
			op.stats.floats, op.stats.failed);
}

606
static unsigned int bch_root_usage(struct cache_set *c)
K
Kent Overstreet 已提交
607
{
608
	unsigned int bytes = 0;
J
John Sheu 已提交
609 610 611
	struct bkey *k;
	struct btree *b;
	struct btree_iter iter;
K
Kent Overstreet 已提交
612

J
John Sheu 已提交
613
	goto lock_root;
K
Kent Overstreet 已提交
614

J
John Sheu 已提交
615 616
	do {
		rw_unlock(false, b);
K
Kent Overstreet 已提交
617
lock_root:
J
John Sheu 已提交
618 619 620
		b = c->root;
		rw_lock(false, b, b->level);
	} while (b != c->root);
K
Kent Overstreet 已提交
621

J
John Sheu 已提交
622 623
	for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
		bytes += bkey_bytes(k);
K
Kent Overstreet 已提交
624

J
John Sheu 已提交
625
	rw_unlock(false, b);
K
Kent Overstreet 已提交
626

J
John Sheu 已提交
627 628
	return (bytes * 100) / btree_bytes(c);
}
K
Kent Overstreet 已提交
629

J
John Sheu 已提交
630 631 632 633
static size_t bch_cache_size(struct cache_set *c)
{
	size_t ret = 0;
	struct btree *b;
K
Kent Overstreet 已提交
634

J
John Sheu 已提交
635 636 637
	mutex_lock(&c->bucket_lock);
	list_for_each_entry(b, &c->btree_cache, list)
		ret += 1 << (b->keys.page_order + PAGE_SHIFT);
K
Kent Overstreet 已提交
638

J
John Sheu 已提交
639 640 641
	mutex_unlock(&c->bucket_lock);
	return ret;
}
K
Kent Overstreet 已提交
642

643
static unsigned int bch_cache_max_chain(struct cache_set *c)
J
John Sheu 已提交
644
{
645
	unsigned int ret = 0;
J
John Sheu 已提交
646
	struct hlist_head *h;
K
Kent Overstreet 已提交
647

J
John Sheu 已提交
648
	mutex_lock(&c->bucket_lock);
K
Kent Overstreet 已提交
649

J
John Sheu 已提交
650 651 652
	for (h = c->bucket_hash;
	     h < c->bucket_hash + (1 << BUCKET_HASH_BITS);
	     h++) {
653
		unsigned int i = 0;
J
John Sheu 已提交
654
		struct hlist_node *p;
K
Kent Overstreet 已提交
655

J
John Sheu 已提交
656 657
		hlist_for_each(p, h)
			i++;
K
Kent Overstreet 已提交
658

J
John Sheu 已提交
659
		ret = max(ret, i);
K
Kent Overstreet 已提交
660 661
	}

J
John Sheu 已提交
662 663 664
	mutex_unlock(&c->bucket_lock);
	return ret;
}
K
Kent Overstreet 已提交
665

666
static unsigned int bch_btree_used(struct cache_set *c)
J
John Sheu 已提交
667 668 669 670
{
	return div64_u64(c->gc_stats.key_bytes * 100,
			 (c->gc_stats.nodes ?: 1) * btree_bytes(c));
}
K
Kent Overstreet 已提交
671

672
static unsigned int bch_average_key_size(struct cache_set *c)
J
John Sheu 已提交
673 674 675 676 677 678 679 680
{
	return c->gc_stats.nkeys
		? div64_u64(c->gc_stats.data, c->gc_stats.nkeys)
		: 0;
}

SHOW(__bch_cache_set)
{
K
Kent Overstreet 已提交
681 682 683 684 685 686 687
	struct cache_set *c = container_of(kobj, struct cache_set, kobj);

	sysfs_print(synchronous,		CACHE_SYNC(&c->sb));
	sysfs_print(journal_delay_ms,		c->journal_delay_ms);
	sysfs_hprint(bucket_size,		bucket_bytes(c));
	sysfs_hprint(block_size,		block_bytes(c));
	sysfs_print(tree_depth,			c->root->level);
J
John Sheu 已提交
688
	sysfs_print(root_usage_percent,		bch_root_usage(c));
K
Kent Overstreet 已提交
689

J
John Sheu 已提交
690 691
	sysfs_hprint(btree_cache_size,		bch_cache_size(c));
	sysfs_print(btree_cache_max_chain,	bch_cache_max_chain(c));
K
Kent Overstreet 已提交
692 693 694 695
	sysfs_print(cache_available_percent,	100 - c->gc_stats.in_use);

	sysfs_print_time_stats(&c->btree_gc_time,	btree_gc, sec, ms);
	sysfs_print_time_stats(&c->btree_split_time,	btree_split, sec, us);
696
	sysfs_print_time_stats(&c->sort.time,		btree_sort, ms, us);
K
Kent Overstreet 已提交
697 698
	sysfs_print_time_stats(&c->btree_read_time,	btree_read, ms, us);

J
John Sheu 已提交
699
	sysfs_print(btree_used_percent,	bch_btree_used(c));
K
Kent Overstreet 已提交
700
	sysfs_print(btree_nodes,	c->gc_stats.nodes);
J
John Sheu 已提交
701
	sysfs_hprint(average_key_size,	bch_average_key_size(c));
K
Kent Overstreet 已提交
702 703 704 705

	sysfs_print(cache_read_races,
		    atomic_long_read(&c->cache_read_races));

T
Tang Junhui 已提交
706 707 708
	sysfs_print(reclaim,
		    atomic_long_read(&c->reclaim));

709 710 711
	sysfs_print(reclaimed_journal_buckets,
		    atomic_long_read(&c->reclaimed_journal_buckets));

T
Tang Junhui 已提交
712 713 714
	sysfs_print(flush_write,
		    atomic_long_read(&c->flush_write));

K
Kent Overstreet 已提交
715 716 717 718 719
	sysfs_print(writeback_keys_done,
		    atomic_long_read(&c->writeback_keys_done));
	sysfs_print(writeback_keys_failed,
		    atomic_long_read(&c->writeback_keys_failed));

720 721 722 723
	if (attr == &sysfs_errors)
		return bch_snprint_string_list(buf, PAGE_SIZE, error_actions,
					       c->on_error);

K
Kent Overstreet 已提交
724 725
	/* See count_io_errors for why 88 */
	sysfs_print(io_error_halflife,	c->error_decay * 88);
C
Coly Li 已提交
726
	sysfs_print(io_error_limit,	c->error_limit);
K
Kent Overstreet 已提交
727 728 729 730 731 732 733 734

	sysfs_hprint(congested,
		     ((uint64_t) bch_get_congested(c)) << 9);
	sysfs_print(congested_read_threshold_us,
		    c->congested_read_threshold_us);
	sysfs_print(congested_write_threshold_us,
		    c->congested_write_threshold_us);

735 736 737
	sysfs_print(cutoff_writeback, bch_cutoff_writeback);
	sysfs_print(cutoff_writeback_sync, bch_cutoff_writeback_sync);

K
Kent Overstreet 已提交
738 739 740
	sysfs_print(active_journal_entries,	fifo_used(&c->journal.pin));
	sysfs_printf(verify,			"%i", c->verify);
	sysfs_printf(key_merging_disabled,	"%i", c->key_merging_disabled);
K
Kent Overstreet 已提交
741 742
	sysfs_printf(expensive_debug_checks,
		     "%i", c->expensive_debug_checks);
K
Kent Overstreet 已提交
743 744 745
	sysfs_printf(gc_always_rewrite,		"%i", c->gc_always_rewrite);
	sysfs_printf(btree_shrinker_disabled,	"%i", c->shrinker_disabled);
	sysfs_printf(copy_gc_enabled,		"%i", c->copy_gc_enabled);
746
	sysfs_printf(gc_after_writeback,	"%i", c->gc_after_writeback);
747 748
	sysfs_printf(io_disable,		"%i",
		     test_bit(CACHE_SET_IO_DISABLE, &c->flags));
K
Kent Overstreet 已提交
749 750 751 752 753 754 755 756 757 758 759

	if (attr == &sysfs_bset_tree_stats)
		return bch_bset_print_stats(c, buf);

	return 0;
}
SHOW_LOCKED(bch_cache_set)

STORE(__bch_cache_set)
{
	struct cache_set *c = container_of(kobj, struct cache_set, kobj);
760
	ssize_t v;
K
Kent Overstreet 已提交
761

762 763 764 765
	/* no user space access if system is rebooting */
	if (bcache_is_reboot)
		return -EBUSY;

K
Kent Overstreet 已提交
766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783
	if (attr == &sysfs_unregister)
		bch_cache_set_unregister(c);

	if (attr == &sysfs_stop)
		bch_cache_set_stop(c);

	if (attr == &sysfs_synchronous) {
		bool sync = strtoul_or_return(buf);

		if (sync != CACHE_SYNC(&c->sb)) {
			SET_CACHE_SYNC(&c->sb, sync);
			bcache_write_super(c);
		}
	}

	if (attr == &sysfs_flash_vol_create) {
		int r;
		uint64_t v;
784

K
Kent Overstreet 已提交
785 786 787 788 789 790 791 792 793 794 795 796 797 798 799
		strtoi_h_or_return(buf, v);

		r = bch_flash_dev_create(c, v);
		if (r)
			return r;
	}

	if (attr == &sysfs_clear_stats) {
		atomic_long_set(&c->writeback_keys_done,	0);
		atomic_long_set(&c->writeback_keys_failed,	0);

		memset(&c->gc_stats, 0, sizeof(struct gc_stat));
		bch_cache_accounting_clear(&c->accounting);
	}

C
Coly Li 已提交
800 801
	if (attr == &sysfs_trigger_gc)
		force_wake_up_gc(c);
K
Kent Overstreet 已提交
802 803 804

	if (attr == &sysfs_prune_cache) {
		struct shrink_control sc;
805

K
Kent Overstreet 已提交
806 807
		sc.gfp_mask = GFP_KERNEL;
		sc.nr_to_scan = strtoul_or_return(buf);
808
		c->shrink.scan_objects(&c->shrink, &sc);
K
Kent Overstreet 已提交
809 810
	}

811 812 813 814 815 816
	sysfs_strtoul_clamp(congested_read_threshold_us,
			    c->congested_read_threshold_us,
			    0, UINT_MAX);
	sysfs_strtoul_clamp(congested_write_threshold_us,
			    c->congested_write_threshold_us,
			    0, UINT_MAX);
K
Kent Overstreet 已提交
817

818
	if (attr == &sysfs_errors) {
819
		v = sysfs_match_string(error_actions, buf);
820 821 822 823 824 825
		if (v < 0)
			return v;

		c->on_error = v;
	}

826
	sysfs_strtoul_clamp(io_error_limit, c->error_limit, 0, UINT_MAX);
K
Kent Overstreet 已提交
827 828

	/* See count_io_errors() for why 88 */
829 830 831 832 833 834 835 836 837 838 839
	if (attr == &sysfs_io_error_halflife) {
		unsigned long v = 0;
		ssize_t ret;

		ret = strtoul_safe_clamp(buf, v, 0, UINT_MAX);
		if (!ret) {
			c->error_decay = v / 88;
			return size;
		}
		return ret;
	}
K
Kent Overstreet 已提交
840

841
	if (attr == &sysfs_io_disable) {
842
		v = strtoul_or_return(buf);
843 844 845 846 847 848 849 850 851 852 853
		if (v) {
			if (test_and_set_bit(CACHE_SET_IO_DISABLE,
					     &c->flags))
				pr_warn("CACHE_SET_IO_DISABLE already set");
		} else {
			if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
						&c->flags))
				pr_warn("CACHE_SET_IO_DISABLE already cleared");
		}
	}

854 855 856
	sysfs_strtoul_clamp(journal_delay_ms,
			    c->journal_delay_ms,
			    0, USHRT_MAX);
857 858
	sysfs_strtoul_bool(verify,		c->verify);
	sysfs_strtoul_bool(key_merging_disabled, c->key_merging_disabled);
K
Kent Overstreet 已提交
859
	sysfs_strtoul(expensive_debug_checks,	c->expensive_debug_checks);
860 861 862
	sysfs_strtoul_bool(gc_always_rewrite,	c->gc_always_rewrite);
	sysfs_strtoul_bool(btree_shrinker_disabled, c->shrinker_disabled);
	sysfs_strtoul_bool(copy_gc_enabled,	c->copy_gc_enabled);
863 864 865 866 867 868
	/*
	 * write gc_after_writeback here may overwrite an already set
	 * BCH_DO_AUTO_GC, it doesn't matter because this flag will be
	 * set in next chance.
	 */
	sysfs_strtoul_clamp(gc_after_writeback, c->gc_after_writeback, 0, 1);
K
Kent Overstreet 已提交
869 870 871 872 873 874 875 876

	return size;
}
STORE_LOCKED(bch_cache_set)

SHOW(bch_cache_set_internal)
{
	struct cache_set *c = container_of(kobj, struct cache_set, internal);
877

K
Kent Overstreet 已提交
878 879 880 881 882 883
	return bch_cache_set_show(&c->kobj, attr, buf);
}

STORE(bch_cache_set_internal)
{
	struct cache_set *c = container_of(kobj, struct cache_set, internal);
884

885 886 887 888
	/* no user space access if system is rebooting */
	if (bcache_is_reboot)
		return -EBUSY;

K
Kent Overstreet 已提交
889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911
	return bch_cache_set_store(&c->kobj, attr, buf, size);
}

static void bch_cache_set_internal_release(struct kobject *k)
{
}

static struct attribute *bch_cache_set_files[] = {
	&sysfs_unregister,
	&sysfs_stop,
	&sysfs_synchronous,
	&sysfs_journal_delay_ms,
	&sysfs_flash_vol_create,

	&sysfs_bucket_size,
	&sysfs_block_size,
	&sysfs_tree_depth,
	&sysfs_root_usage_percent,
	&sysfs_btree_cache_size,
	&sysfs_cache_available_percent,

	&sysfs_average_key_size,

912
	&sysfs_errors,
K
Kent Overstreet 已提交
913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936
	&sysfs_io_error_limit,
	&sysfs_io_error_halflife,
	&sysfs_congested,
	&sysfs_congested_read_threshold_us,
	&sysfs_congested_write_threshold_us,
	&sysfs_clear_stats,
	NULL
};
KTYPE(bch_cache_set);

static struct attribute *bch_cache_set_internal_files[] = {
	&sysfs_active_journal_entries,

	sysfs_time_stats_attribute_list(btree_gc, sec, ms)
	sysfs_time_stats_attribute_list(btree_split, sec, us)
	sysfs_time_stats_attribute_list(btree_sort, ms, us)
	sysfs_time_stats_attribute_list(btree_read, ms, us)

	&sysfs_btree_nodes,
	&sysfs_btree_used_percent,
	&sysfs_btree_cache_max_chain,

	&sysfs_bset_tree_stats,
	&sysfs_cache_read_races,
T
Tang Junhui 已提交
937
	&sysfs_reclaim,
938
	&sysfs_reclaimed_journal_buckets,
T
Tang Junhui 已提交
939
	&sysfs_flush_write,
K
Kent Overstreet 已提交
940 941 942 943 944 945 946 947
	&sysfs_writeback_keys_done,
	&sysfs_writeback_keys_failed,

	&sysfs_trigger_gc,
	&sysfs_prune_cache,
#ifdef CONFIG_BCACHE_DEBUG
	&sysfs_verify,
	&sysfs_key_merging_disabled,
K
Kent Overstreet 已提交
948
	&sysfs_expensive_debug_checks,
K
Kent Overstreet 已提交
949 950 951 952
#endif
	&sysfs_gc_always_rewrite,
	&sysfs_btree_shrinker_disabled,
	&sysfs_copy_gc_enabled,
953
	&sysfs_gc_after_writeback,
954
	&sysfs_io_disable,
955 956
	&sysfs_cutoff_writeback,
	&sysfs_cutoff_writeback_sync,
K
Kent Overstreet 已提交
957 958 959 960
	NULL
};
KTYPE(bch_cache_set_internal);

961 962 963 964 965
static int __bch_cache_cmp(const void *l, const void *r)
{
	return *((uint16_t *)r) - *((uint16_t *)l);
}

K
Kent Overstreet 已提交
966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984
SHOW(__bch_cache)
{
	struct cache *ca = container_of(kobj, struct cache, kobj);

	sysfs_hprint(bucket_size,	bucket_bytes(ca));
	sysfs_hprint(block_size,	block_bytes(ca));
	sysfs_print(nbuckets,		ca->sb.nbuckets);
	sysfs_print(discard,		ca->discard);
	sysfs_hprint(written, atomic_long_read(&ca->sectors_written) << 9);
	sysfs_hprint(btree_written,
		     atomic_long_read(&ca->btree_sectors_written) << 9);
	sysfs_hprint(metadata_written,
		     (atomic_long_read(&ca->meta_sectors_written) +
		      atomic_long_read(&ca->btree_sectors_written)) << 9);

	sysfs_print(io_errors,
		    atomic_read(&ca->io_errors) >> IO_ERROR_SHIFT);

	if (attr == &sysfs_cache_replacement_policy)
985 986 987
		return bch_snprint_string_list(buf, PAGE_SIZE,
					       cache_replacement_policies,
					       CACHE_REPLACEMENT(&ca->sb));
K
Kent Overstreet 已提交
988 989

	if (attr == &sysfs_priority_stats) {
K
Kent Overstreet 已提交
990 991 992
		struct bucket *b;
		size_t n = ca->sb.nbuckets, i;
		size_t unused = 0, available = 0, dirty = 0, meta = 0;
K
Kent Overstreet 已提交
993
		uint64_t sum = 0;
994 995
		/* Compute 31 quantiles */
		uint16_t q[31], *p, *cached;
K
Kent Overstreet 已提交
996 997
		ssize_t ret;

998 999
		cached = p = vmalloc(array_size(sizeof(uint16_t),
						ca->sb.nbuckets));
K
Kent Overstreet 已提交
1000 1001 1002 1003
		if (!p)
			return -ENOMEM;

		mutex_lock(&ca->set->bucket_lock);
K
Kent Overstreet 已提交
1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014
		for_each_bucket(b, ca) {
			if (!GC_SECTORS_USED(b))
				unused++;
			if (GC_MARK(b) == GC_MARK_RECLAIMABLE)
				available++;
			if (GC_MARK(b) == GC_MARK_DIRTY)
				dirty++;
			if (GC_MARK(b) == GC_MARK_METADATA)
				meta++;
		}

K
Kent Overstreet 已提交
1015 1016 1017 1018
		for (i = ca->sb.first_bucket; i < n; i++)
			p[i] = ca->buckets[i].prio;
		mutex_unlock(&ca->set->bucket_lock);

1019
		sort(p, n, sizeof(uint16_t), __bch_cache_cmp, NULL);
K
Kent Overstreet 已提交
1020 1021 1022 1023 1024 1025 1026

		while (n &&
		       !cached[n - 1])
			--n;

		while (cached < p + n &&
		       *cached == BTREE_PRIO)
K
Kent Overstreet 已提交
1027
			cached++, n--;
K
Kent Overstreet 已提交
1028 1029 1030 1031 1032 1033 1034

		for (i = 0; i < n; i++)
			sum += INITIAL_PRIO - cached[i];

		if (n)
			do_div(sum, n);

1035 1036 1037
		for (i = 0; i < ARRAY_SIZE(q); i++)
			q[i] = INITIAL_PRIO - cached[n * (i + 1) /
				(ARRAY_SIZE(q) + 1)];
K
Kent Overstreet 已提交
1038 1039 1040

		vfree(p);

1041 1042
		ret = scnprintf(buf, PAGE_SIZE,
				"Unused:		%zu%%\n"
K
Kent Overstreet 已提交
1043 1044
				"Clean:		%zu%%\n"
				"Dirty:		%zu%%\n"
1045 1046 1047 1048 1049
				"Metadata:	%zu%%\n"
				"Average:	%llu\n"
				"Sectors per Q:	%zu\n"
				"Quantiles:	[",
				unused * 100 / (size_t) ca->sb.nbuckets,
K
Kent Overstreet 已提交
1050 1051 1052
				available * 100 / (size_t) ca->sb.nbuckets,
				dirty * 100 / (size_t) ca->sb.nbuckets,
				meta * 100 / (size_t) ca->sb.nbuckets, sum,
1053 1054 1055 1056 1057 1058 1059 1060 1061
				n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1));

		for (i = 0; i < ARRAY_SIZE(q); i++)
			ret += scnprintf(buf + ret, PAGE_SIZE - ret,
					 "%u ", q[i]);
		ret--;

		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "]\n");

K
Kent Overstreet 已提交
1062 1063 1064 1065 1066 1067 1068 1069 1070 1071
		return ret;
	}

	return 0;
}
SHOW_LOCKED(bch_cache)

STORE(__bch_cache)
{
	struct cache *ca = container_of(kobj, struct cache, kobj);
1072
	ssize_t v;
K
Kent Overstreet 已提交
1073

1074 1075 1076 1077
	/* no user space access if system is rebooting */
	if (bcache_is_reboot)
		return -EBUSY;

K
Kent Overstreet 已提交
1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090
	if (attr == &sysfs_discard) {
		bool v = strtoul_or_return(buf);

		if (blk_queue_discard(bdev_get_queue(ca->bdev)))
			ca->discard = v;

		if (v != CACHE_DISCARD(&ca->sb)) {
			SET_CACHE_DISCARD(&ca->sb, v);
			bcache_write_super(ca->set);
		}
	}

	if (attr == &sysfs_cache_replacement_policy) {
1091
		v = sysfs_match_string(cache_replacement_policies, buf);
K
Kent Overstreet 已提交
1092 1093 1094
		if (v < 0)
			return v;

1095
		if ((unsigned int) v != CACHE_REPLACEMENT(&ca->sb)) {
K
Kent Overstreet 已提交
1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130
			mutex_lock(&ca->set->bucket_lock);
			SET_CACHE_REPLACEMENT(&ca->sb, v);
			mutex_unlock(&ca->set->bucket_lock);

			bcache_write_super(ca->set);
		}
	}

	if (attr == &sysfs_clear_stats) {
		atomic_long_set(&ca->sectors_written, 0);
		atomic_long_set(&ca->btree_sectors_written, 0);
		atomic_long_set(&ca->meta_sectors_written, 0);
		atomic_set(&ca->io_count, 0);
		atomic_set(&ca->io_errors, 0);
	}

	return size;
}
STORE_LOCKED(bch_cache)

static struct attribute *bch_cache_files[] = {
	&sysfs_bucket_size,
	&sysfs_block_size,
	&sysfs_nbuckets,
	&sysfs_priority_stats,
	&sysfs_discard,
	&sysfs_written,
	&sysfs_btree_written,
	&sysfs_metadata_written,
	&sysfs_io_errors,
	&sysfs_clear_stats,
	&sysfs_cache_replacement_policy,
	NULL
};
KTYPE(bch_cache);