提交 16749c23 编写于 作者: K Kent Overstreet

bcache: New writeback PD controller

The old writeback PD controller could get into states where it had throttled all
the way down and take way too long to recover - it was too complicated to really
understand what it was doing.

This rewrites a good chunk of it to hopefully be simpler and make more sense,
and it also pays more attention to units which should make the behaviour a bit
easier to understand.
Signed-off-by: NKent Overstreet <kmo@daterainc.com>
上级 6d3d1a9c
...@@ -373,14 +373,14 @@ struct cached_dev { ...@@ -373,14 +373,14 @@ struct cached_dev {
unsigned char writeback_percent; unsigned char writeback_percent;
unsigned writeback_delay; unsigned writeback_delay;
int writeback_rate_change;
int64_t writeback_rate_derivative;
uint64_t writeback_rate_target; uint64_t writeback_rate_target;
int64_t writeback_rate_proportional;
int64_t writeback_rate_derivative;
int64_t writeback_rate_change;
unsigned writeback_rate_update_seconds; unsigned writeback_rate_update_seconds;
unsigned writeback_rate_d_term; unsigned writeback_rate_d_term;
unsigned writeback_rate_p_term_inverse; unsigned writeback_rate_p_term_inverse;
unsigned writeback_rate_d_smooth;
}; };
enum alloc_watermarks { enum alloc_watermarks {
......
...@@ -83,7 +83,6 @@ rw_attribute(writeback_rate); ...@@ -83,7 +83,6 @@ rw_attribute(writeback_rate);
rw_attribute(writeback_rate_update_seconds); rw_attribute(writeback_rate_update_seconds);
rw_attribute(writeback_rate_d_term); rw_attribute(writeback_rate_d_term);
rw_attribute(writeback_rate_p_term_inverse); rw_attribute(writeback_rate_p_term_inverse);
rw_attribute(writeback_rate_d_smooth);
read_attribute(writeback_rate_debug); read_attribute(writeback_rate_debug);
read_attribute(stripe_size); read_attribute(stripe_size);
...@@ -129,31 +128,41 @@ SHOW(__bch_cached_dev) ...@@ -129,31 +128,41 @@ SHOW(__bch_cached_dev)
var_printf(writeback_running, "%i"); var_printf(writeback_running, "%i");
var_print(writeback_delay); var_print(writeback_delay);
var_print(writeback_percent); var_print(writeback_percent);
sysfs_print(writeback_rate, dc->writeback_rate.rate); sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9);
var_print(writeback_rate_update_seconds); var_print(writeback_rate_update_seconds);
var_print(writeback_rate_d_term); var_print(writeback_rate_d_term);
var_print(writeback_rate_p_term_inverse); var_print(writeback_rate_p_term_inverse);
var_print(writeback_rate_d_smooth);
if (attr == &sysfs_writeback_rate_debug) { if (attr == &sysfs_writeback_rate_debug) {
char rate[20];
char dirty[20]; char dirty[20];
char derivative[20];
char target[20]; char target[20];
bch_hprint(dirty, char proportional[20];
bcache_dev_sectors_dirty(&dc->disk) << 9); char derivative[20];
bch_hprint(derivative, dc->writeback_rate_derivative << 9); char change[20];
s64 next_io;
bch_hprint(rate, dc->writeback_rate.rate << 9);
bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9);
bch_hprint(target, dc->writeback_rate_target << 9); bch_hprint(target, dc->writeback_rate_target << 9);
bch_hprint(proportional,dc->writeback_rate_proportional << 9);
bch_hprint(derivative, dc->writeback_rate_derivative << 9);
bch_hprint(change, dc->writeback_rate_change << 9);
next_io = div64_s64(dc->writeback_rate.next - local_clock(),
NSEC_PER_MSEC);
return sprintf(buf, return sprintf(buf,
"rate:\t\t%u\n" "rate:\t\t%s/sec\n"
"change:\t\t%i\n"
"dirty:\t\t%s\n" "dirty:\t\t%s\n"
"target:\t\t%s\n"
"proportional:\t%s\n"
"derivative:\t%s\n" "derivative:\t%s\n"
"target:\t\t%s\n", "change:\t\t%s/sec\n"
dc->writeback_rate.rate, "next io:\t%llims\n",
dc->writeback_rate_change, rate, dirty, target, proportional,
dirty, derivative, target); derivative, change, next_io);
} }
sysfs_hprint(dirty_data, sysfs_hprint(dirty_data,
...@@ -189,6 +198,7 @@ STORE(__cached_dev) ...@@ -189,6 +198,7 @@ STORE(__cached_dev)
struct kobj_uevent_env *env; struct kobj_uevent_env *env;
#define d_strtoul(var) sysfs_strtoul(var, dc->var) #define d_strtoul(var) sysfs_strtoul(var, dc->var)
#define d_strtoul_nonzero(var) sysfs_strtoul_clamp(var, dc->var, 1, INT_MAX)
#define d_strtoi_h(var) sysfs_hatoi(var, dc->var) #define d_strtoi_h(var) sysfs_hatoi(var, dc->var)
sysfs_strtoul(data_csum, dc->disk.data_csum); sysfs_strtoul(data_csum, dc->disk.data_csum);
...@@ -197,16 +207,15 @@ STORE(__cached_dev) ...@@ -197,16 +207,15 @@ STORE(__cached_dev)
d_strtoul(writeback_metadata); d_strtoul(writeback_metadata);
d_strtoul(writeback_running); d_strtoul(writeback_running);
d_strtoul(writeback_delay); d_strtoul(writeback_delay);
sysfs_strtoul_clamp(writeback_rate,
dc->writeback_rate.rate, 1, 1000000);
sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40); sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40);
d_strtoul(writeback_rate_update_seconds); sysfs_strtoul_clamp(writeback_rate,
dc->writeback_rate.rate, 1, INT_MAX);
d_strtoul_nonzero(writeback_rate_update_seconds);
d_strtoul(writeback_rate_d_term); d_strtoul(writeback_rate_d_term);
d_strtoul(writeback_rate_p_term_inverse); d_strtoul_nonzero(writeback_rate_p_term_inverse);
sysfs_strtoul_clamp(writeback_rate_p_term_inverse,
dc->writeback_rate_p_term_inverse, 1, INT_MAX);
d_strtoul(writeback_rate_d_smooth);
d_strtoi_h(sequential_cutoff); d_strtoi_h(sequential_cutoff);
d_strtoi_h(readahead); d_strtoi_h(readahead);
...@@ -313,7 +322,6 @@ static struct attribute *bch_cached_dev_files[] = { ...@@ -313,7 +322,6 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_writeback_rate_update_seconds, &sysfs_writeback_rate_update_seconds,
&sysfs_writeback_rate_d_term, &sysfs_writeback_rate_d_term,
&sysfs_writeback_rate_p_term_inverse, &sysfs_writeback_rate_p_term_inverse,
&sysfs_writeback_rate_d_smooth,
&sysfs_writeback_rate_debug, &sysfs_writeback_rate_debug,
&sysfs_dirty_data, &sysfs_dirty_data,
&sysfs_stripe_size, &sysfs_stripe_size,
......
...@@ -209,7 +209,13 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done) ...@@ -209,7 +209,13 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
{ {
uint64_t now = local_clock(); uint64_t now = local_clock();
d->next += div_u64(done, d->rate); d->next += div_u64(done * NSEC_PER_SEC, d->rate);
if (time_before64(now + NSEC_PER_SEC, d->next))
d->next = now + NSEC_PER_SEC;
if (time_after64(now - NSEC_PER_SEC * 2, d->next))
d->next = now - NSEC_PER_SEC * 2;
return time_after64(d->next, now) return time_after64(d->next, now)
? div_u64(d->next - now, NSEC_PER_SEC / HZ) ? div_u64(d->next - now, NSEC_PER_SEC / HZ)
......
...@@ -30,38 +30,40 @@ static void __update_writeback_rate(struct cached_dev *dc) ...@@ -30,38 +30,40 @@ static void __update_writeback_rate(struct cached_dev *dc)
/* PD controller */ /* PD controller */
int change = 0;
int64_t error;
int64_t dirty = bcache_dev_sectors_dirty(&dc->disk); int64_t dirty = bcache_dev_sectors_dirty(&dc->disk);
int64_t derivative = dirty - dc->disk.sectors_dirty_last; int64_t derivative = dirty - dc->disk.sectors_dirty_last;
int64_t proportional = dirty - target;
int64_t change;
dc->disk.sectors_dirty_last = dirty; dc->disk.sectors_dirty_last = dirty;
derivative *= dc->writeback_rate_d_term; /* Scale to sectors per second */
derivative = clamp(derivative, -dirty, dirty);
derivative = ewma_add(dc->disk.sectors_dirty_derivative, derivative, proportional *= dc->writeback_rate_update_seconds;
dc->writeback_rate_d_smooth, 0); proportional = div_s64(proportional, dc->writeback_rate_p_term_inverse);
/* Avoid divide by zero */ derivative = div_s64(derivative, dc->writeback_rate_update_seconds);
if (!target)
goto out;
error = div64_s64((dirty + derivative - target) << 8, target); derivative = ewma_add(dc->disk.sectors_dirty_derivative, derivative,
(dc->writeback_rate_d_term /
dc->writeback_rate_update_seconds) ?: 1, 0);
derivative *= dc->writeback_rate_d_term;
derivative = div_s64(derivative, dc->writeback_rate_p_term_inverse);
change = div_s64((dc->writeback_rate.rate * error) >> 8, change = proportional + derivative;
dc->writeback_rate_p_term_inverse);
/* Don't increase writeback rate if the device isn't keeping up */ /* Don't increase writeback rate if the device isn't keeping up */
if (change > 0 && if (change > 0 &&
time_after64(local_clock(), time_after64(local_clock(),
dc->writeback_rate.next + 10 * NSEC_PER_MSEC)) dc->writeback_rate.next + NSEC_PER_MSEC))
change = 0; change = 0;
dc->writeback_rate.rate = dc->writeback_rate.rate =
clamp_t(int64_t, dc->writeback_rate.rate + change, clamp_t(int64_t, (int64_t) dc->writeback_rate.rate + change,
1, NSEC_PER_MSEC); 1, NSEC_PER_MSEC);
out:
dc->writeback_rate_proportional = proportional;
dc->writeback_rate_derivative = derivative; dc->writeback_rate_derivative = derivative;
dc->writeback_rate_change = change; dc->writeback_rate_change = change;
dc->writeback_rate_target = target; dc->writeback_rate_target = target;
...@@ -87,15 +89,11 @@ static void update_writeback_rate(struct work_struct *work) ...@@ -87,15 +89,11 @@ static void update_writeback_rate(struct work_struct *work)
static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
{ {
uint64_t ret;
if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) || if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
!dc->writeback_percent) !dc->writeback_percent)
return 0; return 0;
ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL); return bch_next_delay(&dc->writeback_rate, sectors);
return min_t(uint64_t, ret, HZ);
} }
struct dirty_io { struct dirty_io {
...@@ -476,6 +474,8 @@ void bch_sectors_dirty_init(struct cached_dev *dc) ...@@ -476,6 +474,8 @@ void bch_sectors_dirty_init(struct cached_dev *dc)
bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0), bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0),
sectors_dirty_init_fn, 0); sectors_dirty_init_fn, 0);
dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk);
} }
int bch_cached_dev_writeback_init(struct cached_dev *dc) int bch_cached_dev_writeback_init(struct cached_dev *dc)
...@@ -490,10 +490,9 @@ int bch_cached_dev_writeback_init(struct cached_dev *dc) ...@@ -490,10 +490,9 @@ int bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_delay = 30; dc->writeback_delay = 30;
dc->writeback_rate.rate = 1024; dc->writeback_rate.rate = 1024;
dc->writeback_rate_update_seconds = 30; dc->writeback_rate_update_seconds = 5;
dc->writeback_rate_d_term = 16; dc->writeback_rate_d_term = 30;
dc->writeback_rate_p_term_inverse = 64; dc->writeback_rate_p_term_inverse = 6000;
dc->writeback_rate_d_smooth = 8;
dc->writeback_thread = kthread_create(bch_writeback_thread, dc, dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
"bcache_writeback"); "bcache_writeback");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册