diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 99d12fce876b2db44124c8aef3616e8ee62d219b..70fbde8ca70c95fed9d3c36d9a35e0bef9cb44af 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -399,6 +399,28 @@ struct cached_dev { unsigned int offline_seconds; char backing_dev_name[BDEVNAME_SIZE]; + + /* Count the front and writeback io bandwidth per second */ + atomic_t writeback_sector_size; + atomic_t writeback_io_num; + atomic_t front_io_num; + unsigned int writeback_sector_size_per_sec; + unsigned int writeback_io_num_per_sec; + unsigned int front_io_num_per_sec; + struct timer_list io_stat_timer; + + unsigned int writeback_state; +#define WRITEBACK_DEFAULT 0 +#define WRITEBACK_QUICK 1 +#define WRITEBACK_SLOW 2 + + /* realize for token bucket */ + spinlock_t token_lock; + unsigned int max_sector_size; + unsigned int max_io_num; + unsigned int write_token_sector_size; + unsigned int write_token_io_num; + struct timer_list token_assign_timer; }; enum alloc_reserve { @@ -717,6 +739,10 @@ struct cache_set { #define BUCKET_HASH_BITS 12 struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS]; + unsigned int cutoff_writeback_sync; + bool traffic_policy_start; + bool force_write_through; + unsigned int gc_sectors; }; struct bbio { @@ -732,6 +758,29 @@ struct bbio { struct bio bio; }; +struct get_bcache_status { + unsigned int writeback_sector_size_per_sec; + unsigned int writeback_io_num_per_sec; + unsigned int front_io_num_per_sec; + uint64_t dirty_rate; + unsigned int available; +}; + +struct set_bcache_status { + unsigned int write_token_sector_size; + unsigned int write_token_io_num; + bool traffic_policy_start; + bool force_write_through; + bool copy_gc_enabled; + bool trigger_gc; + unsigned int writeback_state; + unsigned int gc_sectors; + unsigned int cutoff_writeback_sync; +}; +#define BCACHE_MAJOR 'B' +#define BCACHE_GET_WRITE_STATUS _IOR(BCACHE_MAJOR, 0x0, struct get_bcache_status) +#define BCACHE_SET_WRITE_STATUS _IOW(BCACHE_MAJOR, 0x1, struct set_bcache_status) + #define BTREE_PRIO USHRT_MAX #define INITIAL_PRIO 32768U diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 4d0cca145f6992a4efe7be1ff930102b4ce6c620..7ddadcc485ea66d25eb58aae1bdffcdea87b00f4 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -193,7 +193,11 @@ static inline unsigned int bset_block_offset(struct btree *b, struct bset *i) static inline void set_gc_sectors(struct cache_set *c) { - atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 16); + if (c->gc_sectors == 0) + atomic_set(&c->sectors_to_gc, + c->sb.bucket_size * c->nbuckets / 16); + else + atomic_set(&c->sectors_to_gc, c->gc_sectors); } void bkey_put(struct cache_set *c, struct bkey *k); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 6d89e56a4a4105181f4a8d4a0d455a6cc80ede15..c05544e07722e4cf24a7af98a61b8b60033dd44e 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -28,6 +28,7 @@ struct kmem_cache *bch_search_cache; static void bch_data_insert_start(struct closure *cl); +static void alloc_token(struct cached_dev *dc, unsigned int sectors); static unsigned int cache_mode(struct cached_dev *dc) { @@ -396,7 +397,8 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) goto skip; if (mode == CACHE_MODE_NONE || - (mode == CACHE_MODE_WRITEAROUND && + ((mode == CACHE_MODE_WRITEAROUND || + c->force_write_through == true) && op_is_write(bio_op(bio)))) goto skip; @@ -858,6 +860,10 @@ static void cached_dev_read_done(struct closure *cl) if (s->iop.bio && (!dc->read_bypass || s->prefetch) && !test_bit(CACHE_SET_STOPPING, &s->iop.c->flags)) { BUG_ON(!s->iop.replace); + if ((dc->disk.c->traffic_policy_start == true) && + (dc->disk.c->force_write_through != true)) { + alloc_token(dc, bio_sectors(s->iop.bio)); + } closure_call(&s->iop.cl, bch_data_insert, NULL, cl); } @@ -1000,6 +1006,35 @@ static void cached_dev_write_complete(struct closure *cl) continue_at(cl, cached_dev_bio_complete, NULL); } +static void alloc_token(struct cached_dev *dc, unsigned int sectors) +{ + int count = 0; + + spin_lock_bh(&dc->token_lock); + + while ((dc->write_token_sector_size < sectors) && + (dc->write_token_io_num == 0)) { + spin_unlock_bh(&dc->token_lock); + schedule_timeout_interruptible(msecs_to_jiffies(10)); + count++; + if ((dc->disk.c->traffic_policy_start != true) || + (cache_mode(dc) != CACHE_MODE_WRITEBACK) || + (count > 100)) + return; + spin_lock_bh(&dc->token_lock); + } + + if (dc->write_token_sector_size >= sectors) + dc->write_token_sector_size -= sectors; + else + dc->write_token_sector_size = 0; + + if (dc->write_token_io_num > 0) + dc->write_token_io_num--; + + spin_unlock_bh(&dc->token_lock); +} + static void cached_dev_write(struct cached_dev *dc, struct search *s) { struct closure *cl = &s->cl; @@ -1247,6 +1282,7 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, cached_dev_nodata, bcache_wq); } else { + atomic_inc(&dc->front_io_num); s->iop.bypass = check_should_bypass(dc, bio); if (!s->iop.bypass && bio->bi_iter.bi_size && !rw) { @@ -1258,10 +1294,17 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, save_circ_item(&s->smp); } - if (rw) + if (rw) { + if ((s->iop.bypass == false) && + (dc->disk.c->traffic_policy_start == true) && + (cache_mode(dc) == CACHE_MODE_WRITEBACK) && + (bio_op(bio) != REQ_OP_DISCARD)) { + alloc_token(dc, bio_sectors(bio)); + } cached_dev_write(dc, s); - else + } else { cached_dev_read(dc, s); + } } } else /* I/O request sent to backing device */ @@ -1270,6 +1313,65 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, return BLK_QC_T_NONE; } +static int bcache_get_write_status(struct cached_dev *dc, unsigned long arg) +{ + struct get_bcache_status a; + uint64_t cache_sectors; + struct cache_set *c = dc->disk.c; + + if (c == NULL) + return -ENODEV; + + a.writeback_sector_size_per_sec = dc->writeback_sector_size_per_sec; + a.writeback_io_num_per_sec = dc->writeback_io_num_per_sec; + a.front_io_num_per_sec = dc->front_io_num_per_sec; + cache_sectors = c->nbuckets * c->sb.bucket_size - + atomic_long_read(&c->flash_dev_dirty_sectors); + a.dirty_rate = div64_u64(bcache_dev_sectors_dirty(&dc->disk) * 100, + cache_sectors); + a.available = 100 - c->gc_stats.in_use; + if (copy_to_user((struct get_bcache_status *)arg, &a, + sizeof(struct get_bcache_status))) + return -EFAULT; + return 0; +} + +static int bcache_set_write_status(struct cached_dev *dc, unsigned long arg) +{ + struct set_bcache_status a; + struct cache_set *c = dc->disk.c; + + if (c == NULL) + return -ENODEV; + if (copy_from_user(&a, (struct set_bcache_status *)arg, + sizeof(struct set_bcache_status))) + return -EFAULT; + + if (c->traffic_policy_start != a.traffic_policy_start) + pr_info("%s traffic policy %s", dc->disk.disk->disk_name, + (a.traffic_policy_start == true) ? "enable" : "disable"); + if (c->force_write_through != a.force_write_through) + pr_info("%s force write through %s", dc->disk.disk->disk_name, + (a.force_write_through == true) ? "enable" : "disable"); + if (a.trigger_gc) { + pr_info("trigger %s gc", dc->disk.disk->disk_name); + atomic_set(&c->sectors_to_gc, -1); + wake_up_gc(c); + } + if ((a.cutoff_writeback_sync >= MIN_CUTOFF_WRITEBACK_SYNC) && + (a.cutoff_writeback_sync <= MAX_CUTOFF_WRITEBACK_SYNC)) { + c->cutoff_writeback_sync = a.cutoff_writeback_sync; + } + + dc->max_sector_size = a.write_token_sector_size; + dc->max_io_num = a.write_token_io_num; + c->traffic_policy_start = a.traffic_policy_start; + c->force_write_through = a.force_write_through; + c->gc_sectors = a.gc_sectors; + dc->writeback_state = a.writeback_state; + return 0; +} + static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, unsigned int cmd, unsigned long arg) { @@ -1278,7 +1380,14 @@ static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, if (dc->io_disable) return -EIO; - return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg); + switch (cmd) { + case BCACHE_GET_WRITE_STATUS: + return bcache_get_write_status(dc, arg); + case BCACHE_SET_WRITE_STATUS: + return bcache_set_write_status(dc, arg); + default: + return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg); + } } static int cached_dev_congested(void *data, int bits) @@ -1438,3 +1547,29 @@ int __init bch_request_init(void) return 0; } + +static void token_assign(struct timer_list *t) +{ + struct cached_dev *dc = from_timer(dc, t, token_assign_timer); + + dc->token_assign_timer.expires = jiffies + HZ / 8; + add_timer(&dc->token_assign_timer); + + spin_lock(&dc->token_lock); + dc->write_token_sector_size = dc->max_sector_size / 8; + dc->write_token_io_num = dc->max_io_num / 8; + dc->write_token_io_num = + (dc->write_token_io_num == 0) ? 1 : dc->write_token_io_num; + spin_unlock(&dc->token_lock); +} + +void bch_traffic_policy_init(struct cached_dev *dc) +{ + spin_lock_init(&dc->token_lock); + dc->write_token_sector_size = 0; + dc->write_token_io_num = 0; + + timer_setup(&dc->token_assign_timer, token_assign, 0); + dc->token_assign_timer.expires = jiffies + HZ / 8; + add_timer(&dc->token_assign_timer); +} diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index 3667bc5390dfef7f41a6c8a57504d650cf1ef87c..f677ba87049401873a6f58aadd3b54028ff15136 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -41,6 +41,8 @@ void bch_data_insert(struct closure *cl); void bch_cached_dev_request_init(struct cached_dev *dc); void bch_flash_dev_request_init(struct bcache_device *d); +void bch_traffic_policy_init(struct cached_dev *dc); + extern struct kmem_cache *bch_search_cache, *bch_passthrough_cache; struct search { diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index e7f7a0f0386828a6c8eebc6b50038092dd9b4cd9..3f858de9e9602fcc0f175a84316b0d96ff5c7f41 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1210,6 +1210,8 @@ static void cached_dev_free(struct closure *cl) { struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); + del_timer_sync(&dc->io_stat_timer); + del_timer_sync(&dc->token_assign_timer); if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) cancel_writeback_rate_update_dwork(dc); @@ -1250,6 +1252,36 @@ static void cached_dev_flush(struct closure *cl) continue_at(cl, cached_dev_free, system_wq); } +static void cached_dev_io_stat(struct timer_list *t) +{ + struct cached_dev *dc = from_timer(dc, t, io_stat_timer); + + dc->io_stat_timer.expires = jiffies + HZ; + add_timer(&dc->io_stat_timer); + + dc->writeback_sector_size_per_sec = + atomic_read(&dc->writeback_sector_size); + dc->writeback_io_num_per_sec = atomic_read(&dc->writeback_io_num); + dc->front_io_num_per_sec = atomic_read(&dc->front_io_num); + atomic_set(&dc->writeback_sector_size, 0); + atomic_set(&dc->writeback_io_num, 0); + atomic_set(&dc->front_io_num, 0); +} + +static void cached_dev_timer_init(struct cached_dev *dc) +{ + dc->writeback_sector_size_per_sec = 0; + dc->writeback_io_num_per_sec = 0; + dc->front_io_num_per_sec = 0; + atomic_set(&dc->writeback_sector_size, 0); + atomic_set(&dc->writeback_io_num, 0); + atomic_set(&dc->front_io_num, 0); + + timer_setup(&dc->io_stat_timer, cached_dev_io_stat, 0); + dc->io_stat_timer.expires = jiffies + HZ; + add_timer(&dc->io_stat_timer); +} + static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) { int ret; @@ -1266,6 +1298,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) INIT_LIST_HEAD(&dc->io_lru); spin_lock_init(&dc->io_lock); bch_cache_accounting_init(&dc->accounting, &dc->disk.cl); + cached_dev_timer_init(dc); + bch_traffic_policy_init(dc); dc->sequential_cutoff = 4 << 20; dc->inflight_block_enable = 1; @@ -1774,6 +1808,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) c->congested_read_threshold_us = 2000; c->congested_write_threshold_us = 20000; c->error_limit = DEFAULT_IO_ERROR_LIMIT; + c->cutoff_writeback_sync = MIN_CUTOFF_WRITEBACK_SYNC; WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags)); return c; diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 706d3a245dba64627173a42c98b55292ccbb435c..4c693ac29b0e011ab89d85548d2e1a26be6a8282 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -51,6 +51,13 @@ static const char * const error_actions[] = { NULL }; +static const char * const writeback_state[] = { + "default", + "quick", + "slow", + NULL +}; + write_attribute(attach); write_attribute(detach); write_attribute(unregister); @@ -96,6 +103,9 @@ read_attribute(io_errors); read_attribute(congested); rw_attribute(congested_read_threshold_us); rw_attribute(congested_write_threshold_us); +rw_attribute(gc_sectors); +rw_attribute(traffic_policy_start); +rw_attribute(force_write_through); rw_attribute(sequential_cutoff); rw_attribute(read_bypass); @@ -114,7 +124,13 @@ rw_attribute(writeback_rate_update_seconds); rw_attribute(writeback_rate_i_term_inverse); rw_attribute(writeback_rate_p_term_inverse); rw_attribute(writeback_rate_minimum); +rw_attribute(writeback_state); +read_attribute(writeback_sector_size_per_sec); +read_attribute(writeback_io_num_per_sec); +read_attribute(front_io_num_per_sec); read_attribute(writeback_rate_debug); +read_attribute(write_token_sector_size); +read_attribute(write_token_io_num); read_attribute(stripe_size); read_attribute(partial_stripes_expensive); @@ -169,6 +185,11 @@ SHOW(__bch_cached_dev) bch_cache_modes, BDEV_CACHE_MODE(&dc->sb)); + if (attr == &sysfs_writeback_state) + return bch_snprint_string_list(buf, PAGE_SIZE, + writeback_state, + dc->writeback_state); + if (attr == &sysfs_readahead_cache_policy) return bch_snprint_string_list(buf, PAGE_SIZE, bch_reada_cache_policies, @@ -186,6 +207,9 @@ SHOW(__bch_cached_dev) var_printf(writeback_metadata, "%i"); var_printf(writeback_running, "%i"); var_print(writeback_delay); + var_print(writeback_sector_size_per_sec); + var_print(writeback_io_num_per_sec); + var_print(front_io_num_per_sec); var_print(writeback_percent); sysfs_hprint(writeback_rate, wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 : 0); @@ -248,6 +272,8 @@ SHOW(__bch_cached_dev) sysfs_print(running, atomic_read(&dc->running)); sysfs_print(state, states[BDEV_STATE(&dc->sb)]); + var_print(write_token_sector_size); + var_print(write_token_io_num); if (attr == &sysfs_label) { memcpy(buf, dc->sb.label, SB_LABEL_SIZE); @@ -346,6 +372,15 @@ STORE(__cached_dev) } } + if (attr == &sysfs_writeback_state) { + v = __sysfs_match_string(writeback_state, -1, buf); + + if (v < 0) + return v; + + dc->writeback_state = v; + } + if (attr == &sysfs_readahead_cache_policy) { v = __sysfs_match_string(bch_reada_cache_policies, -1, buf); if (v < 0) @@ -448,11 +483,14 @@ static struct attribute *bch_cached_dev_files[] = { &sysfs_data_csum, #endif &sysfs_cache_mode, + &sysfs_writeback_state, &sysfs_readahead_cache_policy, &sysfs_stop_when_cache_set_failed, &sysfs_writeback_metadata, &sysfs_writeback_running, &sysfs_writeback_delay, + &sysfs_writeback_sector_size_per_sec, + &sysfs_writeback_io_num_per_sec, &sysfs_writeback_percent, &sysfs_writeback_rate, &sysfs_writeback_rate_update_seconds, @@ -460,6 +498,9 @@ static struct attribute *bch_cached_dev_files[] = { &sysfs_writeback_rate_p_term_inverse, &sysfs_writeback_rate_minimum, &sysfs_writeback_rate_debug, + &sysfs_write_token_sector_size, + &sysfs_write_token_io_num, + &sysfs_front_io_num_per_sec, &sysfs_io_errors, &sysfs_io_error_limit, &sysfs_io_disable, @@ -714,6 +755,12 @@ SHOW(__bch_cache_set) c->congested_read_threshold_us); sysfs_print(congested_write_threshold_us, c->congested_write_threshold_us); + sysfs_print(gc_sectors, + c->gc_sectors); + sysfs_print(traffic_policy_start, + c->traffic_policy_start); + sysfs_print(force_write_through, + c->force_write_through); sysfs_print(active_journal_entries, fifo_used(&c->journal.pin)); sysfs_printf(verify, "%i", c->verify); @@ -800,6 +847,12 @@ STORE(__bch_cache_set) c->congested_read_threshold_us); sysfs_strtoul(congested_write_threshold_us, c->congested_write_threshold_us); + sysfs_strtoul(gc_sectors, + c->gc_sectors); + sysfs_strtoul(traffic_policy_start, + c->traffic_policy_start); + sysfs_strtoul(force_write_through, + c->force_write_through); if (attr == &sysfs_errors) { v = __sysfs_match_string(error_actions, -1, buf); @@ -926,6 +979,9 @@ static struct attribute *bch_cache_set_internal_files[] = { &sysfs_btree_shrinker_disabled, &sysfs_copy_gc_enabled, &sysfs_io_disable, + &sysfs_gc_sectors, + &sysfs_traffic_policy_start, + &sysfs_force_write_through, NULL }; KTYPE(bch_cache_set_internal); diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index b5fc3c6c7178e00212cb5d78d9f2acb3e4e8ab69..901ad8bae761425235926b4049031914f5f911a4 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -222,7 +222,13 @@ static unsigned int writeback_delay(struct cached_dev *dc, !dc->writeback_percent) return 0; - return bch_next_delay(&dc->writeback_rate, sectors); + if (dc->writeback_state == WRITEBACK_DEFAULT) { + return bch_next_delay(&dc->writeback_rate, sectors); + } else if (dc->writeback_state == WRITEBACK_QUICK) { + return 0; + } else { + return msecs_to_jiffies(1000); + } } struct dirty_io { @@ -287,6 +293,9 @@ static void write_dirty_finish(struct closure *cl) : &dc->disk.c->writeback_keys_done); } + atomic_add(KEY_SIZE(&w->key), &dc->writeback_sector_size); + atomic_inc(&dc->writeback_io_num); + bch_keybuf_del(&dc->writeback_keys, w); up(&dc->in_flight); diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index e75dc33339f6f64d7786183ee580f446365e5f92..a3151c0e966095576687e47ea1e51d3d5a90dcaf 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -3,7 +3,8 @@ #define _BCACHE_WRITEBACK_H #define CUTOFF_WRITEBACK 40 -#define CUTOFF_WRITEBACK_SYNC 70 +#define MIN_CUTOFF_WRITEBACK_SYNC 70 +#define MAX_CUTOFF_WRITEBACK_SYNC 90 #define MAX_WRITEBACKS_IN_PASS 5 #define MAX_WRITESIZE_IN_PASS 5000 /* *512b */ @@ -57,10 +58,11 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, unsigned int cache_mode, bool would_skip) { unsigned int in_use = dc->disk.c->gc_stats.in_use; + unsigned int cutoff = dc->disk.c->cutoff_writeback_sync; if (cache_mode != CACHE_MODE_WRITEBACK || test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) || - in_use > CUTOFF_WRITEBACK_SYNC) + in_use > cutoff) return false; if (bio_op(bio) == REQ_OP_DISCARD)