diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile index 5b87e59676b86b806e79c9831e13801c8e1b1fa1..28f5294dd6cfdcd2ead0b13d96aa90eb3fd5302b 100644 --- a/drivers/md/bcache/Makefile +++ b/drivers/md/bcache/Makefile @@ -3,5 +3,5 @@ obj-$(CONFIG_BCACHE) += bcache.o bcache-y := alloc.o bset.o btree.o closure.o debug.o extents.o\ - io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\ + io.o journal.o movinggc.o request.o stats.o acache.o super.o sysfs.o trace.o\ util.o writeback.o features.o diff --git a/drivers/md/bcache/acache.c b/drivers/md/bcache/acache.c new file mode 100644 index 0000000000000000000000000000000000000000..ff3e120d9619cabe2a2ca8241ec3383f6b5508c0 --- /dev/null +++ b/drivers/md/bcache/acache.c @@ -0,0 +1,473 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "acache.h" +#include "request.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define DEV_NAME "acache" + +int acache_dev_size = (1024 * 4096 + 4096); + +module_param_named(acache_size, acache_dev_size, int, 0444); +MODULE_PARM_DESC(acache_size, "size of ring buffer for size in byte"); + +int acache_prefetch_workers = 1000; + +module_param_named(prefetch_workers, acache_prefetch_workers, int, 0444); +MODULE_PARM_DESC(prefetch_workers, "num of workers for processing prefetch requests"); + +struct prefetch_worker { + struct acache_info s; + struct work_struct work; + struct list_head list; +}; + +struct acache_device { + bool initialized; + + dev_t devno; + struct cdev cdev; + struct class *class; + struct mem_reg *mem_regionp; + + struct acache_info *readbuf; + struct acache_info *writebuf; + + struct acache_circ *acache_info_circ; + + struct workqueue_struct *wq; + struct prefetch_worker *prefetch_workers; + struct list_head prefetch_workers_free; + spinlock_t prefetch_workers_free_list_lock; +} adev; + +#define MAX_TRANSFER_SIZE (1024 * 1024) + +static atomic_t acache_opened_dev = ATOMIC_INIT(0); +static struct acache_metadata metadata; + + +int acache_open(struct inode *inode, struct file *filp) +{ + struct mem_reg *dev; + + int minor = MINOR(inode->i_rdev); + + if (minor >= ACACHE_NR_DEVS) + return -ENODEV; + if (atomic_xchg(&acache_opened_dev, 1)) + return -EPERM; + + dev = &adev.mem_regionp[minor]; + + filp->private_data = dev; + + return 0; +} + +int acache_release(struct inode *inode, struct file *filp) +{ + atomic_dec(&acache_opened_dev); + return 0; +} + +ssize_t read_circ_slice(struct acache_circ *circ, struct acache_info *buf, + size_t size) +{ + unsigned long first, todo, flags; + + spin_lock_irqsave(&circ->lock, flags); + + todo = CIRC_CNT(circ->head, circ->tail, circ->size); + if (todo == 0) { + spin_unlock_irqrestore(&circ->lock, flags); + return 0; + } + if (todo > size / sizeof(struct acache_info)) + todo = size / sizeof(struct acache_info); + + first = CIRC_CNT_TO_END(circ->head, circ->tail, circ->size); + if (first > todo) + first = todo; + + memcpy(buf, circ->data + circ->tail, first * sizeof(struct acache_info)); + if (first < todo) + memcpy(buf + first, circ->data, + (todo - first) * sizeof(struct acache_info)); + circ->tail = (circ->tail + todo) & (circ->size - 1); + + spin_unlock_irqrestore(&circ->lock, flags); + return todo * sizeof(struct acache_info); +} + +static ssize_t acache_read(struct file *filp, char __user *buf, + size_t size, loff_t *ppos) +{ + long ret, cut; + + if (metadata.conntype != ACACHE_READWRITE_CONN) + return -EINVAL; + + if (size > MAX_TRANSFER_SIZE) + size = MAX_TRANSFER_SIZE; + + ret = read_circ_slice(adev.acache_info_circ, adev.readbuf, size); + if (ret <= 0) + return ret; + + cut = copy_to_user(buf, adev.readbuf, size); + return ret - cut; +} + +int process_one_request(struct acache_info *item); +static void prefetch_worker_func(struct work_struct *work) +{ + struct prefetch_worker *sw = + container_of(work, struct prefetch_worker, work); + + process_one_request(&sw->s); + spin_lock(&adev.prefetch_workers_free_list_lock); + list_add_tail(&sw->list, &adev.prefetch_workers_free); + spin_unlock(&adev.prefetch_workers_free_list_lock); +} + +static int queue_prefetch_item(struct acache_info *s) +{ + struct prefetch_worker *sw; + + spin_lock(&adev.prefetch_workers_free_list_lock); + sw = list_first_entry_or_null(&adev.prefetch_workers_free, + struct prefetch_worker, list); + if (!sw) { + spin_unlock(&adev.prefetch_workers_free_list_lock); + return -1; + } + list_del_init(&sw->list); + spin_unlock(&adev.prefetch_workers_free_list_lock); + + memcpy(&sw->s, s, sizeof(struct acache_info)); + INIT_WORK(&sw->work, prefetch_worker_func); + queue_work(adev.wq, &sw->work); + return 0; +} + +static ssize_t acache_write(struct file *filp, const char __user *buf, + size_t size, loff_t *ppos) +{ + long cut; + int i; + + if (metadata.conntype != ACACHE_READWRITE_CONN) + return -EINVAL; + + if (size > MAX_TRANSFER_SIZE) + size = MAX_TRANSFER_SIZE; + + cut = copy_from_user(adev.writebuf, buf, size); + for (i = 0; i < (size - cut) / sizeof(struct acache_info); i++) { + if (queue_prefetch_item(adev.writebuf + i)) + break; + } + return i * sizeof(struct acache_info); +} + +static long acache_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case ACACHE_GET_METADATA: + return copy_to_user((struct acache_metadata __user *)arg, + &metadata, sizeof(struct acache_metadata)); + default: + return -EINVAL; + } +} + +static const struct file_operations acache_fops = { + .owner = THIS_MODULE, + .read = acache_read, + .write = acache_write, + .open = acache_open, + .release = acache_release, + .unlocked_ioctl = acache_ioctl, +}; + +void save_circ_item(struct acache_info *data) +{ + unsigned long flags; + struct acache_circ *circ = adev.acache_info_circ; + + spin_lock_irqsave(&circ->lock, flags); + if (CIRC_SPACE(circ->head, circ->tail, circ->size) >= 1) { + memcpy(&circ->data[circ->head], data, sizeof(struct acache_info)); + circ->head = (circ->head + 1) & (circ->size - 1); + } else { + pr_debug("ringbuffer is full; discard new request.\n"); + } + spin_unlock_irqrestore(&circ->lock, flags); +} + +void init_acache_circ(struct acache_circ **circ, void *startaddr) +{ + *circ = (struct acache_circ *)startaddr; + (*circ)->head = 0; + (*circ)->tail = 0; + (*circ)->size = ACACHE_CIRC_SIZE; + spin_lock_init(&(*circ)->lock); +} + +static void acache_free_mem(void) +{ + int i; + + for (i = 0; i < ACACHE_NR_DEVS; i++) + vfree(adev.mem_regionp[i].data); + + if (adev.readbuf) { + vfree(adev.readbuf); + adev.readbuf = NULL; + } + if (adev.writebuf) { + vfree(adev.writebuf); + adev.writebuf = NULL; + } + + kfree(adev.prefetch_workers); + adev.prefetch_workers = NULL; +} + +int acache_prefetch_init(struct acache_device *adev) +{ + int i; + + if (acache_prefetch_workers <= 0) { + pr_err("acache_dev_size should not be less than zero\n"); + return -1; + } + adev->prefetch_workers = kmalloc_array(acache_prefetch_workers, + sizeof(struct prefetch_worker), + GFP_KERNEL); + if (!adev->prefetch_workers) + goto fail_prefetch_workers_alloc; + + INIT_LIST_HEAD(&adev->prefetch_workers_free); + spin_lock_init(&adev->prefetch_workers_free_list_lock); + for (i = 0; i < acache_prefetch_workers; i++) { + spin_lock(&adev->prefetch_workers_free_list_lock); + list_add_tail(&adev->prefetch_workers[i].list, + &adev->prefetch_workers_free); + spin_unlock(&adev->prefetch_workers_free_list_lock); + } + + adev->wq = alloc_workqueue("acache_prefetch", WQ_MEM_RECLAIM, 0); + if (!adev->wq) + goto fail_workqueue_alloc; + + return 0; + +fail_workqueue_alloc: + kfree(adev->prefetch_workers); + adev->prefetch_workers = NULL; +fail_prefetch_workers_alloc: + if (adev->wq) + destroy_workqueue(adev->wq); + return -1; +} + +int acache_dev_init(void) +{ + int ret; + int i; + int major; + struct device *dev; + + major = alloc_chrdev_region(&adev.devno, 0, ACACHE_NR_DEVS, DEV_NAME); + if (major < 0) { + pr_err("failed to allocate chrdev region: %d\n", major); + return major; + goto fail_allocdev; + } + + adev.class = class_create(THIS_MODULE, DEV_NAME); + if (IS_ERR(adev.class)) { + pr_err("failed to create acache class\n"); + ret = -1; + goto fail_class; + } + + if (acache_dev_size < PAGE_SIZE) { + pr_err("acache_dev_size should not be less than PAGE_SIZE\n"); + ret = -1; + goto fail_dev_add; + } + metadata.devsize = acache_dev_size; + metadata.magic = ACACHE_MAGIC; + metadata.conntype = ACACHE_READWRITE_CONN; + cdev_init(&adev.cdev, &acache_fops); + adev.cdev.owner = THIS_MODULE; + + ret = cdev_add(&adev.cdev, adev.devno, ACACHE_NR_DEVS); + if (ret < 0) { + pr_err("failed to add cdev\n"); + goto fail_dev_add; + } + + dev = device_create(adev.class, NULL, adev.devno, NULL, DEV_NAME); + if (IS_ERR(dev)) { + pr_err("Could not create device\n"); + ret = -1; + goto fail_device; + } + + adev.readbuf = vmalloc(MAX_TRANSFER_SIZE); + adev.writebuf = vmalloc(MAX_TRANSFER_SIZE); + if (!adev.readbuf || !adev.writebuf) { + ret = -ENOMEM; + goto fail_malloc; + } + + adev.initialized = true; + adev.mem_regionp = + kmalloc_array(ACACHE_NR_DEVS, sizeof(struct mem_reg), GFP_KERNEL); + if (!adev.mem_regionp) { + ret = -ENOMEM; + goto fail_malloc; + } + memset(adev.mem_regionp, 0, sizeof(struct mem_reg) * ACACHE_NR_DEVS); + + for (i = 0; i < ACACHE_NR_DEVS; i++) { + adev.mem_regionp[i].size = ACACHE_DEV_SIZE; + adev.mem_regionp[i].data = vmalloc(ACACHE_DEV_SIZE); + if (!adev.mem_regionp[i].data) { + ret = -ENOMEM; + goto fail_memregion_data_malloc; + } + memset(adev.mem_regionp[i].data, 0, ACACHE_DEV_SIZE); + } + + init_acache_circ(&adev.acache_info_circ, adev.mem_regionp[0].data); + if (acache_prefetch_init(&adev)) + goto fail_prefetch_init; + + return 0; + +fail_prefetch_init: +fail_memregion_data_malloc: + acache_free_mem(); +fail_malloc: + device_destroy(adev.class, adev.devno); +fail_device: + cdev_del(&adev.cdev); +fail_dev_add: + class_destroy(adev.class); +fail_class: + unregister_chrdev_region(adev.devno, ACACHE_NR_DEVS); +fail_allocdev: + return ret; +} + +void acache_dev_exit(void) +{ + if (!adev.initialized) + return; + + if (adev.wq) { + flush_workqueue(adev.wq); + destroy_workqueue(adev.wq); + } + device_destroy(adev.class, adev.devno); + cdev_del(&adev.cdev); + acache_free_mem(); + kfree(adev.mem_regionp); + unregister_chrdev_region(adev.devno, ACACHE_NR_DEVS); + class_destroy(adev.class); + kfree(adev.prefetch_workers); +} + +struct cached_dev *get_cached_device_by_dev(dev_t dev) +{ + struct cache_set *c, *tc; + struct cached_dev *dc, *t; + + list_for_each_entry_safe(c, tc, &bch_cache_sets, list) + list_for_each_entry_safe(dc, t, &c->cached_devs, list) + if (dc->bdev->bd_dev == dev && cached_dev_get(dc)) + return dc; + + return NULL; +} + +struct bio *get_bio_by_item(struct cached_dev *dc, struct acache_info *item) +{ + struct bio *bio; + uint64_t offset = item->offset + dc->sb.data_offset; + + if (get_capacity(dc->bdev->bd_disk) < offset + (item->length >> 9)) { + pr_err("prefetch area exceeds the capacity of disk(%d:%d), end: %llx, capacity: %llx\n", + MAJOR(dc->bdev->bd_dev), MINOR(dc->bdev->bd_dev), + offset + (item->length >> 9), + get_capacity(dc->bdev->bd_disk)); + return NULL; + } + + bio = bio_alloc_bioset(GFP_NOWAIT, DIV_ROUND_UP(item->length >> 9, PAGE_SECTORS), &dc->disk.bio_split); + if (!bio) { + bio = bio_alloc_bioset(GFP_NOWAIT, DIV_ROUND_UP(item->length >> 9, PAGE_SECTORS), NULL); + if (!bio) + return NULL; + } + + bio_set_dev(bio, dc->bdev); + bio->bi_iter.bi_sector = item->offset + dc->sb.data_offset; + bio->bi_iter.bi_size = (item->length >> 9) << 9; + + bch_bio_map(bio, NULL); + if (bch_bio_alloc_pages(bio, __GFP_NOWARN | GFP_NOIO)) + goto out_put; + + return bio; +out_put: + bio_put(bio); + return NULL; +} + +int process_one_request(struct acache_info *item) +{ + struct cached_dev *dc; + struct bio *cache_bio; + struct search *s; + + dc = get_cached_device_by_dev(item->dev); + if (dc == NULL) + return -1; + cache_bio = get_bio_by_item(dc, item); + if (cache_bio == NULL) { + pr_err("acache: failed to alloc bio for prefetch\n"); + goto put_dev; + } + + s = search_alloc(cache_bio, &dc->disk, true); + + trace_bcache_prefetch_request(&dc->disk, cache_bio); + cached_dev_read(dc, s); + return 0; + +put_dev: + cached_dev_put(dc); + return -1; +} + diff --git a/drivers/md/bcache/acache.h b/drivers/md/bcache/acache.h new file mode 100644 index 0000000000000000000000000000000000000000..dea6e8cb0a0540bdb25df607cb9c679a325661a4 --- /dev/null +++ b/drivers/md/bcache/acache.h @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef _ACHACHE_INTERFACE_H_ +#define _ACHACHE_INTERFACE_H_ + +#define ACACHE_NR_DEVS 1 + +#define RING_SIZE + +#include "bcache.h" + +struct mem_reg { + char *data; + unsigned long size; +}; + +struct acache_info { + uint64_t length; + uint64_t offset; + uint64_t start_time; + dev_t dev; + int type; +}; + +enum acache_info_type { + ACACHE_INFO_READ = 0, + ACACHE_INFO_WRITE, + ACACHE_INFO_CACHE_INSERT, + ACACHE_INFO_LATENCY, +}; + +struct acache_circ { + spinlock_t lock; + int tail; + int head; + int size; + int item_size; + struct acache_info data[0]; +}; + +struct acache_metadata { + uint32_t magic; + uint32_t conntype; + uint32_t devsize; +}; + +#define ACACHE_DEV_SIZE acache_dev_size +#define ACACHE_MAGIC 2 + +enum acache_conn_types { + ACACHE_NO_CONN = 0, + ACACHE_READWRITE_CONN = 2, +}; + +#define ACACHE_CIRC_SIZE \ + ({int i = (ACACHE_DEV_SIZE - sizeof(struct acache_circ))/sizeof(struct acache_info); \ + int bits = 0; \ + while (i > 0) {i >>= 1; bits++; } \ + 1 << (bits - 1); }) + + +#define ACACHE_GET_METADATA _IOR('a', 1, struct acache_metadata) + +int acache_dev_init(void); +void acache_dev_exit(void); +struct acache_info *fetch_circ_item(struct acache_circ *circ); +void save_circ_item(struct acache_info *data); + +#endif diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index fe6dce125aba226e5f7c05cad39c78b38829389d..96951e638f17a8cec9ea95d2d45aeebc4bb8198d 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -2122,12 +2122,12 @@ static bool btree_insert_key(struct btree *b, struct bkey *k, BUG_ON(bkey_cmp(k, &b->key) > 0); status = bch_btree_insert_key(&b->keys, k, replace_key); + trace_bcache_btree_insert_key(b, k, replace_key != NULL, + status); if (status != BTREE_INSERT_STATUS_NO_INSERT) { bch_check_keys(&b->keys, "%u for %s", status, replace_key ? "replace" : "insert"); - trace_bcache_btree_insert_key(b, k, replace_key != NULL, - status); return true; } else return false; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 2143263831456217af9cd1676dda4a79722c8d7e..5a64afd56b971a8fc210122d74c29416391207b4 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -8,11 +8,13 @@ */ #include "bcache.h" +#include "acache.h" #include "btree.h" #include "debug.h" #include "request.h" #include "writeback.h" +#include #include #include #include @@ -308,10 +310,18 @@ static void bch_data_insert_start(struct closure *cl) void bch_data_insert(struct closure *cl) { struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); + struct acache_info msg; trace_bcache_write(op->c, op->inode, op->bio, op->writeback, op->bypass); + msg.offset = op->bio->bi_iter.bi_sector; + msg.length = op->bio->bi_iter.bi_size; + msg.type = ACACHE_INFO_CACHE_INSERT; + msg.dev = bio_dev(op->bio); + msg.start_time = ktime_get_ns(); + save_circ_item(&msg); + bch_keylist_init(&op->insert_keys); bio_get(op->bio); bch_data_insert_start(cl); @@ -460,28 +470,6 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) /* Cache lookup */ -struct search { - /* Stack frame for bio_complete */ - struct closure cl; - - struct bbio bio; - struct bio *orig_bio; - struct bio *cache_miss; - struct bcache_device *d; - - unsigned int insert_bio_sectors; - unsigned int recoverable:1; - unsigned int write:1; - unsigned int read_dirty_data:1; - unsigned int cache_missed:1; - - struct hd_struct *part; - unsigned long start_time; - - struct btree_op op; - struct data_insert_op iop; -}; - static void bch_cache_read_endio(struct bio *bio) { struct bbio *b = container_of(bio, struct bbio, bio); @@ -540,6 +528,7 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k) return MAP_CONTINUE; /* XXX: figure out best pointer - for multiple cache devices */ + ptr = 0; PTR_BUCKET(b->c, k, ptr)->prio = INITIAL_PRIO; @@ -557,21 +546,25 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k) bch_cut_front(&KEY(s->iop.inode, n->bi_iter.bi_sector, 0), bio_key); bch_cut_back(&KEY(s->iop.inode, bio_end_sector(n), 0), bio_key); - n->bi_end_io = bch_cache_read_endio; - n->bi_private = &s->cl; + if (!s->prefetch) { + n->bi_end_io = bch_cache_read_endio; + n->bi_private = &s->cl; - /* - * The bucket we're reading from might be reused while our bio - * is in flight, and we could then end up reading the wrong - * data. - * - * We guard against this by checking (in cache_read_endio()) if - * the pointer is stale again; if so, we treat it as an error - * and reread from the backing device (but we don't pass that - * error up anywhere). - */ + /* + * The bucket we're reading from might be reused while our bio + * is in flight, and we could then end up reading the wrong + * data. + * + * We guard against this by checking (in cache_read_endio()) if + * the pointer is stale again; if so, we treat it as an error + * and reread from the backing device (but we don't pass that + * error up anywhere). + */ - __bch_submit_bbio(n, b->c); + __bch_submit_bbio(n, b->c); + } else { + bio_put(n); + } return n == bio ? MAP_DONE : MAP_CONTINUE; } @@ -674,7 +667,12 @@ static void bio_complete(struct search *s) trace_bcache_request_end(s->d, s->orig_bio); s->orig_bio->bi_status = s->iop.status; - bio_endio(s->orig_bio); + if (s->prefetch) { + bio_free_pages(s->orig_bio); + bio_put(s->orig_bio); + } else { + bio_endio(s->orig_bio); + } s->orig_bio = NULL; } } @@ -699,7 +697,7 @@ static void do_bio_hook(struct search *s, bio_cnt_set(bio, 3); } -static void search_free(struct closure *cl) +void search_free(struct closure *cl) { struct search *s = container_of(cl, struct search, cl); @@ -713,8 +711,8 @@ static void search_free(struct closure *cl) mempool_free(s, &s->iop.c->search); } -static inline struct search *search_alloc(struct bio *bio, - struct bcache_device *d) +struct search *search_alloc(struct bio *bio, + struct bcache_device *d, bool prefetch) { struct search *s; @@ -733,6 +731,7 @@ static inline struct search *search_alloc(struct bio *bio, s->read_dirty_data = 0; /* Count on the bcache device */ s->start_time = part_start_io_acct(d->disk, &s->part, bio); + s->prefetch = prefetch; s->iop.c = d->c; s->iop.bio = NULL; s->iop.inode = d->id; @@ -836,17 +835,22 @@ static void cached_dev_read_done(struct closure *cl) s->iop.bio->bi_iter.bi_size = s->insert_bio_sectors << 9; bch_bio_map(s->iop.bio, NULL); - bio_copy_data(s->cache_miss, s->iop.bio); + if (!s->prefetch) + bio_copy_data(s->cache_miss, s->iop.bio); + else + trace_bcache_prefetch_cache_miss(s->iop.bio); bio_put(s->cache_miss); s->cache_miss = NULL; + } if (verify(dc) && s->recoverable && !s->read_dirty_data) bch_data_verify(dc, s->orig_bio); closure_get(&dc->disk.cl); - bio_complete(s); + if (!s->prefetch) + bio_complete(s); if (s->iop.bio && !test_bit(CACHE_SET_STOPPING, &s->iop.c->flags)) { @@ -862,10 +866,19 @@ static void cached_dev_read_done_bh(struct closure *cl) struct search *s = container_of(cl, struct search, cl); struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); - bch_mark_cache_accounting(s->iop.c, s->d, + if (s->prefetch) + pr_debug("prefetch request; do not count cache_missed"); + else + bch_mark_cache_accounting(s->iop.c, s->d, !s->cache_missed, s->iop.bypass); trace_bcache_read(s->orig_bio, !s->cache_missed, s->iop.bypass); + if (!s->prefetch && !s->iop.status) { + s->smp.type = ACACHE_INFO_LATENCY; + s->smp.start_time = ktime_get_ns() - s->smp.start_time; + save_circ_item(&s->smp); + } + if (s->iop.status) continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq); else if (s->iop.bio || verify(dc)) @@ -891,8 +904,9 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, } if (!(bio->bi_opf & REQ_RAHEAD) && - !(bio->bi_opf & (REQ_META|REQ_PRIO)) && - s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA) + !(bio->bi_opf & (REQ_META|REQ_PRIO) ) && + s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA && + !s->prefetch) reada = min_t(sector_t, dc->readahead >> 9, get_capacity(bio->bi_disk) - bio_end_sector(bio)); @@ -943,14 +957,18 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, out_put: bio_put(cache_bio); out_submit: - miss->bi_end_io = backing_request_endio; - miss->bi_private = &s->cl; - /* I/O request sent to backing device */ - closure_bio_submit(s->iop.c, miss, &s->cl); + if (!s->prefetch) { + miss->bi_end_io = backing_request_endio; + miss->bi_private = &s->cl; + /* I/O request sent to backing device */ + closure_bio_submit(s->iop.c, miss, &s->cl); + } else { + bio_put(miss); + } return ret; } -static void cached_dev_read(struct cached_dev *dc, struct search *s) +void cached_dev_read(struct cached_dev *dc, struct search *s) { struct closure *cl = &s->cl; @@ -1197,7 +1215,7 @@ blk_qc_t cached_dev_submit_bio(struct bio *bio) bio->bi_iter.bi_sector += dc->sb.data_offset; if (cached_dev_get(dc)) { - s = search_alloc(bio, d); + s = search_alloc(bio, d, false); trace_bcache_request_start(s->d, bio); if (!bio->bi_iter.bi_size) { @@ -1211,6 +1229,15 @@ blk_qc_t cached_dev_submit_bio(struct bio *bio) } else { s->iop.bypass = check_should_bypass(dc, bio); + if (!s->iop.bypass && bio->bi_iter.bi_size && !rw) { + s->smp.offset = bio->bi_iter.bi_sector - dc->sb.data_offset; + s->smp.length = bio->bi_iter.bi_size; + s->smp.type = rw; + s->smp.dev = dc->bdev->bd_dev; + s->smp.start_time = ktime_get_ns(); + save_circ_item(&s->smp); + } + if (rw) cached_dev_write(dc, s); else @@ -1281,7 +1308,7 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio) return BLK_QC_T_NONE; } - s = search_alloc(bio, d); + s = search_alloc(bio, d, false); cl = &s->cl; bio = &s->bio.bio; diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index 82b38366a95deb8f9fabf61379df5ca3bb51ff91..21678037d215c52bbd5ea0a69a26ed8b258bccc7 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -1,6 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHE_REQUEST_H_ #define _BCACHE_REQUEST_H_ +#include "btree.h" +#include "acache.h" struct data_insert_op { struct closure cl; @@ -44,4 +46,34 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio); extern struct kmem_cache *bch_search_cache; +struct search { + /* Stack frame for bio_complete */ + struct closure cl; + + struct bbio bio; + struct bio *orig_bio; + struct bio *cache_miss; + struct bcache_device *d; + + unsigned int insert_bio_sectors; + unsigned int recoverable:1; + unsigned int write:1; + unsigned int read_dirty_data:1; + unsigned int cache_missed:1; + + struct hd_struct *part; + unsigned long start_time; + /* for prefetch, we do not need copy data to bio */ + bool prefetch; + struct list_head list_node; + wait_queue_head_t wqh; + struct acache_info smp; + + struct btree_op op; + struct data_insert_op iop; +}; + +void search_free(struct closure *cl); +struct search *search_alloc(struct bio *bio, struct bcache_device *d, bool prefetch); +void cached_dev_read(struct cached_dev *dc, struct search *s); #endif /* _BCACHE_REQUEST_H_ */ diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 81f1cc5b34999578b06020875cb2cca30f12e3fb..a849fc51cd8843e02d78d4a54e75a292ee3d72f7 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -8,6 +8,7 @@ */ #include "bcache.h" +#include "acache.h" #include "btree.h" #include "debug.h" #include "extents.h" @@ -2846,6 +2847,7 @@ static void bcache_exit(void) if (bcache_major) unregister_blkdev(bcache_major, "bcache"); + acache_dev_exit(); unregister_reboot_notifier(&reboot); mutex_destroy(&bch_register_lock); } @@ -2932,6 +2934,8 @@ static int __init bcache_init(void) bch_debug_init(); closure_debug_init(); + if (acache_dev_init()) + goto err; bcache_is_reboot = false; diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index e41c611d6d3b907f7fab5e0d1026b318d288e2fd..f7be8c6e7cffc931b3e09baea8143e332cd23317 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -75,6 +75,12 @@ DECLARE_EVENT_CLASS(btree_node, TP_printk("bucket %zu", __entry->bucket) ); +/* acache.c */ +DEFINE_EVENT(bcache_request, bcache_prefetch_request, + TP_PROTO(struct bcache_device *d, struct bio *bio), + TP_ARGS(d, bio) +); + /* request.c */ DEFINE_EVENT(bcache_request, bcache_request_start, @@ -120,6 +126,11 @@ DEFINE_EVENT(bcache_bio, bcache_bypass_congested, TP_ARGS(bio) ); +DEFINE_EVENT(bcache_bio, bcache_prefetch_cache_miss, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) +); + TRACE_EVENT(bcache_read, TP_PROTO(struct bio *bio, bool hit, bool bypass), TP_ARGS(bio, hit, bypass),