提交 1355b37f 编写于 作者: J Jens Axboe

Merge branch 'for-3.13/post-mq-drivers' into for-linus

...@@ -39,15 +39,15 @@ Module configuration options ...@@ -39,15 +39,15 @@ Module configuration options
============================ ============================
If you use the floppy driver as a module, use the following syntax: If you use the floppy driver as a module, use the following syntax:
modprobe floppy <options> modprobe floppy floppy="<options>"
Example: Example:
modprobe floppy omnibook messages modprobe floppy floppy="omnibook messages"
If you need certain options enabled every time you load the floppy driver, If you need certain options enabled every time you load the floppy driver,
you can put: you can put:
options floppy omnibook messages options floppy floppy="omnibook messages"
in a configuration file in /etc/modprobe.d/. in a configuration file in /etc/modprobe.d/.
......
...@@ -110,7 +110,7 @@ source "drivers/block/mtip32xx/Kconfig" ...@@ -110,7 +110,7 @@ source "drivers/block/mtip32xx/Kconfig"
config BLK_CPQ_DA config BLK_CPQ_DA
tristate "Compaq SMART2 support" tristate "Compaq SMART2 support"
depends on PCI && VIRT_TO_BUS depends on PCI && VIRT_TO_BUS && 0
help help
This is the driver for Compaq Smart Array controllers. Everyone This is the driver for Compaq Smart Array controllers. Everyone
using these boards should say Y here. See the file using these boards should say Y here. See the file
...@@ -319,6 +319,16 @@ config BLK_DEV_NVME ...@@ -319,6 +319,16 @@ config BLK_DEV_NVME
To compile this driver as a module, choose M here: the To compile this driver as a module, choose M here: the
module will be called nvme. module will be called nvme.
config BLK_DEV_SKD
tristate "STEC S1120 Block Driver"
depends on PCI
depends on 64BIT
---help---
Saying Y or M here will enable support for the
STEC, Inc. S1120 PCIe SSD.
Use device /dev/skd$N amd /dev/skd$Np$M.
config BLK_DEV_OSD config BLK_DEV_OSD
tristate "OSD object-as-blkdev support" tristate "OSD object-as-blkdev support"
depends on SCSI_OSD_ULD depends on SCSI_OSD_ULD
......
...@@ -23,6 +23,7 @@ obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o ...@@ -23,6 +23,7 @@ obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o
obj-$(CONFIG_MG_DISK) += mg_disk.o obj-$(CONFIG_MG_DISK) += mg_disk.o
obj-$(CONFIG_SUNVDC) += sunvdc.o obj-$(CONFIG_SUNVDC) += sunvdc.o
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
obj-$(CONFIG_BLK_DEV_SKD) += skd.o
obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o
obj-$(CONFIG_BLK_DEV_UMEM) += umem.o obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
...@@ -44,4 +45,5 @@ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ ...@@ -44,4 +45,5 @@ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o
nvme-y := nvme-core.o nvme-scsi.o nvme-y := nvme-core.o nvme-scsi.o
skd-y := skd_main.o
swim_mod-y := swim.o swim_asm.o swim_mod-y := swim.o swim_asm.o
...@@ -5183,7 +5183,7 @@ static int cciss_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ...@@ -5183,7 +5183,7 @@ static int cciss_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
rebuild_lun_table(h, 1, 0); rebuild_lun_table(h, 1, 0);
cciss_engage_scsi(h); cciss_engage_scsi(h);
h->busy_initializing = 0; h->busy_initializing = 0;
return 1; return 0;
clean4: clean4:
cciss_free_cmd_pool(h); cciss_free_cmd_pool(h);
......
...@@ -1474,7 +1474,8 @@ enum determine_dev_size { ...@@ -1474,7 +1474,8 @@ enum determine_dev_size {
DS_ERROR = -1, DS_ERROR = -1,
DS_UNCHANGED = 0, DS_UNCHANGED = 0,
DS_SHRUNK = 1, DS_SHRUNK = 1,
DS_GREW = 2 DS_GREW = 2,
DS_GREW_FROM_ZERO = 3,
}; };
extern enum determine_dev_size extern enum determine_dev_size
drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct resize_parms *) __must_hold(local); drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct resize_parms *) __must_hold(local);
......
...@@ -2750,13 +2750,6 @@ int __init drbd_init(void) ...@@ -2750,13 +2750,6 @@ int __init drbd_init(void)
return err; return err;
} }
err = drbd_genl_register();
if (err) {
printk(KERN_ERR "drbd: unable to register generic netlink family\n");
goto fail;
}
register_reboot_notifier(&drbd_notifier); register_reboot_notifier(&drbd_notifier);
/* /*
...@@ -2767,6 +2760,15 @@ int __init drbd_init(void) ...@@ -2767,6 +2760,15 @@ int __init drbd_init(void)
drbd_proc = NULL; /* play safe for drbd_cleanup */ drbd_proc = NULL; /* play safe for drbd_cleanup */
idr_init(&minors); idr_init(&minors);
rwlock_init(&global_state_lock);
INIT_LIST_HEAD(&drbd_tconns);
err = drbd_genl_register();
if (err) {
printk(KERN_ERR "drbd: unable to register generic netlink family\n");
goto fail;
}
err = drbd_create_mempools(); err = drbd_create_mempools();
if (err) if (err)
goto fail; goto fail;
...@@ -2778,9 +2780,6 @@ int __init drbd_init(void) ...@@ -2778,9 +2780,6 @@ int __init drbd_init(void)
goto fail; goto fail;
} }
rwlock_init(&global_state_lock);
INIT_LIST_HEAD(&drbd_tconns);
retry.wq = create_singlethread_workqueue("drbd-reissue"); retry.wq = create_singlethread_workqueue("drbd-reissue");
if (!retry.wq) { if (!retry.wq) {
printk(KERN_ERR "drbd: unable to create retry workqueue\n"); printk(KERN_ERR "drbd: unable to create retry workqueue\n");
......
...@@ -955,7 +955,7 @@ drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct res ...@@ -955,7 +955,7 @@ drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct res
} }
if (size > la_size_sect) if (size > la_size_sect)
rv = DS_GREW; rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
if (size < la_size_sect) if (size < la_size_sect)
rv = DS_SHRUNK; rv = DS_SHRUNK;
...@@ -1132,9 +1132,9 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) ...@@ -1132,9 +1132,9 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
/* We may ignore peer limits if the peer is modern enough. /* We may ignore peer limits if the peer is modern enough.
Because new from 8.3.8 onwards the peer can use multiple Because new from 8.3.8 onwards the peer can use multiple
BIOs for a single peer_request */ BIOs for a single peer_request */
if (mdev->state.conn >= C_CONNECTED) { if (mdev->state.conn >= C_WF_REPORT_PARAMS) {
if (mdev->tconn->agreed_pro_version < 94) if (mdev->tconn->agreed_pro_version < 94)
peer = min( mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); peer = min(mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
/* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */ /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
else if (mdev->tconn->agreed_pro_version == 94) else if (mdev->tconn->agreed_pro_version == 94)
peer = DRBD_MAX_SIZE_H80_PACKET; peer = DRBD_MAX_SIZE_H80_PACKET;
......
...@@ -1890,29 +1890,11 @@ static u32 seq_max(u32 a, u32 b) ...@@ -1890,29 +1890,11 @@ static u32 seq_max(u32 a, u32 b)
return seq_greater(a, b) ? a : b; return seq_greater(a, b) ? a : b;
} }
static bool need_peer_seq(struct drbd_conf *mdev)
{
struct drbd_tconn *tconn = mdev->tconn;
int tp;
/*
* We only need to keep track of the last packet_seq number of our peer
* if we are in dual-primary mode and we have the resolve-conflicts flag set; see
* handle_write_conflicts().
*/
rcu_read_lock();
tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
rcu_read_unlock();
return tp && test_bit(RESOLVE_CONFLICTS, &tconn->flags);
}
static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq) static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
{ {
unsigned int newest_peer_seq; unsigned int newest_peer_seq;
if (need_peer_seq(mdev)) { if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)) {
spin_lock(&mdev->peer_seq_lock); spin_lock(&mdev->peer_seq_lock);
newest_peer_seq = seq_max(mdev->peer_seq, peer_seq); newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
mdev->peer_seq = newest_peer_seq; mdev->peer_seq = newest_peer_seq;
...@@ -1972,22 +1954,31 @@ static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_s ...@@ -1972,22 +1954,31 @@ static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_s
{ {
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
long timeout; long timeout;
int ret; int ret = 0, tp;
if (!need_peer_seq(mdev)) if (!test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags))
return 0; return 0;
spin_lock(&mdev->peer_seq_lock); spin_lock(&mdev->peer_seq_lock);
for (;;) { for (;;) {
if (!seq_greater(peer_seq - 1, mdev->peer_seq)) { if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq); mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
ret = 0;
break; break;
} }
if (signal_pending(current)) { if (signal_pending(current)) {
ret = -ERESTARTSYS; ret = -ERESTARTSYS;
break; break;
} }
rcu_read_lock();
tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
rcu_read_unlock();
if (!tp)
break;
/* Only need to wait if two_primaries is enabled */
prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE); prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
spin_unlock(&mdev->peer_seq_lock); spin_unlock(&mdev->peer_seq_lock);
rcu_read_lock(); rcu_read_lock();
...@@ -2228,8 +2219,10 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) ...@@ -2228,8 +2219,10 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi)
} }
goto out_interrupted; goto out_interrupted;
} }
} else } else {
update_peer_seq(mdev, peer_seq);
spin_lock_irq(&mdev->tconn->req_lock); spin_lock_irq(&mdev->tconn->req_lock);
}
list_add(&peer_req->w.list, &mdev->active_ee); list_add(&peer_req->w.list, &mdev->active_ee);
spin_unlock_irq(&mdev->tconn->req_lock); spin_unlock_irq(&mdev->tconn->req_lock);
...@@ -4132,7 +4125,11 @@ recv_bm_rle_bits(struct drbd_conf *mdev, ...@@ -4132,7 +4125,11 @@ recv_bm_rle_bits(struct drbd_conf *mdev,
(unsigned int)bs.buf_len); (unsigned int)bs.buf_len);
return -EIO; return -EIO;
} }
look_ahead >>= bits; /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
if (likely(bits < 64))
look_ahead >>= bits;
else
look_ahead = 0;
have -= bits; have -= bits;
bits = bitstream_get_bits(&bs, &tmp, 64 - have); bits = bitstream_get_bits(&bs, &tmp, 64 - have);
......
...@@ -1306,6 +1306,7 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct ...@@ -1306,6 +1306,7 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
int backing_limit; int backing_limit;
if (bio_size && get_ldev(mdev)) { if (bio_size && get_ldev(mdev)) {
unsigned int max_hw_sectors = queue_max_hw_sectors(q);
struct request_queue * const b = struct request_queue * const b =
mdev->ldev->backing_bdev->bd_disk->queue; mdev->ldev->backing_bdev->bd_disk->queue;
if (b->merge_bvec_fn) { if (b->merge_bvec_fn) {
...@@ -1313,6 +1314,8 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct ...@@ -1313,6 +1314,8 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
limit = min(limit, backing_limit); limit = min(limit, backing_limit);
} }
put_ldev(mdev); put_ldev(mdev);
if ((limit >> 9) > max_hw_sectors)
limit = max_hw_sectors << 9;
} }
return limit; return limit;
} }
......
...@@ -894,13 +894,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, ...@@ -894,13 +894,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
bio_list_init(&lo->lo_bio_list); bio_list_init(&lo->lo_bio_list);
/*
* set queue make_request_fn, and add limits based on lower level
* device
*/
blk_queue_make_request(lo->lo_queue, loop_make_request);
lo->lo_queue->queuedata = lo;
if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
blk_queue_flush(lo->lo_queue, REQ_FLUSH); blk_queue_flush(lo->lo_queue, REQ_FLUSH);
...@@ -1618,6 +1611,8 @@ static int loop_add(struct loop_device **l, int i) ...@@ -1618,6 +1611,8 @@ static int loop_add(struct loop_device **l, int i)
if (!lo) if (!lo)
goto out; goto out;
lo->lo_state = Lo_unbound;
/* allocate id, if @id >= 0, we're requesting that specific id */ /* allocate id, if @id >= 0, we're requesting that specific id */
if (i >= 0) { if (i >= 0) {
err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL); err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL);
...@@ -1635,6 +1630,12 @@ static int loop_add(struct loop_device **l, int i) ...@@ -1635,6 +1630,12 @@ static int loop_add(struct loop_device **l, int i)
if (!lo->lo_queue) if (!lo->lo_queue)
goto out_free_idr; goto out_free_idr;
/*
* set queue make_request_fn
*/
blk_queue_make_request(lo->lo_queue, loop_make_request);
lo->lo_queue->queuedata = lo;
disk = lo->lo_disk = alloc_disk(1 << part_shift); disk = lo->lo_disk = alloc_disk(1 << part_shift);
if (!disk) if (!disk)
goto out_free_queue; goto out_free_queue;
......
...@@ -936,7 +936,7 @@ static int mg_probe(struct platform_device *plat_dev) ...@@ -936,7 +936,7 @@ static int mg_probe(struct platform_device *plat_dev)
goto probe_err_3b; goto probe_err_3b;
} }
err = request_irq(host->irq, mg_irq, err = request_irq(host->irq, mg_irq,
IRQF_DISABLED | IRQF_TRIGGER_RISING, IRQF_TRIGGER_RISING,
MG_DEV_NAME, host); MG_DEV_NAME, host);
if (err) { if (err) {
printk(KERN_ERR "%s:%d fail (request_irq err=%d)\n", printk(KERN_ERR "%s:%d fail (request_irq err=%d)\n",
......
此差异已折叠。
...@@ -140,6 +140,7 @@ enum { ...@@ -140,6 +140,7 @@ enum {
MTIP_PF_SVC_THD_ACTIVE_BIT = 4, MTIP_PF_SVC_THD_ACTIVE_BIT = 4,
MTIP_PF_ISSUE_CMDS_BIT = 5, MTIP_PF_ISSUE_CMDS_BIT = 5,
MTIP_PF_REBUILD_BIT = 6, MTIP_PF_REBUILD_BIT = 6,
MTIP_PF_SR_CLEANUP_BIT = 7,
MTIP_PF_SVC_THD_STOP_BIT = 8, MTIP_PF_SVC_THD_STOP_BIT = 8,
/* below are bit numbers in 'dd_flag' defined in driver_data */ /* below are bit numbers in 'dd_flag' defined in driver_data */
...@@ -147,15 +148,18 @@ enum { ...@@ -147,15 +148,18 @@ enum {
MTIP_DDF_REMOVE_PENDING_BIT = 1, MTIP_DDF_REMOVE_PENDING_BIT = 1,
MTIP_DDF_OVER_TEMP_BIT = 2, MTIP_DDF_OVER_TEMP_BIT = 2,
MTIP_DDF_WRITE_PROTECT_BIT = 3, MTIP_DDF_WRITE_PROTECT_BIT = 3,
MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | MTIP_DDF_REMOVE_DONE_BIT = 4,
(1 << MTIP_DDF_SEC_LOCK_BIT) |
(1 << MTIP_DDF_OVER_TEMP_BIT) |
(1 << MTIP_DDF_WRITE_PROTECT_BIT)),
MTIP_DDF_CLEANUP_BIT = 5, MTIP_DDF_CLEANUP_BIT = 5,
MTIP_DDF_RESUME_BIT = 6, MTIP_DDF_RESUME_BIT = 6,
MTIP_DDF_INIT_DONE_BIT = 7, MTIP_DDF_INIT_DONE_BIT = 7,
MTIP_DDF_REBUILD_FAILED_BIT = 8, MTIP_DDF_REBUILD_FAILED_BIT = 8,
MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) |
(1 << MTIP_DDF_SEC_LOCK_BIT) |
(1 << MTIP_DDF_OVER_TEMP_BIT) |
(1 << MTIP_DDF_WRITE_PROTECT_BIT) |
(1 << MTIP_DDF_REBUILD_FAILED_BIT)),
}; };
struct smart_attr { struct smart_attr {
...@@ -499,6 +503,8 @@ struct driver_data { ...@@ -499,6 +503,8 @@ struct driver_data {
bool trim_supp; /* flag indicating trim support */ bool trim_supp; /* flag indicating trim support */
bool sr;
int numa_node; /* NUMA support */ int numa_node; /* NUMA support */
char workq_name[32]; char workq_name[32];
...@@ -511,6 +517,8 @@ struct driver_data { ...@@ -511,6 +517,8 @@ struct driver_data {
int isr_binding; int isr_binding;
struct block_device *bdev;
int unal_qdepth; /* qdepth of unaligned IO queue */ int unal_qdepth; /* qdepth of unaligned IO queue */
struct list_head online_list; /* linkage for online list */ struct list_head online_list; /* linkage for online list */
......
...@@ -473,45 +473,31 @@ static void pkt_debugfs_dev_new(struct pktcdvd_device *pd) ...@@ -473,45 +473,31 @@ static void pkt_debugfs_dev_new(struct pktcdvd_device *pd)
{ {
if (!pkt_debugfs_root) if (!pkt_debugfs_root)
return; return;
pd->dfs_f_info = NULL;
pd->dfs_d_root = debugfs_create_dir(pd->name, pkt_debugfs_root); pd->dfs_d_root = debugfs_create_dir(pd->name, pkt_debugfs_root);
if (IS_ERR(pd->dfs_d_root)) { if (!pd->dfs_d_root)
pd->dfs_d_root = NULL;
return; return;
}
pd->dfs_f_info = debugfs_create_file("info", S_IRUGO, pd->dfs_f_info = debugfs_create_file("info", S_IRUGO,
pd->dfs_d_root, pd, &debug_fops); pd->dfs_d_root, pd, &debug_fops);
if (IS_ERR(pd->dfs_f_info)) {
pd->dfs_f_info = NULL;
return;
}
} }
static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd) static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd)
{ {
if (!pkt_debugfs_root) if (!pkt_debugfs_root)
return; return;
if (pd->dfs_f_info) debugfs_remove(pd->dfs_f_info);
debugfs_remove(pd->dfs_f_info); debugfs_remove(pd->dfs_d_root);
pd->dfs_f_info = NULL; pd->dfs_f_info = NULL;
if (pd->dfs_d_root)
debugfs_remove(pd->dfs_d_root);
pd->dfs_d_root = NULL; pd->dfs_d_root = NULL;
} }
static void pkt_debugfs_init(void) static void pkt_debugfs_init(void)
{ {
pkt_debugfs_root = debugfs_create_dir(DRIVER_NAME, NULL); pkt_debugfs_root = debugfs_create_dir(DRIVER_NAME, NULL);
if (IS_ERR(pkt_debugfs_root)) {
pkt_debugfs_root = NULL;
return;
}
} }
static void pkt_debugfs_cleanup(void) static void pkt_debugfs_cleanup(void)
{ {
if (!pkt_debugfs_root)
return;
debugfs_remove(pkt_debugfs_root); debugfs_remove(pkt_debugfs_root);
pkt_debugfs_root = NULL; pkt_debugfs_root = NULL;
} }
......
...@@ -654,7 +654,8 @@ static void rsxx_eeh_failure(struct pci_dev *dev) ...@@ -654,7 +654,8 @@ static void rsxx_eeh_failure(struct pci_dev *dev)
for (i = 0; i < card->n_targets; i++) { for (i = 0; i < card->n_targets; i++) {
spin_lock_bh(&card->ctrl[i].queue_lock); spin_lock_bh(&card->ctrl[i].queue_lock);
cnt = rsxx_cleanup_dma_queue(&card->ctrl[i], cnt = rsxx_cleanup_dma_queue(&card->ctrl[i],
&card->ctrl[i].queue); &card->ctrl[i].queue,
COMPLETE_DMA);
spin_unlock_bh(&card->ctrl[i].queue_lock); spin_unlock_bh(&card->ctrl[i].queue_lock);
cnt += rsxx_dma_cancel(&card->ctrl[i]); cnt += rsxx_dma_cancel(&card->ctrl[i]);
...@@ -748,10 +749,6 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev) ...@@ -748,10 +749,6 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
card->eeh_state = 0; card->eeh_state = 0;
st = rsxx_eeh_remap_dmas(card);
if (st)
goto failed_remap_dmas;
spin_lock_irqsave(&card->irq_lock, flags); spin_lock_irqsave(&card->irq_lock, flags);
if (card->n_targets & RSXX_MAX_TARGETS) if (card->n_targets & RSXX_MAX_TARGETS)
rsxx_enable_ier_and_isr(card, CR_INTR_ALL_G); rsxx_enable_ier_and_isr(card, CR_INTR_ALL_G);
...@@ -778,7 +775,6 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev) ...@@ -778,7 +775,6 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
return PCI_ERS_RESULT_RECOVERED; return PCI_ERS_RESULT_RECOVERED;
failed_hw_buffers_init: failed_hw_buffers_init:
failed_remap_dmas:
for (i = 0; i < card->n_targets; i++) { for (i = 0; i < card->n_targets; i++) {
if (card->ctrl[i].status.buf) if (card->ctrl[i].status.buf)
pci_free_consistent(card->dev, pci_free_consistent(card->dev,
......
...@@ -295,13 +295,15 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card) ...@@ -295,13 +295,15 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
return -ENOMEM; return -ENOMEM;
} }
blk_size = card->config.data.block_size; if (card->config_valid) {
blk_size = card->config.data.block_size;
blk_queue_dma_alignment(card->queue, blk_size - 1);
blk_queue_logical_block_size(card->queue, blk_size);
}
blk_queue_make_request(card->queue, rsxx_make_request); blk_queue_make_request(card->queue, rsxx_make_request);
blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY); blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
blk_queue_dma_alignment(card->queue, blk_size - 1);
blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors); blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
blk_queue_logical_block_size(card->queue, blk_size);
blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE); blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue);
......
...@@ -221,6 +221,21 @@ static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card) ...@@ -221,6 +221,21 @@ static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card)
} }
/*----------------- RSXX DMA Handling -------------------*/ /*----------------- RSXX DMA Handling -------------------*/
static void rsxx_free_dma(struct rsxx_dma_ctrl *ctrl, struct rsxx_dma *dma)
{
if (dma->cmd != HW_CMD_BLK_DISCARD) {
if (!pci_dma_mapping_error(ctrl->card->dev, dma->dma_addr)) {
pci_unmap_page(ctrl->card->dev, dma->dma_addr,
get_dma_size(dma),
dma->cmd == HW_CMD_BLK_WRITE ?
PCI_DMA_TODEVICE :
PCI_DMA_FROMDEVICE);
}
}
kmem_cache_free(rsxx_dma_pool, dma);
}
static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl, static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl,
struct rsxx_dma *dma, struct rsxx_dma *dma,
unsigned int status) unsigned int status)
...@@ -232,21 +247,14 @@ static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl, ...@@ -232,21 +247,14 @@ static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl,
if (status & DMA_CANCELLED) if (status & DMA_CANCELLED)
ctrl->stats.dma_cancelled++; ctrl->stats.dma_cancelled++;
if (dma->dma_addr)
pci_unmap_page(ctrl->card->dev, dma->dma_addr,
get_dma_size(dma),
dma->cmd == HW_CMD_BLK_WRITE ?
PCI_DMA_TODEVICE :
PCI_DMA_FROMDEVICE);
if (dma->cb) if (dma->cb)
dma->cb(ctrl->card, dma->cb_data, status ? 1 : 0); dma->cb(ctrl->card, dma->cb_data, status ? 1 : 0);
kmem_cache_free(rsxx_dma_pool, dma); rsxx_free_dma(ctrl, dma);
} }
int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl, int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl,
struct list_head *q) struct list_head *q, unsigned int done)
{ {
struct rsxx_dma *dma; struct rsxx_dma *dma;
struct rsxx_dma *tmp; struct rsxx_dma *tmp;
...@@ -254,7 +262,10 @@ int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl, ...@@ -254,7 +262,10 @@ int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl,
list_for_each_entry_safe(dma, tmp, q, list) { list_for_each_entry_safe(dma, tmp, q, list) {
list_del(&dma->list); list_del(&dma->list);
rsxx_complete_dma(ctrl, dma, DMA_CANCELLED); if (done & COMPLETE_DMA)
rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
else
rsxx_free_dma(ctrl, dma);
cnt++; cnt++;
} }
...@@ -370,7 +381,7 @@ static void dma_engine_stalled(unsigned long data) ...@@ -370,7 +381,7 @@ static void dma_engine_stalled(unsigned long data)
/* Clean up the DMA queue */ /* Clean up the DMA queue */
spin_lock(&ctrl->queue_lock); spin_lock(&ctrl->queue_lock);
cnt = rsxx_cleanup_dma_queue(ctrl, &ctrl->queue); cnt = rsxx_cleanup_dma_queue(ctrl, &ctrl->queue, COMPLETE_DMA);
spin_unlock(&ctrl->queue_lock); spin_unlock(&ctrl->queue_lock);
cnt += rsxx_dma_cancel(ctrl); cnt += rsxx_dma_cancel(ctrl);
...@@ -388,6 +399,7 @@ static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl) ...@@ -388,6 +399,7 @@ static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl)
int tag; int tag;
int cmds_pending = 0; int cmds_pending = 0;
struct hw_cmd *hw_cmd_buf; struct hw_cmd *hw_cmd_buf;
int dir;
hw_cmd_buf = ctrl->cmd.buf; hw_cmd_buf = ctrl->cmd.buf;
...@@ -424,6 +436,31 @@ static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl) ...@@ -424,6 +436,31 @@ static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl)
continue; continue;
} }
if (dma->cmd != HW_CMD_BLK_DISCARD) {
if (dma->cmd == HW_CMD_BLK_WRITE)
dir = PCI_DMA_TODEVICE;
else
dir = PCI_DMA_FROMDEVICE;
/*
* The function pci_map_page is placed here because we
* can only, by design, issue up to 255 commands to the
* hardware at one time per DMA channel. So the maximum
* amount of mapped memory would be 255 * 4 channels *
* 4096 Bytes which is less than 2GB, the limit of a x8
* Non-HWWD PCIe slot. This way the pci_map_page
* function should never fail because of a lack of
* mappable memory.
*/
dma->dma_addr = pci_map_page(ctrl->card->dev, dma->page,
dma->pg_off, dma->sub_page.cnt << 9, dir);
if (pci_dma_mapping_error(ctrl->card->dev, dma->dma_addr)) {
push_tracker(ctrl->trackers, tag);
rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
continue;
}
}
set_tracker_dma(ctrl->trackers, tag, dma); set_tracker_dma(ctrl->trackers, tag, dma);
hw_cmd_buf[ctrl->cmd.idx].command = dma->cmd; hw_cmd_buf[ctrl->cmd.idx].command = dma->cmd;
hw_cmd_buf[ctrl->cmd.idx].tag = tag; hw_cmd_buf[ctrl->cmd.idx].tag = tag;
...@@ -620,14 +657,6 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card, ...@@ -620,14 +657,6 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
if (!dma) if (!dma)
return -ENOMEM; return -ENOMEM;
dma->dma_addr = pci_map_page(card->dev, page, pg_off, dma_len,
dir ? PCI_DMA_TODEVICE :
PCI_DMA_FROMDEVICE);
if (!dma->dma_addr) {
kmem_cache_free(rsxx_dma_pool, dma);
return -ENOMEM;
}
dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ; dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ;
dma->laddr = laddr; dma->laddr = laddr;
dma->sub_page.off = (dma_off >> 9); dma->sub_page.off = (dma_off >> 9);
...@@ -736,11 +765,9 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, ...@@ -736,11 +765,9 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
return 0; return 0;
bvec_err: bvec_err:
for (i = 0; i < card->n_targets; i++) { for (i = 0; i < card->n_targets; i++)
spin_lock_bh(&card->ctrl[i].queue_lock); rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i],
rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i]); FREE_DMA);
spin_unlock_bh(&card->ctrl[i].queue_lock);
}
return st; return st;
} }
...@@ -990,7 +1017,7 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card) ...@@ -990,7 +1017,7 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card)
/* Clean up the DMA queue */ /* Clean up the DMA queue */
spin_lock_bh(&ctrl->queue_lock); spin_lock_bh(&ctrl->queue_lock);
rsxx_cleanup_dma_queue(ctrl, &ctrl->queue); rsxx_cleanup_dma_queue(ctrl, &ctrl->queue, COMPLETE_DMA);
spin_unlock_bh(&ctrl->queue_lock); spin_unlock_bh(&ctrl->queue_lock);
rsxx_dma_cancel(ctrl); rsxx_dma_cancel(ctrl);
...@@ -1032,6 +1059,14 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) ...@@ -1032,6 +1059,14 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
else else
card->ctrl[i].stats.reads_issued--; card->ctrl[i].stats.reads_issued--;
if (dma->cmd != HW_CMD_BLK_DISCARD) {
pci_unmap_page(card->dev, dma->dma_addr,
get_dma_size(dma),
dma->cmd == HW_CMD_BLK_WRITE ?
PCI_DMA_TODEVICE :
PCI_DMA_FROMDEVICE);
}
list_add_tail(&dma->list, &issued_dmas[i]); list_add_tail(&dma->list, &issued_dmas[i]);
push_tracker(card->ctrl[i].trackers, j); push_tracker(card->ctrl[i].trackers, j);
cnt++; cnt++;
...@@ -1043,15 +1078,6 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) ...@@ -1043,15 +1078,6 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth); atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth);
card->ctrl[i].stats.sw_q_depth += cnt; card->ctrl[i].stats.sw_q_depth += cnt;
card->ctrl[i].e_cnt = 0; card->ctrl[i].e_cnt = 0;
list_for_each_entry(dma, &card->ctrl[i].queue, list) {
if (dma->dma_addr)
pci_unmap_page(card->dev, dma->dma_addr,
get_dma_size(dma),
dma->cmd == HW_CMD_BLK_WRITE ?
PCI_DMA_TODEVICE :
PCI_DMA_FROMDEVICE);
}
spin_unlock_bh(&card->ctrl[i].queue_lock); spin_unlock_bh(&card->ctrl[i].queue_lock);
} }
...@@ -1060,31 +1086,6 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) ...@@ -1060,31 +1086,6 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
return 0; return 0;
} }
int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
{
struct rsxx_dma *dma;
int i;
for (i = 0; i < card->n_targets; i++) {
spin_lock_bh(&card->ctrl[i].queue_lock);
list_for_each_entry(dma, &card->ctrl[i].queue, list) {
dma->dma_addr = pci_map_page(card->dev, dma->page,
dma->pg_off, get_dma_size(dma),
dma->cmd == HW_CMD_BLK_WRITE ?
PCI_DMA_TODEVICE :
PCI_DMA_FROMDEVICE);
if (!dma->dma_addr) {
spin_unlock_bh(&card->ctrl[i].queue_lock);
kmem_cache_free(rsxx_dma_pool, dma);
return -ENOMEM;
}
}
spin_unlock_bh(&card->ctrl[i].queue_lock);
}
return 0;
}
int rsxx_dma_init(void) int rsxx_dma_init(void)
{ {
rsxx_dma_pool = KMEM_CACHE(rsxx_dma, SLAB_HWCACHE_ALIGN); rsxx_dma_pool = KMEM_CACHE(rsxx_dma, SLAB_HWCACHE_ALIGN);
......
...@@ -52,7 +52,7 @@ struct proc_cmd; ...@@ -52,7 +52,7 @@ struct proc_cmd;
#define RS70_PCI_REV_SUPPORTED 4 #define RS70_PCI_REV_SUPPORTED 4
#define DRIVER_NAME "rsxx" #define DRIVER_NAME "rsxx"
#define DRIVER_VERSION "4.0" #define DRIVER_VERSION "4.0.3.2516"
/* Block size is 4096 */ /* Block size is 4096 */
#define RSXX_HW_BLK_SHIFT 12 #define RSXX_HW_BLK_SHIFT 12
...@@ -345,6 +345,11 @@ enum rsxx_creg_stat { ...@@ -345,6 +345,11 @@ enum rsxx_creg_stat {
CREG_STAT_TAG_MASK = 0x0000ff00, CREG_STAT_TAG_MASK = 0x0000ff00,
}; };
enum rsxx_dma_finish {
FREE_DMA = 0x0,
COMPLETE_DMA = 0x1,
};
static inline unsigned int CREG_DATA(int N) static inline unsigned int CREG_DATA(int N)
{ {
return CREG_DATA0 + (N << 2); return CREG_DATA0 + (N << 2);
...@@ -379,7 +384,9 @@ typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card, ...@@ -379,7 +384,9 @@ typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card,
int rsxx_dma_setup(struct rsxx_cardinfo *card); int rsxx_dma_setup(struct rsxx_cardinfo *card);
void rsxx_dma_destroy(struct rsxx_cardinfo *card); void rsxx_dma_destroy(struct rsxx_cardinfo *card);
int rsxx_dma_init(void); int rsxx_dma_init(void);
int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl, struct list_head *q); int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl,
struct list_head *q,
unsigned int done);
int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl); int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
void rsxx_dma_cleanup(void); void rsxx_dma_cleanup(void);
void rsxx_dma_queue_reset(struct rsxx_cardinfo *card); void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
......
此差异已折叠。
/* Copyright 2012 STEC, Inc.
*
* This file is licensed under the terms of the 3-clause
* BSD License (http://opensource.org/licenses/BSD-3-Clause)
* or the GNU GPL-2.0 (http://www.gnu.org/licenses/gpl-2.0.html),
* at your option. Both licenses are also available in the LICENSE file
* distributed with this project. This file may not be copied, modified,
* or distributed except in accordance with those terms.
*/
#ifndef SKD_S1120_H
#define SKD_S1120_H
#pragma pack(push, s1120_h, 1)
/*
* Q-channel, 64-bit r/w
*/
#define FIT_Q_COMMAND 0x400u
#define FIT_QCMD_QID_MASK (0x3 << 1)
#define FIT_QCMD_QID0 (0x0 << 1)
#define FIT_QCMD_QID_NORMAL FIT_QCMD_QID0
#define FIT_QCMD_QID1 (0x1 << 1)
#define FIT_QCMD_QID2 (0x2 << 1)
#define FIT_QCMD_QID3 (0x3 << 1)
#define FIT_QCMD_FLUSH_QUEUE (0ull) /* add QID */
#define FIT_QCMD_MSGSIZE_MASK (0x3 << 4)
#define FIT_QCMD_MSGSIZE_64 (0x0 << 4)
#define FIT_QCMD_MSGSIZE_128 (0x1 << 4)
#define FIT_QCMD_MSGSIZE_256 (0x2 << 4)
#define FIT_QCMD_MSGSIZE_512 (0x3 << 4)
#define FIT_QCMD_BASE_ADDRESS_MASK (0xFFFFFFFFFFFFFFC0ull)
/*
* Control, 32-bit r/w
*/
#define FIT_CONTROL 0x500u
#define FIT_CR_HARD_RESET (1u << 0u)
#define FIT_CR_SOFT_RESET (1u << 1u)
#define FIT_CR_DIS_TIMESTAMPS (1u << 6u)
#define FIT_CR_ENABLE_INTERRUPTS (1u << 7u)
/*
* Status, 32-bit, r/o
*/
#define FIT_STATUS 0x510u
#define FIT_SR_DRIVE_STATE_MASK 0x000000FFu
#define FIT_SR_SIGNATURE (0xFF << 8)
#define FIT_SR_PIO_DMA (1 << 16)
#define FIT_SR_DRIVE_OFFLINE 0x00
#define FIT_SR_DRIVE_INIT 0x01
/* #define FIT_SR_DRIVE_READY 0x02 */
#define FIT_SR_DRIVE_ONLINE 0x03
#define FIT_SR_DRIVE_BUSY 0x04
#define FIT_SR_DRIVE_FAULT 0x05
#define FIT_SR_DRIVE_DEGRADED 0x06
#define FIT_SR_PCIE_LINK_DOWN 0x07
#define FIT_SR_DRIVE_SOFT_RESET 0x08
#define FIT_SR_DRIVE_INIT_FAULT 0x09
#define FIT_SR_DRIVE_BUSY_SANITIZE 0x0A
#define FIT_SR_DRIVE_BUSY_ERASE 0x0B
#define FIT_SR_DRIVE_FW_BOOTING 0x0C
#define FIT_SR_DRIVE_NEED_FW_DOWNLOAD 0xFE
#define FIT_SR_DEVICE_MISSING 0xFF
#define FIT_SR__RESERVED 0xFFFFFF00u
/*
* FIT_STATUS - Status register data definition
*/
#define FIT_SR_STATE_MASK (0xFF << 0)
#define FIT_SR_SIGNATURE (0xFF << 8)
#define FIT_SR_PIO_DMA (1 << 16)
/*
* Interrupt status, 32-bit r/w1c (w1c ==> write 1 to clear)
*/
#define FIT_INT_STATUS_HOST 0x520u
#define FIT_ISH_FW_STATE_CHANGE (1u << 0u)
#define FIT_ISH_COMPLETION_POSTED (1u << 1u)
#define FIT_ISH_MSG_FROM_DEV (1u << 2u)
#define FIT_ISH_UNDEFINED_3 (1u << 3u)
#define FIT_ISH_UNDEFINED_4 (1u << 4u)
#define FIT_ISH_Q0_FULL (1u << 5u)
#define FIT_ISH_Q1_FULL (1u << 6u)
#define FIT_ISH_Q2_FULL (1u << 7u)
#define FIT_ISH_Q3_FULL (1u << 8u)
#define FIT_ISH_QCMD_FIFO_OVERRUN (1u << 9u)
#define FIT_ISH_BAD_EXP_ROM_READ (1u << 10u)
#define FIT_INT_DEF_MASK \
(FIT_ISH_FW_STATE_CHANGE | \
FIT_ISH_COMPLETION_POSTED | \
FIT_ISH_MSG_FROM_DEV | \
FIT_ISH_Q0_FULL | \
FIT_ISH_Q1_FULL | \
FIT_ISH_Q2_FULL | \
FIT_ISH_Q3_FULL | \
FIT_ISH_QCMD_FIFO_OVERRUN | \
FIT_ISH_BAD_EXP_ROM_READ)
#define FIT_INT_QUEUE_FULL \
(FIT_ISH_Q0_FULL | \
FIT_ISH_Q1_FULL | \
FIT_ISH_Q2_FULL | \
FIT_ISH_Q3_FULL)
#define MSI_MSG_NWL_ERROR_0 0x00000000
#define MSI_MSG_NWL_ERROR_1 0x00000001
#define MSI_MSG_NWL_ERROR_2 0x00000002
#define MSI_MSG_NWL_ERROR_3 0x00000003
#define MSI_MSG_STATE_CHANGE 0x00000004
#define MSI_MSG_COMPLETION_POSTED 0x00000005
#define MSI_MSG_MSG_FROM_DEV 0x00000006
#define MSI_MSG_RESERVED_0 0x00000007
#define MSI_MSG_RESERVED_1 0x00000008
#define MSI_MSG_QUEUE_0_FULL 0x00000009
#define MSI_MSG_QUEUE_1_FULL 0x0000000A
#define MSI_MSG_QUEUE_2_FULL 0x0000000B
#define MSI_MSG_QUEUE_3_FULL 0x0000000C
#define FIT_INT_RESERVED_MASK \
(FIT_ISH_UNDEFINED_3 | \
FIT_ISH_UNDEFINED_4)
/*
* Interrupt mask, 32-bit r/w
* Bit definitions are the same as FIT_INT_STATUS_HOST
*/
#define FIT_INT_MASK_HOST 0x528u
/*
* Message to device, 32-bit r/w
*/
#define FIT_MSG_TO_DEVICE 0x540u
/*
* Message from device, 32-bit, r/o
*/
#define FIT_MSG_FROM_DEVICE 0x548u
/*
* 32-bit messages to/from device, composition/extraction macros
*/
#define FIT_MXD_CONS(TYPE, PARAM, DATA) \
((((TYPE) & 0xFFu) << 24u) | \
(((PARAM) & 0xFFu) << 16u) | \
(((DATA) & 0xFFFFu) << 0u))
#define FIT_MXD_TYPE(MXD) (((MXD) >> 24u) & 0xFFu)
#define FIT_MXD_PARAM(MXD) (((MXD) >> 16u) & 0xFFu)
#define FIT_MXD_DATA(MXD) (((MXD) >> 0u) & 0xFFFFu)
/*
* Types of messages to/from device
*/
#define FIT_MTD_FITFW_INIT 0x01u
#define FIT_MTD_GET_CMDQ_DEPTH 0x02u
#define FIT_MTD_SET_COMPQ_DEPTH 0x03u
#define FIT_MTD_SET_COMPQ_ADDR 0x04u
#define FIT_MTD_ARM_QUEUE 0x05u
#define FIT_MTD_CMD_LOG_HOST_ID 0x07u
#define FIT_MTD_CMD_LOG_TIME_STAMP_LO 0x08u
#define FIT_MTD_CMD_LOG_TIME_STAMP_HI 0x09u
#define FIT_MFD_SMART_EXCEEDED 0x10u
#define FIT_MFD_POWER_DOWN 0x11u
#define FIT_MFD_OFFLINE 0x12u
#define FIT_MFD_ONLINE 0x13u
#define FIT_MFD_FW_RESTARTING 0x14u
#define FIT_MFD_PM_ACTIVE 0x15u
#define FIT_MFD_PM_STANDBY 0x16u
#define FIT_MFD_PM_SLEEP 0x17u
#define FIT_MFD_CMD_PROGRESS 0x18u
#define FIT_MTD_DEBUG 0xFEu
#define FIT_MFD_DEBUG 0xFFu
#define FIT_MFD_MASK (0xFFu)
#define FIT_MFD_DATA_MASK (0xFFu)
#define FIT_MFD_MSG(x) (((x) >> 24) & FIT_MFD_MASK)
#define FIT_MFD_DATA(x) ((x) & FIT_MFD_MASK)
/*
* Extra arg to FIT_MSG_TO_DEVICE, 64-bit r/w
* Used to set completion queue address (FIT_MTD_SET_COMPQ_ADDR)
* (was Response buffer in docs)
*/
#define FIT_MSG_TO_DEVICE_ARG 0x580u
/*
* Hardware (ASIC) version, 32-bit r/o
*/
#define FIT_HW_VERSION 0x588u
/*
* Scatter/gather list descriptor.
* 32-bytes and must be aligned on a 32-byte boundary.
* All fields are in little endian order.
*/
struct fit_sg_descriptor {
uint32_t control;
uint32_t byte_count;
uint64_t host_side_addr;
uint64_t dev_side_addr;
uint64_t next_desc_ptr;
};
#define FIT_SGD_CONTROL_NOT_LAST 0x000u
#define FIT_SGD_CONTROL_LAST 0x40Eu
/*
* Header at the beginning of a FIT message. The header
* is followed by SSDI requests each 64 bytes.
* A FIT message can be up to 512 bytes long and must start
* on a 64-byte boundary.
*/
struct fit_msg_hdr {
uint8_t protocol_id;
uint8_t num_protocol_cmds_coalesced;
uint8_t _reserved[62];
};
#define FIT_PROTOCOL_ID_FIT 1
#define FIT_PROTOCOL_ID_SSDI 2
#define FIT_PROTOCOL_ID_SOFIT 3
#define FIT_PROTOCOL_MINOR_VER(mtd_val) ((mtd_val >> 16) & 0xF)
#define FIT_PROTOCOL_MAJOR_VER(mtd_val) ((mtd_val >> 20) & 0xF)
/*
* Format of a completion entry. The completion queue is circular
* and must have at least as many entries as the maximum number
* of commands that may be issued to the device.
*
* There are no head/tail pointers. The cycle value is used to
* infer the presence of new completion records.
* Initially the cycle in all entries is 0, the index is 0, and
* the cycle value to expect is 1. When completions are added
* their cycle values are set to 1. When the index wraps the
* cycle value to expect is incremented.
*
* Command_context is opaque and taken verbatim from the SSDI command.
* All other fields are big endian.
*/
#define FIT_PROTOCOL_VERSION_0 0
/*
* Protocol major version 1 completion entry.
* The major protocol version is found in bits
* 20-23 of the FIT_MTD_FITFW_INIT response.
*/
struct fit_completion_entry_v1 {
uint32_t num_returned_bytes;
uint16_t tag;
uint8_t status; /* SCSI status */
uint8_t cycle;
};
#define FIT_PROTOCOL_VERSION_1 1
#define FIT_PROTOCOL_VERSION_CURRENT FIT_PROTOCOL_VERSION_1
struct fit_comp_error_info {
uint8_t type:7; /* 00: Bits0-6 indicates the type of sense data. */
uint8_t valid:1; /* 00: Bit 7 := 1 ==> info field is valid. */
uint8_t reserved0; /* 01: Obsolete field */
uint8_t key:4; /* 02: Bits0-3 indicate the sense key. */
uint8_t reserved2:1; /* 02: Reserved bit. */
uint8_t bad_length:1; /* 02: Incorrect Length Indicator */
uint8_t end_medium:1; /* 02: End of Medium */
uint8_t file_mark:1; /* 02: Filemark */
uint8_t info[4]; /* 03: */
uint8_t reserved1; /* 07: Additional Sense Length */
uint8_t cmd_spec[4]; /* 08: Command Specific Information */
uint8_t code; /* 0C: Additional Sense Code */
uint8_t qual; /* 0D: Additional Sense Code Qualifier */
uint8_t fruc; /* 0E: Field Replaceable Unit Code */
uint8_t sks_high:7; /* 0F: Sense Key Specific (MSB) */
uint8_t sks_valid:1; /* 0F: Sense Key Specific Valid */
uint16_t sks_low; /* 10: Sense Key Specific (LSW) */
uint16_t reserved3; /* 12: Part of additional sense bytes (unused) */
uint16_t uec; /* 14: Additional Sense Bytes */
uint64_t per; /* 16: Additional Sense Bytes */
uint8_t reserved4[2]; /* 1E: Additional Sense Bytes (unused) */
};
/* Task management constants */
#define SOFT_TASK_SIMPLE 0x00
#define SOFT_TASK_HEAD_OF_QUEUE 0x01
#define SOFT_TASK_ORDERED 0x02
/* Version zero has the last 32 bits reserved,
* Version one has the last 32 bits sg_list_len_bytes;
*/
struct skd_command_header {
uint64_t sg_list_dma_address;
uint16_t tag;
uint8_t attribute;
uint8_t add_cdb_len; /* In 32 bit words */
uint32_t sg_list_len_bytes;
};
struct skd_scsi_request {
struct skd_command_header hdr;
unsigned char cdb[16];
/* unsigned char _reserved[16]; */
};
struct driver_inquiry_data {
uint8_t peripheral_device_type:5;
uint8_t qualifier:3;
uint8_t page_code;
uint16_t page_length;
uint16_t pcie_bus_number;
uint8_t pcie_device_number;
uint8_t pcie_function_number;
uint8_t pcie_link_speed;
uint8_t pcie_link_lanes;
uint16_t pcie_vendor_id;
uint16_t pcie_device_id;
uint16_t pcie_subsystem_vendor_id;
uint16_t pcie_subsystem_device_id;
uint8_t reserved1[2];
uint8_t reserved2[3];
uint8_t driver_version_length;
uint8_t driver_version[0x14];
};
#pragma pack(pop, s1120_h)
#endif /* SKD_S1120_H */
...@@ -887,6 +887,8 @@ static int dispatch_discard_io(struct xen_blkif *blkif, ...@@ -887,6 +887,8 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
unsigned long secure; unsigned long secure;
struct phys_req preq; struct phys_req preq;
xen_blkif_get(blkif);
preq.sector_number = req->u.discard.sector_number; preq.sector_number = req->u.discard.sector_number;
preq.nr_sects = req->u.discard.nr_sectors; preq.nr_sects = req->u.discard.nr_sectors;
...@@ -899,7 +901,6 @@ static int dispatch_discard_io(struct xen_blkif *blkif, ...@@ -899,7 +901,6 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
} }
blkif->st_ds_req++; blkif->st_ds_req++;
xen_blkif_get(blkif);
secure = (blkif->vbd.discard_secure && secure = (blkif->vbd.discard_secure &&
(req->u.discard.flag & BLKIF_DISCARD_SECURE)) ? (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
BLKDEV_DISCARD_SECURE : 0; BLKDEV_DISCARD_SECURE : 0;
......
...@@ -121,7 +121,8 @@ struct blkfront_info ...@@ -121,7 +121,8 @@ struct blkfront_info
struct work_struct work; struct work_struct work;
struct gnttab_free_callback callback; struct gnttab_free_callback callback;
struct blk_shadow shadow[BLK_RING_SIZE]; struct blk_shadow shadow[BLK_RING_SIZE];
struct list_head persistent_gnts; struct list_head grants;
struct list_head indirect_pages;
unsigned int persistent_gnts_c; unsigned int persistent_gnts_c;
unsigned long shadow_free; unsigned long shadow_free;
unsigned int feature_flush; unsigned int feature_flush;
...@@ -200,15 +201,17 @@ static int fill_grant_buffer(struct blkfront_info *info, int num) ...@@ -200,15 +201,17 @@ static int fill_grant_buffer(struct blkfront_info *info, int num)
if (!gnt_list_entry) if (!gnt_list_entry)
goto out_of_memory; goto out_of_memory;
granted_page = alloc_page(GFP_NOIO); if (info->feature_persistent) {
if (!granted_page) { granted_page = alloc_page(GFP_NOIO);
kfree(gnt_list_entry); if (!granted_page) {
goto out_of_memory; kfree(gnt_list_entry);
goto out_of_memory;
}
gnt_list_entry->pfn = page_to_pfn(granted_page);
} }
gnt_list_entry->pfn = page_to_pfn(granted_page);
gnt_list_entry->gref = GRANT_INVALID_REF; gnt_list_entry->gref = GRANT_INVALID_REF;
list_add(&gnt_list_entry->node, &info->persistent_gnts); list_add(&gnt_list_entry->node, &info->grants);
i++; i++;
} }
...@@ -216,9 +219,10 @@ static int fill_grant_buffer(struct blkfront_info *info, int num) ...@@ -216,9 +219,10 @@ static int fill_grant_buffer(struct blkfront_info *info, int num)
out_of_memory: out_of_memory:
list_for_each_entry_safe(gnt_list_entry, n, list_for_each_entry_safe(gnt_list_entry, n,
&info->persistent_gnts, node) { &info->grants, node) {
list_del(&gnt_list_entry->node); list_del(&gnt_list_entry->node);
__free_page(pfn_to_page(gnt_list_entry->pfn)); if (info->feature_persistent)
__free_page(pfn_to_page(gnt_list_entry->pfn));
kfree(gnt_list_entry); kfree(gnt_list_entry);
i--; i--;
} }
...@@ -227,13 +231,14 @@ static int fill_grant_buffer(struct blkfront_info *info, int num) ...@@ -227,13 +231,14 @@ static int fill_grant_buffer(struct blkfront_info *info, int num)
} }
static struct grant *get_grant(grant_ref_t *gref_head, static struct grant *get_grant(grant_ref_t *gref_head,
unsigned long pfn,
struct blkfront_info *info) struct blkfront_info *info)
{ {
struct grant *gnt_list_entry; struct grant *gnt_list_entry;
unsigned long buffer_mfn; unsigned long buffer_mfn;
BUG_ON(list_empty(&info->persistent_gnts)); BUG_ON(list_empty(&info->grants));
gnt_list_entry = list_first_entry(&info->persistent_gnts, struct grant, gnt_list_entry = list_first_entry(&info->grants, struct grant,
node); node);
list_del(&gnt_list_entry->node); list_del(&gnt_list_entry->node);
...@@ -245,6 +250,10 @@ static struct grant *get_grant(grant_ref_t *gref_head, ...@@ -245,6 +250,10 @@ static struct grant *get_grant(grant_ref_t *gref_head,
/* Assign a gref to this page */ /* Assign a gref to this page */
gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
BUG_ON(gnt_list_entry->gref == -ENOSPC); BUG_ON(gnt_list_entry->gref == -ENOSPC);
if (!info->feature_persistent) {
BUG_ON(!pfn);
gnt_list_entry->pfn = pfn;
}
buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn); buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
gnttab_grant_foreign_access_ref(gnt_list_entry->gref, gnttab_grant_foreign_access_ref(gnt_list_entry->gref,
info->xbdev->otherend_id, info->xbdev->otherend_id,
...@@ -400,10 +409,13 @@ static int blkif_queue_request(struct request *req) ...@@ -400,10 +409,13 @@ static int blkif_queue_request(struct request *req)
if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
return 1; return 1;
max_grefs = info->max_indirect_segments ? max_grefs = req->nr_phys_segments;
info->max_indirect_segments + if (max_grefs > BLKIF_MAX_SEGMENTS_PER_REQUEST)
INDIRECT_GREFS(info->max_indirect_segments) : /*
BLKIF_MAX_SEGMENTS_PER_REQUEST; * If we are using indirect segments we need to account
* for the indirect grefs used in the request.
*/
max_grefs += INDIRECT_GREFS(req->nr_phys_segments);
/* Check if we have enough grants to allocate a requests */ /* Check if we have enough grants to allocate a requests */
if (info->persistent_gnts_c < max_grefs) { if (info->persistent_gnts_c < max_grefs) {
...@@ -477,22 +489,34 @@ static int blkif_queue_request(struct request *req) ...@@ -477,22 +489,34 @@ static int blkif_queue_request(struct request *req)
if ((ring_req->operation == BLKIF_OP_INDIRECT) && if ((ring_req->operation == BLKIF_OP_INDIRECT) &&
(i % SEGS_PER_INDIRECT_FRAME == 0)) { (i % SEGS_PER_INDIRECT_FRAME == 0)) {
unsigned long pfn;
if (segments) if (segments)
kunmap_atomic(segments); kunmap_atomic(segments);
n = i / SEGS_PER_INDIRECT_FRAME; n = i / SEGS_PER_INDIRECT_FRAME;
gnt_list_entry = get_grant(&gref_head, info); if (!info->feature_persistent) {
struct page *indirect_page;
/* Fetch a pre-allocated page to use for indirect grefs */
BUG_ON(list_empty(&info->indirect_pages));
indirect_page = list_first_entry(&info->indirect_pages,
struct page, lru);
list_del(&indirect_page->lru);
pfn = page_to_pfn(indirect_page);
}
gnt_list_entry = get_grant(&gref_head, pfn, info);
info->shadow[id].indirect_grants[n] = gnt_list_entry; info->shadow[id].indirect_grants[n] = gnt_list_entry;
segments = kmap_atomic(pfn_to_page(gnt_list_entry->pfn)); segments = kmap_atomic(pfn_to_page(gnt_list_entry->pfn));
ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref; ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref;
} }
gnt_list_entry = get_grant(&gref_head, info); gnt_list_entry = get_grant(&gref_head, page_to_pfn(sg_page(sg)), info);
ref = gnt_list_entry->gref; ref = gnt_list_entry->gref;
info->shadow[id].grants_used[i] = gnt_list_entry; info->shadow[id].grants_used[i] = gnt_list_entry;
if (rq_data_dir(req)) { if (rq_data_dir(req) && info->feature_persistent) {
char *bvec_data; char *bvec_data;
void *shared_data; void *shared_data;
...@@ -904,21 +928,36 @@ static void blkif_free(struct blkfront_info *info, int suspend) ...@@ -904,21 +928,36 @@ static void blkif_free(struct blkfront_info *info, int suspend)
blk_stop_queue(info->rq); blk_stop_queue(info->rq);
/* Remove all persistent grants */ /* Remove all persistent grants */
if (!list_empty(&info->persistent_gnts)) { if (!list_empty(&info->grants)) {
list_for_each_entry_safe(persistent_gnt, n, list_for_each_entry_safe(persistent_gnt, n,
&info->persistent_gnts, node) { &info->grants, node) {
list_del(&persistent_gnt->node); list_del(&persistent_gnt->node);
if (persistent_gnt->gref != GRANT_INVALID_REF) { if (persistent_gnt->gref != GRANT_INVALID_REF) {
gnttab_end_foreign_access(persistent_gnt->gref, gnttab_end_foreign_access(persistent_gnt->gref,
0, 0UL); 0, 0UL);
info->persistent_gnts_c--; info->persistent_gnts_c--;
} }
__free_page(pfn_to_page(persistent_gnt->pfn)); if (info->feature_persistent)
__free_page(pfn_to_page(persistent_gnt->pfn));
kfree(persistent_gnt); kfree(persistent_gnt);
} }
} }
BUG_ON(info->persistent_gnts_c != 0); BUG_ON(info->persistent_gnts_c != 0);
/*
* Remove indirect pages, this only happens when using indirect
* descriptors but not persistent grants
*/
if (!list_empty(&info->indirect_pages)) {
struct page *indirect_page, *n;
BUG_ON(info->feature_persistent);
list_for_each_entry_safe(indirect_page, n, &info->indirect_pages, lru) {
list_del(&indirect_page->lru);
__free_page(indirect_page);
}
}
for (i = 0; i < BLK_RING_SIZE; i++) { for (i = 0; i < BLK_RING_SIZE; i++) {
/* /*
* Clear persistent grants present in requests already * Clear persistent grants present in requests already
...@@ -933,7 +972,8 @@ static void blkif_free(struct blkfront_info *info, int suspend) ...@@ -933,7 +972,8 @@ static void blkif_free(struct blkfront_info *info, int suspend)
for (j = 0; j < segs; j++) { for (j = 0; j < segs; j++) {
persistent_gnt = info->shadow[i].grants_used[j]; persistent_gnt = info->shadow[i].grants_used[j];
gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
__free_page(pfn_to_page(persistent_gnt->pfn)); if (info->feature_persistent)
__free_page(pfn_to_page(persistent_gnt->pfn));
kfree(persistent_gnt); kfree(persistent_gnt);
} }
...@@ -992,7 +1032,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, ...@@ -992,7 +1032,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
nseg = s->req.operation == BLKIF_OP_INDIRECT ? nseg = s->req.operation == BLKIF_OP_INDIRECT ?
s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments; s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
if (bret->operation == BLKIF_OP_READ) { if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
/* /*
* Copy the data received from the backend into the bvec. * Copy the data received from the backend into the bvec.
* Since bv_offset can be different than 0, and bv_len different * Since bv_offset can be different than 0, and bv_len different
...@@ -1013,13 +1053,51 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, ...@@ -1013,13 +1053,51 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
} }
/* Add the persistent grant into the list of free grants */ /* Add the persistent grant into the list of free grants */
for (i = 0; i < nseg; i++) { for (i = 0; i < nseg; i++) {
list_add(&s->grants_used[i]->node, &info->persistent_gnts); if (gnttab_query_foreign_access(s->grants_used[i]->gref)) {
info->persistent_gnts_c++; /*
* If the grant is still mapped by the backend (the
* backend has chosen to make this grant persistent)
* we add it at the head of the list, so it will be
* reused first.
*/
if (!info->feature_persistent)
pr_alert_ratelimited("backed has not unmapped grant: %u\n",
s->grants_used[i]->gref);
list_add(&s->grants_used[i]->node, &info->grants);
info->persistent_gnts_c++;
} else {
/*
* If the grant is not mapped by the backend we end the
* foreign access and add it to the tail of the list,
* so it will not be picked again unless we run out of
* persistent grants.
*/
gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL);
s->grants_used[i]->gref = GRANT_INVALID_REF;
list_add_tail(&s->grants_used[i]->node, &info->grants);
}
} }
if (s->req.operation == BLKIF_OP_INDIRECT) { if (s->req.operation == BLKIF_OP_INDIRECT) {
for (i = 0; i < INDIRECT_GREFS(nseg); i++) { for (i = 0; i < INDIRECT_GREFS(nseg); i++) {
list_add(&s->indirect_grants[i]->node, &info->persistent_gnts); if (gnttab_query_foreign_access(s->indirect_grants[i]->gref)) {
info->persistent_gnts_c++; if (!info->feature_persistent)
pr_alert_ratelimited("backed has not unmapped grant: %u\n",
s->indirect_grants[i]->gref);
list_add(&s->indirect_grants[i]->node, &info->grants);
info->persistent_gnts_c++;
} else {
struct page *indirect_page;
gnttab_end_foreign_access(s->indirect_grants[i]->gref, 0, 0UL);
/*
* Add the used indirect page back to the list of
* available pages for indirect grefs.
*/
indirect_page = pfn_to_page(s->indirect_grants[i]->pfn);
list_add(&indirect_page->lru, &info->indirect_pages);
s->indirect_grants[i]->gref = GRANT_INVALID_REF;
list_add_tail(&s->indirect_grants[i]->node, &info->grants);
}
} }
} }
} }
...@@ -1313,7 +1391,8 @@ static int blkfront_probe(struct xenbus_device *dev, ...@@ -1313,7 +1391,8 @@ static int blkfront_probe(struct xenbus_device *dev,
spin_lock_init(&info->io_lock); spin_lock_init(&info->io_lock);
info->xbdev = dev; info->xbdev = dev;
info->vdevice = vdevice; info->vdevice = vdevice;
INIT_LIST_HEAD(&info->persistent_gnts); INIT_LIST_HEAD(&info->grants);
INIT_LIST_HEAD(&info->indirect_pages);
info->persistent_gnts_c = 0; info->persistent_gnts_c = 0;
info->connected = BLKIF_STATE_DISCONNECTED; info->connected = BLKIF_STATE_DISCONNECTED;
INIT_WORK(&info->work, blkif_restart_queue); INIT_WORK(&info->work, blkif_restart_queue);
...@@ -1609,6 +1688,23 @@ static int blkfront_setup_indirect(struct blkfront_info *info) ...@@ -1609,6 +1688,23 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
if (err) if (err)
goto out_of_memory; goto out_of_memory;
if (!info->feature_persistent && info->max_indirect_segments) {
/*
* We are using indirect descriptors but not persistent
* grants, we need to allocate a set of pages that can be
* used for mapping indirect grefs
*/
int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE;
BUG_ON(!list_empty(&info->indirect_pages));
for (i = 0; i < num; i++) {
struct page *indirect_page = alloc_page(GFP_NOIO);
if (!indirect_page)
goto out_of_memory;
list_add(&indirect_page->lru, &info->indirect_pages);
}
}
for (i = 0; i < BLK_RING_SIZE; i++) { for (i = 0; i < BLK_RING_SIZE; i++) {
info->shadow[i].grants_used = kzalloc( info->shadow[i].grants_used = kzalloc(
sizeof(info->shadow[i].grants_used[0]) * segs, sizeof(info->shadow[i].grants_used[0]) * segs,
...@@ -1639,6 +1735,13 @@ static int blkfront_setup_indirect(struct blkfront_info *info) ...@@ -1639,6 +1735,13 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
kfree(info->shadow[i].indirect_grants); kfree(info->shadow[i].indirect_grants);
info->shadow[i].indirect_grants = NULL; info->shadow[i].indirect_grants = NULL;
} }
if (!list_empty(&info->indirect_pages)) {
struct page *indirect_page, *n;
list_for_each_entry_safe(indirect_page, n, &info->indirect_pages, lru) {
list_del(&indirect_page->lru);
__free_page(indirect_page);
}
}
return -ENOMEM; return -ENOMEM;
} }
......
...@@ -13,15 +13,8 @@ config BCACHE_DEBUG ...@@ -13,15 +13,8 @@ config BCACHE_DEBUG
---help--- ---help---
Don't select this option unless you're a developer Don't select this option unless you're a developer
Enables extra debugging tools (primarily a fuzz tester) Enables extra debugging tools, allows expensive runtime checks to be
turned on.
config BCACHE_EDEBUG
bool "Extended runtime checks"
depends on BCACHE
---help---
Don't select this option unless you're a developer
Enables extra runtime checks which significantly affect performance
config BCACHE_CLOSURES_DEBUG config BCACHE_CLOSURES_DEBUG
bool "Debug closures" bool "Debug closures"
......
...@@ -63,13 +63,12 @@ ...@@ -63,13 +63,12 @@
#include "bcache.h" #include "bcache.h"
#include "btree.h" #include "btree.h"
#include <linux/blkdev.h>
#include <linux/freezer.h> #include <linux/freezer.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/random.h> #include <linux/random.h>
#include <trace/events/bcache.h> #include <trace/events/bcache.h>
#define MAX_IN_FLIGHT_DISCARDS 8U
/* Bucket heap / gen */ /* Bucket heap / gen */
uint8_t bch_inc_gen(struct cache *ca, struct bucket *b) uint8_t bch_inc_gen(struct cache *ca, struct bucket *b)
...@@ -121,75 +120,6 @@ void bch_rescale_priorities(struct cache_set *c, int sectors) ...@@ -121,75 +120,6 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
mutex_unlock(&c->bucket_lock); mutex_unlock(&c->bucket_lock);
} }
/* Discard/TRIM */
struct discard {
struct list_head list;
struct work_struct work;
struct cache *ca;
long bucket;
struct bio bio;
struct bio_vec bv;
};
static void discard_finish(struct work_struct *w)
{
struct discard *d = container_of(w, struct discard, work);
struct cache *ca = d->ca;
char buf[BDEVNAME_SIZE];
if (!test_bit(BIO_UPTODATE, &d->bio.bi_flags)) {
pr_notice("discard error on %s, disabling",
bdevname(ca->bdev, buf));
d->ca->discard = 0;
}
mutex_lock(&ca->set->bucket_lock);
fifo_push(&ca->free, d->bucket);
list_add(&d->list, &ca->discards);
atomic_dec(&ca->discards_in_flight);
mutex_unlock(&ca->set->bucket_lock);
closure_wake_up(&ca->set->bucket_wait);
wake_up_process(ca->alloc_thread);
closure_put(&ca->set->cl);
}
static void discard_endio(struct bio *bio, int error)
{
struct discard *d = container_of(bio, struct discard, bio);
schedule_work(&d->work);
}
static void do_discard(struct cache *ca, long bucket)
{
struct discard *d = list_first_entry(&ca->discards,
struct discard, list);
list_del(&d->list);
d->bucket = bucket;
atomic_inc(&ca->discards_in_flight);
closure_get(&ca->set->cl);
bio_init(&d->bio);
d->bio.bi_sector = bucket_to_sector(ca->set, d->bucket);
d->bio.bi_bdev = ca->bdev;
d->bio.bi_rw = REQ_WRITE|REQ_DISCARD;
d->bio.bi_max_vecs = 1;
d->bio.bi_io_vec = d->bio.bi_inline_vecs;
d->bio.bi_size = bucket_bytes(ca);
d->bio.bi_end_io = discard_endio;
bio_set_prio(&d->bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
submit_bio(0, &d->bio);
}
/* Allocation */ /* Allocation */
static inline bool can_inc_bucket_gen(struct bucket *b) static inline bool can_inc_bucket_gen(struct bucket *b)
...@@ -280,7 +210,7 @@ static void invalidate_buckets_lru(struct cache *ca) ...@@ -280,7 +210,7 @@ static void invalidate_buckets_lru(struct cache *ca)
* multiple times when it can't do anything * multiple times when it can't do anything
*/ */
ca->invalidate_needs_gc = 1; ca->invalidate_needs_gc = 1;
bch_queue_gc(ca->set); wake_up_gc(ca->set);
return; return;
} }
...@@ -305,7 +235,7 @@ static void invalidate_buckets_fifo(struct cache *ca) ...@@ -305,7 +235,7 @@ static void invalidate_buckets_fifo(struct cache *ca)
if (++checked >= ca->sb.nbuckets) { if (++checked >= ca->sb.nbuckets) {
ca->invalidate_needs_gc = 1; ca->invalidate_needs_gc = 1;
bch_queue_gc(ca->set); wake_up_gc(ca->set);
return; return;
} }
} }
...@@ -330,7 +260,7 @@ static void invalidate_buckets_random(struct cache *ca) ...@@ -330,7 +260,7 @@ static void invalidate_buckets_random(struct cache *ca)
if (++checked >= ca->sb.nbuckets / 2) { if (++checked >= ca->sb.nbuckets / 2) {
ca->invalidate_needs_gc = 1; ca->invalidate_needs_gc = 1;
bch_queue_gc(ca->set); wake_up_gc(ca->set);
return; return;
} }
} }
...@@ -398,16 +328,18 @@ static int bch_allocator_thread(void *arg) ...@@ -398,16 +328,18 @@ static int bch_allocator_thread(void *arg)
else else
break; break;
allocator_wait(ca, (int) fifo_free(&ca->free) >
atomic_read(&ca->discards_in_flight));
if (ca->discard) { if (ca->discard) {
allocator_wait(ca, !list_empty(&ca->discards)); mutex_unlock(&ca->set->bucket_lock);
do_discard(ca, bucket); blkdev_issue_discard(ca->bdev,
} else { bucket_to_sector(ca->set, bucket),
fifo_push(&ca->free, bucket); ca->sb.block_size, GFP_KERNEL, 0);
closure_wake_up(&ca->set->bucket_wait); mutex_lock(&ca->set->bucket_lock);
} }
allocator_wait(ca, !fifo_full(&ca->free));
fifo_push(&ca->free, bucket);
wake_up(&ca->set->bucket_wait);
} }
/* /*
...@@ -433,16 +365,40 @@ static int bch_allocator_thread(void *arg) ...@@ -433,16 +365,40 @@ static int bch_allocator_thread(void *arg)
} }
} }
long bch_bucket_alloc(struct cache *ca, unsigned watermark, struct closure *cl) long bch_bucket_alloc(struct cache *ca, unsigned watermark, bool wait)
{ {
long r = -1; DEFINE_WAIT(w);
again: struct bucket *b;
long r;
/* fastpath */
if (fifo_used(&ca->free) > ca->watermark[watermark]) {
fifo_pop(&ca->free, r);
goto out;
}
if (!wait)
return -1;
while (1) {
if (fifo_used(&ca->free) > ca->watermark[watermark]) {
fifo_pop(&ca->free, r);
break;
}
prepare_to_wait(&ca->set->bucket_wait, &w,
TASK_UNINTERRUPTIBLE);
mutex_unlock(&ca->set->bucket_lock);
schedule();
mutex_lock(&ca->set->bucket_lock);
}
finish_wait(&ca->set->bucket_wait, &w);
out:
wake_up_process(ca->alloc_thread); wake_up_process(ca->alloc_thread);
if (fifo_used(&ca->free) > ca->watermark[watermark] && if (expensive_debug_checks(ca->set)) {
fifo_pop(&ca->free, r)) {
struct bucket *b = ca->buckets + r;
#ifdef CONFIG_BCACHE_EDEBUG
size_t iter; size_t iter;
long i; long i;
...@@ -455,36 +411,23 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, struct closure *cl) ...@@ -455,36 +411,23 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, struct closure *cl)
BUG_ON(i == r); BUG_ON(i == r);
fifo_for_each(i, &ca->unused, iter) fifo_for_each(i, &ca->unused, iter)
BUG_ON(i == r); BUG_ON(i == r);
#endif
BUG_ON(atomic_read(&b->pin) != 1);
SET_GC_SECTORS_USED(b, ca->sb.bucket_size);
if (watermark <= WATERMARK_METADATA) {
SET_GC_MARK(b, GC_MARK_METADATA);
b->prio = BTREE_PRIO;
} else {
SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
b->prio = INITIAL_PRIO;
}
return r;
} }
trace_bcache_alloc_fail(ca); b = ca->buckets + r;
if (cl) { BUG_ON(atomic_read(&b->pin) != 1);
closure_wait(&ca->set->bucket_wait, cl);
if (closure_blocking(cl)) { SET_GC_SECTORS_USED(b, ca->sb.bucket_size);
mutex_unlock(&ca->set->bucket_lock);
closure_sync(cl); if (watermark <= WATERMARK_METADATA) {
mutex_lock(&ca->set->bucket_lock); SET_GC_MARK(b, GC_MARK_METADATA);
goto again; b->prio = BTREE_PRIO;
} } else {
SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
b->prio = INITIAL_PRIO;
} }
return -1; return r;
} }
void bch_bucket_free(struct cache_set *c, struct bkey *k) void bch_bucket_free(struct cache_set *c, struct bkey *k)
...@@ -501,7 +444,7 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k) ...@@ -501,7 +444,7 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k)
} }
int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
struct bkey *k, int n, struct closure *cl) struct bkey *k, int n, bool wait)
{ {
int i; int i;
...@@ -514,7 +457,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, ...@@ -514,7 +457,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
struct cache *ca = c->cache_by_alloc[i]; struct cache *ca = c->cache_by_alloc[i];
long b = bch_bucket_alloc(ca, watermark, cl); long b = bch_bucket_alloc(ca, watermark, wait);
if (b == -1) if (b == -1)
goto err; goto err;
...@@ -529,22 +472,202 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, ...@@ -529,22 +472,202 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
return 0; return 0;
err: err:
bch_bucket_free(c, k); bch_bucket_free(c, k);
__bkey_put(c, k); bkey_put(c, k);
return -1; return -1;
} }
int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
struct bkey *k, int n, struct closure *cl) struct bkey *k, int n, bool wait)
{ {
int ret; int ret;
mutex_lock(&c->bucket_lock); mutex_lock(&c->bucket_lock);
ret = __bch_bucket_alloc_set(c, watermark, k, n, cl); ret = __bch_bucket_alloc_set(c, watermark, k, n, wait);
mutex_unlock(&c->bucket_lock); mutex_unlock(&c->bucket_lock);
return ret; return ret;
} }
/* Sector allocator */
struct open_bucket {
struct list_head list;
unsigned last_write_point;
unsigned sectors_free;
BKEY_PADDED(key);
};
/*
* We keep multiple buckets open for writes, and try to segregate different
* write streams for better cache utilization: first we look for a bucket where
* the last write to it was sequential with the current write, and failing that
* we look for a bucket that was last used by the same task.
*
* The ideas is if you've got multiple tasks pulling data into the cache at the
* same time, you'll get better cache utilization if you try to segregate their
* data and preserve locality.
*
* For example, say you've starting Firefox at the same time you're copying a
* bunch of files. Firefox will likely end up being fairly hot and stay in the
* cache awhile, but the data you copied might not be; if you wrote all that
* data to the same buckets it'd get invalidated at the same time.
*
* Both of those tasks will be doing fairly random IO so we can't rely on
* detecting sequential IO to segregate their data, but going off of the task
* should be a sane heuristic.
*/
static struct open_bucket *pick_data_bucket(struct cache_set *c,
const struct bkey *search,
unsigned write_point,
struct bkey *alloc)
{
struct open_bucket *ret, *ret_task = NULL;
list_for_each_entry_reverse(ret, &c->data_buckets, list)
if (!bkey_cmp(&ret->key, search))
goto found;
else if (ret->last_write_point == write_point)
ret_task = ret;
ret = ret_task ?: list_first_entry(&c->data_buckets,
struct open_bucket, list);
found:
if (!ret->sectors_free && KEY_PTRS(alloc)) {
ret->sectors_free = c->sb.bucket_size;
bkey_copy(&ret->key, alloc);
bkey_init(alloc);
}
if (!ret->sectors_free)
ret = NULL;
return ret;
}
/*
* Allocates some space in the cache to write to, and k to point to the newly
* allocated space, and updates KEY_SIZE(k) and KEY_OFFSET(k) (to point to the
* end of the newly allocated space).
*
* May allocate fewer sectors than @sectors, KEY_SIZE(k) indicates how many
* sectors were actually allocated.
*
* If s->writeback is true, will not fail.
*/
bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
unsigned write_point, unsigned write_prio, bool wait)
{
struct open_bucket *b;
BKEY_PADDED(key) alloc;
unsigned i;
/*
* We might have to allocate a new bucket, which we can't do with a
* spinlock held. So if we have to allocate, we drop the lock, allocate
* and then retry. KEY_PTRS() indicates whether alloc points to
* allocated bucket(s).
*/
bkey_init(&alloc.key);
spin_lock(&c->data_bucket_lock);
while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) {
unsigned watermark = write_prio
? WATERMARK_MOVINGGC
: WATERMARK_NONE;
spin_unlock(&c->data_bucket_lock);
if (bch_bucket_alloc_set(c, watermark, &alloc.key, 1, wait))
return false;
spin_lock(&c->data_bucket_lock);
}
/*
* If we had to allocate, we might race and not need to allocate the
* second time we call find_data_bucket(). If we allocated a bucket but
* didn't use it, drop the refcount bch_bucket_alloc_set() took:
*/
if (KEY_PTRS(&alloc.key))
bkey_put(c, &alloc.key);
for (i = 0; i < KEY_PTRS(&b->key); i++)
EBUG_ON(ptr_stale(c, &b->key, i));
/* Set up the pointer to the space we're allocating: */
for (i = 0; i < KEY_PTRS(&b->key); i++)
k->ptr[i] = b->key.ptr[i];
sectors = min(sectors, b->sectors_free);
SET_KEY_OFFSET(k, KEY_OFFSET(k) + sectors);
SET_KEY_SIZE(k, sectors);
SET_KEY_PTRS(k, KEY_PTRS(&b->key));
/*
* Move b to the end of the lru, and keep track of what this bucket was
* last used for:
*/
list_move_tail(&b->list, &c->data_buckets);
bkey_copy_key(&b->key, k);
b->last_write_point = write_point;
b->sectors_free -= sectors;
for (i = 0; i < KEY_PTRS(&b->key); i++) {
SET_PTR_OFFSET(&b->key, i, PTR_OFFSET(&b->key, i) + sectors);
atomic_long_add(sectors,
&PTR_CACHE(c, &b->key, i)->sectors_written);
}
if (b->sectors_free < c->sb.block_size)
b->sectors_free = 0;
/*
* k takes refcounts on the buckets it points to until it's inserted
* into the btree, but if we're done with this bucket we just transfer
* get_data_bucket()'s refcount.
*/
if (b->sectors_free)
for (i = 0; i < KEY_PTRS(&b->key); i++)
atomic_inc(&PTR_BUCKET(c, &b->key, i)->pin);
spin_unlock(&c->data_bucket_lock);
return true;
}
/* Init */ /* Init */
void bch_open_buckets_free(struct cache_set *c)
{
struct open_bucket *b;
while (!list_empty(&c->data_buckets)) {
b = list_first_entry(&c->data_buckets,
struct open_bucket, list);
list_del(&b->list);
kfree(b);
}
}
int bch_open_buckets_alloc(struct cache_set *c)
{
int i;
spin_lock_init(&c->data_bucket_lock);
for (i = 0; i < 6; i++) {
struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL);
if (!b)
return -ENOMEM;
list_add(&b->list, &c->data_buckets);
}
return 0;
}
int bch_cache_allocator_start(struct cache *ca) int bch_cache_allocator_start(struct cache *ca)
{ {
struct task_struct *k = kthread_run(bch_allocator_thread, struct task_struct *k = kthread_run(bch_allocator_thread,
...@@ -556,22 +679,8 @@ int bch_cache_allocator_start(struct cache *ca) ...@@ -556,22 +679,8 @@ int bch_cache_allocator_start(struct cache *ca)
return 0; return 0;
} }
void bch_cache_allocator_exit(struct cache *ca)
{
struct discard *d;
while (!list_empty(&ca->discards)) {
d = list_first_entry(&ca->discards, struct discard, list);
cancel_work_sync(&d->work);
list_del(&d->list);
kfree(d);
}
}
int bch_cache_allocator_init(struct cache *ca) int bch_cache_allocator_init(struct cache *ca)
{ {
unsigned i;
/* /*
* Reserve: * Reserve:
* Prio/gen writes first * Prio/gen writes first
...@@ -589,15 +698,5 @@ int bch_cache_allocator_init(struct cache *ca) ...@@ -589,15 +698,5 @@ int bch_cache_allocator_init(struct cache *ca)
ca->watermark[WATERMARK_NONE] = ca->free.size / 2 + ca->watermark[WATERMARK_NONE] = ca->free.size / 2 +
ca->watermark[WATERMARK_MOVINGGC]; ca->watermark[WATERMARK_MOVINGGC];
for (i = 0; i < MAX_IN_FLIGHT_DISCARDS; i++) {
struct discard *d = kzalloc(sizeof(*d), GFP_KERNEL);
if (!d)
return -ENOMEM;
d->ca = ca;
INIT_WORK(&d->work, discard_finish);
list_add(&d->list, &ca->discards);
}
return 0; return 0;
} }
...@@ -177,6 +177,7 @@ ...@@ -177,6 +177,7 @@
#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ #define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
#include <linux/bcache.h>
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/kobject.h> #include <linux/kobject.h>
#include <linux/list.h> #include <linux/list.h>
...@@ -210,168 +211,6 @@ BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2); ...@@ -210,168 +211,6 @@ BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2);
#define GC_MARK_METADATA 2 #define GC_MARK_METADATA 2
BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 14); BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 14);
struct bkey {
uint64_t high;
uint64_t low;
uint64_t ptr[];
};
/* Enough for a key with 6 pointers */
#define BKEY_PAD 8
#define BKEY_PADDED(key) \
union { struct bkey key; uint64_t key ## _pad[BKEY_PAD]; }
/* Version 0: Cache device
* Version 1: Backing device
* Version 2: Seed pointer into btree node checksum
* Version 3: Cache device with new UUID format
* Version 4: Backing device with data offset
*/
#define BCACHE_SB_VERSION_CDEV 0
#define BCACHE_SB_VERSION_BDEV 1
#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3
#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4
#define BCACHE_SB_MAX_VERSION 4
#define SB_SECTOR 8
#define SB_SIZE 4096
#define SB_LABEL_SIZE 32
#define SB_JOURNAL_BUCKETS 256U
/* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */
#define MAX_CACHES_PER_SET 8
#define BDEV_DATA_START_DEFAULT 16 /* sectors */
struct cache_sb {
uint64_t csum;
uint64_t offset; /* sector where this sb was written */
uint64_t version;
uint8_t magic[16];
uint8_t uuid[16];
union {
uint8_t set_uuid[16];
uint64_t set_magic;
};
uint8_t label[SB_LABEL_SIZE];
uint64_t flags;
uint64_t seq;
uint64_t pad[8];
union {
struct {
/* Cache devices */
uint64_t nbuckets; /* device size */
uint16_t block_size; /* sectors */
uint16_t bucket_size; /* sectors */
uint16_t nr_in_set;
uint16_t nr_this_dev;
};
struct {
/* Backing devices */
uint64_t data_offset;
/*
* block_size from the cache device section is still used by
* backing devices, so don't add anything here until we fix
* things to not need it for backing devices anymore
*/
};
};
uint32_t last_mount; /* time_t */
uint16_t first_bucket;
union {
uint16_t njournal_buckets;
uint16_t keys;
};
uint64_t d[SB_JOURNAL_BUCKETS]; /* journal buckets */
};
BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1);
BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1);
BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3);
#define CACHE_REPLACEMENT_LRU 0U
#define CACHE_REPLACEMENT_FIFO 1U
#define CACHE_REPLACEMENT_RANDOM 2U
BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4);
#define CACHE_MODE_WRITETHROUGH 0U
#define CACHE_MODE_WRITEBACK 1U
#define CACHE_MODE_WRITEAROUND 2U
#define CACHE_MODE_NONE 3U
BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2);
#define BDEV_STATE_NONE 0U
#define BDEV_STATE_CLEAN 1U
#define BDEV_STATE_DIRTY 2U
#define BDEV_STATE_STALE 3U
/* Version 1: Seed pointer into btree node checksum
*/
#define BCACHE_BSET_VERSION 1
/*
* This is the on disk format for btree nodes - a btree node on disk is a list
* of these; within each set the keys are sorted
*/
struct bset {
uint64_t csum;
uint64_t magic;
uint64_t seq;
uint32_t version;
uint32_t keys;
union {
struct bkey start[0];
uint64_t d[0];
};
};
/*
* On disk format for priorities and gens - see super.c near prio_write() for
* more.
*/
struct prio_set {
uint64_t csum;
uint64_t magic;
uint64_t seq;
uint32_t version;
uint32_t pad;
uint64_t next_bucket;
struct bucket_disk {
uint16_t prio;
uint8_t gen;
} __attribute((packed)) data[];
};
struct uuid_entry {
union {
struct {
uint8_t uuid[16];
uint8_t label[32];
uint32_t first_reg;
uint32_t last_reg;
uint32_t invalidated;
uint32_t flags;
/* Size of flash only volumes */
uint64_t sectors;
};
uint8_t pad[128];
};
};
BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1);
#include "journal.h" #include "journal.h"
#include "stats.h" #include "stats.h"
struct search; struct search;
...@@ -384,8 +223,6 @@ struct keybuf_key { ...@@ -384,8 +223,6 @@ struct keybuf_key {
void *private; void *private;
}; };
typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *);
struct keybuf { struct keybuf {
struct bkey last_scanned; struct bkey last_scanned;
spinlock_t lock; spinlock_t lock;
...@@ -400,7 +237,7 @@ struct keybuf { ...@@ -400,7 +237,7 @@ struct keybuf {
struct rb_root keys; struct rb_root keys;
#define KEYBUF_NR 100 #define KEYBUF_NR 500
DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR); DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR);
}; };
...@@ -429,16 +266,15 @@ struct bcache_device { ...@@ -429,16 +266,15 @@ struct bcache_device {
struct gendisk *disk; struct gendisk *disk;
/* If nonzero, we're closing */ unsigned long flags;
atomic_t closing; #define BCACHE_DEV_CLOSING 0
#define BCACHE_DEV_DETACHING 1
/* If nonzero, we're detaching/unregistering from cache set */ #define BCACHE_DEV_UNLINK_DONE 2
atomic_t detaching;
int flush_done;
uint64_t nr_stripes; unsigned nr_stripes;
unsigned stripe_size_bits; unsigned stripe_size;
atomic_t *stripe_sectors_dirty; atomic_t *stripe_sectors_dirty;
unsigned long *full_dirty_stripes;
unsigned long sectors_dirty_last; unsigned long sectors_dirty_last;
long sectors_dirty_derivative; long sectors_dirty_derivative;
...@@ -509,7 +345,7 @@ struct cached_dev { ...@@ -509,7 +345,7 @@ struct cached_dev {
/* Limit number of writeback bios in flight */ /* Limit number of writeback bios in flight */
struct semaphore in_flight; struct semaphore in_flight;
struct closure_with_timer writeback; struct task_struct *writeback_thread;
struct keybuf writeback_keys; struct keybuf writeback_keys;
...@@ -527,8 +363,8 @@ struct cached_dev { ...@@ -527,8 +363,8 @@ struct cached_dev {
unsigned sequential_cutoff; unsigned sequential_cutoff;
unsigned readahead; unsigned readahead;
unsigned sequential_merge:1;
unsigned verify:1; unsigned verify:1;
unsigned bypass_torture_test:1;
unsigned partial_stripes_expensive:1; unsigned partial_stripes_expensive:1;
unsigned writeback_metadata:1; unsigned writeback_metadata:1;
...@@ -620,15 +456,6 @@ struct cache { ...@@ -620,15 +456,6 @@ struct cache {
bool discard; /* Get rid of? */ bool discard; /* Get rid of? */
/*
* We preallocate structs for issuing discards to buckets, and keep them
* on this list when they're not in use; do_discard() issues discards
* whenever there's work to do and is called by free_some_buckets() and
* when a discard finishes.
*/
atomic_t discards_in_flight;
struct list_head discards;
struct journal_device journal; struct journal_device journal;
/* The rest of this all shows up in sysfs */ /* The rest of this all shows up in sysfs */
...@@ -649,7 +476,6 @@ struct gc_stat { ...@@ -649,7 +476,6 @@ struct gc_stat {
size_t nkeys; size_t nkeys;
uint64_t data; /* sectors */ uint64_t data; /* sectors */
uint64_t dirty; /* sectors */
unsigned in_use; /* percent */ unsigned in_use; /* percent */
}; };
...@@ -744,8 +570,8 @@ struct cache_set { ...@@ -744,8 +570,8 @@ struct cache_set {
* basically a lock for this that we can wait on asynchronously. The * basically a lock for this that we can wait on asynchronously. The
* btree_root() macro releases the lock when it returns. * btree_root() macro releases the lock when it returns.
*/ */
struct closure *try_harder; struct task_struct *try_harder;
struct closure_waitlist try_wait; wait_queue_head_t try_wait;
uint64_t try_harder_start; uint64_t try_harder_start;
/* /*
...@@ -759,7 +585,7 @@ struct cache_set { ...@@ -759,7 +585,7 @@ struct cache_set {
* written. * written.
*/ */
atomic_t prio_blocked; atomic_t prio_blocked;
struct closure_waitlist bucket_wait; wait_queue_head_t bucket_wait;
/* /*
* For any bio we don't skip we subtract the number of sectors from * For any bio we don't skip we subtract the number of sectors from
...@@ -782,7 +608,7 @@ struct cache_set { ...@@ -782,7 +608,7 @@ struct cache_set {
struct gc_stat gc_stats; struct gc_stat gc_stats;
size_t nbuckets; size_t nbuckets;
struct closure_with_waitlist gc; struct task_struct *gc_thread;
/* Where in the btree gc currently is */ /* Where in the btree gc currently is */
struct bkey gc_done; struct bkey gc_done;
...@@ -795,11 +621,10 @@ struct cache_set { ...@@ -795,11 +621,10 @@ struct cache_set {
/* Counts how many sectors bio_insert has added to the cache */ /* Counts how many sectors bio_insert has added to the cache */
atomic_t sectors_to_gc; atomic_t sectors_to_gc;
struct closure moving_gc; wait_queue_head_t moving_gc_wait;
struct closure_waitlist moving_gc_wait;
struct keybuf moving_gc_keys; struct keybuf moving_gc_keys;
/* Number of moving GC bios in flight */ /* Number of moving GC bios in flight */
atomic_t in_flight; struct semaphore moving_in_flight;
struct btree *root; struct btree *root;
...@@ -841,22 +666,27 @@ struct cache_set { ...@@ -841,22 +666,27 @@ struct cache_set {
unsigned congested_read_threshold_us; unsigned congested_read_threshold_us;
unsigned congested_write_threshold_us; unsigned congested_write_threshold_us;
spinlock_t sort_time_lock;
struct time_stats sort_time; struct time_stats sort_time;
struct time_stats btree_gc_time; struct time_stats btree_gc_time;
struct time_stats btree_split_time; struct time_stats btree_split_time;
spinlock_t btree_read_time_lock;
struct time_stats btree_read_time; struct time_stats btree_read_time;
struct time_stats try_harder_time; struct time_stats try_harder_time;
atomic_long_t cache_read_races; atomic_long_t cache_read_races;
atomic_long_t writeback_keys_done; atomic_long_t writeback_keys_done;
atomic_long_t writeback_keys_failed; atomic_long_t writeback_keys_failed;
enum {
ON_ERROR_UNREGISTER,
ON_ERROR_PANIC,
} on_error;
unsigned error_limit; unsigned error_limit;
unsigned error_decay; unsigned error_decay;
unsigned short journal_delay_ms; unsigned short journal_delay_ms;
unsigned verify:1; unsigned verify:1;
unsigned key_merging_disabled:1; unsigned key_merging_disabled:1;
unsigned expensive_debug_checks:1;
unsigned gc_always_rewrite:1; unsigned gc_always_rewrite:1;
unsigned shrinker_disabled:1; unsigned shrinker_disabled:1;
unsigned copy_gc_enabled:1; unsigned copy_gc_enabled:1;
...@@ -865,21 +695,6 @@ struct cache_set { ...@@ -865,21 +695,6 @@ struct cache_set {
struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS]; struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
}; };
static inline bool key_merging_disabled(struct cache_set *c)
{
#ifdef CONFIG_BCACHE_DEBUG
return c->key_merging_disabled;
#else
return 0;
#endif
}
static inline bool SB_IS_BDEV(const struct cache_sb *sb)
{
return sb->version == BCACHE_SB_VERSION_BDEV
|| sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
}
struct bbio { struct bbio {
unsigned submit_time_us; unsigned submit_time_us;
union { union {
...@@ -933,59 +748,6 @@ static inline unsigned local_clock_us(void) ...@@ -933,59 +748,6 @@ static inline unsigned local_clock_us(void)
#define prio_buckets(c) \ #define prio_buckets(c) \
DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c)) DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c))
#define JSET_MAGIC 0x245235c1a3625032ULL
#define PSET_MAGIC 0x6750e15f87337f91ULL
#define BSET_MAGIC 0x90135c78b99e07f5ULL
#define jset_magic(c) ((c)->sb.set_magic ^ JSET_MAGIC)
#define pset_magic(c) ((c)->sb.set_magic ^ PSET_MAGIC)
#define bset_magic(c) ((c)->sb.set_magic ^ BSET_MAGIC)
/* Bkey fields: all units are in sectors */
#define KEY_FIELD(name, field, offset, size) \
BITMASK(name, struct bkey, field, offset, size)
#define PTR_FIELD(name, offset, size) \
static inline uint64_t name(const struct bkey *k, unsigned i) \
{ return (k->ptr[i] >> offset) & ~(((uint64_t) ~0) << size); } \
\
static inline void SET_##name(struct bkey *k, unsigned i, uint64_t v)\
{ \
k->ptr[i] &= ~(~((uint64_t) ~0 << size) << offset); \
k->ptr[i] |= v << offset; \
}
KEY_FIELD(KEY_PTRS, high, 60, 3)
KEY_FIELD(HEADER_SIZE, high, 58, 2)
KEY_FIELD(KEY_CSUM, high, 56, 2)
KEY_FIELD(KEY_PINNED, high, 55, 1)
KEY_FIELD(KEY_DIRTY, high, 36, 1)
KEY_FIELD(KEY_SIZE, high, 20, 16)
KEY_FIELD(KEY_INODE, high, 0, 20)
/* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */
static inline uint64_t KEY_OFFSET(const struct bkey *k)
{
return k->low;
}
static inline void SET_KEY_OFFSET(struct bkey *k, uint64_t v)
{
k->low = v;
}
PTR_FIELD(PTR_DEV, 51, 12)
PTR_FIELD(PTR_OFFSET, 8, 43)
PTR_FIELD(PTR_GEN, 0, 8)
#define PTR_CHECK_DEV ((1 << 12) - 1)
#define PTR(gen, offset, dev) \
((((uint64_t) dev) << 51) | ((uint64_t) offset) << 8 | gen)
static inline size_t sector_to_bucket(struct cache_set *c, sector_t s) static inline size_t sector_to_bucket(struct cache_set *c, sector_t s)
{ {
return s >> c->bucket_bits; return s >> c->bucket_bits;
...@@ -1024,27 +786,11 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c, ...@@ -1024,27 +786,11 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c,
/* Btree key macros */ /* Btree key macros */
/*
* The high bit being set is a relic from when we used it to do binary
* searches - it told you where a key started. It's not used anymore,
* and can probably be safely dropped.
*/
#define KEY(dev, sector, len) \
((struct bkey) { \
.high = (1ULL << 63) | ((uint64_t) (len) << 20) | (dev), \
.low = (sector) \
})
static inline void bkey_init(struct bkey *k) static inline void bkey_init(struct bkey *k)
{ {
*k = KEY(0, 0, 0); *k = ZERO_KEY;
} }
#define KEY_START(k) (KEY_OFFSET(k) - KEY_SIZE(k))
#define START_KEY(k) KEY(KEY_INODE(k), KEY_START(k), 0)
#define MAX_KEY KEY(~(~0 << 20), ((uint64_t) ~0) >> 1, 0)
#define ZERO_KEY KEY(0, 0, 0)
/* /*
* This is used for various on disk data structures - cache_sb, prio_set, bset, * This is used for various on disk data structures - cache_sb, prio_set, bset,
* jset: The checksum is _always_ the first 8 bytes of these structs * jset: The checksum is _always_ the first 8 bytes of these structs
...@@ -1094,14 +840,6 @@ do { \ ...@@ -1094,14 +840,6 @@ do { \
for (b = (ca)->buckets + (ca)->sb.first_bucket; \ for (b = (ca)->buckets + (ca)->sb.first_bucket; \
b < (ca)->buckets + (ca)->sb.nbuckets; b++) b < (ca)->buckets + (ca)->sb.nbuckets; b++)
static inline void __bkey_put(struct cache_set *c, struct bkey *k)
{
unsigned i;
for (i = 0; i < KEY_PTRS(k); i++)
atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin);
}
static inline void cached_dev_put(struct cached_dev *dc) static inline void cached_dev_put(struct cached_dev *dc)
{ {
if (atomic_dec_and_test(&dc->count)) if (atomic_dec_and_test(&dc->count))
...@@ -1173,13 +911,15 @@ uint8_t bch_inc_gen(struct cache *, struct bucket *); ...@@ -1173,13 +911,15 @@ uint8_t bch_inc_gen(struct cache *, struct bucket *);
void bch_rescale_priorities(struct cache_set *, int); void bch_rescale_priorities(struct cache_set *, int);
bool bch_bucket_add_unused(struct cache *, struct bucket *); bool bch_bucket_add_unused(struct cache *, struct bucket *);
long bch_bucket_alloc(struct cache *, unsigned, struct closure *); long bch_bucket_alloc(struct cache *, unsigned, bool);
void bch_bucket_free(struct cache_set *, struct bkey *); void bch_bucket_free(struct cache_set *, struct bkey *);
int __bch_bucket_alloc_set(struct cache_set *, unsigned, int __bch_bucket_alloc_set(struct cache_set *, unsigned,
struct bkey *, int, struct closure *); struct bkey *, int, bool);
int bch_bucket_alloc_set(struct cache_set *, unsigned, int bch_bucket_alloc_set(struct cache_set *, unsigned,
struct bkey *, int, struct closure *); struct bkey *, int, bool);
bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
unsigned, unsigned, bool);
__printf(2, 3) __printf(2, 3)
bool bch_cache_set_error(struct cache_set *, const char *, ...); bool bch_cache_set_error(struct cache_set *, const char *, ...);
...@@ -1187,7 +927,7 @@ bool bch_cache_set_error(struct cache_set *, const char *, ...); ...@@ -1187,7 +927,7 @@ bool bch_cache_set_error(struct cache_set *, const char *, ...);
void bch_prio_write(struct cache *); void bch_prio_write(struct cache *);
void bch_write_bdev_super(struct cached_dev *, struct closure *); void bch_write_bdev_super(struct cached_dev *, struct closure *);
extern struct workqueue_struct *bcache_wq, *bch_gc_wq; extern struct workqueue_struct *bcache_wq;
extern const char * const bch_cache_modes[]; extern const char * const bch_cache_modes[];
extern struct mutex bch_register_lock; extern struct mutex bch_register_lock;
extern struct list_head bch_cache_sets; extern struct list_head bch_cache_sets;
...@@ -1220,15 +960,14 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *); ...@@ -1220,15 +960,14 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *);
void bch_btree_cache_free(struct cache_set *); void bch_btree_cache_free(struct cache_set *);
int bch_btree_cache_alloc(struct cache_set *); int bch_btree_cache_alloc(struct cache_set *);
void bch_moving_init_cache_set(struct cache_set *); void bch_moving_init_cache_set(struct cache_set *);
int bch_open_buckets_alloc(struct cache_set *);
void bch_open_buckets_free(struct cache_set *);
int bch_cache_allocator_start(struct cache *ca); int bch_cache_allocator_start(struct cache *ca);
void bch_cache_allocator_exit(struct cache *ca);
int bch_cache_allocator_init(struct cache *ca); int bch_cache_allocator_init(struct cache *ca);
void bch_debug_exit(void); void bch_debug_exit(void);
int bch_debug_init(struct kobject *); int bch_debug_init(struct kobject *);
void bch_writeback_exit(void);
int bch_writeback_init(void);
void bch_request_exit(void); void bch_request_exit(void);
int bch_request_init(void); int bch_request_init(void);
void bch_btree_exit(void); void bch_btree_exit(void);
......
...@@ -14,22 +14,12 @@ ...@@ -14,22 +14,12 @@
/* Keylists */ /* Keylists */
void bch_keylist_copy(struct keylist *dest, struct keylist *src)
{
*dest = *src;
if (src->list == src->d) {
size_t n = (uint64_t *) src->top - src->d;
dest->top = (struct bkey *) &dest->d[n];
dest->list = dest->d;
}
}
int bch_keylist_realloc(struct keylist *l, int nptrs, struct cache_set *c) int bch_keylist_realloc(struct keylist *l, int nptrs, struct cache_set *c)
{ {
unsigned oldsize = (uint64_t *) l->top - l->list; size_t oldsize = bch_keylist_nkeys(l);
unsigned newsize = oldsize + 2 + nptrs; size_t newsize = oldsize + 2 + nptrs;
uint64_t *new; uint64_t *old_keys = l->keys_p == l->inline_keys ? NULL : l->keys_p;
uint64_t *new_keys;
/* The journalling code doesn't handle the case where the keys to insert /* The journalling code doesn't handle the case where the keys to insert
* is bigger than an empty write: If we just return -ENOMEM here, * is bigger than an empty write: If we just return -ENOMEM here,
...@@ -45,24 +35,23 @@ int bch_keylist_realloc(struct keylist *l, int nptrs, struct cache_set *c) ...@@ -45,24 +35,23 @@ int bch_keylist_realloc(struct keylist *l, int nptrs, struct cache_set *c)
roundup_pow_of_two(oldsize) == newsize) roundup_pow_of_two(oldsize) == newsize)
return 0; return 0;
new = krealloc(l->list == l->d ? NULL : l->list, new_keys = krealloc(old_keys, sizeof(uint64_t) * newsize, GFP_NOIO);
sizeof(uint64_t) * newsize, GFP_NOIO);
if (!new) if (!new_keys)
return -ENOMEM; return -ENOMEM;
if (l->list == l->d) if (!old_keys)
memcpy(new, l->list, sizeof(uint64_t) * KEYLIST_INLINE); memcpy(new_keys, l->inline_keys, sizeof(uint64_t) * oldsize);
l->list = new; l->keys_p = new_keys;
l->top = (struct bkey *) (&l->list[oldsize]); l->top_p = new_keys + oldsize;
return 0; return 0;
} }
struct bkey *bch_keylist_pop(struct keylist *l) struct bkey *bch_keylist_pop(struct keylist *l)
{ {
struct bkey *k = l->bottom; struct bkey *k = l->keys;
if (k == l->top) if (k == l->top)
return NULL; return NULL;
...@@ -73,21 +62,20 @@ struct bkey *bch_keylist_pop(struct keylist *l) ...@@ -73,21 +62,20 @@ struct bkey *bch_keylist_pop(struct keylist *l)
return l->top = k; return l->top = k;
} }
/* Pointer validation */ void bch_keylist_pop_front(struct keylist *l)
bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k)
{ {
unsigned i; l->top_p -= bkey_u64s(l->keys);
char buf[80];
if (level && (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))) memmove(l->keys,
goto bad; bkey_next(l->keys),
bch_keylist_bytes(l));
}
if (!level && KEY_SIZE(k) > KEY_OFFSET(k)) /* Pointer validation */
goto bad;
if (!KEY_SIZE(k)) static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
return true; {
unsigned i;
for (i = 0; i < KEY_PTRS(k); i++) for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i)) { if (ptr_available(c, k, i)) {
...@@ -98,13 +86,83 @@ bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k) ...@@ -98,13 +86,83 @@ bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k)
if (KEY_SIZE(k) + r > c->sb.bucket_size || if (KEY_SIZE(k) + r > c->sb.bucket_size ||
bucket < ca->sb.first_bucket || bucket < ca->sb.first_bucket ||
bucket >= ca->sb.nbuckets) bucket >= ca->sb.nbuckets)
goto bad; return true;
} }
return false;
}
bool bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k)
{
char buf[80];
if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))
goto bad;
if (__ptr_invalid(c, k))
goto bad;
return false;
bad:
bch_bkey_to_text(buf, sizeof(buf), k);
cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k));
return true;
}
bool bch_extent_ptr_invalid(struct cache_set *c, const struct bkey *k)
{
char buf[80];
if (!KEY_SIZE(k))
return true;
if (KEY_SIZE(k) > KEY_OFFSET(k))
goto bad;
if (__ptr_invalid(c, k))
goto bad;
return false; return false;
bad: bad:
bch_bkey_to_text(buf, sizeof(buf), k); bch_bkey_to_text(buf, sizeof(buf), k);
cache_bug(c, "spotted bad key %s: %s", buf, bch_ptr_status(c, k)); cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k));
return true;
}
static bool ptr_bad_expensive_checks(struct btree *b, const struct bkey *k,
unsigned ptr)
{
struct bucket *g = PTR_BUCKET(b->c, k, ptr);
char buf[80];
if (mutex_trylock(&b->c->bucket_lock)) {
if (b->level) {
if (KEY_DIRTY(k) ||
g->prio != BTREE_PRIO ||
(b->c->gc_mark_valid &&
GC_MARK(g) != GC_MARK_METADATA))
goto err;
} else {
if (g->prio == BTREE_PRIO)
goto err;
if (KEY_DIRTY(k) &&
b->c->gc_mark_valid &&
GC_MARK(g) != GC_MARK_DIRTY)
goto err;
}
mutex_unlock(&b->c->bucket_lock);
}
return false;
err:
mutex_unlock(&b->c->bucket_lock);
bch_bkey_to_text(buf, sizeof(buf), k);
btree_bug(b,
"inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin),
g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
return true; return true;
} }
...@@ -118,64 +176,29 @@ bool bch_ptr_bad(struct btree *b, const struct bkey *k) ...@@ -118,64 +176,29 @@ bool bch_ptr_bad(struct btree *b, const struct bkey *k)
bch_ptr_invalid(b, k)) bch_ptr_invalid(b, k))
return true; return true;
if (KEY_PTRS(k) && PTR_DEV(k, 0) == PTR_CHECK_DEV) for (i = 0; i < KEY_PTRS(k); i++) {
return true; if (!ptr_available(b->c, k, i))
return true;
for (i = 0; i < KEY_PTRS(k); i++) g = PTR_BUCKET(b->c, k, i);
if (ptr_available(b->c, k, i)) { stale = ptr_stale(b->c, k, i);
g = PTR_BUCKET(b->c, k, i);
stale = ptr_stale(b->c, k, i);
btree_bug_on(stale > 96, b, btree_bug_on(stale > 96, b,
"key too stale: %i, need_gc %u", "key too stale: %i, need_gc %u",
stale, b->c->need_gc); stale, b->c->need_gc);
btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k), btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k),
b, "stale dirty pointer"); b, "stale dirty pointer");
if (stale) if (stale)
return true; return true;
#ifdef CONFIG_BCACHE_EDEBUG if (expensive_debug_checks(b->c) &&
if (!mutex_trylock(&b->c->bucket_lock)) ptr_bad_expensive_checks(b, k, i))
continue; return true;
}
if (b->level) {
if (KEY_DIRTY(k) ||
g->prio != BTREE_PRIO ||
(b->c->gc_mark_valid &&
GC_MARK(g) != GC_MARK_METADATA))
goto bug;
} else {
if (g->prio == BTREE_PRIO)
goto bug;
if (KEY_DIRTY(k) &&
b->c->gc_mark_valid &&
GC_MARK(g) != GC_MARK_DIRTY)
goto bug;
}
mutex_unlock(&b->c->bucket_lock);
#endif
}
return false; return false;
#ifdef CONFIG_BCACHE_EDEBUG
bug:
mutex_unlock(&b->c->bucket_lock);
{
char buf[80];
bch_bkey_to_text(buf, sizeof(buf), k);
btree_bug(b,
"inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
}
return true;
#endif
} }
/* Key/pointer manipulation */ /* Key/pointer manipulation */
...@@ -458,16 +481,8 @@ static struct bkey *table_to_bkey(struct bset_tree *t, unsigned cacheline) ...@@ -458,16 +481,8 @@ static struct bkey *table_to_bkey(struct bset_tree *t, unsigned cacheline)
static inline uint64_t shrd128(uint64_t high, uint64_t low, uint8_t shift) static inline uint64_t shrd128(uint64_t high, uint64_t low, uint8_t shift)
{ {
#ifdef CONFIG_X86_64
asm("shrd %[shift],%[high],%[low]"
: [low] "+Rm" (low)
: [high] "R" (high),
[shift] "ci" (shift)
: "cc");
#else
low >>= shift; low >>= shift;
low |= (high << 1) << (63U - shift); low |= (high << 1) << (63U - shift);
#endif
return low; return low;
} }
...@@ -686,7 +701,7 @@ void bch_bset_init_next(struct btree *b) ...@@ -686,7 +701,7 @@ void bch_bset_init_next(struct btree *b)
} else } else
get_random_bytes(&i->seq, sizeof(uint64_t)); get_random_bytes(&i->seq, sizeof(uint64_t));
i->magic = bset_magic(b->c); i->magic = bset_magic(&b->c->sb);
i->version = 0; i->version = 0;
i->keys = 0; i->keys = 0;
...@@ -824,16 +839,16 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t, ...@@ -824,16 +839,16 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t,
} else } else
i = bset_search_write_set(b, t, search); i = bset_search_write_set(b, t, search);
#ifdef CONFIG_BCACHE_EDEBUG if (expensive_debug_checks(b->c)) {
BUG_ON(bset_written(b, t) && BUG_ON(bset_written(b, t) &&
i.l != t->data->start && i.l != t->data->start &&
bkey_cmp(tree_to_prev_bkey(t, bkey_cmp(tree_to_prev_bkey(t,
inorder_to_tree(bkey_to_cacheline(t, i.l), t)), inorder_to_tree(bkey_to_cacheline(t, i.l), t)),
search) > 0); search) > 0);
BUG_ON(i.r != end(t->data) && BUG_ON(i.r != end(t->data) &&
bkey_cmp(i.r, search) <= 0); bkey_cmp(i.r, search) <= 0);
#endif }
while (likely(i.l != i.r) && while (likely(i.l != i.r) &&
bkey_cmp(i.l, search) <= 0) bkey_cmp(i.l, search) <= 0)
...@@ -844,6 +859,13 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t, ...@@ -844,6 +859,13 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t,
/* Btree iterator */ /* Btree iterator */
/*
* Returns true if l > r - unless l == r, in which case returns true if l is
* older than r.
*
* Necessary for btree_sort_fixup() - if there are multiple keys that compare
* equal in different sets, we have to process them newest to oldest.
*/
static inline bool btree_iter_cmp(struct btree_iter_set l, static inline bool btree_iter_cmp(struct btree_iter_set l,
struct btree_iter_set r) struct btree_iter_set r)
{ {
...@@ -867,12 +889,16 @@ void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, ...@@ -867,12 +889,16 @@ void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
} }
struct bkey *__bch_btree_iter_init(struct btree *b, struct btree_iter *iter, struct bkey *__bch_btree_iter_init(struct btree *b, struct btree_iter *iter,
struct bkey *search, struct bset_tree *start) struct bkey *search, struct bset_tree *start)
{ {
struct bkey *ret = NULL; struct bkey *ret = NULL;
iter->size = ARRAY_SIZE(iter->data); iter->size = ARRAY_SIZE(iter->data);
iter->used = 0; iter->used = 0;
#ifdef CONFIG_BCACHE_DEBUG
iter->b = b;
#endif
for (; start <= &b->sets[b->nsets]; start++) { for (; start <= &b->sets[b->nsets]; start++) {
ret = bch_bset_search(b, start, search); ret = bch_bset_search(b, start, search);
bch_btree_iter_push(iter, ret, end(start->data)); bch_btree_iter_push(iter, ret, end(start->data));
...@@ -887,6 +913,8 @@ struct bkey *bch_btree_iter_next(struct btree_iter *iter) ...@@ -887,6 +913,8 @@ struct bkey *bch_btree_iter_next(struct btree_iter *iter)
struct bkey *ret = NULL; struct bkey *ret = NULL;
if (!btree_iter_end(iter)) { if (!btree_iter_end(iter)) {
bch_btree_iter_next_check(iter);
ret = iter->data->k; ret = iter->data->k;
iter->data->k = bkey_next(iter->data->k); iter->data->k = bkey_next(iter->data->k);
...@@ -916,14 +944,6 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, ...@@ -916,14 +944,6 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
return ret; return ret;
} }
struct bkey *bch_next_recurse_key(struct btree *b, struct bkey *search)
{
struct btree_iter iter;
bch_btree_iter_init(b, &iter, search);
return bch_btree_iter_next_filter(&iter, b, bch_ptr_bad);
}
/* Mergesort */ /* Mergesort */
static void sort_key_next(struct btree_iter *iter, static void sort_key_next(struct btree_iter *iter,
...@@ -998,7 +1018,6 @@ static void btree_mergesort(struct btree *b, struct bset *out, ...@@ -998,7 +1018,6 @@ static void btree_mergesort(struct btree *b, struct bset *out,
out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0; out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0;
pr_debug("sorted %i keys", out->keys); pr_debug("sorted %i keys", out->keys);
bch_check_key_order(b, out);
} }
static void __btree_sort(struct btree *b, struct btree_iter *iter, static void __btree_sort(struct btree *b, struct btree_iter *iter,
...@@ -1029,7 +1048,7 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter, ...@@ -1029,7 +1048,7 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter,
* memcpy() * memcpy()
*/ */
out->magic = bset_magic(b->c); out->magic = bset_magic(&b->c->sb);
out->seq = b->sets[0].data->seq; out->seq = b->sets[0].data->seq;
out->version = b->sets[0].data->version; out->version = b->sets[0].data->version;
swap(out, b->sets[0].data); swap(out, b->sets[0].data);
...@@ -1050,24 +1069,21 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter, ...@@ -1050,24 +1069,21 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter,
if (b->written) if (b->written)
bset_build_written_tree(b); bset_build_written_tree(b);
if (!start) { if (!start)
spin_lock(&b->c->sort_time_lock);
bch_time_stats_update(&b->c->sort_time, start_time); bch_time_stats_update(&b->c->sort_time, start_time);
spin_unlock(&b->c->sort_time_lock);
}
} }
void bch_btree_sort_partial(struct btree *b, unsigned start) void bch_btree_sort_partial(struct btree *b, unsigned start)
{ {
size_t oldsize = 0, order = b->page_order, keys = 0; size_t order = b->page_order, keys = 0;
struct btree_iter iter; struct btree_iter iter;
int oldsize = bch_count_data(b);
__bch_btree_iter_init(b, &iter, NULL, &b->sets[start]); __bch_btree_iter_init(b, &iter, NULL, &b->sets[start]);
BUG_ON(b->sets[b->nsets].data == write_block(b) && BUG_ON(b->sets[b->nsets].data == write_block(b) &&
(b->sets[b->nsets].size || b->nsets)); (b->sets[b->nsets].size || b->nsets));
if (b->written)
oldsize = bch_count_data(b);
if (start) { if (start) {
unsigned i; unsigned i;
...@@ -1083,7 +1099,7 @@ void bch_btree_sort_partial(struct btree *b, unsigned start) ...@@ -1083,7 +1099,7 @@ void bch_btree_sort_partial(struct btree *b, unsigned start)
__btree_sort(b, &iter, start, order, false); __btree_sort(b, &iter, start, order, false);
EBUG_ON(b->written && bch_count_data(b) != oldsize); EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize);
} }
void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter) void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter)
...@@ -1101,9 +1117,7 @@ void bch_btree_sort_into(struct btree *b, struct btree *new) ...@@ -1101,9 +1117,7 @@ void bch_btree_sort_into(struct btree *b, struct btree *new)
btree_mergesort(b, new->sets->data, &iter, false, true); btree_mergesort(b, new->sets->data, &iter, false, true);
spin_lock(&b->c->sort_time_lock);
bch_time_stats_update(&b->c->sort_time, start_time); bch_time_stats_update(&b->c->sort_time, start_time);
spin_unlock(&b->c->sort_time_lock);
bkey_copy_key(&new->key, &b->key); bkey_copy_key(&new->key, &b->key);
new->sets->size = 0; new->sets->size = 0;
...@@ -1148,16 +1162,16 @@ void bch_btree_sort_lazy(struct btree *b) ...@@ -1148,16 +1162,16 @@ void bch_btree_sort_lazy(struct btree *b)
/* Sysfs stuff */ /* Sysfs stuff */
struct bset_stats { struct bset_stats {
struct btree_op op;
size_t nodes; size_t nodes;
size_t sets_written, sets_unwritten; size_t sets_written, sets_unwritten;
size_t bytes_written, bytes_unwritten; size_t bytes_written, bytes_unwritten;
size_t floats, failed; size_t floats, failed;
}; };
static int bch_btree_bset_stats(struct btree *b, struct btree_op *op, static int btree_bset_stats(struct btree_op *op, struct btree *b)
struct bset_stats *stats)
{ {
struct bkey *k; struct bset_stats *stats = container_of(op, struct bset_stats, op);
unsigned i; unsigned i;
stats->nodes++; stats->nodes++;
...@@ -1182,30 +1196,19 @@ static int bch_btree_bset_stats(struct btree *b, struct btree_op *op, ...@@ -1182,30 +1196,19 @@ static int bch_btree_bset_stats(struct btree *b, struct btree_op *op,
} }
} }
if (b->level) { return MAP_CONTINUE;
struct btree_iter iter;
for_each_key_filter(b, k, &iter, bch_ptr_bad) {
int ret = btree(bset_stats, k, b, op, stats);
if (ret)
return ret;
}
}
return 0;
} }
int bch_bset_print_stats(struct cache_set *c, char *buf) int bch_bset_print_stats(struct cache_set *c, char *buf)
{ {
struct btree_op op;
struct bset_stats t; struct bset_stats t;
int ret; int ret;
bch_btree_op_init_stack(&op);
memset(&t, 0, sizeof(struct bset_stats)); memset(&t, 0, sizeof(struct bset_stats));
bch_btree_op_init(&t.op, -1);
ret = btree_root(bset_stats, c, &op, &t); ret = bch_btree_map_nodes(&t.op, c, &ZERO_KEY, btree_bset_stats);
if (ret) if (ret < 0)
return ret; return ret;
return snprintf(buf, PAGE_SIZE, return snprintf(buf, PAGE_SIZE,
......
...@@ -148,6 +148,9 @@ ...@@ -148,6 +148,9 @@
struct btree_iter { struct btree_iter {
size_t size, used; size_t size, used;
#ifdef CONFIG_BCACHE_DEBUG
struct btree *b;
#endif
struct btree_iter_set { struct btree_iter_set {
struct bkey *k, *end; struct bkey *k, *end;
} data[MAX_BSETS]; } data[MAX_BSETS];
...@@ -193,54 +196,26 @@ static __always_inline int64_t bkey_cmp(const struct bkey *l, ...@@ -193,54 +196,26 @@ static __always_inline int64_t bkey_cmp(const struct bkey *l,
: (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r); : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r);
} }
static inline size_t bkey_u64s(const struct bkey *k)
{
BUG_ON(KEY_CSUM(k) > 1);
return 2 + KEY_PTRS(k) + (KEY_CSUM(k) ? 1 : 0);
}
static inline size_t bkey_bytes(const struct bkey *k)
{
return bkey_u64s(k) * sizeof(uint64_t);
}
static inline void bkey_copy(struct bkey *dest, const struct bkey *src)
{
memcpy(dest, src, bkey_bytes(src));
}
static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src)
{
if (!src)
src = &KEY(0, 0, 0);
SET_KEY_INODE(dest, KEY_INODE(src));
SET_KEY_OFFSET(dest, KEY_OFFSET(src));
}
static inline struct bkey *bkey_next(const struct bkey *k)
{
uint64_t *d = (void *) k;
return (struct bkey *) (d + bkey_u64s(k));
}
/* Keylists */ /* Keylists */
struct keylist { struct keylist {
struct bkey *top;
union { union {
uint64_t *list; struct bkey *keys;
struct bkey *bottom; uint64_t *keys_p;
};
union {
struct bkey *top;
uint64_t *top_p;
}; };
/* Enough room for btree_split's keys without realloc */ /* Enough room for btree_split's keys without realloc */
#define KEYLIST_INLINE 16 #define KEYLIST_INLINE 16
uint64_t d[KEYLIST_INLINE]; uint64_t inline_keys[KEYLIST_INLINE];
}; };
static inline void bch_keylist_init(struct keylist *l) static inline void bch_keylist_init(struct keylist *l)
{ {
l->top = (void *) (l->list = l->d); l->top_p = l->keys_p = l->inline_keys;
} }
static inline void bch_keylist_push(struct keylist *l) static inline void bch_keylist_push(struct keylist *l)
...@@ -256,17 +231,32 @@ static inline void bch_keylist_add(struct keylist *l, struct bkey *k) ...@@ -256,17 +231,32 @@ static inline void bch_keylist_add(struct keylist *l, struct bkey *k)
static inline bool bch_keylist_empty(struct keylist *l) static inline bool bch_keylist_empty(struct keylist *l)
{ {
return l->top == (void *) l->list; return l->top == l->keys;
}
static inline void bch_keylist_reset(struct keylist *l)
{
l->top = l->keys;
} }
static inline void bch_keylist_free(struct keylist *l) static inline void bch_keylist_free(struct keylist *l)
{ {
if (l->list != l->d) if (l->keys_p != l->inline_keys)
kfree(l->list); kfree(l->keys_p);
}
static inline size_t bch_keylist_nkeys(struct keylist *l)
{
return l->top_p - l->keys_p;
}
static inline size_t bch_keylist_bytes(struct keylist *l)
{
return bch_keylist_nkeys(l) * sizeof(uint64_t);
} }
void bch_keylist_copy(struct keylist *, struct keylist *);
struct bkey *bch_keylist_pop(struct keylist *); struct bkey *bch_keylist_pop(struct keylist *);
void bch_keylist_pop_front(struct keylist *);
int bch_keylist_realloc(struct keylist *, int, struct cache_set *); int bch_keylist_realloc(struct keylist *, int, struct cache_set *);
void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *, void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *,
...@@ -287,7 +277,9 @@ static inline bool bch_cut_back(const struct bkey *where, struct bkey *k) ...@@ -287,7 +277,9 @@ static inline bool bch_cut_back(const struct bkey *where, struct bkey *k)
} }
const char *bch_ptr_status(struct cache_set *, const struct bkey *); const char *bch_ptr_status(struct cache_set *, const struct bkey *);
bool __bch_ptr_invalid(struct cache_set *, int level, const struct bkey *); bool bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
bool bch_extent_ptr_invalid(struct cache_set *, const struct bkey *);
bool bch_ptr_bad(struct btree *, const struct bkey *); bool bch_ptr_bad(struct btree *, const struct bkey *);
static inline uint8_t gen_after(uint8_t a, uint8_t b) static inline uint8_t gen_after(uint8_t a, uint8_t b)
...@@ -311,7 +303,6 @@ static inline bool ptr_available(struct cache_set *c, const struct bkey *k, ...@@ -311,7 +303,6 @@ static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *); typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *);
struct bkey *bch_next_recurse_key(struct btree *, struct bkey *);
struct bkey *bch_btree_iter_next(struct btree_iter *); struct bkey *bch_btree_iter_next(struct btree_iter *);
struct bkey *bch_btree_iter_next_filter(struct btree_iter *, struct bkey *bch_btree_iter_next_filter(struct btree_iter *,
struct btree *, ptr_filter_fn); struct btree *, ptr_filter_fn);
...@@ -361,12 +352,30 @@ void bch_bset_fix_lookup_table(struct btree *, struct bkey *); ...@@ -361,12 +352,30 @@ void bch_bset_fix_lookup_table(struct btree *, struct bkey *);
struct bkey *__bch_bset_search(struct btree *, struct bset_tree *, struct bkey *__bch_bset_search(struct btree *, struct bset_tree *,
const struct bkey *); const struct bkey *);
/*
* Returns the first key that is strictly greater than search
*/
static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t, static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t,
const struct bkey *search) const struct bkey *search)
{ {
return search ? __bch_bset_search(b, t, search) : t->data->start; return search ? __bch_bset_search(b, t, search) : t->data->start;
} }
#define PRECEDING_KEY(_k) \
({ \
struct bkey *_ret = NULL; \
\
if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
_ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
\
if (!_ret->low) \
_ret->high--; \
_ret->low--; \
} \
\
_ret; \
})
bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *); bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *);
void bch_btree_sort_lazy(struct btree *); void bch_btree_sort_lazy(struct btree *);
void bch_btree_sort_into(struct btree *, struct btree *); void bch_btree_sort_into(struct btree *, struct btree *);
......
此差异已折叠。
...@@ -125,6 +125,7 @@ struct btree { ...@@ -125,6 +125,7 @@ struct btree {
unsigned long seq; unsigned long seq;
struct rw_semaphore lock; struct rw_semaphore lock;
struct cache_set *c; struct cache_set *c;
struct btree *parent;
unsigned long flags; unsigned long flags;
uint16_t written; /* would be nice to kill */ uint16_t written; /* would be nice to kill */
...@@ -200,12 +201,7 @@ static inline bool bkey_written(struct btree *b, struct bkey *k) ...@@ -200,12 +201,7 @@ static inline bool bkey_written(struct btree *b, struct bkey *k)
static inline void set_gc_sectors(struct cache_set *c) static inline void set_gc_sectors(struct cache_set *c)
{ {
atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 8); atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 16);
}
static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k)
{
return __bch_ptr_invalid(b->c, b->level, k);
} }
static inline struct bkey *bch_btree_iter_init(struct btree *b, static inline struct bkey *bch_btree_iter_init(struct btree *b,
...@@ -215,6 +211,16 @@ static inline struct bkey *bch_btree_iter_init(struct btree *b, ...@@ -215,6 +211,16 @@ static inline struct bkey *bch_btree_iter_init(struct btree *b,
return __bch_btree_iter_init(b, iter, search, b->sets); return __bch_btree_iter_init(b, iter, search, b->sets);
} }
static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k)
{
if (b->level)
return bch_btree_ptr_invalid(b->c, k);
else
return bch_extent_ptr_invalid(b->c, k);
}
void bkey_put(struct cache_set *c, struct bkey *k);
/* Looping macros */ /* Looping macros */
#define for_each_cached_btree(b, c, iter) \ #define for_each_cached_btree(b, c, iter) \
...@@ -234,51 +240,17 @@ static inline struct bkey *bch_btree_iter_init(struct btree *b, ...@@ -234,51 +240,17 @@ static inline struct bkey *bch_btree_iter_init(struct btree *b,
/* Recursing down the btree */ /* Recursing down the btree */
struct btree_op { struct btree_op {
struct closure cl;
struct cache_set *c;
/* Journal entry we have a refcount on */
atomic_t *journal;
/* Bio to be inserted into the cache */
struct bio *cache_bio;
unsigned inode;
uint16_t write_prio;
/* Btree level at which we start taking write locks */ /* Btree level at which we start taking write locks */
short lock; short lock;
/* Btree insertion type */
enum {
BTREE_INSERT,
BTREE_REPLACE
} type:8;
unsigned csum:1;
unsigned skip:1;
unsigned flush_journal:1;
unsigned insert_data_done:1;
unsigned lookup_done:1;
unsigned insert_collision:1; unsigned insert_collision:1;
/* Anything after this point won't get zeroed in do_bio_hook() */
/* Keys to be inserted */
struct keylist keys;
BKEY_PADDED(replace);
}; };
enum { static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
BTREE_INSERT_STATUS_INSERT, {
BTREE_INSERT_STATUS_BACK_MERGE, memset(op, 0, sizeof(struct btree_op));
BTREE_INSERT_STATUS_OVERWROTE, op->lock = write_lock_level;
BTREE_INSERT_STATUS_FRONT_MERGE, }
};
void bch_btree_op_init_stack(struct btree_op *);
static inline void rw_lock(bool w, struct btree *b, int level) static inline void rw_lock(bool w, struct btree *b, int level)
{ {
...@@ -290,108 +262,71 @@ static inline void rw_lock(bool w, struct btree *b, int level) ...@@ -290,108 +262,71 @@ static inline void rw_lock(bool w, struct btree *b, int level)
static inline void rw_unlock(bool w, struct btree *b) static inline void rw_unlock(bool w, struct btree *b)
{ {
#ifdef CONFIG_BCACHE_EDEBUG
unsigned i;
if (w && b->key.ptr[0])
for (i = 0; i <= b->nsets; i++)
bch_check_key_order(b, b->sets[i].data);
#endif
if (w) if (w)
b->seq++; b->seq++;
(w ? up_write : up_read)(&b->lock); (w ? up_write : up_read)(&b->lock);
} }
#define insert_lock(s, b) ((b)->level <= (s)->lock) void bch_btree_node_read(struct btree *);
void bch_btree_node_write(struct btree *, struct closure *);
/* void bch_btree_set_root(struct btree *);
* These macros are for recursing down the btree - they handle the details of struct btree *bch_btree_node_alloc(struct cache_set *, int, bool);
* locking and looking up nodes in the cache for you. They're best treated as struct btree *bch_btree_node_get(struct cache_set *, struct bkey *, int, bool);
* mere syntax when reading code that uses them.
*
* op->lock determines whether we take a read or a write lock at a given depth.
* If you've got a read lock and find that you need a write lock (i.e. you're
* going to have to split), set op->lock and return -EINTR; btree_root() will
* call you again and you'll have the correct lock.
*/
/** int bch_btree_insert_check_key(struct btree *, struct btree_op *,
* btree - recurse down the btree on a specified key struct bkey *);
* @fn: function to call, which will be passed the child node int bch_btree_insert(struct cache_set *, struct keylist *,
* @key: key to recurse on atomic_t *, struct bkey *);
* @b: parent btree node
* @op: pointer to struct btree_op int bch_gc_thread_start(struct cache_set *);
*/ size_t bch_btree_gc_finish(struct cache_set *);
#define btree(fn, key, b, op, ...) \ void bch_moving_gc(struct cache_set *);
({ \ int bch_btree_check(struct cache_set *);
int _r, l = (b)->level - 1; \ uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *);
bool _w = l <= (op)->lock; \
struct btree *_b = bch_btree_node_get((b)->c, key, l, op); \
if (!IS_ERR(_b)) { \
_r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
rw_unlock(_w, _b); \
} else \
_r = PTR_ERR(_b); \
_r; \
})
/**
* btree_root - call a function on the root of the btree
* @fn: function to call, which will be passed the child node
* @c: cache set
* @op: pointer to struct btree_op
*/
#define btree_root(fn, c, op, ...) \
({ \
int _r = -EINTR; \
do { \
struct btree *_b = (c)->root; \
bool _w = insert_lock(op, _b); \
rw_lock(_w, _b, _b->level); \
if (_b == (c)->root && \
_w == insert_lock(op, _b)) \
_r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
rw_unlock(_w, _b); \
bch_cannibalize_unlock(c, &(op)->cl); \
} while (_r == -EINTR); \
\
_r; \
})
static inline bool should_split(struct btree *b) static inline void wake_up_gc(struct cache_set *c)
{ {
struct bset *i = write_block(b); if (c->gc_thread)
return b->written >= btree_blocks(b) || wake_up_process(c->gc_thread);
(i->seq == b->sets[0].data->seq &&
b->written + __set_blocks(i, i->keys + 15, b->c)
> btree_blocks(b));
} }
void bch_btree_node_read(struct btree *); #define MAP_DONE 0
void bch_btree_node_write(struct btree *, struct closure *); #define MAP_CONTINUE 1
void bch_cannibalize_unlock(struct cache_set *, struct closure *); #define MAP_ALL_NODES 0
void bch_btree_set_root(struct btree *); #define MAP_LEAF_NODES 1
struct btree *bch_btree_node_alloc(struct cache_set *, int, struct closure *);
struct btree *bch_btree_node_get(struct cache_set *, struct bkey *,
int, struct btree_op *);
bool bch_btree_insert_check_key(struct btree *, struct btree_op *, #define MAP_END_KEY 1
struct bio *);
int bch_btree_insert(struct btree_op *, struct cache_set *);
int bch_btree_search_recurse(struct btree *, struct btree_op *); typedef int (btree_map_nodes_fn)(struct btree_op *, struct btree *);
int __bch_btree_map_nodes(struct btree_op *, struct cache_set *,
struct bkey *, btree_map_nodes_fn *, int);
void bch_queue_gc(struct cache_set *); static inline int bch_btree_map_nodes(struct btree_op *op, struct cache_set *c,
size_t bch_btree_gc_finish(struct cache_set *); struct bkey *from, btree_map_nodes_fn *fn)
void bch_moving_gc(struct closure *); {
int bch_btree_check(struct cache_set *, struct btree_op *); return __bch_btree_map_nodes(op, c, from, fn, MAP_ALL_NODES);
uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *); }
static inline int bch_btree_map_leaf_nodes(struct btree_op *op,
struct cache_set *c,
struct bkey *from,
btree_map_nodes_fn *fn)
{
return __bch_btree_map_nodes(op, c, from, fn, MAP_LEAF_NODES);
}
typedef int (btree_map_keys_fn)(struct btree_op *, struct btree *,
struct bkey *);
int bch_btree_map_keys(struct btree_op *, struct cache_set *,
struct bkey *, btree_map_keys_fn *, int);
typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *);
void bch_keybuf_init(struct keybuf *); void bch_keybuf_init(struct keybuf *);
void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *, void bch_refill_keybuf(struct cache_set *, struct keybuf *,
keybuf_pred_fn *); struct bkey *, keybuf_pred_fn *);
bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *, bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *,
struct bkey *); struct bkey *);
void bch_keybuf_del(struct keybuf *, struct keybuf_key *); void bch_keybuf_del(struct keybuf *, struct keybuf_key *);
......
...@@ -11,17 +11,6 @@ ...@@ -11,17 +11,6 @@
#include "closure.h" #include "closure.h"
void closure_queue(struct closure *cl)
{
struct workqueue_struct *wq = cl->wq;
if (wq) {
INIT_WORK(&cl->work, cl->work.func);
BUG_ON(!queue_work(wq, &cl->work));
} else
cl->fn(cl);
}
EXPORT_SYMBOL_GPL(closure_queue);
#define CL_FIELD(type, field) \ #define CL_FIELD(type, field) \
case TYPE_ ## type: \ case TYPE_ ## type: \
return &container_of(cl, struct type, cl)->field return &container_of(cl, struct type, cl)->field
...@@ -30,17 +19,6 @@ static struct closure_waitlist *closure_waitlist(struct closure *cl) ...@@ -30,17 +19,6 @@ static struct closure_waitlist *closure_waitlist(struct closure *cl)
{ {
switch (cl->type) { switch (cl->type) {
CL_FIELD(closure_with_waitlist, wait); CL_FIELD(closure_with_waitlist, wait);
CL_FIELD(closure_with_waitlist_and_timer, wait);
default:
return NULL;
}
}
static struct timer_list *closure_timer(struct closure *cl)
{
switch (cl->type) {
CL_FIELD(closure_with_timer, timer);
CL_FIELD(closure_with_waitlist_and_timer, timer);
default: default:
return NULL; return NULL;
} }
...@@ -51,7 +29,7 @@ static inline void closure_put_after_sub(struct closure *cl, int flags) ...@@ -51,7 +29,7 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
int r = flags & CLOSURE_REMAINING_MASK; int r = flags & CLOSURE_REMAINING_MASK;
BUG_ON(flags & CLOSURE_GUARD_MASK); BUG_ON(flags & CLOSURE_GUARD_MASK);
BUG_ON(!r && (flags & ~(CLOSURE_DESTRUCTOR|CLOSURE_BLOCKING))); BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
/* Must deliver precisely one wakeup */ /* Must deliver precisely one wakeup */
if (r == 1 && (flags & CLOSURE_SLEEPING)) if (r == 1 && (flags & CLOSURE_SLEEPING))
...@@ -59,7 +37,6 @@ static inline void closure_put_after_sub(struct closure *cl, int flags) ...@@ -59,7 +37,6 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
if (!r) { if (!r) {
if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) { if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
/* CLOSURE_BLOCKING might be set - clear it */
atomic_set(&cl->remaining, atomic_set(&cl->remaining,
CLOSURE_REMAINING_INITIALIZER); CLOSURE_REMAINING_INITIALIZER);
closure_queue(cl); closure_queue(cl);
...@@ -90,13 +67,13 @@ void closure_sub(struct closure *cl, int v) ...@@ -90,13 +67,13 @@ void closure_sub(struct closure *cl, int v)
{ {
closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining)); closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining));
} }
EXPORT_SYMBOL_GPL(closure_sub); EXPORT_SYMBOL(closure_sub);
void closure_put(struct closure *cl) void closure_put(struct closure *cl)
{ {
closure_put_after_sub(cl, atomic_dec_return(&cl->remaining)); closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
} }
EXPORT_SYMBOL_GPL(closure_put); EXPORT_SYMBOL(closure_put);
static void set_waiting(struct closure *cl, unsigned long f) static void set_waiting(struct closure *cl, unsigned long f)
{ {
...@@ -133,7 +110,7 @@ void __closure_wake_up(struct closure_waitlist *wait_list) ...@@ -133,7 +110,7 @@ void __closure_wake_up(struct closure_waitlist *wait_list)
closure_sub(cl, CLOSURE_WAITING + 1); closure_sub(cl, CLOSURE_WAITING + 1);
} }
} }
EXPORT_SYMBOL_GPL(__closure_wake_up); EXPORT_SYMBOL(__closure_wake_up);
bool closure_wait(struct closure_waitlist *list, struct closure *cl) bool closure_wait(struct closure_waitlist *list, struct closure *cl)
{ {
...@@ -146,7 +123,7 @@ bool closure_wait(struct closure_waitlist *list, struct closure *cl) ...@@ -146,7 +123,7 @@ bool closure_wait(struct closure_waitlist *list, struct closure *cl)
return true; return true;
} }
EXPORT_SYMBOL_GPL(closure_wait); EXPORT_SYMBOL(closure_wait);
/** /**
* closure_sync() - sleep until a closure a closure has nothing left to wait on * closure_sync() - sleep until a closure a closure has nothing left to wait on
...@@ -169,7 +146,7 @@ void closure_sync(struct closure *cl) ...@@ -169,7 +146,7 @@ void closure_sync(struct closure *cl)
__closure_end_sleep(cl); __closure_end_sleep(cl);
} }
EXPORT_SYMBOL_GPL(closure_sync); EXPORT_SYMBOL(closure_sync);
/** /**
* closure_trylock() - try to acquire the closure, without waiting * closure_trylock() - try to acquire the closure, without waiting
...@@ -183,17 +160,17 @@ bool closure_trylock(struct closure *cl, struct closure *parent) ...@@ -183,17 +160,17 @@ bool closure_trylock(struct closure *cl, struct closure *parent)
CLOSURE_REMAINING_INITIALIZER) != -1) CLOSURE_REMAINING_INITIALIZER) != -1)
return false; return false;
closure_set_ret_ip(cl);
smp_mb(); smp_mb();
cl->parent = parent; cl->parent = parent;
if (parent) if (parent)
closure_get(parent); closure_get(parent);
closure_set_ret_ip(cl);
closure_debug_create(cl); closure_debug_create(cl);
return true; return true;
} }
EXPORT_SYMBOL_GPL(closure_trylock); EXPORT_SYMBOL(closure_trylock);
void __closure_lock(struct closure *cl, struct closure *parent, void __closure_lock(struct closure *cl, struct closure *parent,
struct closure_waitlist *wait_list) struct closure_waitlist *wait_list)
...@@ -205,57 +182,11 @@ void __closure_lock(struct closure *cl, struct closure *parent, ...@@ -205,57 +182,11 @@ void __closure_lock(struct closure *cl, struct closure *parent,
if (closure_trylock(cl, parent)) if (closure_trylock(cl, parent))
return; return;
closure_wait_event_sync(wait_list, &wait, closure_wait_event(wait_list, &wait,
atomic_read(&cl->remaining) == -1); atomic_read(&cl->remaining) == -1);
} }
} }
EXPORT_SYMBOL_GPL(__closure_lock); EXPORT_SYMBOL(__closure_lock);
static void closure_delay_timer_fn(unsigned long data)
{
struct closure *cl = (struct closure *) data;
closure_sub(cl, CLOSURE_TIMER + 1);
}
void do_closure_timer_init(struct closure *cl)
{
struct timer_list *timer = closure_timer(cl);
init_timer(timer);
timer->data = (unsigned long) cl;
timer->function = closure_delay_timer_fn;
}
EXPORT_SYMBOL_GPL(do_closure_timer_init);
bool __closure_delay(struct closure *cl, unsigned long delay,
struct timer_list *timer)
{
if (atomic_read(&cl->remaining) & CLOSURE_TIMER)
return false;
BUG_ON(timer_pending(timer));
timer->expires = jiffies + delay;
atomic_add(CLOSURE_TIMER + 1, &cl->remaining);
add_timer(timer);
return true;
}
EXPORT_SYMBOL_GPL(__closure_delay);
void __closure_flush(struct closure *cl, struct timer_list *timer)
{
if (del_timer(timer))
closure_sub(cl, CLOSURE_TIMER + 1);
}
EXPORT_SYMBOL_GPL(__closure_flush);
void __closure_flush_sync(struct closure *cl, struct timer_list *timer)
{
if (del_timer_sync(timer))
closure_sub(cl, CLOSURE_TIMER + 1);
}
EXPORT_SYMBOL_GPL(__closure_flush_sync);
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
...@@ -273,7 +204,7 @@ void closure_debug_create(struct closure *cl) ...@@ -273,7 +204,7 @@ void closure_debug_create(struct closure *cl)
list_add(&cl->all, &closure_list); list_add(&cl->all, &closure_list);
spin_unlock_irqrestore(&closure_list_lock, flags); spin_unlock_irqrestore(&closure_list_lock, flags);
} }
EXPORT_SYMBOL_GPL(closure_debug_create); EXPORT_SYMBOL(closure_debug_create);
void closure_debug_destroy(struct closure *cl) void closure_debug_destroy(struct closure *cl)
{ {
...@@ -286,7 +217,7 @@ void closure_debug_destroy(struct closure *cl) ...@@ -286,7 +217,7 @@ void closure_debug_destroy(struct closure *cl)
list_del(&cl->all); list_del(&cl->all);
spin_unlock_irqrestore(&closure_list_lock, flags); spin_unlock_irqrestore(&closure_list_lock, flags);
} }
EXPORT_SYMBOL_GPL(closure_debug_destroy); EXPORT_SYMBOL(closure_debug_destroy);
static struct dentry *debug; static struct dentry *debug;
...@@ -304,14 +235,12 @@ static int debug_seq_show(struct seq_file *f, void *data) ...@@ -304,14 +235,12 @@ static int debug_seq_show(struct seq_file *f, void *data)
cl, (void *) cl->ip, cl->fn, cl->parent, cl, (void *) cl->ip, cl->fn, cl->parent,
r & CLOSURE_REMAINING_MASK); r & CLOSURE_REMAINING_MASK);
seq_printf(f, "%s%s%s%s%s%s\n", seq_printf(f, "%s%s%s%s\n",
test_bit(WORK_STRUCT_PENDING, test_bit(WORK_STRUCT_PENDING,
work_data_bits(&cl->work)) ? "Q" : "", work_data_bits(&cl->work)) ? "Q" : "",
r & CLOSURE_RUNNING ? "R" : "", r & CLOSURE_RUNNING ? "R" : "",
r & CLOSURE_BLOCKING ? "B" : "",
r & CLOSURE_STACK ? "S" : "", r & CLOSURE_STACK ? "S" : "",
r & CLOSURE_SLEEPING ? "Sl" : "", r & CLOSURE_SLEEPING ? "Sl" : "");
r & CLOSURE_TIMER ? "T" : "");
if (r & CLOSURE_WAITING) if (r & CLOSURE_WAITING)
seq_printf(f, " W %pF\n", seq_printf(f, " W %pF\n",
......
...@@ -155,21 +155,6 @@ ...@@ -155,21 +155,6 @@
* delayed_work embeds a work item and a timer_list. The important thing is, use * delayed_work embeds a work item and a timer_list. The important thing is, use
* it exactly like you would a regular closure and closure_put() will magically * it exactly like you would a regular closure and closure_put() will magically
* handle everything for you. * handle everything for you.
*
* We've got closures that embed timers, too. They're called, appropriately
* enough:
* struct closure_with_timer;
*
* This gives you access to closure_delay(). It takes a refcount for a specified
* number of jiffies - you could then call closure_sync() (for a slightly
* convoluted version of msleep()) or continue_at() - which gives you the same
* effect as using a delayed work item, except you can reuse the work_struct
* already embedded in struct closure.
*
* Lastly, there's struct closure_with_waitlist_and_timer. It does what you
* probably expect, if you happen to need the features of both. (You don't
* really want to know how all this is implemented, but if I've done my job
* right you shouldn't have to care).
*/ */
struct closure; struct closure;
...@@ -182,16 +167,11 @@ struct closure_waitlist { ...@@ -182,16 +167,11 @@ struct closure_waitlist {
enum closure_type { enum closure_type {
TYPE_closure = 0, TYPE_closure = 0,
TYPE_closure_with_waitlist = 1, TYPE_closure_with_waitlist = 1,
TYPE_closure_with_timer = 2, MAX_CLOSURE_TYPE = 1,
TYPE_closure_with_waitlist_and_timer = 3,
MAX_CLOSURE_TYPE = 3,
}; };
enum closure_state { enum closure_state {
/* /*
* CLOSURE_BLOCKING: Causes closure_wait_event() to block, instead of
* waiting asynchronously
*
* CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
* the thread that owns the closure, and cleared by the thread that's * the thread that owns the closure, and cleared by the thread that's
* waking up the closure. * waking up the closure.
...@@ -200,10 +180,6 @@ enum closure_state { ...@@ -200,10 +180,6 @@ enum closure_state {
* - indicates that cl->task is valid and closure_put() may wake it up. * - indicates that cl->task is valid and closure_put() may wake it up.
* Only set or cleared by the thread that owns the closure. * Only set or cleared by the thread that owns the closure.
* *
* CLOSURE_TIMER: Analagous to CLOSURE_WAITING, indicates that a closure
* has an outstanding timer. Must be set by the thread that owns the
* closure, and cleared by the timer function when the timer goes off.
*
* The rest are for debugging and don't affect behaviour: * The rest are for debugging and don't affect behaviour:
* *
* CLOSURE_RUNNING: Set when a closure is running (i.e. by * CLOSURE_RUNNING: Set when a closure is running (i.e. by
...@@ -218,19 +194,17 @@ enum closure_state { ...@@ -218,19 +194,17 @@ enum closure_state {
* closure with this flag set * closure with this flag set
*/ */
CLOSURE_BITS_START = (1 << 19), CLOSURE_BITS_START = (1 << 23),
CLOSURE_DESTRUCTOR = (1 << 19), CLOSURE_DESTRUCTOR = (1 << 23),
CLOSURE_BLOCKING = (1 << 21), CLOSURE_WAITING = (1 << 25),
CLOSURE_WAITING = (1 << 23), CLOSURE_SLEEPING = (1 << 27),
CLOSURE_SLEEPING = (1 << 25),
CLOSURE_TIMER = (1 << 27),
CLOSURE_RUNNING = (1 << 29), CLOSURE_RUNNING = (1 << 29),
CLOSURE_STACK = (1 << 31), CLOSURE_STACK = (1 << 31),
}; };
#define CLOSURE_GUARD_MASK \ #define CLOSURE_GUARD_MASK \
((CLOSURE_DESTRUCTOR|CLOSURE_BLOCKING|CLOSURE_WAITING| \ ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_SLEEPING| \
CLOSURE_SLEEPING|CLOSURE_TIMER|CLOSURE_RUNNING|CLOSURE_STACK) << 1) CLOSURE_RUNNING|CLOSURE_STACK) << 1)
#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1) #define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1)
#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING) #define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING)
...@@ -268,17 +242,6 @@ struct closure_with_waitlist { ...@@ -268,17 +242,6 @@ struct closure_with_waitlist {
struct closure_waitlist wait; struct closure_waitlist wait;
}; };
struct closure_with_timer {
struct closure cl;
struct timer_list timer;
};
struct closure_with_waitlist_and_timer {
struct closure cl;
struct closure_waitlist wait;
struct timer_list timer;
};
extern unsigned invalid_closure_type(void); extern unsigned invalid_closure_type(void);
#define __CLOSURE_TYPE(cl, _t) \ #define __CLOSURE_TYPE(cl, _t) \
...@@ -289,14 +252,11 @@ extern unsigned invalid_closure_type(void); ...@@ -289,14 +252,11 @@ extern unsigned invalid_closure_type(void);
( \ ( \
__CLOSURE_TYPE(cl, closure) \ __CLOSURE_TYPE(cl, closure) \
__CLOSURE_TYPE(cl, closure_with_waitlist) \ __CLOSURE_TYPE(cl, closure_with_waitlist) \
__CLOSURE_TYPE(cl, closure_with_timer) \
__CLOSURE_TYPE(cl, closure_with_waitlist_and_timer) \
invalid_closure_type() \ invalid_closure_type() \
) )
void closure_sub(struct closure *cl, int v); void closure_sub(struct closure *cl, int v);
void closure_put(struct closure *cl); void closure_put(struct closure *cl);
void closure_queue(struct closure *cl);
void __closure_wake_up(struct closure_waitlist *list); void __closure_wake_up(struct closure_waitlist *list);
bool closure_wait(struct closure_waitlist *list, struct closure *cl); bool closure_wait(struct closure_waitlist *list, struct closure *cl);
void closure_sync(struct closure *cl); void closure_sync(struct closure *cl);
...@@ -305,12 +265,6 @@ bool closure_trylock(struct closure *cl, struct closure *parent); ...@@ -305,12 +265,6 @@ bool closure_trylock(struct closure *cl, struct closure *parent);
void __closure_lock(struct closure *cl, struct closure *parent, void __closure_lock(struct closure *cl, struct closure *parent,
struct closure_waitlist *wait_list); struct closure_waitlist *wait_list);
void do_closure_timer_init(struct closure *cl);
bool __closure_delay(struct closure *cl, unsigned long delay,
struct timer_list *timer);
void __closure_flush(struct closure *cl, struct timer_list *timer);
void __closure_flush_sync(struct closure *cl, struct timer_list *timer);
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
void closure_debug_init(void); void closure_debug_init(void);
...@@ -354,11 +308,6 @@ static inline void closure_set_stopped(struct closure *cl) ...@@ -354,11 +308,6 @@ static inline void closure_set_stopped(struct closure *cl)
atomic_sub(CLOSURE_RUNNING, &cl->remaining); atomic_sub(CLOSURE_RUNNING, &cl->remaining);
} }
static inline bool closure_is_stopped(struct closure *cl)
{
return !(atomic_read(&cl->remaining) & CLOSURE_RUNNING);
}
static inline bool closure_is_unlocked(struct closure *cl) static inline bool closure_is_unlocked(struct closure *cl)
{ {
return atomic_read(&cl->remaining) == -1; return atomic_read(&cl->remaining) == -1;
...@@ -367,14 +316,6 @@ static inline bool closure_is_unlocked(struct closure *cl) ...@@ -367,14 +316,6 @@ static inline bool closure_is_unlocked(struct closure *cl)
static inline void do_closure_init(struct closure *cl, struct closure *parent, static inline void do_closure_init(struct closure *cl, struct closure *parent,
bool running) bool running)
{ {
switch (cl->type) {
case TYPE_closure_with_timer:
case TYPE_closure_with_waitlist_and_timer:
do_closure_timer_init(cl);
default:
break;
}
cl->parent = parent; cl->parent = parent;
if (parent) if (parent)
closure_get(parent); closure_get(parent);
...@@ -429,8 +370,7 @@ do { \ ...@@ -429,8 +370,7 @@ do { \
static inline void closure_init_stack(struct closure *cl) static inline void closure_init_stack(struct closure *cl)
{ {
memset(cl, 0, sizeof(struct closure)); memset(cl, 0, sizeof(struct closure));
atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER| atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER|CLOSURE_STACK);
CLOSURE_BLOCKING|CLOSURE_STACK);
} }
/** /**
...@@ -461,24 +401,6 @@ do { \ ...@@ -461,24 +401,6 @@ do { \
#define closure_lock(cl, parent) \ #define closure_lock(cl, parent) \
__closure_lock(__to_internal_closure(cl), parent, &(cl)->wait) __closure_lock(__to_internal_closure(cl), parent, &(cl)->wait)
/**
* closure_delay() - delay some number of jiffies
* @cl: the closure that will sleep
* @delay: the delay in jiffies
*
* Takes a refcount on @cl which will be released after @delay jiffies; this may
* be used to have a function run after a delay with continue_at(), or
* closure_sync() may be used for a convoluted version of msleep().
*/
#define closure_delay(cl, delay) \
__closure_delay(__to_internal_closure(cl), delay, &(cl)->timer)
#define closure_flush(cl) \
__closure_flush(__to_internal_closure(cl), &(cl)->timer)
#define closure_flush_sync(cl) \
__closure_flush_sync(__to_internal_closure(cl), &(cl)->timer)
static inline void __closure_end_sleep(struct closure *cl) static inline void __closure_end_sleep(struct closure *cl)
{ {
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
...@@ -497,40 +419,6 @@ static inline void __closure_start_sleep(struct closure *cl) ...@@ -497,40 +419,6 @@ static inline void __closure_start_sleep(struct closure *cl)
atomic_add(CLOSURE_SLEEPING, &cl->remaining); atomic_add(CLOSURE_SLEEPING, &cl->remaining);
} }
/**
* closure_blocking() - returns true if the closure is in blocking mode.
*
* If a closure is in blocking mode, closure_wait_event() will sleep until the
* condition is true instead of waiting asynchronously.
*/
static inline bool closure_blocking(struct closure *cl)
{
return atomic_read(&cl->remaining) & CLOSURE_BLOCKING;
}
/**
* set_closure_blocking() - put a closure in blocking mode.
*
* If a closure is in blocking mode, closure_wait_event() will sleep until the
* condition is true instead of waiting asynchronously.
*
* Not thread safe - can only be called by the thread running the closure.
*/
static inline void set_closure_blocking(struct closure *cl)
{
if (!closure_blocking(cl))
atomic_add(CLOSURE_BLOCKING, &cl->remaining);
}
/*
* Not thread safe - can only be called by the thread running the closure.
*/
static inline void clear_closure_blocking(struct closure *cl)
{
if (closure_blocking(cl))
atomic_sub(CLOSURE_BLOCKING, &cl->remaining);
}
/** /**
* closure_wake_up() - wake up all closures on a wait list. * closure_wake_up() - wake up all closures on a wait list.
*/ */
...@@ -561,63 +449,36 @@ static inline void closure_wake_up(struct closure_waitlist *list) ...@@ -561,63 +449,36 @@ static inline void closure_wake_up(struct closure_waitlist *list)
* refcount on our closure. If this was a stack allocated closure, that would be * refcount on our closure. If this was a stack allocated closure, that would be
* bad. * bad.
*/ */
#define __closure_wait_event(list, cl, condition, _block) \ #define closure_wait_event(list, cl, condition) \
({ \ ({ \
bool block = _block; \
typeof(condition) ret; \ typeof(condition) ret; \
\ \
while (1) { \ while (1) { \
ret = (condition); \ ret = (condition); \
if (ret) { \ if (ret) { \
__closure_wake_up(list); \ __closure_wake_up(list); \
if (block) \ closure_sync(cl); \
closure_sync(cl); \
\
break; \ break; \
} \ } \
\ \
if (block) \ __closure_start_sleep(cl); \
__closure_start_sleep(cl); \
\
if (!closure_wait(list, cl)) { \
if (!block) \
break; \
\ \
if (!closure_wait(list, cl)) \
schedule(); \ schedule(); \
} \
} \ } \
\ \
ret; \ ret; \
}) })
/** static inline void closure_queue(struct closure *cl)
* closure_wait_event() - wait on a condition, synchronously or asynchronously. {
* @list: the wait list to wait on struct workqueue_struct *wq = cl->wq;
* @cl: the closure that is doing the waiting if (wq) {
* @condition: a C expression for the event to wait for INIT_WORK(&cl->work, cl->work.func);
* BUG_ON(!queue_work(wq, &cl->work));
* If the closure is in blocking mode, sleeps until the @condition evaluates to } else
* true - exactly like wait_event(). cl->fn(cl);
* }
* If the closure is not in blocking mode, waits asynchronously; if the
* condition is currently false the @cl is put onto @list and returns. @list
* owns a refcount on @cl; closure_sync() or continue_at() may be used later to
* wait for another thread to wake up @list, which drops the refcount on @cl.
*
* Returns the value of @condition; @cl will be on @list iff @condition was
* false.
*
* closure_wake_up(@list) must be called after changing any variable that could
* cause @condition to become true.
*/
#define closure_wait_event(list, cl, condition) \
__closure_wait_event(list, cl, condition, closure_blocking(cl))
#define closure_wait_event_async(list, cl, condition) \
__closure_wait_event(list, cl, condition, false)
#define closure_wait_event_sync(list, cl, condition) \
__closure_wait_event(list, cl, condition, true)
static inline void set_closure_fn(struct closure *cl, closure_fn *fn, static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
struct workqueue_struct *wq) struct workqueue_struct *wq)
...@@ -642,7 +503,7 @@ do { \ ...@@ -642,7 +503,7 @@ do { \
#define continue_at_nobarrier(_cl, _fn, _wq) \ #define continue_at_nobarrier(_cl, _fn, _wq) \
do { \ do { \
set_closure_fn(_cl, _fn, _wq); \ set_closure_fn(_cl, _fn, _wq); \
closure_queue(cl); \ closure_queue(_cl); \
return; \ return; \
} while (0) } while (0)
......
...@@ -8,7 +8,6 @@ ...@@ -8,7 +8,6 @@
#include "bcache.h" #include "bcache.h"
#include "btree.h" #include "btree.h"
#include "debug.h" #include "debug.h"
#include "request.h"
#include <linux/console.h> #include <linux/console.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
...@@ -77,29 +76,17 @@ int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k) ...@@ -77,29 +76,17 @@ int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k)
return out - buf; return out - buf;
} }
int bch_btree_to_text(char *buf, size_t size, const struct btree *b) #ifdef CONFIG_BCACHE_DEBUG
{
return scnprintf(buf, size, "%zu level %i/%i",
PTR_BUCKET_NR(b->c, &b->key, 0),
b->level, b->c->root ? b->c->root->level : -1);
}
#if defined(CONFIG_BCACHE_DEBUG) || defined(CONFIG_BCACHE_EDEBUG)
static bool skipped_backwards(struct btree *b, struct bkey *k)
{
return bkey_cmp(k, (!b->level)
? &START_KEY(bkey_next(k))
: bkey_next(k)) > 0;
}
static void dump_bset(struct btree *b, struct bset *i) static void dump_bset(struct btree *b, struct bset *i)
{ {
struct bkey *k; struct bkey *k, *next;
unsigned j; unsigned j;
char buf[80]; char buf[80];
for (k = i->start; k < end(i); k = bkey_next(k)) { for (k = i->start; k < end(i); k = next) {
next = bkey_next(k);
bch_bkey_to_text(buf, sizeof(buf), k); bch_bkey_to_text(buf, sizeof(buf), k);
printk(KERN_ERR "block %zu key %zi/%u: %s", index(i, b), printk(KERN_ERR "block %zu key %zi/%u: %s", index(i, b),
(uint64_t *) k - i->d, i->keys, buf); (uint64_t *) k - i->d, i->keys, buf);
...@@ -115,15 +102,21 @@ static void dump_bset(struct btree *b, struct bset *i) ...@@ -115,15 +102,21 @@ static void dump_bset(struct btree *b, struct bset *i)
printk(" %s\n", bch_ptr_status(b->c, k)); printk(" %s\n", bch_ptr_status(b->c, k));
if (bkey_next(k) < end(i) && if (next < end(i) &&
skipped_backwards(b, k)) bkey_cmp(k, !b->level ? &START_KEY(next) : next) > 0)
printk(KERN_ERR "Key skipped backwards\n"); printk(KERN_ERR "Key skipped backwards\n");
} }
} }
#endif static void bch_dump_bucket(struct btree *b)
{
unsigned i;
#ifdef CONFIG_BCACHE_DEBUG console_lock();
for (i = 0; i <= b->nsets; i++)
dump_bset(b, b->sets[i].data);
console_unlock();
}
void bch_btree_verify(struct btree *b, struct bset *new) void bch_btree_verify(struct btree *b, struct bset *new)
{ {
...@@ -176,66 +169,44 @@ void bch_btree_verify(struct btree *b, struct bset *new) ...@@ -176,66 +169,44 @@ void bch_btree_verify(struct btree *b, struct bset *new)
mutex_unlock(&b->c->verify_lock); mutex_unlock(&b->c->verify_lock);
} }
static void data_verify_endio(struct bio *bio, int error) void bch_data_verify(struct cached_dev *dc, struct bio *bio)
{
struct closure *cl = bio->bi_private;
closure_put(cl);
}
void bch_data_verify(struct search *s)
{ {
char name[BDEVNAME_SIZE]; char name[BDEVNAME_SIZE];
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
struct closure *cl = &s->cl;
struct bio *check; struct bio *check;
struct bio_vec *bv; struct bio_vec *bv;
int i; int i;
if (!s->unaligned_bvec) check = bio_clone(bio, GFP_NOIO);
bio_for_each_segment(bv, s->orig_bio, i)
bv->bv_offset = 0, bv->bv_len = PAGE_SIZE;
check = bio_clone(s->orig_bio, GFP_NOIO);
if (!check) if (!check)
return; return;
if (bio_alloc_pages(check, GFP_NOIO)) if (bio_alloc_pages(check, GFP_NOIO))
goto out_put; goto out_put;
check->bi_rw = READ_SYNC; submit_bio_wait(READ_SYNC, check);
check->bi_private = cl;
check->bi_end_io = data_verify_endio;
closure_bio_submit(check, cl, &dc->disk);
closure_sync(cl);
bio_for_each_segment(bv, s->orig_bio, i) { bio_for_each_segment(bv, bio, i) {
void *p1 = kmap(bv->bv_page); void *p1 = kmap_atomic(bv->bv_page);
void *p2 = kmap(check->bi_io_vec[i].bv_page); void *p2 = page_address(check->bi_io_vec[i].bv_page);
if (memcmp(p1 + bv->bv_offset, cache_set_err_on(memcmp(p1 + bv->bv_offset,
p2 + bv->bv_offset, p2 + bv->bv_offset,
bv->bv_len)) bv->bv_len),
printk(KERN_ERR dc->disk.c,
"bcache (%s): verify failed at sector %llu\n", "verify failed at dev %s sector %llu",
bdevname(dc->bdev, name), bdevname(dc->bdev, name),
(uint64_t) s->orig_bio->bi_sector); (uint64_t) bio->bi_sector);
kunmap(bv->bv_page); kunmap_atomic(p1);
kunmap(check->bi_io_vec[i].bv_page);
} }
__bio_for_each_segment(bv, check, i, 0) bio_for_each_segment_all(bv, check, i)
__free_page(bv->bv_page); __free_page(bv->bv_page);
out_put: out_put:
bio_put(check); bio_put(check);
} }
#endif int __bch_count_data(struct btree *b)
#ifdef CONFIG_BCACHE_EDEBUG
unsigned bch_count_data(struct btree *b)
{ {
unsigned ret = 0; unsigned ret = 0;
struct btree_iter iter; struct btree_iter iter;
...@@ -247,72 +218,60 @@ unsigned bch_count_data(struct btree *b) ...@@ -247,72 +218,60 @@ unsigned bch_count_data(struct btree *b)
return ret; return ret;
} }
static void vdump_bucket_and_panic(struct btree *b, const char *fmt, void __bch_check_keys(struct btree *b, const char *fmt, ...)
va_list args)
{
unsigned i;
char buf[80];
console_lock();
for (i = 0; i <= b->nsets; i++)
dump_bset(b, b->sets[i].data);
vprintk(fmt, args);
console_unlock();
bch_btree_to_text(buf, sizeof(buf), b);
panic("at %s\n", buf);
}
void bch_check_key_order_msg(struct btree *b, struct bset *i,
const char *fmt, ...)
{
struct bkey *k;
if (!i->keys)
return;
for (k = i->start; bkey_next(k) < end(i); k = bkey_next(k))
if (skipped_backwards(b, k)) {
va_list args;
va_start(args, fmt);
vdump_bucket_and_panic(b, fmt, args);
va_end(args);
}
}
void bch_check_keys(struct btree *b, const char *fmt, ...)
{ {
va_list args; va_list args;
struct bkey *k, *p = NULL; struct bkey *k, *p = NULL;
struct btree_iter iter; struct btree_iter iter;
const char *err;
if (b->level)
return;
for_each_key(b, k, &iter) { for_each_key(b, k, &iter) {
if (p && bkey_cmp(&START_KEY(p), &START_KEY(k)) > 0) { if (!b->level) {
printk(KERN_ERR "Keys out of order:\n"); err = "Keys out of order";
goto bug; if (p && bkey_cmp(&START_KEY(p), &START_KEY(k)) > 0)
} goto bug;
if (bch_ptr_invalid(b, k)) if (bch_ptr_invalid(b, k))
continue; continue;
if (p && bkey_cmp(p, &START_KEY(k)) > 0) { err = "Overlapping keys";
printk(KERN_ERR "Overlapping keys:\n"); if (p && bkey_cmp(p, &START_KEY(k)) > 0)
goto bug; goto bug;
} else {
if (bch_ptr_bad(b, k))
continue;
err = "Duplicate keys";
if (p && !bkey_cmp(p, k))
goto bug;
} }
p = k; p = k;
} }
err = "Key larger than btree node key";
if (p && bkey_cmp(p, &b->key) > 0)
goto bug;
return; return;
bug: bug:
bch_dump_bucket(b);
va_start(args, fmt); va_start(args, fmt);
vdump_bucket_and_panic(b, fmt, args); vprintk(fmt, args);
va_end(args); va_end(args);
panic("bcache error: %s:\n", err);
}
void bch_btree_iter_next_check(struct btree_iter *iter)
{
struct bkey *k = iter->data->k, *next = bkey_next(k);
if (next < iter->data->end &&
bkey_cmp(k, iter->b->level ? next : &START_KEY(next)) > 0) {
bch_dump_bucket(iter->b);
panic("Key skipped backwards\n");
}
} }
#endif #endif
......
...@@ -4,40 +4,44 @@ ...@@ -4,40 +4,44 @@
/* Btree/bkey debug printing */ /* Btree/bkey debug printing */
int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k); int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k);
int bch_btree_to_text(char *buf, size_t size, const struct btree *b);
#ifdef CONFIG_BCACHE_EDEBUG
unsigned bch_count_data(struct btree *);
void bch_check_key_order_msg(struct btree *, struct bset *, const char *, ...);
void bch_check_keys(struct btree *, const char *, ...);
#define bch_check_key_order(b, i) \
bch_check_key_order_msg(b, i, "keys out of order")
#define EBUG_ON(cond) BUG_ON(cond)
#else /* EDEBUG */
#define bch_count_data(b) 0
#define bch_check_key_order(b, i) do {} while (0)
#define bch_check_key_order_msg(b, i, ...) do {} while (0)
#define bch_check_keys(b, ...) do {} while (0)
#define EBUG_ON(cond) do {} while (0)
#endif
#ifdef CONFIG_BCACHE_DEBUG #ifdef CONFIG_BCACHE_DEBUG
void bch_btree_verify(struct btree *, struct bset *); void bch_btree_verify(struct btree *, struct bset *);
void bch_data_verify(struct search *); void bch_data_verify(struct cached_dev *, struct bio *);
int __bch_count_data(struct btree *);
void __bch_check_keys(struct btree *, const char *, ...);
void bch_btree_iter_next_check(struct btree_iter *);
#define EBUG_ON(cond) BUG_ON(cond)
#define expensive_debug_checks(c) ((c)->expensive_debug_checks)
#define key_merging_disabled(c) ((c)->key_merging_disabled)
#define bypass_torture_test(d) ((d)->bypass_torture_test)
#else /* DEBUG */ #else /* DEBUG */
static inline void bch_btree_verify(struct btree *b, struct bset *i) {} static inline void bch_btree_verify(struct btree *b, struct bset *i) {}
static inline void bch_data_verify(struct search *s) {}; static inline void bch_data_verify(struct cached_dev *dc, struct bio *bio) {}
static inline int __bch_count_data(struct btree *b) { return -1; }
static inline void __bch_check_keys(struct btree *b, const char *fmt, ...) {}
static inline void bch_btree_iter_next_check(struct btree_iter *iter) {}
#define EBUG_ON(cond) do { if (cond); } while (0)
#define expensive_debug_checks(c) 0
#define key_merging_disabled(c) 0
#define bypass_torture_test(d) 0
#endif #endif
#define bch_count_data(b) \
(expensive_debug_checks((b)->c) ? __bch_count_data(b) : -1)
#define bch_check_keys(b, ...) \
do { \
if (expensive_debug_checks((b)->c)) \
__bch_check_keys(b, __VA_ARGS__); \
} while (0)
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
void bch_debug_init_cache_set(struct cache_set *); void bch_debug_init_cache_set(struct cache_set *);
#else #else
......
此差异已折叠。
...@@ -75,43 +75,6 @@ ...@@ -75,43 +75,6 @@
* nodes that are pinning the oldest journal entries first. * nodes that are pinning the oldest journal entries first.
*/ */
#define BCACHE_JSET_VERSION_UUIDv1 1
/* Always latest UUID format */
#define BCACHE_JSET_VERSION_UUID 1
#define BCACHE_JSET_VERSION 1
/*
* On disk format for a journal entry:
* seq is monotonically increasing; every journal entry has its own unique
* sequence number.
*
* last_seq is the oldest journal entry that still has keys the btree hasn't
* flushed to disk yet.
*
* version is for on disk format changes.
*/
struct jset {
uint64_t csum;
uint64_t magic;
uint64_t seq;
uint32_t version;
uint32_t keys;
uint64_t last_seq;
BKEY_PADDED(uuid_bucket);
BKEY_PADDED(btree_root);
uint16_t btree_level;
uint16_t pad[3];
uint64_t prio_bucket[MAX_CACHES_PER_SET];
union {
struct bkey start[0];
uint64_t d[0];
};
};
/* /*
* Only used for holding the journal entries we read in btree_journal_read() * Only used for holding the journal entries we read in btree_journal_read()
* during cache_registration * during cache_registration
...@@ -140,7 +103,8 @@ struct journal { ...@@ -140,7 +103,8 @@ struct journal {
spinlock_t lock; spinlock_t lock;
/* used when waiting because the journal was full */ /* used when waiting because the journal was full */
struct closure_waitlist wait; struct closure_waitlist wait;
struct closure_with_timer io; struct closure io;
struct delayed_work work;
/* Number of blocks free in the bucket(s) we're currently writing to */ /* Number of blocks free in the bucket(s) we're currently writing to */
unsigned blocks_free; unsigned blocks_free;
...@@ -188,8 +152,7 @@ struct journal_device { ...@@ -188,8 +152,7 @@ struct journal_device {
}; };
#define journal_pin_cmp(c, l, r) \ #define journal_pin_cmp(c, l, r) \
(fifo_idx(&(c)->journal.pin, (l)->journal) > \ (fifo_idx(&(c)->journal.pin, (l)) > fifo_idx(&(c)->journal.pin, (r)))
fifo_idx(&(c)->journal.pin, (r)->journal))
#define JOURNAL_PIN 20000 #define JOURNAL_PIN 20000
...@@ -199,15 +162,14 @@ struct journal_device { ...@@ -199,15 +162,14 @@ struct journal_device {
struct closure; struct closure;
struct cache_set; struct cache_set;
struct btree_op; struct btree_op;
struct keylist;
void bch_journal(struct closure *); atomic_t *bch_journal(struct cache_set *, struct keylist *, struct closure *);
void bch_journal_next(struct journal *); void bch_journal_next(struct journal *);
void bch_journal_mark(struct cache_set *, struct list_head *); void bch_journal_mark(struct cache_set *, struct list_head *);
void bch_journal_meta(struct cache_set *, struct closure *); void bch_journal_meta(struct cache_set *, struct closure *);
int bch_journal_read(struct cache_set *, struct list_head *, int bch_journal_read(struct cache_set *, struct list_head *);
struct btree_op *); int bch_journal_replay(struct cache_set *, struct list_head *);
int bch_journal_replay(struct cache_set *, struct list_head *,
struct btree_op *);
void bch_journal_free(struct cache_set *); void bch_journal_free(struct cache_set *);
int bch_journal_alloc(struct cache_set *); int bch_journal_alloc(struct cache_set *);
......
...@@ -12,8 +12,9 @@ ...@@ -12,8 +12,9 @@
#include <trace/events/bcache.h> #include <trace/events/bcache.h>
struct moving_io { struct moving_io {
struct closure cl;
struct keybuf_key *w; struct keybuf_key *w;
struct search s; struct data_insert_op op;
struct bbio bio; struct bbio bio;
}; };
...@@ -38,13 +39,13 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k) ...@@ -38,13 +39,13 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k)
static void moving_io_destructor(struct closure *cl) static void moving_io_destructor(struct closure *cl)
{ {
struct moving_io *io = container_of(cl, struct moving_io, s.cl); struct moving_io *io = container_of(cl, struct moving_io, cl);
kfree(io); kfree(io);
} }
static void write_moving_finish(struct closure *cl) static void write_moving_finish(struct closure *cl)
{ {
struct moving_io *io = container_of(cl, struct moving_io, s.cl); struct moving_io *io = container_of(cl, struct moving_io, cl);
struct bio *bio = &io->bio.bio; struct bio *bio = &io->bio.bio;
struct bio_vec *bv; struct bio_vec *bv;
int i; int i;
...@@ -52,13 +53,12 @@ static void write_moving_finish(struct closure *cl) ...@@ -52,13 +53,12 @@ static void write_moving_finish(struct closure *cl)
bio_for_each_segment_all(bv, bio, i) bio_for_each_segment_all(bv, bio, i)
__free_page(bv->bv_page); __free_page(bv->bv_page);
if (io->s.op.insert_collision) if (io->op.replace_collision)
trace_bcache_gc_copy_collision(&io->w->key); trace_bcache_gc_copy_collision(&io->w->key);
bch_keybuf_del(&io->s.op.c->moving_gc_keys, io->w); bch_keybuf_del(&io->op.c->moving_gc_keys, io->w);
atomic_dec_bug(&io->s.op.c->in_flight); up(&io->op.c->moving_in_flight);
closure_wake_up(&io->s.op.c->moving_gc_wait);
closure_return_with_destructor(cl, moving_io_destructor); closure_return_with_destructor(cl, moving_io_destructor);
} }
...@@ -66,12 +66,12 @@ static void write_moving_finish(struct closure *cl) ...@@ -66,12 +66,12 @@ static void write_moving_finish(struct closure *cl)
static void read_moving_endio(struct bio *bio, int error) static void read_moving_endio(struct bio *bio, int error)
{ {
struct moving_io *io = container_of(bio->bi_private, struct moving_io *io = container_of(bio->bi_private,
struct moving_io, s.cl); struct moving_io, cl);
if (error) if (error)
io->s.error = error; io->op.error = error;
bch_bbio_endio(io->s.op.c, bio, error, "reading data to move"); bch_bbio_endio(io->op.c, bio, error, "reading data to move");
} }
static void moving_init(struct moving_io *io) static void moving_init(struct moving_io *io)
...@@ -85,54 +85,53 @@ static void moving_init(struct moving_io *io) ...@@ -85,54 +85,53 @@ static void moving_init(struct moving_io *io)
bio->bi_size = KEY_SIZE(&io->w->key) << 9; bio->bi_size = KEY_SIZE(&io->w->key) << 9;
bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&io->w->key), bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&io->w->key),
PAGE_SECTORS); PAGE_SECTORS);
bio->bi_private = &io->s.cl; bio->bi_private = &io->cl;
bio->bi_io_vec = bio->bi_inline_vecs; bio->bi_io_vec = bio->bi_inline_vecs;
bch_bio_map(bio, NULL); bch_bio_map(bio, NULL);
} }
static void write_moving(struct closure *cl) static void write_moving(struct closure *cl)
{ {
struct search *s = container_of(cl, struct search, cl); struct moving_io *io = container_of(cl, struct moving_io, cl);
struct moving_io *io = container_of(s, struct moving_io, s); struct data_insert_op *op = &io->op;
if (!s->error) { if (!op->error) {
moving_init(io); moving_init(io);
io->bio.bio.bi_sector = KEY_START(&io->w->key); io->bio.bio.bi_sector = KEY_START(&io->w->key);
s->op.lock = -1; op->write_prio = 1;
s->op.write_prio = 1; op->bio = &io->bio.bio;
s->op.cache_bio = &io->bio.bio;
s->writeback = KEY_DIRTY(&io->w->key); op->writeback = KEY_DIRTY(&io->w->key);
s->op.csum = KEY_CSUM(&io->w->key); op->csum = KEY_CSUM(&io->w->key);
s->op.type = BTREE_REPLACE; bkey_copy(&op->replace_key, &io->w->key);
bkey_copy(&s->op.replace, &io->w->key); op->replace = true;
closure_init(&s->op.cl, cl); closure_call(&op->cl, bch_data_insert, NULL, cl);
bch_insert_data(&s->op.cl);
} }
continue_at(cl, write_moving_finish, NULL); continue_at(cl, write_moving_finish, system_wq);
} }
static void read_moving_submit(struct closure *cl) static void read_moving_submit(struct closure *cl)
{ {
struct search *s = container_of(cl, struct search, cl); struct moving_io *io = container_of(cl, struct moving_io, cl);
struct moving_io *io = container_of(s, struct moving_io, s);
struct bio *bio = &io->bio.bio; struct bio *bio = &io->bio.bio;
bch_submit_bbio(bio, s->op.c, &io->w->key, 0); bch_submit_bbio(bio, io->op.c, &io->w->key, 0);
continue_at(cl, write_moving, bch_gc_wq); continue_at(cl, write_moving, system_wq);
} }
static void read_moving(struct closure *cl) static void read_moving(struct cache_set *c)
{ {
struct cache_set *c = container_of(cl, struct cache_set, moving_gc);
struct keybuf_key *w; struct keybuf_key *w;
struct moving_io *io; struct moving_io *io;
struct bio *bio; struct bio *bio;
struct closure cl;
closure_init_stack(&cl);
/* XXX: if we error, background writeback could stall indefinitely */ /* XXX: if we error, background writeback could stall indefinitely */
...@@ -150,8 +149,8 @@ static void read_moving(struct closure *cl) ...@@ -150,8 +149,8 @@ static void read_moving(struct closure *cl)
w->private = io; w->private = io;
io->w = w; io->w = w;
io->s.op.inode = KEY_INODE(&w->key); io->op.inode = KEY_INODE(&w->key);
io->s.op.c = c; io->op.c = c;
moving_init(io); moving_init(io);
bio = &io->bio.bio; bio = &io->bio.bio;
...@@ -164,13 +163,8 @@ static void read_moving(struct closure *cl) ...@@ -164,13 +163,8 @@ static void read_moving(struct closure *cl)
trace_bcache_gc_copy(&w->key); trace_bcache_gc_copy(&w->key);
closure_call(&io->s.cl, read_moving_submit, NULL, &c->gc.cl); down(&c->moving_in_flight);
closure_call(&io->cl, read_moving_submit, NULL, &cl);
if (atomic_inc_return(&c->in_flight) >= 64) {
closure_wait_event(&c->moving_gc_wait, cl,
atomic_read(&c->in_flight) < 64);
continue_at(cl, read_moving, bch_gc_wq);
}
} }
if (0) { if (0) {
...@@ -180,7 +174,7 @@ err: if (!IS_ERR_OR_NULL(w->private)) ...@@ -180,7 +174,7 @@ err: if (!IS_ERR_OR_NULL(w->private))
bch_keybuf_del(&c->moving_gc_keys, w); bch_keybuf_del(&c->moving_gc_keys, w);
} }
closure_return(cl); closure_sync(&cl);
} }
static bool bucket_cmp(struct bucket *l, struct bucket *r) static bool bucket_cmp(struct bucket *l, struct bucket *r)
...@@ -193,15 +187,14 @@ static unsigned bucket_heap_top(struct cache *ca) ...@@ -193,15 +187,14 @@ static unsigned bucket_heap_top(struct cache *ca)
return GC_SECTORS_USED(heap_peek(&ca->heap)); return GC_SECTORS_USED(heap_peek(&ca->heap));
} }
void bch_moving_gc(struct closure *cl) void bch_moving_gc(struct cache_set *c)
{ {
struct cache_set *c = container_of(cl, struct cache_set, gc.cl);
struct cache *ca; struct cache *ca;
struct bucket *b; struct bucket *b;
unsigned i; unsigned i;
if (!c->copy_gc_enabled) if (!c->copy_gc_enabled)
closure_return(cl); return;
mutex_lock(&c->bucket_lock); mutex_lock(&c->bucket_lock);
...@@ -242,13 +235,11 @@ void bch_moving_gc(struct closure *cl) ...@@ -242,13 +235,11 @@ void bch_moving_gc(struct closure *cl)
c->moving_gc_keys.last_scanned = ZERO_KEY; c->moving_gc_keys.last_scanned = ZERO_KEY;
closure_init(&c->moving_gc, cl); read_moving(c);
read_moving(&c->moving_gc);
closure_return(cl);
} }
void bch_moving_init_cache_set(struct cache_set *c) void bch_moving_init_cache_set(struct cache_set *c)
{ {
bch_keybuf_init(&c->moving_gc_keys); bch_keybuf_init(&c->moving_gc_keys);
sema_init(&c->moving_in_flight, 64);
} }
此差异已折叠。
此差异已折叠。
此差异已折叠。
...@@ -38,7 +38,9 @@ struct cache_accounting { ...@@ -38,7 +38,9 @@ struct cache_accounting {
struct cache_stats day; struct cache_stats day;
}; };
struct search; struct cache_set;
struct cached_dev;
struct bcache_device;
void bch_cache_accounting_init(struct cache_accounting *acc, void bch_cache_accounting_init(struct cache_accounting *acc,
struct closure *parent); struct closure *parent);
...@@ -50,9 +52,10 @@ void bch_cache_accounting_clear(struct cache_accounting *acc); ...@@ -50,9 +52,10 @@ void bch_cache_accounting_clear(struct cache_accounting *acc);
void bch_cache_accounting_destroy(struct cache_accounting *acc); void bch_cache_accounting_destroy(struct cache_accounting *acc);
void bch_mark_cache_accounting(struct search *s, bool hit, bool bypass); void bch_mark_cache_accounting(struct cache_set *, struct bcache_device *,
void bch_mark_cache_readahead(struct search *s); bool, bool);
void bch_mark_cache_miss_collision(struct search *s); void bch_mark_cache_readahead(struct cache_set *, struct bcache_device *);
void bch_mark_sectors_bypassed(struct search *s, int sectors); void bch_mark_cache_miss_collision(struct cache_set *, struct bcache_device *);
void bch_mark_sectors_bypassed(struct cache_set *, struct cached_dev *, int);
#endif /* _BCACHE_STATS_H_ */ #endif /* _BCACHE_STATS_H_ */
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册