提交 f042fea0 编写于 作者: L Linus Torvalds

Merge branch 'for-3.9/drivers' of git://git.kernel.dk/linux-block

Pull block driver bits from Jens Axboe:
 "After the block IO core bits are in, please grab the driver updates
  from below as well.  It contains:

   - Fix ancient regression in dac960.  Nobody must be using that
     anymore...

   - Some good fixes from Guo Ghao for loop, fixing both potential
     oopses and deadlocks.

   - Improve mtip32xx for NUMA systems, by being a bit more clever in
     distributing work.

   - Add IBM RamSan 70/80 driver.  A second round of fixes for that is
     pending, that will come in through for-linus during the 3.9 cycle
     as per usual.

   - A few xen-blk{back,front} fixes from Konrad and Roger.

   - Other minor fixes and improvements."

* 'for-3.9/drivers' of git://git.kernel.dk/linux-block:
  loopdev: ignore negative offset when calculate loop device size
  loopdev: remove an user triggerable oops
  loopdev: move common code into loop_figure_size()
  loopdev: update block device size in loop_set_status()
  loopdev: fix a deadlock
  xen-blkback: use balloon pages for persistent grants
  xen-blkfront: drop the use of llist_for_each_entry_safe
  xen/blkback: Don't trust the handle from the frontend.
  xen-blkback: do not leak mode property
  block: IBM RamSan 70/80 driver fixes
  rsxx: add slab.h include to dma.c
  drivers/block/mtip32xx: add missing GENERIC_HARDIRQS dependency
  block: remove new __devinit/exit annotations on ramsam driver
  block: IBM RamSan 70/80 device driver
  drivers/block/mtip32xx/mtip32xx.c:1726:5: sparse: symbol 'mtip_send_trim' was not declared. Should it be static?
  drivers/block/mtip32xx/mtip32xx.c:4029:1: sparse: symbol 'mtip_workq_sdbf0' was not declared. Should it be static?
  dac960: return success instead of -ENOTTY
  mtip32xx: add trim support
  mtip32xx: Add workqueue and NUMA support
  block: delete super ancient PC-XT driver for 1980's hardware
......@@ -6515,6 +6515,12 @@ S: Maintained
F: Documentation/blockdev/ramdisk.txt
F: drivers/block/brd.c
RAMSAM DRIVER (IBM RamSan 70/80 PCI SSD Flash Card)
M: Joshua Morris <josh.h.morris@us.ibm.com>
M: Philip Kelleher <pjk1939@linux.vnet.ibm.com>
S: Maintained
F: drivers/block/rsxx/
RANDOM NUMBER DRIVER
M: Theodore Ts'o" <tytso@mit.edu>
S: Maintained
......
......@@ -7054,6 +7054,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
else
ErrorCode = 0;
}
break;
default:
ErrorCode = -ENOTTY;
}
......
......@@ -63,19 +63,6 @@ config AMIGA_Z2RAM
To compile this driver as a module, choose M here: the
module will be called z2ram.
config BLK_DEV_XD
tristate "XT hard disk support"
depends on ISA && ISA_DMA_API
select CHECK_SIGNATURE
help
Very old 8 bit hard disk controllers used in the IBM XT computer
will be supported if you say Y here.
To compile this driver as a module, choose M here: the
module will be called xd.
It's pretty unlikely that you have one of these: say N.
config GDROM
tristate "SEGA Dreamcast GD-ROM drive"
depends on SH_DREAMCAST
......@@ -544,4 +531,14 @@ config BLK_DEV_RBD
If unsure, say N.
config BLK_DEV_RSXX
tristate "RamSam PCIe Flash SSD Device Driver"
depends on PCI
help
Device driver for IBM's high speed PCIe SSD
storage devices: RamSan-70 and RamSan-80.
To compile this driver as a module, choose M here: the
module will be called rsxx.
endif # BLK_DEV
......@@ -15,7 +15,6 @@ obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o
obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o
obj-$(CONFIG_BLK_DEV_RAM) += brd.o
obj-$(CONFIG_BLK_DEV_LOOP) += loop.o
obj-$(CONFIG_BLK_DEV_XD) += xd.o
obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o
obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o
obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o
......@@ -41,4 +40,6 @@ obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
obj-$(CONFIG_BLK_DEV_RBD) += rbd.o
obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/
obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
swim_mod-y := swim.o swim_asm.o
......@@ -162,12 +162,13 @@ static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file)
{
loff_t size, loopsize;
loff_t loopsize;
/* Compute loopsize in bytes */
size = i_size_read(file->f_mapping->host);
loopsize = size - offset;
/* offset is beyond i_size, wierd but possible */
loopsize = i_size_read(file->f_mapping->host);
if (offset > 0)
loopsize -= offset;
/* offset is beyond i_size, weird but possible */
if (loopsize < 0)
return 0;
......@@ -190,6 +191,7 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
{
loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
sector_t x = (sector_t)size;
struct block_device *bdev = lo->lo_device;
if (unlikely((loff_t)x != size))
return -EFBIG;
......@@ -198,6 +200,9 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
if (lo->lo_sizelimit != sizelimit)
lo->lo_sizelimit = sizelimit;
set_capacity(lo->lo_disk, x);
bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
/* let user-space know about the new size */
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
return 0;
}
......@@ -1091,10 +1096,10 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
return err;
if (lo->lo_offset != info->lo_offset ||
lo->lo_sizelimit != info->lo_sizelimit) {
lo->lo_sizelimit != info->lo_sizelimit)
if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit))
return -EFBIG;
}
loop_config_discard(lo);
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
......@@ -1271,28 +1276,10 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
{
int err;
sector_t sec;
loff_t sz;
err = -ENXIO;
if (unlikely(lo->lo_state != Lo_bound))
goto out;
err = figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
if (unlikely(err))
goto out;
sec = get_capacity(lo->lo_disk);
/* the width of sector_t may be narrow for bit-shift */
sz = sec;
sz <<= 9;
mutex_lock(&bdev->bd_mutex);
bd_set_size(bdev, sz);
/* let user-space know about the new size */
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
mutex_unlock(&bdev->bd_mutex);
return -ENXIO;
out:
return err;
return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
}
static int lo_ioctl(struct block_device *bdev, fmode_t mode,
......@@ -1845,11 +1832,15 @@ static int __init loop_init(void)
max_part = (1UL << part_shift) - 1;
}
if ((1UL << part_shift) > DISK_MAX_PARTS)
return -EINVAL;
if ((1UL << part_shift) > DISK_MAX_PARTS) {
err = -EINVAL;
goto misc_out;
}
if (max_loop > 1UL << (MINORBITS - part_shift))
return -EINVAL;
if (max_loop > 1UL << (MINORBITS - part_shift)) {
err = -EINVAL;
goto misc_out;
}
/*
* If max_loop is specified, create that many devices upfront.
......@@ -1867,8 +1858,10 @@ static int __init loop_init(void)
range = 1UL << MINORBITS;
}
if (register_blkdev(LOOP_MAJOR, "loop"))
return -EIO;
if (register_blkdev(LOOP_MAJOR, "loop")) {
err = -EIO;
goto misc_out;
}
blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
THIS_MODULE, loop_probe, NULL, NULL);
......@@ -1881,6 +1874,10 @@ static int __init loop_init(void)
printk(KERN_INFO "loop: module loaded\n");
return 0;
misc_out:
misc_deregister(&loop_misc);
return err;
}
static int loop_exit_cb(int id, void *ptr, void *data)
......
......@@ -4,6 +4,6 @@
config BLK_DEV_PCIESSD_MTIP32XX
tristate "Block Device Driver for Micron PCIe SSDs"
depends on PCI
depends on PCI && GENERIC_HARDIRQS
help
This enables the block driver for Micron PCIe SSDs.
......@@ -88,6 +88,8 @@ static int instance;
static int mtip_major;
static struct dentry *dfs_parent;
static u32 cpu_use[NR_CPUS];
static DEFINE_SPINLOCK(rssd_index_lock);
static DEFINE_IDA(rssd_index_ida);
......@@ -296,16 +298,17 @@ static int hba_reset_nosleep(struct driver_data *dd)
*/
static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag)
{
atomic_set(&port->commands[tag].active, 1);
int group = tag >> 5;
spin_lock(&port->cmd_issue_lock);
atomic_set(&port->commands[tag].active, 1);
/* guard SACT and CI registers */
spin_lock(&port->cmd_issue_lock[group]);
writel((1 << MTIP_TAG_BIT(tag)),
port->s_active[MTIP_TAG_INDEX(tag)]);
writel((1 << MTIP_TAG_BIT(tag)),
port->cmd_issue[MTIP_TAG_INDEX(tag)]);
spin_unlock(&port->cmd_issue_lock);
spin_unlock(&port->cmd_issue_lock[group]);
/* Set the command's timeout value.*/
port->commands[tag].comp_time = jiffies + msecs_to_jiffies(
......@@ -964,56 +967,56 @@ static void mtip_handle_tfe(struct driver_data *dd)
/*
* Handle a set device bits interrupt
*/
static inline void mtip_process_sdbf(struct driver_data *dd)
static inline void mtip_workq_sdbfx(struct mtip_port *port, int group,
u32 completed)
{
struct mtip_port *port = dd->port;
int group, tag, bit;
u32 completed;
struct driver_data *dd = port->dd;
int tag, bit;
struct mtip_cmd *command;
/* walk all bits in all slot groups */
for (group = 0; group < dd->slot_groups; group++) {
completed = readl(port->completed[group]);
if (!completed)
continue;
if (!completed) {
WARN_ON_ONCE(!completed);
return;
}
/* clear completed status register in the hardware.*/
writel(completed, port->completed[group]);
/* clear completed status register in the hardware.*/
writel(completed, port->completed[group]);
/* Process completed commands. */
for (bit = 0; (bit < 32) && completed; bit++) {
if (completed & 0x01) {
tag = (group << 5) | bit;
/* Process completed commands. */
for (bit = 0;
(bit < 32) && completed;
bit++, completed >>= 1) {
if (completed & 0x01) {
tag = (group << 5) | bit;
/* skip internal command slot. */
if (unlikely(tag == MTIP_TAG_INTERNAL))
continue;
/* skip internal command slot. */
if (unlikely(tag == MTIP_TAG_INTERNAL))
continue;
command = &port->commands[tag];
/* make internal callback */
if (likely(command->comp_func)) {
command->comp_func(
port,
tag,
command->comp_data,
0);
} else {
dev_warn(&dd->pdev->dev,
"Null completion "
"for tag %d",
tag);
command = &port->commands[tag];
/* make internal callback */
if (likely(command->comp_func)) {
command->comp_func(
port,
tag,
command->comp_data,
0);
} else {
dev_warn(&dd->pdev->dev,
"Null completion "
"for tag %d",
tag);
if (mtip_check_surprise_removal(
dd->pdev)) {
mtip_command_cleanup(dd);
return;
}
if (mtip_check_surprise_removal(
dd->pdev)) {
mtip_command_cleanup(dd);
return;
}
}
}
completed >>= 1;
}
/* If last, re-enable interrupts */
if (atomic_dec_return(&dd->irq_workers_active) == 0)
writel(0xffffffff, dd->mmio + HOST_IRQ_STAT);
}
/*
......@@ -1072,6 +1075,8 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
struct mtip_port *port = dd->port;
u32 hba_stat, port_stat;
int rv = IRQ_NONE;
int do_irq_enable = 1, i, workers;
struct mtip_work *twork;
hba_stat = readl(dd->mmio + HOST_IRQ_STAT);
if (hba_stat) {
......@@ -1082,8 +1087,42 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
writel(port_stat, port->mmio + PORT_IRQ_STAT);
/* Demux port status */
if (likely(port_stat & PORT_IRQ_SDB_FIS))
mtip_process_sdbf(dd);
if (likely(port_stat & PORT_IRQ_SDB_FIS)) {
do_irq_enable = 0;
WARN_ON_ONCE(atomic_read(&dd->irq_workers_active) != 0);
/* Start at 1: group zero is always local? */
for (i = 0, workers = 0; i < MTIP_MAX_SLOT_GROUPS;
i++) {
twork = &dd->work[i];
twork->completed = readl(port->completed[i]);
if (twork->completed)
workers++;
}
atomic_set(&dd->irq_workers_active, workers);
if (workers) {
for (i = 1; i < MTIP_MAX_SLOT_GROUPS; i++) {
twork = &dd->work[i];
if (twork->completed)
queue_work_on(
twork->cpu_binding,
dd->isr_workq,
&twork->work);
}
if (likely(dd->work[0].completed))
mtip_workq_sdbfx(port, 0,
dd->work[0].completed);
} else {
/*
* Chip quirk: SDB interrupt but nothing
* to complete
*/
do_irq_enable = 1;
}
}
if (unlikely(port_stat & PORT_IRQ_ERR)) {
if (unlikely(mtip_check_surprise_removal(dd->pdev))) {
......@@ -1103,20 +1142,12 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
}
/* acknowledge interrupt */
writel(hba_stat, dd->mmio + HOST_IRQ_STAT);
if (unlikely(do_irq_enable))
writel(hba_stat, dd->mmio + HOST_IRQ_STAT);
return rv;
}
/*
* Wrapper for mtip_handle_irq
* (ignores return code)
*/
static void mtip_tasklet(unsigned long data)
{
mtip_handle_irq((struct driver_data *) data);
}
/*
* HBA interrupt subroutine.
*
......@@ -1130,8 +1161,8 @@ static void mtip_tasklet(unsigned long data)
static irqreturn_t mtip_irq_handler(int irq, void *instance)
{
struct driver_data *dd = instance;
tasklet_schedule(&dd->tasklet);
return IRQ_HANDLED;
return mtip_handle_irq(dd);
}
static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag)
......@@ -1489,6 +1520,12 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer)
}
#endif
/* Demux ID.DRAT & ID.RZAT to determine trim support */
if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5))
port->dd->trim_supp = true;
else
port->dd->trim_supp = false;
/* Set the identify buffer as valid. */
port->identify_valid = 1;
......@@ -1675,6 +1712,81 @@ static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
return rv;
}
/*
* Trim unused sectors
*
* @dd pointer to driver_data structure
* @lba starting lba
* @len # of 512b sectors to trim
*
* return value
* -ENOMEM Out of dma memory
* -EINVAL Invalid parameters passed in, trim not supported
* -EIO Error submitting trim request to hw
*/
static int mtip_send_trim(struct driver_data *dd, unsigned int lba, unsigned int len)
{
int i, rv = 0;
u64 tlba, tlen, sect_left;
struct mtip_trim_entry *buf;
dma_addr_t dma_addr;
struct host_to_dev_fis fis;
if (!len || dd->trim_supp == false)
return -EINVAL;
/* Trim request too big */
WARN_ON(len > (MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES));
/* Trim request not aligned on 4k boundary */
WARN_ON(len % 8 != 0);
/* Warn if vu_trim structure is too big */
WARN_ON(sizeof(struct mtip_trim) > ATA_SECT_SIZE);
/* Allocate a DMA buffer for the trim structure */
buf = dmam_alloc_coherent(&dd->pdev->dev, ATA_SECT_SIZE, &dma_addr,
GFP_KERNEL);
if (!buf)
return -ENOMEM;
memset(buf, 0, ATA_SECT_SIZE);
for (i = 0, sect_left = len, tlba = lba;
i < MTIP_MAX_TRIM_ENTRIES && sect_left;
i++) {
tlen = (sect_left >= MTIP_MAX_TRIM_ENTRY_LEN ?
MTIP_MAX_TRIM_ENTRY_LEN :
sect_left);
buf[i].lba = __force_bit2int cpu_to_le32(tlba);
buf[i].range = __force_bit2int cpu_to_le16(tlen);
tlba += tlen;
sect_left -= tlen;
}
WARN_ON(sect_left != 0);
/* Build the fis */
memset(&fis, 0, sizeof(struct host_to_dev_fis));
fis.type = 0x27;
fis.opts = 1 << 7;
fis.command = 0xfb;
fis.features = 0x60;
fis.sect_count = 1;
fis.device = ATA_DEVICE_OBS;
if (mtip_exec_internal_command(dd->port,
&fis,
5,
dma_addr,
ATA_SECT_SIZE,
0,
GFP_KERNEL,
MTIP_TRIM_TIMEOUT_MS) < 0)
rv = -EIO;
dmam_free_coherent(&dd->pdev->dev, ATA_SECT_SIZE, buf, dma_addr);
return rv;
}
/*
* Get the drive capacity.
*
......@@ -3005,20 +3117,24 @@ static int mtip_hw_init(struct driver_data *dd)
hba_setup(dd);
tasklet_init(&dd->tasklet, mtip_tasklet, (unsigned long)dd);
dd->port = kzalloc(sizeof(struct mtip_port), GFP_KERNEL);
dd->port = kzalloc_node(sizeof(struct mtip_port), GFP_KERNEL,
dd->numa_node);
if (!dd->port) {
dev_err(&dd->pdev->dev,
"Memory allocation: port structure\n");
return -ENOMEM;
}
/* Continue workqueue setup */
for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++)
dd->work[i].port = dd->port;
/* Counting semaphore to track command slot usage */
sema_init(&dd->port->cmd_slot, num_command_slots - 1);
/* Spinlock to prevent concurrent issue */
spin_lock_init(&dd->port->cmd_issue_lock);
for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++)
spin_lock_init(&dd->port->cmd_issue_lock[i]);
/* Set the port mmio base address. */
dd->port->mmio = dd->mmio + PORT_OFFSET;
......@@ -3165,6 +3281,7 @@ static int mtip_hw_init(struct driver_data *dd)
"Unable to allocate IRQ %d\n", dd->pdev->irq);
goto out2;
}
irq_set_affinity_hint(dd->pdev->irq, get_cpu_mask(dd->isr_binding));
/* Enable interrupts on the HBA. */
writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
......@@ -3241,7 +3358,8 @@ static int mtip_hw_init(struct driver_data *dd)
writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
dd->mmio + HOST_CTL);
/*Release the IRQ. */
/* Release the IRQ. */
irq_set_affinity_hint(dd->pdev->irq, NULL);
devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
out2:
......@@ -3291,11 +3409,9 @@ static int mtip_hw_exit(struct driver_data *dd)
del_timer_sync(&dd->port->cmd_timer);
/* Release the IRQ. */
irq_set_affinity_hint(dd->pdev->irq, NULL);
devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
/* Stop the bottom half tasklet. */
tasklet_kill(&dd->tasklet);
/* Free the command/command header memory. */
dmam_free_coherent(&dd->pdev->dev,
HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
......@@ -3641,6 +3757,12 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
}
}
if (unlikely(bio->bi_rw & REQ_DISCARD)) {
bio_endio(bio, mtip_send_trim(dd, bio->bi_sector,
bio_sectors(bio)));
return;
}
if (unlikely(!bio_has_data(bio))) {
blk_queue_flush(queue, 0);
bio_endio(bio, 0);
......@@ -3711,7 +3833,7 @@ static int mtip_block_initialize(struct driver_data *dd)
goto protocol_init_error;
}
dd->disk = alloc_disk(MTIP_MAX_MINORS);
dd->disk = alloc_disk_node(MTIP_MAX_MINORS, dd->numa_node);
if (dd->disk == NULL) {
dev_err(&dd->pdev->dev,
"Unable to allocate gendisk structure\n");
......@@ -3755,7 +3877,7 @@ static int mtip_block_initialize(struct driver_data *dd)
skip_create_disk:
/* Allocate the request queue. */
dd->queue = blk_alloc_queue(GFP_KERNEL);
dd->queue = blk_alloc_queue_node(GFP_KERNEL, dd->numa_node);
if (dd->queue == NULL) {
dev_err(&dd->pdev->dev,
"Unable to allocate request queue\n");
......@@ -3783,6 +3905,15 @@ static int mtip_block_initialize(struct driver_data *dd)
*/
blk_queue_flush(dd->queue, 0);
/* Signal trim support */
if (dd->trim_supp == true) {
set_bit(QUEUE_FLAG_DISCARD, &dd->queue->queue_flags);
dd->queue->limits.discard_granularity = 4096;
blk_queue_max_discard_sectors(dd->queue,
MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES);
dd->queue->limits.discard_zeroes_data = 0;
}
/* Set the capacity of the device in 512 byte sectors. */
if (!(mtip_hw_get_capacity(dd, &capacity))) {
dev_warn(&dd->pdev->dev,
......@@ -3813,9 +3944,8 @@ static int mtip_block_initialize(struct driver_data *dd)
start_service_thread:
sprintf(thd_name, "mtip_svc_thd_%02d", index);
dd->mtip_svc_handler = kthread_run(mtip_service_thread,
dd, thd_name);
dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread,
dd, dd->numa_node, thd_name);
if (IS_ERR(dd->mtip_svc_handler)) {
dev_err(&dd->pdev->dev, "service thread failed to start\n");
......@@ -3823,7 +3953,7 @@ static int mtip_block_initialize(struct driver_data *dd)
rv = -EFAULT;
goto kthread_run_error;
}
wake_up_process(dd->mtip_svc_handler);
if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
rv = wait_for_rebuild;
......@@ -3963,6 +4093,56 @@ static int mtip_block_resume(struct driver_data *dd)
return 0;
}
static void drop_cpu(int cpu)
{
cpu_use[cpu]--;
}
static int get_least_used_cpu_on_node(int node)
{
int cpu, least_used_cpu, least_cnt;
const struct cpumask *node_mask;
node_mask = cpumask_of_node(node);
least_used_cpu = cpumask_first(node_mask);
least_cnt = cpu_use[least_used_cpu];
cpu = least_used_cpu;
for_each_cpu(cpu, node_mask) {
if (cpu_use[cpu] < least_cnt) {
least_used_cpu = cpu;
least_cnt = cpu_use[cpu];
}
}
cpu_use[least_used_cpu]++;
return least_used_cpu;
}
/* Helper for selecting a node in round robin mode */
static inline int mtip_get_next_rr_node(void)
{
static int next_node = -1;
if (next_node == -1) {
next_node = first_online_node;
return next_node;
}
next_node = next_online_node(next_node);
if (next_node == MAX_NUMNODES)
next_node = first_online_node;
return next_node;
}
static DEFINE_HANDLER(0);
static DEFINE_HANDLER(1);
static DEFINE_HANDLER(2);
static DEFINE_HANDLER(3);
static DEFINE_HANDLER(4);
static DEFINE_HANDLER(5);
static DEFINE_HANDLER(6);
static DEFINE_HANDLER(7);
/*
* Called for each supported PCI device detected.
*
......@@ -3977,9 +4157,25 @@ static int mtip_pci_probe(struct pci_dev *pdev,
{
int rv = 0;
struct driver_data *dd = NULL;
char cpu_list[256];
const struct cpumask *node_mask;
int cpu, i = 0, j = 0;
int my_node = NUMA_NO_NODE;
/* Allocate memory for this devices private data. */
dd = kzalloc(sizeof(struct driver_data), GFP_KERNEL);
my_node = pcibus_to_node(pdev->bus);
if (my_node != NUMA_NO_NODE) {
if (!node_online(my_node))
my_node = mtip_get_next_rr_node();
} else {
dev_info(&pdev->dev, "Kernel not reporting proximity, choosing a node\n");
my_node = mtip_get_next_rr_node();
}
dev_info(&pdev->dev, "NUMA node %d (closest: %d,%d, probe on %d:%d)\n",
my_node, pcibus_to_node(pdev->bus), dev_to_node(&pdev->dev),
cpu_to_node(smp_processor_id()), smp_processor_id());
dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node);
if (dd == NULL) {
dev_err(&pdev->dev,
"Unable to allocate memory for driver data\n");
......@@ -4016,19 +4212,82 @@ static int mtip_pci_probe(struct pci_dev *pdev,
}
}
pci_set_master(pdev);
/* Copy the info we may need later into the private data structure. */
dd->major = mtip_major;
dd->instance = instance;
dd->pdev = pdev;
dd->numa_node = my_node;
memset(dd->workq_name, 0, 32);
snprintf(dd->workq_name, 31, "mtipq%d", dd->instance);
dd->isr_workq = create_workqueue(dd->workq_name);
if (!dd->isr_workq) {
dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance);
goto block_initialize_err;
}
memset(cpu_list, 0, sizeof(cpu_list));
node_mask = cpumask_of_node(dd->numa_node);
if (!cpumask_empty(node_mask)) {
for_each_cpu(cpu, node_mask)
{
snprintf(&cpu_list[j], 256 - j, "%d ", cpu);
j = strlen(cpu_list);
}
dev_info(&pdev->dev, "Node %d on package %d has %d cpu(s): %s\n",
dd->numa_node,
topology_physical_package_id(cpumask_first(node_mask)),
nr_cpus_node(dd->numa_node),
cpu_list);
} else
dev_dbg(&pdev->dev, "mtip32xx: node_mask empty\n");
dd->isr_binding = get_least_used_cpu_on_node(dd->numa_node);
dev_info(&pdev->dev, "Initial IRQ binding node:cpu %d:%d\n",
cpu_to_node(dd->isr_binding), dd->isr_binding);
/* first worker context always runs in ISR */
dd->work[0].cpu_binding = dd->isr_binding;
dd->work[1].cpu_binding = get_least_used_cpu_on_node(dd->numa_node);
dd->work[2].cpu_binding = get_least_used_cpu_on_node(dd->numa_node);
dd->work[3].cpu_binding = dd->work[0].cpu_binding;
dd->work[4].cpu_binding = dd->work[1].cpu_binding;
dd->work[5].cpu_binding = dd->work[2].cpu_binding;
dd->work[6].cpu_binding = dd->work[2].cpu_binding;
dd->work[7].cpu_binding = dd->work[1].cpu_binding;
/* Log the bindings */
for_each_present_cpu(cpu) {
memset(cpu_list, 0, sizeof(cpu_list));
for (i = 0, j = 0; i < MTIP_MAX_SLOT_GROUPS; i++) {
if (dd->work[i].cpu_binding == cpu) {
snprintf(&cpu_list[j], 256 - j, "%d ", i);
j = strlen(cpu_list);
}
}
if (j)
dev_info(&pdev->dev, "CPU %d: WQs %s\n", cpu, cpu_list);
}
INIT_WORK(&dd->work[0].work, mtip_workq_sdbf0);
INIT_WORK(&dd->work[1].work, mtip_workq_sdbf1);
INIT_WORK(&dd->work[2].work, mtip_workq_sdbf2);
INIT_WORK(&dd->work[3].work, mtip_workq_sdbf3);
INIT_WORK(&dd->work[4].work, mtip_workq_sdbf4);
INIT_WORK(&dd->work[5].work, mtip_workq_sdbf5);
INIT_WORK(&dd->work[6].work, mtip_workq_sdbf6);
INIT_WORK(&dd->work[7].work, mtip_workq_sdbf7);
pci_set_master(pdev);
if (pci_enable_msi(pdev)) {
dev_warn(&pdev->dev,
"Unable to enable MSI interrupt.\n");
goto block_initialize_err;
}
/* Copy the info we may need later into the private data structure. */
dd->major = mtip_major;
dd->instance = instance;
dd->pdev = pdev;
/* Initialize the block layer. */
rv = mtip_block_initialize(dd);
if (rv < 0) {
......@@ -4048,7 +4307,13 @@ static int mtip_pci_probe(struct pci_dev *pdev,
block_initialize_err:
pci_disable_msi(pdev);
if (dd->isr_workq) {
flush_workqueue(dd->isr_workq);
destroy_workqueue(dd->isr_workq);
drop_cpu(dd->work[0].cpu_binding);
drop_cpu(dd->work[1].cpu_binding);
drop_cpu(dd->work[2].cpu_binding);
}
setmask_err:
pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
......@@ -4089,6 +4354,14 @@ static void mtip_pci_remove(struct pci_dev *pdev)
/* Clean up the block layer. */
mtip_block_remove(dd);
if (dd->isr_workq) {
flush_workqueue(dd->isr_workq);
destroy_workqueue(dd->isr_workq);
drop_cpu(dd->work[0].cpu_binding);
drop_cpu(dd->work[1].cpu_binding);
drop_cpu(dd->work[2].cpu_binding);
}
pci_disable_msi(pdev);
kfree(dd);
......
......@@ -164,6 +164,35 @@ struct smart_attr {
u8 res[3];
} __packed;
struct mtip_work {
struct work_struct work;
void *port;
int cpu_binding;
u32 completed;
} ____cacheline_aligned_in_smp;
#define DEFINE_HANDLER(group) \
void mtip_workq_sdbf##group(struct work_struct *work) \
{ \
struct mtip_work *w = (struct mtip_work *) work; \
mtip_workq_sdbfx(w->port, group, w->completed); \
}
#define MTIP_TRIM_TIMEOUT_MS 240000
#define MTIP_MAX_TRIM_ENTRIES 8
#define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8
struct mtip_trim_entry {
u32 lba; /* starting lba of region */
u16 rsvd; /* unused */
u16 range; /* # of 512b blocks to trim */
} __packed;
struct mtip_trim {
/* Array of regions to trim */
struct mtip_trim_entry entry[MTIP_MAX_TRIM_ENTRIES];
} __packed;
/* Register Frame Information Structure (FIS), host to device. */
struct host_to_dev_fis {
/*
......@@ -424,7 +453,7 @@ struct mtip_port {
*/
struct semaphore cmd_slot;
/* Spinlock for working around command-issue bug. */
spinlock_t cmd_issue_lock;
spinlock_t cmd_issue_lock[MTIP_MAX_SLOT_GROUPS];
};
/*
......@@ -447,9 +476,6 @@ struct driver_data {
struct mtip_port *port; /* Pointer to the port data structure. */
/* Tasklet used to process the bottom half of the ISR. */
struct tasklet_struct tasklet;
unsigned product_type; /* magic value declaring the product type */
unsigned slot_groups; /* number of slot groups the product supports */
......@@ -461,6 +487,20 @@ struct driver_data {
struct task_struct *mtip_svc_handler; /* task_struct of svc thd */
struct dentry *dfs_node;
bool trim_supp; /* flag indicating trim support */
int numa_node; /* NUMA support */
char workq_name[32];
struct workqueue_struct *isr_workq;
struct mtip_work work[MTIP_MAX_SLOT_GROUPS];
atomic_t irq_workers_active;
int isr_binding;
};
#endif
obj-$(CONFIG_BLK_DEV_RSXX) += rsxx.o
rsxx-y := config.o core.o cregs.o dev.o dma.o
/*
* Filename: config.c
*
*
* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
* Philip Kelleher <pjk1939@linux.vnet.ibm.com>
*
* (C) Copyright 2013 IBM Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/types.h>
#include <linux/crc32.h>
#include <linux/swab.h>
#include "rsxx_priv.h"
#include "rsxx_cfg.h"
static void initialize_config(void *config)
{
struct rsxx_card_cfg *cfg = config;
cfg->hdr.version = RSXX_CFG_VERSION;
cfg->data.block_size = RSXX_HW_BLK_SIZE;
cfg->data.stripe_size = RSXX_HW_BLK_SIZE;
cfg->data.vendor_id = RSXX_VENDOR_ID_TMS_IBM;
cfg->data.cache_order = (-1);
cfg->data.intr_coal.mode = RSXX_INTR_COAL_DISABLED;
cfg->data.intr_coal.count = 0;
cfg->data.intr_coal.latency = 0;
}
static u32 config_data_crc32(struct rsxx_card_cfg *cfg)
{
/*
* Return the compliment of the CRC to ensure compatibility
* (i.e. this is how early rsxx drivers did it.)
*/
return ~crc32(~0, &cfg->data, sizeof(cfg->data));
}
/*----------------- Config Byte Swap Functions -------------------*/
static void config_hdr_be_to_cpu(struct card_cfg_hdr *hdr)
{
hdr->version = be32_to_cpu((__force __be32) hdr->version);
hdr->crc = be32_to_cpu((__force __be32) hdr->crc);
}
static void config_hdr_cpu_to_be(struct card_cfg_hdr *hdr)
{
hdr->version = (__force u32) cpu_to_be32(hdr->version);
hdr->crc = (__force u32) cpu_to_be32(hdr->crc);
}
static void config_data_swab(struct rsxx_card_cfg *cfg)
{
u32 *data = (u32 *) &cfg->data;
int i;
for (i = 0; i < (sizeof(cfg->data) / 4); i++)
data[i] = swab32(data[i]);
}
static void config_data_le_to_cpu(struct rsxx_card_cfg *cfg)
{
u32 *data = (u32 *) &cfg->data;
int i;
for (i = 0; i < (sizeof(cfg->data) / 4); i++)
data[i] = le32_to_cpu((__force __le32) data[i]);
}
static void config_data_cpu_to_le(struct rsxx_card_cfg *cfg)
{
u32 *data = (u32 *) &cfg->data;
int i;
for (i = 0; i < (sizeof(cfg->data) / 4); i++)
data[i] = (__force u32) cpu_to_le32(data[i]);
}
/*----------------- Config Operations ------------------*/
static int rsxx_save_config(struct rsxx_cardinfo *card)
{
struct rsxx_card_cfg cfg;
int st;
memcpy(&cfg, &card->config, sizeof(cfg));
if (unlikely(cfg.hdr.version != RSXX_CFG_VERSION)) {
dev_err(CARD_TO_DEV(card),
"Cannot save config with invalid version %d\n",
cfg.hdr.version);
return -EINVAL;
}
/* Convert data to little endian for the CRC calculation. */
config_data_cpu_to_le(&cfg);
cfg.hdr.crc = config_data_crc32(&cfg);
/*
* Swap the data from little endian to big endian so it can be
* stored.
*/
config_data_swab(&cfg);
config_hdr_cpu_to_be(&cfg.hdr);
st = rsxx_creg_write(card, CREG_ADD_CONFIG, sizeof(cfg), &cfg, 1);
if (st)
return st;
return 0;
}
int rsxx_load_config(struct rsxx_cardinfo *card)
{
int st;
u32 crc;
st = rsxx_creg_read(card, CREG_ADD_CONFIG, sizeof(card->config),
&card->config, 1);
if (st) {
dev_err(CARD_TO_DEV(card),
"Failed reading card config.\n");
return st;
}
config_hdr_be_to_cpu(&card->config.hdr);
if (card->config.hdr.version == RSXX_CFG_VERSION) {
/*
* We calculate the CRC with the data in little endian, because
* early drivers did not take big endian CPUs into account.
* The data is always stored in big endian, so we need to byte
* swap it before calculating the CRC.
*/
config_data_swab(&card->config);
/* Check the CRC */
crc = config_data_crc32(&card->config);
if (crc != card->config.hdr.crc) {
dev_err(CARD_TO_DEV(card),
"Config corruption detected!\n");
dev_info(CARD_TO_DEV(card),
"CRC (sb x%08x is x%08x)\n",
card->config.hdr.crc, crc);
return -EIO;
}
/* Convert the data to CPU byteorder */
config_data_le_to_cpu(&card->config);
} else if (card->config.hdr.version != 0) {
dev_err(CARD_TO_DEV(card),
"Invalid config version %d.\n",
card->config.hdr.version);
/*
* Config version changes require special handling from the
* user
*/
return -EINVAL;
} else {
dev_info(CARD_TO_DEV(card),
"Initializing card configuration.\n");
initialize_config(card);
st = rsxx_save_config(card);
if (st)
return st;
}
card->config_valid = 1;
dev_dbg(CARD_TO_DEV(card), "version: x%08x\n",
card->config.hdr.version);
dev_dbg(CARD_TO_DEV(card), "crc: x%08x\n",
card->config.hdr.crc);
dev_dbg(CARD_TO_DEV(card), "block_size: x%08x\n",
card->config.data.block_size);
dev_dbg(CARD_TO_DEV(card), "stripe_size: x%08x\n",
card->config.data.stripe_size);
dev_dbg(CARD_TO_DEV(card), "vendor_id: x%08x\n",
card->config.data.vendor_id);
dev_dbg(CARD_TO_DEV(card), "cache_order: x%08x\n",
card->config.data.cache_order);
dev_dbg(CARD_TO_DEV(card), "mode: x%08x\n",
card->config.data.intr_coal.mode);
dev_dbg(CARD_TO_DEV(card), "count: x%08x\n",
card->config.data.intr_coal.count);
dev_dbg(CARD_TO_DEV(card), "latency: x%08x\n",
card->config.data.intr_coal.latency);
return 0;
}
/*
* Filename: core.c
*
*
* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
* Philip Kelleher <pjk1939@linux.vnet.ibm.com>
*
* (C) Copyright 2013 IBM Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/reboot.h>
#include <linux/slab.h>
#include <linux/bitops.h>
#include <linux/genhd.h>
#include <linux/idr.h>
#include "rsxx_priv.h"
#include "rsxx_cfg.h"
#define NO_LEGACY 0
MODULE_DESCRIPTION("IBM RamSan PCIe Flash SSD Device Driver");
MODULE_AUTHOR("IBM <support@ramsan.com>");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRIVER_VERSION);
static unsigned int force_legacy = NO_LEGACY;
module_param(force_legacy, uint, 0444);
MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts");
static DEFINE_IDA(rsxx_disk_ida);
static DEFINE_SPINLOCK(rsxx_ida_lock);
/*----------------- Interrupt Control & Handling -------------------*/
static void __enable_intr(unsigned int *mask, unsigned int intr)
{
*mask |= intr;
}
static void __disable_intr(unsigned int *mask, unsigned int intr)
{
*mask &= ~intr;
}
/*
* NOTE: Disabling the IER will disable the hardware interrupt.
* Disabling the ISR will disable the software handling of the ISR bit.
*
* Enable/Disable interrupt functions assume the card->irq_lock
* is held by the caller.
*/
void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr)
{
if (unlikely(card->halt))
return;
__enable_intr(&card->ier_mask, intr);
iowrite32(card->ier_mask, card->regmap + IER);
}
void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr)
{
__disable_intr(&card->ier_mask, intr);
iowrite32(card->ier_mask, card->regmap + IER);
}
void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card,
unsigned int intr)
{
if (unlikely(card->halt))
return;
__enable_intr(&card->isr_mask, intr);
__enable_intr(&card->ier_mask, intr);
iowrite32(card->ier_mask, card->regmap + IER);
}
void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card,
unsigned int intr)
{
__disable_intr(&card->isr_mask, intr);
__disable_intr(&card->ier_mask, intr);
iowrite32(card->ier_mask, card->regmap + IER);
}
static irqreturn_t rsxx_isr(int irq, void *pdata)
{
struct rsxx_cardinfo *card = pdata;
unsigned int isr;
int handled = 0;
int reread_isr;
int i;
spin_lock(&card->irq_lock);
do {
reread_isr = 0;
isr = ioread32(card->regmap + ISR);
if (isr == 0xffffffff) {
/*
* A few systems seem to have an intermittent issue
* where PCI reads return all Fs, but retrying the read
* a little later will return as expected.
*/
dev_info(CARD_TO_DEV(card),
"ISR = 0xFFFFFFFF, retrying later\n");
break;
}
isr &= card->isr_mask;
if (!isr)
break;
for (i = 0; i < card->n_targets; i++) {
if (isr & CR_INTR_DMA(i)) {
if (card->ier_mask & CR_INTR_DMA(i)) {
rsxx_disable_ier(card, CR_INTR_DMA(i));
reread_isr = 1;
}
queue_work(card->ctrl[i].done_wq,
&card->ctrl[i].dma_done_work);
handled++;
}
}
if (isr & CR_INTR_CREG) {
schedule_work(&card->creg_ctrl.done_work);
handled++;
}
if (isr & CR_INTR_EVENT) {
schedule_work(&card->event_work);
rsxx_disable_ier_and_isr(card, CR_INTR_EVENT);
handled++;
}
} while (reread_isr);
spin_unlock(&card->irq_lock);
return handled ? IRQ_HANDLED : IRQ_NONE;
}
/*----------------- Card Event Handler -------------------*/
static char *rsxx_card_state_to_str(unsigned int state)
{
static char *state_strings[] = {
"Unknown", "Shutdown", "Starting", "Formatting",
"Uninitialized", "Good", "Shutting Down",
"Fault", "Read Only Fault", "dStroying"
};
return state_strings[ffs(state)];
}
static void card_state_change(struct rsxx_cardinfo *card,
unsigned int new_state)
{
int st;
dev_info(CARD_TO_DEV(card),
"card state change detected.(%s -> %s)\n",
rsxx_card_state_to_str(card->state),
rsxx_card_state_to_str(new_state));
card->state = new_state;
/* Don't attach DMA interfaces if the card has an invalid config */
if (!card->config_valid)
return;
switch (new_state) {
case CARD_STATE_RD_ONLY_FAULT:
dev_crit(CARD_TO_DEV(card),
"Hardware has entered read-only mode!\n");
/*
* Fall through so the DMA devices can be attached and
* the user can attempt to pull off their data.
*/
case CARD_STATE_GOOD:
st = rsxx_get_card_size8(card, &card->size8);
if (st)
dev_err(CARD_TO_DEV(card),
"Failed attaching DMA devices\n");
if (card->config_valid)
set_capacity(card->gendisk, card->size8 >> 9);
break;
case CARD_STATE_FAULT:
dev_crit(CARD_TO_DEV(card),
"Hardware Fault reported!\n");
/* Fall through. */
/* Everything else, detach DMA interface if it's attached. */
case CARD_STATE_SHUTDOWN:
case CARD_STATE_STARTING:
case CARD_STATE_FORMATTING:
case CARD_STATE_UNINITIALIZED:
case CARD_STATE_SHUTTING_DOWN:
/*
* dStroy is a term coined by marketing to represent the low level
* secure erase.
*/
case CARD_STATE_DSTROYING:
set_capacity(card->gendisk, 0);
break;
}
}
static void card_event_handler(struct work_struct *work)
{
struct rsxx_cardinfo *card;
unsigned int state;
unsigned long flags;
int st;
card = container_of(work, struct rsxx_cardinfo, event_work);
if (unlikely(card->halt))
return;
/*
* Enable the interrupt now to avoid any weird race conditions where a
* state change might occur while rsxx_get_card_state() is
* processing a returned creg cmd.
*/
spin_lock_irqsave(&card->irq_lock, flags);
rsxx_enable_ier_and_isr(card, CR_INTR_EVENT);
spin_unlock_irqrestore(&card->irq_lock, flags);
st = rsxx_get_card_state(card, &state);
if (st) {
dev_info(CARD_TO_DEV(card),
"Failed reading state after event.\n");
return;
}
if (card->state != state)
card_state_change(card, state);
if (card->creg_ctrl.creg_stats.stat & CREG_STAT_LOG_PENDING)
rsxx_read_hw_log(card);
}
/*----------------- Card Operations -------------------*/
static int card_shutdown(struct rsxx_cardinfo *card)
{
unsigned int state;
signed long start;
const int timeout = msecs_to_jiffies(120000);
int st;
/* We can't issue a shutdown if the card is in a transition state */
start = jiffies;
do {
st = rsxx_get_card_state(card, &state);
if (st)
return st;
} while (state == CARD_STATE_STARTING &&
(jiffies - start < timeout));
if (state == CARD_STATE_STARTING)
return -ETIMEDOUT;
/* Only issue a shutdown if we need to */
if ((state != CARD_STATE_SHUTTING_DOWN) &&
(state != CARD_STATE_SHUTDOWN)) {
st = rsxx_issue_card_cmd(card, CARD_CMD_SHUTDOWN);
if (st)
return st;
}
start = jiffies;
do {
st = rsxx_get_card_state(card, &state);
if (st)
return st;
} while (state != CARD_STATE_SHUTDOWN &&
(jiffies - start < timeout));
if (state != CARD_STATE_SHUTDOWN)
return -ETIMEDOUT;
return 0;
}
/*----------------- Driver Initialization & Setup -------------------*/
/* Returns: 0 if the driver is compatible with the device
-1 if the driver is NOT compatible with the device */
static int rsxx_compatibility_check(struct rsxx_cardinfo *card)
{
unsigned char pci_rev;
pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev);
if (pci_rev > RS70_PCI_REV_SUPPORTED)
return -1;
return 0;
}
static int rsxx_pci_probe(struct pci_dev *dev,
const struct pci_device_id *id)
{
struct rsxx_cardinfo *card;
int st;
dev_info(&dev->dev, "PCI-Flash SSD discovered\n");
card = kzalloc(sizeof(*card), GFP_KERNEL);
if (!card)
return -ENOMEM;
card->dev = dev;
pci_set_drvdata(dev, card);
do {
if (!ida_pre_get(&rsxx_disk_ida, GFP_KERNEL)) {
st = -ENOMEM;
goto failed_ida_get;
}
spin_lock(&rsxx_ida_lock);
st = ida_get_new(&rsxx_disk_ida, &card->disk_id);
spin_unlock(&rsxx_ida_lock);
} while (st == -EAGAIN);
if (st)
goto failed_ida_get;
st = pci_enable_device(dev);
if (st)
goto failed_enable;
pci_set_master(dev);
pci_set_dma_max_seg_size(dev, RSXX_HW_BLK_SIZE);
st = pci_set_dma_mask(dev, DMA_BIT_MASK(64));
if (st) {
dev_err(CARD_TO_DEV(card),
"No usable DMA configuration,aborting\n");
goto failed_dma_mask;
}
st = pci_request_regions(dev, DRIVER_NAME);
if (st) {
dev_err(CARD_TO_DEV(card),
"Failed to request memory region\n");
goto failed_request_regions;
}
if (pci_resource_len(dev, 0) == 0) {
dev_err(CARD_TO_DEV(card), "BAR0 has length 0!\n");
st = -ENOMEM;
goto failed_iomap;
}
card->regmap = pci_iomap(dev, 0, 0);
if (!card->regmap) {
dev_err(CARD_TO_DEV(card), "Failed to map BAR0\n");
st = -ENOMEM;
goto failed_iomap;
}
spin_lock_init(&card->irq_lock);
card->halt = 0;
spin_lock_irq(&card->irq_lock);
rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
spin_unlock_irq(&card->irq_lock);
if (!force_legacy) {
st = pci_enable_msi(dev);
if (st)
dev_warn(CARD_TO_DEV(card),
"Failed to enable MSI\n");
}
st = request_irq(dev->irq, rsxx_isr, IRQF_DISABLED | IRQF_SHARED,
DRIVER_NAME, card);
if (st) {
dev_err(CARD_TO_DEV(card),
"Failed requesting IRQ%d\n", dev->irq);
goto failed_irq;
}
/************* Setup Processor Command Interface *************/
rsxx_creg_setup(card);
spin_lock_irq(&card->irq_lock);
rsxx_enable_ier_and_isr(card, CR_INTR_CREG);
spin_unlock_irq(&card->irq_lock);
st = rsxx_compatibility_check(card);
if (st) {
dev_warn(CARD_TO_DEV(card),
"Incompatible driver detected. Please update the driver.\n");
st = -EINVAL;
goto failed_compatiblity_check;
}
/************* Load Card Config *************/
st = rsxx_load_config(card);
if (st)
dev_err(CARD_TO_DEV(card),
"Failed loading card config\n");
/************* Setup DMA Engine *************/
st = rsxx_get_num_targets(card, &card->n_targets);
if (st)
dev_info(CARD_TO_DEV(card),
"Failed reading the number of DMA targets\n");
card->ctrl = kzalloc(card->n_targets * sizeof(*card->ctrl), GFP_KERNEL);
if (!card->ctrl) {
st = -ENOMEM;
goto failed_dma_setup;
}
st = rsxx_dma_setup(card);
if (st) {
dev_info(CARD_TO_DEV(card),
"Failed to setup DMA engine\n");
goto failed_dma_setup;
}
/************* Setup Card Event Handler *************/
INIT_WORK(&card->event_work, card_event_handler);
st = rsxx_setup_dev(card);
if (st)
goto failed_create_dev;
rsxx_get_card_state(card, &card->state);
dev_info(CARD_TO_DEV(card),
"card state: %s\n",
rsxx_card_state_to_str(card->state));
/*
* Now that the DMA Engine and devices have been setup,
* we can enable the event interrupt(it kicks off actions in
* those layers so we couldn't enable it right away.)
*/
spin_lock_irq(&card->irq_lock);
rsxx_enable_ier_and_isr(card, CR_INTR_EVENT);
spin_unlock_irq(&card->irq_lock);
if (card->state == CARD_STATE_SHUTDOWN) {
st = rsxx_issue_card_cmd(card, CARD_CMD_STARTUP);
if (st)
dev_crit(CARD_TO_DEV(card),
"Failed issuing card startup\n");
} else if (card->state == CARD_STATE_GOOD ||
card->state == CARD_STATE_RD_ONLY_FAULT) {
st = rsxx_get_card_size8(card, &card->size8);
if (st)
card->size8 = 0;
}
rsxx_attach_dev(card);
return 0;
failed_create_dev:
rsxx_dma_destroy(card);
failed_dma_setup:
failed_compatiblity_check:
spin_lock_irq(&card->irq_lock);
rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
spin_unlock_irq(&card->irq_lock);
free_irq(dev->irq, card);
if (!force_legacy)
pci_disable_msi(dev);
failed_irq:
pci_iounmap(dev, card->regmap);
failed_iomap:
pci_release_regions(dev);
failed_request_regions:
failed_dma_mask:
pci_disable_device(dev);
failed_enable:
spin_lock(&rsxx_ida_lock);
ida_remove(&rsxx_disk_ida, card->disk_id);
spin_unlock(&rsxx_ida_lock);
failed_ida_get:
kfree(card);
return st;
}
static void rsxx_pci_remove(struct pci_dev *dev)
{
struct rsxx_cardinfo *card = pci_get_drvdata(dev);
unsigned long flags;
int st;
int i;
if (!card)
return;
dev_info(CARD_TO_DEV(card),
"Removing PCI-Flash SSD.\n");
rsxx_detach_dev(card);
for (i = 0; i < card->n_targets; i++) {
spin_lock_irqsave(&card->irq_lock, flags);
rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i));
spin_unlock_irqrestore(&card->irq_lock, flags);
}
st = card_shutdown(card);
if (st)
dev_crit(CARD_TO_DEV(card), "Shutdown failed!\n");
/* Sync outstanding event handlers. */
spin_lock_irqsave(&card->irq_lock, flags);
rsxx_disable_ier_and_isr(card, CR_INTR_EVENT);
spin_unlock_irqrestore(&card->irq_lock, flags);
/* Prevent work_structs from re-queuing themselves. */
card->halt = 1;
cancel_work_sync(&card->event_work);
rsxx_destroy_dev(card);
rsxx_dma_destroy(card);
spin_lock_irqsave(&card->irq_lock, flags);
rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
spin_unlock_irqrestore(&card->irq_lock, flags);
free_irq(dev->irq, card);
if (!force_legacy)
pci_disable_msi(dev);
rsxx_creg_destroy(card);
pci_iounmap(dev, card->regmap);
pci_disable_device(dev);
pci_release_regions(dev);
kfree(card);
}
static int rsxx_pci_suspend(struct pci_dev *dev, pm_message_t state)
{
/* We don't support suspend at this time. */
return -ENOSYS;
}
static void rsxx_pci_shutdown(struct pci_dev *dev)
{
struct rsxx_cardinfo *card = pci_get_drvdata(dev);
unsigned long flags;
int i;
if (!card)
return;
dev_info(CARD_TO_DEV(card), "Shutting down PCI-Flash SSD.\n");
rsxx_detach_dev(card);
for (i = 0; i < card->n_targets; i++) {
spin_lock_irqsave(&card->irq_lock, flags);
rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i));
spin_unlock_irqrestore(&card->irq_lock, flags);
}
card_shutdown(card);
}
static DEFINE_PCI_DEVICE_TABLE(rsxx_pci_ids) = {
{PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70_FLASH)},
{PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70D_FLASH)},
{PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS80_FLASH)},
{PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS81_FLASH)},
{0,},
};
MODULE_DEVICE_TABLE(pci, rsxx_pci_ids);
static struct pci_driver rsxx_pci_driver = {
.name = DRIVER_NAME,
.id_table = rsxx_pci_ids,
.probe = rsxx_pci_probe,
.remove = rsxx_pci_remove,
.suspend = rsxx_pci_suspend,
.shutdown = rsxx_pci_shutdown,
};
static int __init rsxx_core_init(void)
{
int st;
st = rsxx_dev_init();
if (st)
return st;
st = rsxx_dma_init();
if (st)
goto dma_init_failed;
st = rsxx_creg_init();
if (st)
goto creg_init_failed;
return pci_register_driver(&rsxx_pci_driver);
creg_init_failed:
rsxx_dma_cleanup();
dma_init_failed:
rsxx_dev_cleanup();
return st;
}
static void __exit rsxx_core_cleanup(void)
{
pci_unregister_driver(&rsxx_pci_driver);
rsxx_creg_cleanup();
rsxx_dma_cleanup();
rsxx_dev_cleanup();
}
module_init(rsxx_core_init);
module_exit(rsxx_core_cleanup);
/*
* Filename: cregs.c
*
*
* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
* Philip Kelleher <pjk1939@linux.vnet.ibm.com>
*
* (C) Copyright 2013 IBM Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/completion.h>
#include <linux/slab.h>
#include "rsxx_priv.h"
#define CREG_TIMEOUT_MSEC 10000
typedef void (*creg_cmd_cb)(struct rsxx_cardinfo *card,
struct creg_cmd *cmd,
int st);
struct creg_cmd {
struct list_head list;
creg_cmd_cb cb;
void *cb_private;
unsigned int op;
unsigned int addr;
int cnt8;
void *buf;
unsigned int stream;
unsigned int status;
};
static struct kmem_cache *creg_cmd_pool;
/*------------ Private Functions --------------*/
#if defined(__LITTLE_ENDIAN)
#define LITTLE_ENDIAN 1
#elif defined(__BIG_ENDIAN)
#define LITTLE_ENDIAN 0
#else
#error Unknown endianess!!! Aborting...
#endif
static void copy_to_creg_data(struct rsxx_cardinfo *card,
int cnt8,
void *buf,
unsigned int stream)
{
int i = 0;
u32 *data = buf;
for (i = 0; cnt8 > 0; i++, cnt8 -= 4) {
/*
* Firmware implementation makes it necessary to byte swap on
* little endian processors.
*/
if (LITTLE_ENDIAN && stream)
iowrite32be(data[i], card->regmap + CREG_DATA(i));
else
iowrite32(data[i], card->regmap + CREG_DATA(i));
}
}
static void copy_from_creg_data(struct rsxx_cardinfo *card,
int cnt8,
void *buf,
unsigned int stream)
{
int i = 0;
u32 *data = buf;
for (i = 0; cnt8 > 0; i++, cnt8 -= 4) {
/*
* Firmware implementation makes it necessary to byte swap on
* little endian processors.
*/
if (LITTLE_ENDIAN && stream)
data[i] = ioread32be(card->regmap + CREG_DATA(i));
else
data[i] = ioread32(card->regmap + CREG_DATA(i));
}
}
static struct creg_cmd *pop_active_cmd(struct rsxx_cardinfo *card)
{
struct creg_cmd *cmd;
/*
* Spin lock is needed because this can be called in atomic/interrupt
* context.
*/
spin_lock_bh(&card->creg_ctrl.lock);
cmd = card->creg_ctrl.active_cmd;
card->creg_ctrl.active_cmd = NULL;
spin_unlock_bh(&card->creg_ctrl.lock);
return cmd;
}
static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd)
{
iowrite32(cmd->addr, card->regmap + CREG_ADD);
iowrite32(cmd->cnt8, card->regmap + CREG_CNT);
if (cmd->op == CREG_OP_WRITE) {
if (cmd->buf)
copy_to_creg_data(card, cmd->cnt8,
cmd->buf, cmd->stream);
}
/*
* Data copy must complete before initiating the command. This is
* needed for weakly ordered processors (i.e. PowerPC), so that all
* neccessary registers are written before we kick the hardware.
*/
wmb();
/* Setting the valid bit will kick off the command. */
iowrite32(cmd->op, card->regmap + CREG_CMD);
}
static void creg_kick_queue(struct rsxx_cardinfo *card)
{
if (card->creg_ctrl.active || list_empty(&card->creg_ctrl.queue))
return;
card->creg_ctrl.active = 1;
card->creg_ctrl.active_cmd = list_first_entry(&card->creg_ctrl.queue,
struct creg_cmd, list);
list_del(&card->creg_ctrl.active_cmd->list);
card->creg_ctrl.q_depth--;
/*
* We have to set the timer before we push the new command. Otherwise,
* we could create a race condition that would occur if the timer
* was not canceled, and expired after the new command was pushed,
* but before the command was issued to hardware.
*/
mod_timer(&card->creg_ctrl.cmd_timer,
jiffies + msecs_to_jiffies(CREG_TIMEOUT_MSEC));
creg_issue_cmd(card, card->creg_ctrl.active_cmd);
}
static int creg_queue_cmd(struct rsxx_cardinfo *card,
unsigned int op,
unsigned int addr,
unsigned int cnt8,
void *buf,
int stream,
creg_cmd_cb callback,
void *cb_private)
{
struct creg_cmd *cmd;
/* Don't queue stuff up if we're halted. */
if (unlikely(card->halt))
return -EINVAL;
if (card->creg_ctrl.reset)
return -EAGAIN;
if (cnt8 > MAX_CREG_DATA8)
return -EINVAL;
cmd = kmem_cache_alloc(creg_cmd_pool, GFP_KERNEL);
if (!cmd)
return -ENOMEM;
INIT_LIST_HEAD(&cmd->list);
cmd->op = op;
cmd->addr = addr;
cmd->cnt8 = cnt8;
cmd->buf = buf;
cmd->stream = stream;
cmd->cb = callback;
cmd->cb_private = cb_private;
cmd->status = 0;
spin_lock(&card->creg_ctrl.lock);
list_add_tail(&cmd->list, &card->creg_ctrl.queue);
card->creg_ctrl.q_depth++;
creg_kick_queue(card);
spin_unlock(&card->creg_ctrl.lock);
return 0;
}
static void creg_cmd_timed_out(unsigned long data)
{
struct rsxx_cardinfo *card = (struct rsxx_cardinfo *) data;
struct creg_cmd *cmd;
cmd = pop_active_cmd(card);
if (cmd == NULL) {
card->creg_ctrl.creg_stats.creg_timeout++;
dev_warn(CARD_TO_DEV(card),
"No active command associated with timeout!\n");
return;
}
if (cmd->cb)
cmd->cb(card, cmd, -ETIMEDOUT);
kmem_cache_free(creg_cmd_pool, cmd);
spin_lock(&card->creg_ctrl.lock);
card->creg_ctrl.active = 0;
creg_kick_queue(card);
spin_unlock(&card->creg_ctrl.lock);
}
static void creg_cmd_done(struct work_struct *work)
{
struct rsxx_cardinfo *card;
struct creg_cmd *cmd;
int st = 0;
card = container_of(work, struct rsxx_cardinfo,
creg_ctrl.done_work);
/*
* The timer could not be cancelled for some reason,
* race to pop the active command.
*/
if (del_timer_sync(&card->creg_ctrl.cmd_timer) == 0)
card->creg_ctrl.creg_stats.failed_cancel_timer++;
cmd = pop_active_cmd(card);
if (cmd == NULL) {
dev_err(CARD_TO_DEV(card),
"Spurious creg interrupt!\n");
return;
}
card->creg_ctrl.creg_stats.stat = ioread32(card->regmap + CREG_STAT);
cmd->status = card->creg_ctrl.creg_stats.stat;
if ((cmd->status & CREG_STAT_STATUS_MASK) == 0) {
dev_err(CARD_TO_DEV(card),
"Invalid status on creg command\n");
/*
* At this point we're probably reading garbage from HW. Don't
* do anything else that could mess up the system and let
* the sync function return an error.
*/
st = -EIO;
goto creg_done;
} else if (cmd->status & CREG_STAT_ERROR) {
st = -EIO;
}
if ((cmd->op == CREG_OP_READ)) {
unsigned int cnt8 = ioread32(card->regmap + CREG_CNT);
/* Paranoid Sanity Checks */
if (!cmd->buf) {
dev_err(CARD_TO_DEV(card),
"Buffer not given for read.\n");
st = -EIO;
goto creg_done;
}
if (cnt8 != cmd->cnt8) {
dev_err(CARD_TO_DEV(card),
"count mismatch\n");
st = -EIO;
goto creg_done;
}
copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream);
}
creg_done:
if (cmd->cb)
cmd->cb(card, cmd, st);
kmem_cache_free(creg_cmd_pool, cmd);
spin_lock(&card->creg_ctrl.lock);
card->creg_ctrl.active = 0;
creg_kick_queue(card);
spin_unlock(&card->creg_ctrl.lock);
}
static void creg_reset(struct rsxx_cardinfo *card)
{
struct creg_cmd *cmd = NULL;
struct creg_cmd *tmp;
unsigned long flags;
/*
* mutex_trylock is used here because if reset_lock is taken then a
* reset is already happening. So, we can just go ahead and return.
*/
if (!mutex_trylock(&card->creg_ctrl.reset_lock))
return;
card->creg_ctrl.reset = 1;
spin_lock_irqsave(&card->irq_lock, flags);
rsxx_disable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT);
spin_unlock_irqrestore(&card->irq_lock, flags);
dev_warn(CARD_TO_DEV(card),
"Resetting creg interface for recovery\n");
/* Cancel outstanding commands */
spin_lock(&card->creg_ctrl.lock);
list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) {
list_del(&cmd->list);
card->creg_ctrl.q_depth--;
if (cmd->cb)
cmd->cb(card, cmd, -ECANCELED);
kmem_cache_free(creg_cmd_pool, cmd);
}
cmd = card->creg_ctrl.active_cmd;
card->creg_ctrl.active_cmd = NULL;
if (cmd) {
if (timer_pending(&card->creg_ctrl.cmd_timer))
del_timer_sync(&card->creg_ctrl.cmd_timer);
if (cmd->cb)
cmd->cb(card, cmd, -ECANCELED);
kmem_cache_free(creg_cmd_pool, cmd);
card->creg_ctrl.active = 0;
}
spin_unlock(&card->creg_ctrl.lock);
card->creg_ctrl.reset = 0;
spin_lock_irqsave(&card->irq_lock, flags);
rsxx_enable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT);
spin_unlock_irqrestore(&card->irq_lock, flags);
mutex_unlock(&card->creg_ctrl.reset_lock);
}
/* Used for synchronous accesses */
struct creg_completion {
struct completion *cmd_done;
int st;
u32 creg_status;
};
static void creg_cmd_done_cb(struct rsxx_cardinfo *card,
struct creg_cmd *cmd,
int st)
{
struct creg_completion *cmd_completion;
cmd_completion = cmd->cb_private;
BUG_ON(!cmd_completion);
cmd_completion->st = st;
cmd_completion->creg_status = cmd->status;
complete(cmd_completion->cmd_done);
}
static int __issue_creg_rw(struct rsxx_cardinfo *card,
unsigned int op,
unsigned int addr,
unsigned int cnt8,
void *buf,
int stream,
unsigned int *hw_stat)
{
DECLARE_COMPLETION_ONSTACK(cmd_done);
struct creg_completion completion;
unsigned long timeout;
int st;
completion.cmd_done = &cmd_done;
completion.st = 0;
completion.creg_status = 0;
st = creg_queue_cmd(card, op, addr, cnt8, buf, stream, creg_cmd_done_cb,
&completion);
if (st)
return st;
/*
* This timeout is neccessary for unresponsive hardware. The additional
* 20 seconds to used to guarantee that each cregs requests has time to
* complete.
*/
timeout = msecs_to_jiffies((CREG_TIMEOUT_MSEC *
card->creg_ctrl.q_depth) + 20000);
/*
* The creg interface is guaranteed to complete. It has a timeout
* mechanism that will kick in if hardware does not respond.
*/
st = wait_for_completion_timeout(completion.cmd_done, timeout);
if (st == 0) {
/*
* This is really bad, because the kernel timer did not
* expire and notify us of a timeout!
*/
dev_crit(CARD_TO_DEV(card),
"cregs timer failed\n");
creg_reset(card);
return -EIO;
}
*hw_stat = completion.creg_status;
if (completion.st) {
dev_warn(CARD_TO_DEV(card),
"creg command failed(%d x%08x)\n",
completion.st, addr);
return completion.st;
}
return 0;
}
static int issue_creg_rw(struct rsxx_cardinfo *card,
u32 addr,
unsigned int size8,
void *data,
int stream,
int read)
{
unsigned int hw_stat;
unsigned int xfer;
unsigned int op;
int st;
op = read ? CREG_OP_READ : CREG_OP_WRITE;
do {
xfer = min_t(unsigned int, size8, MAX_CREG_DATA8);
st = __issue_creg_rw(card, op, addr, xfer,
data, stream, &hw_stat);
if (st)
return st;
data = (char *)data + xfer;
addr += xfer;
size8 -= xfer;
} while (size8);
return 0;
}
/* ---------------------------- Public API ---------------------------------- */
int rsxx_creg_write(struct rsxx_cardinfo *card,
u32 addr,
unsigned int size8,
void *data,
int byte_stream)
{
return issue_creg_rw(card, addr, size8, data, byte_stream, 0);
}
int rsxx_creg_read(struct rsxx_cardinfo *card,
u32 addr,
unsigned int size8,
void *data,
int byte_stream)
{
return issue_creg_rw(card, addr, size8, data, byte_stream, 1);
}
int rsxx_get_card_state(struct rsxx_cardinfo *card, unsigned int *state)
{
return rsxx_creg_read(card, CREG_ADD_CARD_STATE,
sizeof(*state), state, 0);
}
int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8)
{
unsigned int size;
int st;
st = rsxx_creg_read(card, CREG_ADD_CARD_SIZE,
sizeof(size), &size, 0);
if (st)
return st;
*size8 = (u64)size * RSXX_HW_BLK_SIZE;
return 0;
}
int rsxx_get_num_targets(struct rsxx_cardinfo *card,
unsigned int *n_targets)
{
return rsxx_creg_read(card, CREG_ADD_NUM_TARGETS,
sizeof(*n_targets), n_targets, 0);
}
int rsxx_get_card_capabilities(struct rsxx_cardinfo *card,
u32 *capabilities)
{
return rsxx_creg_read(card, CREG_ADD_CAPABILITIES,
sizeof(*capabilities), capabilities, 0);
}
int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd)
{
return rsxx_creg_write(card, CREG_ADD_CARD_CMD,
sizeof(cmd), &cmd, 0);
}
/*----------------- HW Log Functions -------------------*/
static void hw_log_msg(struct rsxx_cardinfo *card, const char *str, int len)
{
static char level;
/*
* New messages start with "<#>", where # is the log level. Messages
* that extend past the log buffer will use the previous level
*/
if ((len > 3) && (str[0] == '<') && (str[2] == '>')) {
level = str[1];
str += 3; /* Skip past the log level. */
len -= 3;
}
switch (level) {
case '0':
dev_emerg(CARD_TO_DEV(card), "HW: %.*s", len, str);
break;
case '1':
dev_alert(CARD_TO_DEV(card), "HW: %.*s", len, str);
break;
case '2':
dev_crit(CARD_TO_DEV(card), "HW: %.*s", len, str);
break;
case '3':
dev_err(CARD_TO_DEV(card), "HW: %.*s", len, str);
break;
case '4':
dev_warn(CARD_TO_DEV(card), "HW: %.*s", len, str);
break;
case '5':
dev_notice(CARD_TO_DEV(card), "HW: %.*s", len, str);
break;
case '6':
dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str);
break;
case '7':
dev_dbg(CARD_TO_DEV(card), "HW: %.*s", len, str);
break;
default:
dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str);
break;
}
}
/*
* The substrncpy function copies the src string (which includes the
* terminating '\0' character), up to the count into the dest pointer.
* Returns the number of bytes copied to dest.
*/
static int substrncpy(char *dest, const char *src, int count)
{
int max_cnt = count;
while (count) {
count--;
*dest = *src;
if (*dest == '\0')
break;
src++;
dest++;
}
return max_cnt - count;
}
static void read_hw_log_done(struct rsxx_cardinfo *card,
struct creg_cmd *cmd,
int st)
{
char *buf;
char *log_str;
int cnt;
int len;
int off;
buf = cmd->buf;
off = 0;
/* Failed getting the log message */
if (st)
return;
while (off < cmd->cnt8) {
log_str = &card->log.buf[card->log.buf_len];
cnt = min(cmd->cnt8 - off, LOG_BUF_SIZE8 - card->log.buf_len);
len = substrncpy(log_str, &buf[off], cnt);
off += len;
card->log.buf_len += len;
/*
* Flush the log if we've hit the end of a message or if we've
* run out of buffer space.
*/
if ((log_str[len - 1] == '\0') ||
(card->log.buf_len == LOG_BUF_SIZE8)) {
if (card->log.buf_len != 1) /* Don't log blank lines. */
hw_log_msg(card, card->log.buf,
card->log.buf_len);
card->log.buf_len = 0;
}
}
if (cmd->status & CREG_STAT_LOG_PENDING)
rsxx_read_hw_log(card);
}
int rsxx_read_hw_log(struct rsxx_cardinfo *card)
{
int st;
st = creg_queue_cmd(card, CREG_OP_READ, CREG_ADD_LOG,
sizeof(card->log.tmp), card->log.tmp,
1, read_hw_log_done, NULL);
if (st)
dev_err(CARD_TO_DEV(card),
"Failed getting log text\n");
return st;
}
/*-------------- IOCTL REG Access ------------------*/
static int issue_reg_cmd(struct rsxx_cardinfo *card,
struct rsxx_reg_access *cmd,
int read)
{
unsigned int op = read ? CREG_OP_READ : CREG_OP_WRITE;
return __issue_creg_rw(card, op, cmd->addr, cmd->cnt, cmd->data,
cmd->stream, &cmd->stat);
}
int rsxx_reg_access(struct rsxx_cardinfo *card,
struct rsxx_reg_access __user *ucmd,
int read)
{
struct rsxx_reg_access cmd;
int st;
st = copy_from_user(&cmd, ucmd, sizeof(cmd));
if (st)
return -EFAULT;
if (cmd.cnt > RSXX_MAX_REG_CNT)
return -EFAULT;
st = issue_reg_cmd(card, &cmd, read);
if (st)
return st;
st = put_user(cmd.stat, &ucmd->stat);
if (st)
return -EFAULT;
if (read) {
st = copy_to_user(ucmd->data, cmd.data, cmd.cnt);
if (st)
return -EFAULT;
}
return 0;
}
/*------------ Initialization & Setup --------------*/
int rsxx_creg_setup(struct rsxx_cardinfo *card)
{
card->creg_ctrl.active_cmd = NULL;
INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done);
mutex_init(&card->creg_ctrl.reset_lock);
INIT_LIST_HEAD(&card->creg_ctrl.queue);
spin_lock_init(&card->creg_ctrl.lock);
setup_timer(&card->creg_ctrl.cmd_timer, creg_cmd_timed_out,
(unsigned long) card);
return 0;
}
void rsxx_creg_destroy(struct rsxx_cardinfo *card)
{
struct creg_cmd *cmd;
struct creg_cmd *tmp;
int cnt = 0;
/* Cancel outstanding commands */
spin_lock(&card->creg_ctrl.lock);
list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) {
list_del(&cmd->list);
if (cmd->cb)
cmd->cb(card, cmd, -ECANCELED);
kmem_cache_free(creg_cmd_pool, cmd);
cnt++;
}
if (cnt)
dev_info(CARD_TO_DEV(card),
"Canceled %d queue creg commands\n", cnt);
cmd = card->creg_ctrl.active_cmd;
card->creg_ctrl.active_cmd = NULL;
if (cmd) {
if (timer_pending(&card->creg_ctrl.cmd_timer))
del_timer_sync(&card->creg_ctrl.cmd_timer);
if (cmd->cb)
cmd->cb(card, cmd, -ECANCELED);
dev_info(CARD_TO_DEV(card),
"Canceled active creg command\n");
kmem_cache_free(creg_cmd_pool, cmd);
}
spin_unlock(&card->creg_ctrl.lock);
cancel_work_sync(&card->creg_ctrl.done_work);
}
int rsxx_creg_init(void)
{
creg_cmd_pool = KMEM_CACHE(creg_cmd, SLAB_HWCACHE_ALIGN);
if (!creg_cmd_pool)
return -ENOMEM;
return 0;
}
void rsxx_creg_cleanup(void)
{
kmem_cache_destroy(creg_cmd_pool);
}
/*
* Filename: dev.c
*
*
* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
* Philip Kelleher <pjk1939@linux.vnet.ibm.com>
*
* (C) Copyright 2013 IBM Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/hdreg.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/fs.h>
#include "rsxx_priv.h"
static unsigned int blkdev_minors = 64;
module_param(blkdev_minors, uint, 0444);
MODULE_PARM_DESC(blkdev_minors, "Number of minors(partitions)");
/*
* For now I'm making this tweakable in case any applications hit this limit.
* If you see a "bio too big" error in the log you will need to raise this
* value.
*/
static unsigned int blkdev_max_hw_sectors = 1024;
module_param(blkdev_max_hw_sectors, uint, 0444);
MODULE_PARM_DESC(blkdev_max_hw_sectors, "Max hw sectors for a single BIO");
static unsigned int enable_blkdev = 1;
module_param(enable_blkdev , uint, 0444);
MODULE_PARM_DESC(enable_blkdev, "Enable block device interfaces");
struct rsxx_bio_meta {
struct bio *bio;
atomic_t pending_dmas;
atomic_t error;
unsigned long start_time;
};
static struct kmem_cache *bio_meta_pool;
/*----------------- Block Device Operations -----------------*/
static int rsxx_blkdev_ioctl(struct block_device *bdev,
fmode_t mode,
unsigned int cmd,
unsigned long arg)
{
struct rsxx_cardinfo *card = bdev->bd_disk->private_data;
switch (cmd) {
case RSXX_GETREG:
return rsxx_reg_access(card, (void __user *)arg, 1);
case RSXX_SETREG:
return rsxx_reg_access(card, (void __user *)arg, 0);
}
return -ENOTTY;
}
static int rsxx_getgeo(struct block_device *bdev, struct hd_geometry *geo)
{
struct rsxx_cardinfo *card = bdev->bd_disk->private_data;
u64 blocks = card->size8 >> 9;
/*
* get geometry: Fake it. I haven't found any drivers that set
* geo->start, so we won't either.
*/
if (card->size8) {
geo->heads = 64;
geo->sectors = 16;
do_div(blocks, (geo->heads * geo->sectors));
geo->cylinders = blocks;
} else {
geo->heads = 0;
geo->sectors = 0;
geo->cylinders = 0;
}
return 0;
}
static const struct block_device_operations rsxx_fops = {
.owner = THIS_MODULE,
.getgeo = rsxx_getgeo,
.ioctl = rsxx_blkdev_ioctl,
};
static void disk_stats_start(struct rsxx_cardinfo *card, struct bio *bio)
{
struct hd_struct *part0 = &card->gendisk->part0;
int rw = bio_data_dir(bio);
int cpu;
cpu = part_stat_lock();
part_round_stats(cpu, part0);
part_inc_in_flight(part0, rw);
part_stat_unlock();
}
static void disk_stats_complete(struct rsxx_cardinfo *card,
struct bio *bio,
unsigned long start_time)
{
struct hd_struct *part0 = &card->gendisk->part0;
unsigned long duration = jiffies - start_time;
int rw = bio_data_dir(bio);
int cpu;
cpu = part_stat_lock();
part_stat_add(cpu, part0, sectors[rw], bio_sectors(bio));
part_stat_inc(cpu, part0, ios[rw]);
part_stat_add(cpu, part0, ticks[rw], duration);
part_round_stats(cpu, part0);
part_dec_in_flight(part0, rw);
part_stat_unlock();
}
static void bio_dma_done_cb(struct rsxx_cardinfo *card,
void *cb_data,
unsigned int error)
{
struct rsxx_bio_meta *meta = cb_data;
if (error)
atomic_set(&meta->error, 1);
if (atomic_dec_and_test(&meta->pending_dmas)) {
disk_stats_complete(card, meta->bio, meta->start_time);
bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0);
kmem_cache_free(bio_meta_pool, meta);
}
}
static void rsxx_make_request(struct request_queue *q, struct bio *bio)
{
struct rsxx_cardinfo *card = q->queuedata;
struct rsxx_bio_meta *bio_meta;
int st = -EINVAL;
might_sleep();
if (unlikely(card->halt)) {
st = -EFAULT;
goto req_err;
}
if (unlikely(card->dma_fault)) {
st = (-EFAULT);
goto req_err;
}
if (bio->bi_size == 0) {
dev_err(CARD_TO_DEV(card), "size zero BIO!\n");
goto req_err;
}
bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL);
if (!bio_meta) {
st = -ENOMEM;
goto req_err;
}
bio_meta->bio = bio;
atomic_set(&bio_meta->error, 0);
atomic_set(&bio_meta->pending_dmas, 0);
bio_meta->start_time = jiffies;
disk_stats_start(card, bio);
dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n",
bio_data_dir(bio) ? 'W' : 'R', bio_meta,
(u64)bio->bi_sector << 9, bio->bi_size);
st = rsxx_dma_queue_bio(card, bio, &bio_meta->pending_dmas,
bio_dma_done_cb, bio_meta);
if (st)
goto queue_err;
return;
queue_err:
kmem_cache_free(bio_meta_pool, bio_meta);
req_err:
bio_endio(bio, st);
}
/*----------------- Device Setup -------------------*/
static bool rsxx_discard_supported(struct rsxx_cardinfo *card)
{
unsigned char pci_rev;
pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev);
return (pci_rev >= RSXX_DISCARD_SUPPORT);
}
static unsigned short rsxx_get_logical_block_size(
struct rsxx_cardinfo *card)
{
u32 capabilities = 0;
int st;
st = rsxx_get_card_capabilities(card, &capabilities);
if (st)
dev_warn(CARD_TO_DEV(card),
"Failed reading card capabilities register\n");
/* Earlier firmware did not have support for 512 byte accesses */
if (capabilities & CARD_CAP_SUBPAGE_WRITES)
return 512;
else
return RSXX_HW_BLK_SIZE;
}
int rsxx_attach_dev(struct rsxx_cardinfo *card)
{
mutex_lock(&card->dev_lock);
/* The block device requires the stripe size from the config. */
if (enable_blkdev) {
if (card->config_valid)
set_capacity(card->gendisk, card->size8 >> 9);
else
set_capacity(card->gendisk, 0);
add_disk(card->gendisk);
card->bdev_attached = 1;
}
mutex_unlock(&card->dev_lock);
return 0;
}
void rsxx_detach_dev(struct rsxx_cardinfo *card)
{
mutex_lock(&card->dev_lock);
if (card->bdev_attached) {
del_gendisk(card->gendisk);
card->bdev_attached = 0;
}
mutex_unlock(&card->dev_lock);
}
int rsxx_setup_dev(struct rsxx_cardinfo *card)
{
unsigned short blk_size;
mutex_init(&card->dev_lock);
if (!enable_blkdev)
return 0;
card->major = register_blkdev(0, DRIVER_NAME);
if (card->major < 0) {
dev_err(CARD_TO_DEV(card), "Failed to get major number\n");
return -ENOMEM;
}
card->queue = blk_alloc_queue(GFP_KERNEL);
if (!card->queue) {
dev_err(CARD_TO_DEV(card), "Failed queue alloc\n");
unregister_blkdev(card->major, DRIVER_NAME);
return -ENOMEM;
}
card->gendisk = alloc_disk(blkdev_minors);
if (!card->gendisk) {
dev_err(CARD_TO_DEV(card), "Failed disk alloc\n");
blk_cleanup_queue(card->queue);
unregister_blkdev(card->major, DRIVER_NAME);
return -ENOMEM;
}
blk_size = rsxx_get_logical_block_size(card);
blk_queue_make_request(card->queue, rsxx_make_request);
blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
blk_queue_dma_alignment(card->queue, blk_size - 1);
blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
blk_queue_logical_block_size(card->queue, blk_size);
blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue);
if (rsxx_discard_supported(card)) {
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, card->queue);
blk_queue_max_discard_sectors(card->queue,
RSXX_HW_BLK_SIZE >> 9);
card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE;
card->queue->limits.discard_alignment = RSXX_HW_BLK_SIZE;
card->queue->limits.discard_zeroes_data = 1;
}
card->queue->queuedata = card;
snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name),
"rsxx%d", card->disk_id);
card->gendisk->driverfs_dev = &card->dev->dev;
card->gendisk->major = card->major;
card->gendisk->first_minor = 0;
card->gendisk->fops = &rsxx_fops;
card->gendisk->private_data = card;
card->gendisk->queue = card->queue;
return 0;
}
void rsxx_destroy_dev(struct rsxx_cardinfo *card)
{
if (!enable_blkdev)
return;
put_disk(card->gendisk);
card->gendisk = NULL;
blk_cleanup_queue(card->queue);
unregister_blkdev(card->major, DRIVER_NAME);
}
int rsxx_dev_init(void)
{
bio_meta_pool = KMEM_CACHE(rsxx_bio_meta, SLAB_HWCACHE_ALIGN);
if (!bio_meta_pool)
return -ENOMEM;
return 0;
}
void rsxx_dev_cleanup(void)
{
kmem_cache_destroy(bio_meta_pool);
}
此差异已折叠。
/*
* Filename: rsxx.h
*
*
* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
* Philip Kelleher <pjk1939@linux.vnet.ibm.com>
*
* (C) Copyright 2013 IBM Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __RSXX_H__
#define __RSXX_H__
/*----------------- IOCTL Definitions -------------------*/
struct rsxx_reg_access {
__u32 addr;
__u32 cnt;
__u32 stat;
__u32 stream;
__u32 data[8];
};
#define RSXX_MAX_REG_CNT (8 * (sizeof(__u32)))
#define RSXX_IOC_MAGIC 'r'
#define RSXX_GETREG _IOWR(RSXX_IOC_MAGIC, 0x20, struct rsxx_reg_access)
#define RSXX_SETREG _IOWR(RSXX_IOC_MAGIC, 0x21, struct rsxx_reg_access)
#endif /* __RSXX_H_ */
/*
* Filename: rsXX_cfg.h
*
*
* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
* Philip Kelleher <pjk1939@linux.vnet.ibm.com>
*
* (C) Copyright 2013 IBM Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __RSXX_CFG_H__
#define __RSXX_CFG_H__
/* NOTE: Config values will be saved in network byte order (i.e. Big endian) */
#include <linux/types.h>
/*
* The card config version must match the driver's expected version. If it does
* not, the DMA interfaces will not be attached and the user will need to
* initialize/upgrade the card configuration using the card config utility.
*/
#define RSXX_CFG_VERSION 4
struct card_cfg_hdr {
__u32 version;
__u32 crc;
};
struct card_cfg_data {
__u32 block_size;
__u32 stripe_size;
__u32 vendor_id;
__u32 cache_order;
struct {
__u32 mode; /* Disabled, manual, auto-tune... */
__u32 count; /* Number of intr to coalesce */
__u32 latency;/* Max wait time (in ns) */
} intr_coal;
};
struct rsxx_card_cfg {
struct card_cfg_hdr hdr;
struct card_cfg_data data;
};
/* Vendor ID Values */
#define RSXX_VENDOR_ID_TMS_IBM 0
#define RSXX_VENDOR_ID_DSI 1
#define RSXX_VENDOR_COUNT 2
/* Interrupt Coalescing Values */
#define RSXX_INTR_COAL_DISABLED 0
#define RSXX_INTR_COAL_EXPLICIT 1
#define RSXX_INTR_COAL_AUTO_TUNE 2
#endif /* __RSXX_CFG_H__ */
/*
* Filename: rsxx_priv.h
*
*
* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
* Philip Kelleher <pjk1939@linux.vnet.ibm.com>
*
* (C) Copyright 2013 IBM Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __RSXX_PRIV_H__
#define __RSXX_PRIV_H__
#include <linux/version.h>
#include <linux/semaphore.h>
#include <linux/fs.h>
#include <linux/interrupt.h>
#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/spinlock.h>
#include <linux/sysfs.h>
#include <linux/workqueue.h>
#include <linux/bio.h>
#include <linux/vmalloc.h>
#include <linux/timer.h>
#include <linux/ioctl.h>
#include "rsxx.h"
#include "rsxx_cfg.h"
struct proc_cmd;
#define PCI_VENDOR_ID_TMS_IBM 0x15B6
#define PCI_DEVICE_ID_RS70_FLASH 0x0019
#define PCI_DEVICE_ID_RS70D_FLASH 0x001A
#define PCI_DEVICE_ID_RS80_FLASH 0x001C
#define PCI_DEVICE_ID_RS81_FLASH 0x001E
#define RS70_PCI_REV_SUPPORTED 4
#define DRIVER_NAME "rsxx"
#define DRIVER_VERSION "3.7"
/* Block size is 4096 */
#define RSXX_HW_BLK_SHIFT 12
#define RSXX_HW_BLK_SIZE (1 << RSXX_HW_BLK_SHIFT)
#define RSXX_HW_BLK_MASK (RSXX_HW_BLK_SIZE - 1)
#define MAX_CREG_DATA8 32
#define LOG_BUF_SIZE8 128
#define RSXX_MAX_OUTSTANDING_CMDS 255
#define RSXX_CS_IDX_MASK 0xff
#define RSXX_MAX_TARGETS 8
struct dma_tracker_list;
/* DMA Command/Status Buffer structure */
struct rsxx_cs_buffer {
dma_addr_t dma_addr;
void *buf;
u32 idx;
};
struct rsxx_dma_stats {
u32 crc_errors;
u32 hard_errors;
u32 soft_errors;
u32 writes_issued;
u32 writes_failed;
u32 reads_issued;
u32 reads_failed;
u32 reads_retried;
u32 discards_issued;
u32 discards_failed;
u32 done_rescheduled;
u32 issue_rescheduled;
u32 sw_q_depth; /* Number of DMAs on the SW queue. */
atomic_t hw_q_depth; /* Number of DMAs queued to HW. */
};
struct rsxx_dma_ctrl {
struct rsxx_cardinfo *card;
int id;
void __iomem *regmap;
struct rsxx_cs_buffer status;
struct rsxx_cs_buffer cmd;
u16 e_cnt;
spinlock_t queue_lock;
struct list_head queue;
struct workqueue_struct *issue_wq;
struct work_struct issue_dma_work;
struct workqueue_struct *done_wq;
struct work_struct dma_done_work;
struct timer_list activity_timer;
struct dma_tracker_list *trackers;
struct rsxx_dma_stats stats;
};
struct rsxx_cardinfo {
struct pci_dev *dev;
unsigned int halt;
void __iomem *regmap;
spinlock_t irq_lock;
unsigned int isr_mask;
unsigned int ier_mask;
struct rsxx_card_cfg config;
int config_valid;
/* Embedded CPU Communication */
struct {
spinlock_t lock;
bool active;
struct creg_cmd *active_cmd;
struct work_struct done_work;
struct list_head queue;
unsigned int q_depth;
/* Cache the creg status to prevent ioreads */
struct {
u32 stat;
u32 failed_cancel_timer;
u32 creg_timeout;
} creg_stats;
struct timer_list cmd_timer;
struct mutex reset_lock;
int reset;
} creg_ctrl;
struct {
char tmp[MAX_CREG_DATA8];
char buf[LOG_BUF_SIZE8]; /* terminated */
int buf_len;
} log;
struct work_struct event_work;
unsigned int state;
u64 size8;
/* Lock the device attach/detach function */
struct mutex dev_lock;
/* Block Device Variables */
bool bdev_attached;
int disk_id;
int major;
struct request_queue *queue;
struct gendisk *gendisk;
struct {
/* Used to convert a byte address to a device address. */
u64 lower_mask;
u64 upper_shift;
u64 upper_mask;
u64 target_mask;
u64 target_shift;
} _stripe;
unsigned int dma_fault;
int scrub_hard;
int n_targets;
struct rsxx_dma_ctrl *ctrl;
};
enum rsxx_pci_regmap {
HWID = 0x00, /* Hardware Identification Register */
SCRATCH = 0x04, /* Scratch/Debug Register */
RESET = 0x08, /* Reset Register */
ISR = 0x10, /* Interrupt Status Register */
IER = 0x14, /* Interrupt Enable Register */
IPR = 0x18, /* Interrupt Poll Register */
CB_ADD_LO = 0x20, /* Command Host Buffer Address [31:0] */
CB_ADD_HI = 0x24, /* Command Host Buffer Address [63:32]*/
HW_CMD_IDX = 0x28, /* Hardware Processed Command Index */
SW_CMD_IDX = 0x2C, /* Software Processed Command Index */
SB_ADD_LO = 0x30, /* Status Host Buffer Address [31:0] */
SB_ADD_HI = 0x34, /* Status Host Buffer Address [63:32] */
HW_STATUS_CNT = 0x38, /* Hardware Status Counter */
SW_STATUS_CNT = 0x3C, /* Deprecated */
CREG_CMD = 0x40, /* CPU Command Register */
CREG_ADD = 0x44, /* CPU Address Register */
CREG_CNT = 0x48, /* CPU Count Register */
CREG_STAT = 0x4C, /* CPU Status Register */
CREG_DATA0 = 0x50, /* CPU Data Registers */
CREG_DATA1 = 0x54,
CREG_DATA2 = 0x58,
CREG_DATA3 = 0x5C,
CREG_DATA4 = 0x60,
CREG_DATA5 = 0x64,
CREG_DATA6 = 0x68,
CREG_DATA7 = 0x6c,
INTR_COAL = 0x70, /* Interrupt Coalescing Register */
HW_ERROR = 0x74, /* Card Error Register */
PCI_DEBUG0 = 0x78, /* PCI Debug Registers */
PCI_DEBUG1 = 0x7C,
PCI_DEBUG2 = 0x80,
PCI_DEBUG3 = 0x84,
PCI_DEBUG4 = 0x88,
PCI_DEBUG5 = 0x8C,
PCI_DEBUG6 = 0x90,
PCI_DEBUG7 = 0x94,
PCI_POWER_THROTTLE = 0x98,
PERF_CTRL = 0x9c,
PERF_TIMER_LO = 0xa0,
PERF_TIMER_HI = 0xa4,
PERF_RD512_LO = 0xa8,
PERF_RD512_HI = 0xac,
PERF_WR512_LO = 0xb0,
PERF_WR512_HI = 0xb4,
};
enum rsxx_intr {
CR_INTR_DMA0 = 0x00000001,
CR_INTR_CREG = 0x00000002,
CR_INTR_DMA1 = 0x00000004,
CR_INTR_EVENT = 0x00000008,
CR_INTR_DMA2 = 0x00000010,
CR_INTR_DMA3 = 0x00000020,
CR_INTR_DMA4 = 0x00000040,
CR_INTR_DMA5 = 0x00000080,
CR_INTR_DMA6 = 0x00000100,
CR_INTR_DMA7 = 0x00000200,
CR_INTR_DMA_ALL = 0x000003f5,
CR_INTR_ALL = 0xffffffff,
};
static inline int CR_INTR_DMA(int N)
{
static const unsigned int _CR_INTR_DMA[] = {
CR_INTR_DMA0, CR_INTR_DMA1, CR_INTR_DMA2, CR_INTR_DMA3,
CR_INTR_DMA4, CR_INTR_DMA5, CR_INTR_DMA6, CR_INTR_DMA7
};
return _CR_INTR_DMA[N];
}
enum rsxx_pci_reset {
DMA_QUEUE_RESET = 0x00000001,
};
enum rsxx_pci_revision {
RSXX_DISCARD_SUPPORT = 2,
};
enum rsxx_creg_cmd {
CREG_CMD_TAG_MASK = 0x0000FF00,
CREG_OP_WRITE = 0x000000C0,
CREG_OP_READ = 0x000000E0,
};
enum rsxx_creg_addr {
CREG_ADD_CARD_CMD = 0x80001000,
CREG_ADD_CARD_STATE = 0x80001004,
CREG_ADD_CARD_SIZE = 0x8000100c,
CREG_ADD_CAPABILITIES = 0x80001050,
CREG_ADD_LOG = 0x80002000,
CREG_ADD_NUM_TARGETS = 0x80003000,
CREG_ADD_CONFIG = 0xB0000000,
};
enum rsxx_creg_card_cmd {
CARD_CMD_STARTUP = 1,
CARD_CMD_SHUTDOWN = 2,
CARD_CMD_LOW_LEVEL_FORMAT = 3,
CARD_CMD_FPGA_RECONFIG_BR = 4,
CARD_CMD_FPGA_RECONFIG_MAIN = 5,
CARD_CMD_BACKUP = 6,
CARD_CMD_RESET = 7,
CARD_CMD_deprecated = 8,
CARD_CMD_UNINITIALIZE = 9,
CARD_CMD_DSTROY_EMERGENCY = 10,
CARD_CMD_DSTROY_NORMAL = 11,
CARD_CMD_DSTROY_EXTENDED = 12,
CARD_CMD_DSTROY_ABORT = 13,
};
enum rsxx_card_state {
CARD_STATE_SHUTDOWN = 0x00000001,
CARD_STATE_STARTING = 0x00000002,
CARD_STATE_FORMATTING = 0x00000004,
CARD_STATE_UNINITIALIZED = 0x00000008,
CARD_STATE_GOOD = 0x00000010,
CARD_STATE_SHUTTING_DOWN = 0x00000020,
CARD_STATE_FAULT = 0x00000040,
CARD_STATE_RD_ONLY_FAULT = 0x00000080,
CARD_STATE_DSTROYING = 0x00000100,
};
enum rsxx_led {
LED_DEFAULT = 0x0,
LED_IDENTIFY = 0x1,
LED_SOAK = 0x2,
};
enum rsxx_creg_flash_lock {
CREG_FLASH_LOCK = 1,
CREG_FLASH_UNLOCK = 2,
};
enum rsxx_card_capabilities {
CARD_CAP_SUBPAGE_WRITES = 0x00000080,
};
enum rsxx_creg_stat {
CREG_STAT_STATUS_MASK = 0x00000003,
CREG_STAT_SUCCESS = 0x1,
CREG_STAT_ERROR = 0x2,
CREG_STAT_CHAR_PENDING = 0x00000004, /* Character I/O pending bit */
CREG_STAT_LOG_PENDING = 0x00000008, /* HW log message pending bit */
CREG_STAT_TAG_MASK = 0x0000ff00,
};
static inline unsigned int CREG_DATA(int N)
{
return CREG_DATA0 + (N << 2);
}
/*----------------- Convenient Log Wrappers -------------------*/
#define CARD_TO_DEV(__CARD) (&(__CARD)->dev->dev)
/***** config.c *****/
int rsxx_load_config(struct rsxx_cardinfo *card);
/***** core.c *****/
void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr);
void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr);
void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card,
unsigned int intr);
void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card,
unsigned int intr);
/***** dev.c *****/
int rsxx_attach_dev(struct rsxx_cardinfo *card);
void rsxx_detach_dev(struct rsxx_cardinfo *card);
int rsxx_setup_dev(struct rsxx_cardinfo *card);
void rsxx_destroy_dev(struct rsxx_cardinfo *card);
int rsxx_dev_init(void);
void rsxx_dev_cleanup(void);
/***** dma.c ****/
typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card,
void *cb_data,
unsigned int status);
int rsxx_dma_setup(struct rsxx_cardinfo *card);
void rsxx_dma_destroy(struct rsxx_cardinfo *card);
int rsxx_dma_init(void);
void rsxx_dma_cleanup(void);
int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
struct bio *bio,
atomic_t *n_dmas,
rsxx_dma_cb cb,
void *cb_data);
/***** cregs.c *****/
int rsxx_creg_write(struct rsxx_cardinfo *card, u32 addr,
unsigned int size8,
void *data,
int byte_stream);
int rsxx_creg_read(struct rsxx_cardinfo *card,
u32 addr,
unsigned int size8,
void *data,
int byte_stream);
int rsxx_read_hw_log(struct rsxx_cardinfo *card);
int rsxx_get_card_state(struct rsxx_cardinfo *card,
unsigned int *state);
int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8);
int rsxx_get_num_targets(struct rsxx_cardinfo *card,
unsigned int *n_targets);
int rsxx_get_card_capabilities(struct rsxx_cardinfo *card,
u32 *capabilities);
int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd);
int rsxx_creg_setup(struct rsxx_cardinfo *card);
void rsxx_creg_destroy(struct rsxx_cardinfo *card);
int rsxx_creg_init(void);
void rsxx_creg_cleanup(void);
int rsxx_reg_access(struct rsxx_cardinfo *card,
struct rsxx_reg_access __user *ucmd,
int read);
#endif /* __DRIVERS_BLOCK_RSXX_H__ */
此差异已折叠。
#ifndef _LINUX_XD_H
#define _LINUX_XD_H
/*
* This file contains the definitions for the IO ports and errors etc. for XT hard disk controllers (at least the DTC 5150X).
*
* Author: Pat Mackinlay, pat@it.com.au
* Date: 29/09/92
*
* Revised: 01/01/93, ...
*
* Ref: DTC 5150X Controller Specification (thanks to Kevin Fowler, kevinf@agora.rain.com)
* Also thanks to: Salvador Abreu, Dave Thaler, Risto Kankkunen and Wim Van Dorst.
*/
#include <linux/interrupt.h>
/* XT hard disk controller registers */
#define XD_DATA (xd_iobase + 0x00) /* data RW register */
#define XD_RESET (xd_iobase + 0x01) /* reset WO register */
#define XD_STATUS (xd_iobase + 0x01) /* status RO register */
#define XD_SELECT (xd_iobase + 0x02) /* select WO register */
#define XD_JUMPER (xd_iobase + 0x02) /* jumper RO register */
#define XD_CONTROL (xd_iobase + 0x03) /* DMAE/INTE WO register */
#define XD_RESERVED (xd_iobase + 0x03) /* reserved */
/* XT hard disk controller commands (incomplete list) */
#define CMD_TESTREADY 0x00 /* test drive ready */
#define CMD_RECALIBRATE 0x01 /* recalibrate drive */
#define CMD_SENSE 0x03 /* request sense */
#define CMD_FORMATDRV 0x04 /* format drive */
#define CMD_VERIFY 0x05 /* read verify */
#define CMD_FORMATTRK 0x06 /* format track */
#define CMD_FORMATBAD 0x07 /* format bad track */
#define CMD_READ 0x08 /* read */
#define CMD_WRITE 0x0A /* write */
#define CMD_SEEK 0x0B /* seek */
/* Controller specific commands */
#define CMD_DTCSETPARAM 0x0C /* set drive parameters (DTC 5150X & CX only?) */
#define CMD_DTCGETECC 0x0D /* get ecc error length (DTC 5150X only?) */
#define CMD_DTCREADBUF 0x0E /* read sector buffer (DTC 5150X only?) */
#define CMD_DTCWRITEBUF 0x0F /* write sector buffer (DTC 5150X only?) */
#define CMD_DTCREMAPTRK 0x11 /* assign alternate track (DTC 5150X only?) */
#define CMD_DTCGETPARAM 0xFB /* get drive parameters (DTC 5150X only?) */
#define CMD_DTCSETSTEP 0xFC /* set step rate (DTC 5150X only?) */
#define CMD_DTCSETGEOM 0xFE /* set geometry data (DTC 5150X only?) */
#define CMD_DTCGETGEOM 0xFF /* get geometry data (DTC 5150X only?) */
#define CMD_ST11GETGEOM 0xF8 /* get geometry data (Seagate ST11R/M only?) */
#define CMD_WDSETPARAM 0x0C /* set drive parameters (WD 1004A27X only?) */
#define CMD_XBSETPARAM 0x0C /* set drive parameters (XEBEC only?) */
/* Bits for command status byte */
#define CSB_ERROR 0x02 /* error */
#define CSB_LUN 0x20 /* logical Unit Number */
/* XT hard disk controller status bits */
#define STAT_READY 0x01 /* controller is ready */
#define STAT_INPUT 0x02 /* data flowing from controller to host */
#define STAT_COMMAND 0x04 /* controller in command phase */
#define STAT_SELECT 0x08 /* controller is selected */
#define STAT_REQUEST 0x10 /* controller requesting data */
#define STAT_INTERRUPT 0x20 /* controller requesting interrupt */
/* XT hard disk controller control bits */
#define PIO_MODE 0x00 /* control bits to set for PIO */
#define DMA_MODE 0x03 /* control bits to set for DMA & interrupt */
#define XD_MAXDRIVES 2 /* maximum 2 drives */
#define XD_TIMEOUT HZ /* 1 second timeout */
#define XD_RETRIES 4 /* maximum 4 retries */
#undef DEBUG /* define for debugging output */
#ifdef DEBUG
#define DEBUG_STARTUP /* debug driver initialisation */
#define DEBUG_OVERRIDE /* debug override geometry detection */
#define DEBUG_READWRITE /* debug each read/write command */
#define DEBUG_OTHER /* debug misc. interrupt/DMA stuff */
#define DEBUG_COMMAND /* debug each controller command */
#endif /* DEBUG */
/* this structure defines the XT drives and their types */
typedef struct {
u_char heads;
u_short cylinders;
u_char sectors;
u_char control;
int unit;
} XD_INFO;
/* this structure defines a ROM BIOS signature */
typedef struct {
unsigned int offset;
const char *string;
void (*init_controller)(unsigned int address);
void (*init_drive)(u_char drive);
const char *name;
} XD_SIGNATURE;
#ifndef MODULE
static int xd_manual_geo_init (char *command);
#endif /* MODULE */
static u_char xd_detect (u_char *controller, unsigned int *address);
static u_char xd_initdrives (void (*init_drive)(u_char drive));
static void do_xd_request (struct request_queue * q);
static int xd_ioctl (struct block_device *bdev,fmode_t mode,unsigned int cmd,unsigned long arg);
static int xd_readwrite (u_char operation,XD_INFO *disk,char *buffer,u_int block,u_int count);
static void xd_recalibrate (u_char drive);
static irqreturn_t xd_interrupt_handler(int irq, void *dev_id);
static u_char xd_setup_dma (u_char opcode,u_char *buffer,u_int count);
static u_char *xd_build (u_char *cmdblk,u_char command,u_char drive,u_char head,u_short cylinder,u_char sector,u_char count,u_char control);
static void xd_watchdog (unsigned long unused);
static inline u_char xd_waitport (u_short port,u_char flags,u_char mask,u_long timeout);
static u_int xd_command (u_char *command,u_char mode,u_char *indata,u_char *outdata,u_char *sense,u_long timeout);
/* card specific setup and geometry gathering code */
static void xd_dtc_init_controller (unsigned int address);
static void xd_dtc5150cx_init_drive (u_char drive);
static void xd_dtc_init_drive (u_char drive);
static void xd_wd_init_controller (unsigned int address);
static void xd_wd_init_drive (u_char drive);
static void xd_seagate_init_controller (unsigned int address);
static void xd_seagate_init_drive (u_char drive);
static void xd_omti_init_controller (unsigned int address);
static void xd_omti_init_drive (u_char drive);
static void xd_xebec_init_controller (unsigned int address);
static void xd_xebec_init_drive (u_char drive);
static void xd_setparam (u_char command,u_char drive,u_char heads,u_short cylinders,u_short rwrite,u_short wprecomp,u_char ecc);
static void xd_override_init_drive (u_char drive);
#endif /* _LINUX_XD_H */
......@@ -46,6 +46,7 @@
#include <xen/xen.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
#include <xen/balloon.h>
#include "common.h"
/*
......@@ -239,6 +240,7 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
ret = gnttab_unmap_refs(unmap, NULL, pages,
segs_to_unmap);
BUG_ON(ret);
free_xenballooned_pages(segs_to_unmap, pages);
segs_to_unmap = 0;
}
......@@ -527,8 +529,8 @@ static int xen_blkbk_map(struct blkif_request *req,
GFP_KERNEL);
if (!persistent_gnt)
return -ENOMEM;
persistent_gnt->page = alloc_page(GFP_KERNEL);
if (!persistent_gnt->page) {
if (alloc_xenballooned_pages(1, &persistent_gnt->page,
false)) {
kfree(persistent_gnt);
return -ENOMEM;
}
......@@ -879,7 +881,6 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
goto fail_response;
}
preq.dev = req->u.rw.handle;
preq.sector_number = req->u.rw.sector_number;
preq.nr_sects = 0;
......
......@@ -367,6 +367,7 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
be->blkif = NULL;
}
kfree(be->mode);
kfree(be);
dev_set_drvdata(&dev->dev, NULL);
return 0;
......@@ -502,6 +503,7 @@ static void backend_changed(struct xenbus_watch *watch,
= container_of(watch, struct backend_info, backend_watch);
struct xenbus_device *dev = be->dev;
int cdrom = 0;
unsigned long handle;
char *device_type;
DPRINTK("");
......@@ -521,10 +523,10 @@ static void backend_changed(struct xenbus_watch *watch,
return;
}
if ((be->major || be->minor) &&
((be->major != major) || (be->minor != minor))) {
pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n",
be->major, be->minor, major, minor);
if (be->major | be->minor) {
if (be->major != major || be->minor != minor)
pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n",
be->major, be->minor, major, minor);
return;
}
......@@ -542,36 +544,33 @@ static void backend_changed(struct xenbus_watch *watch,
kfree(device_type);
}
if (be->major == 0 && be->minor == 0) {
/* Front end dir is a number, which is used as the handle. */
char *p = strrchr(dev->otherend, '/') + 1;
long handle;
err = strict_strtoul(p, 0, &handle);
if (err)
return;
/* Front end dir is a number, which is used as the handle. */
err = strict_strtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
if (err)
return;
be->major = major;
be->minor = minor;
be->major = major;
be->minor = minor;
err = xen_vbd_create(be->blkif, handle, major, minor,
(NULL == strchr(be->mode, 'w')), cdrom);
if (err) {
be->major = 0;
be->minor = 0;
xenbus_dev_fatal(dev, err, "creating vbd structure");
return;
}
err = xen_vbd_create(be->blkif, handle, major, minor,
!strchr(be->mode, 'w'), cdrom);
if (err)
xenbus_dev_fatal(dev, err, "creating vbd structure");
else {
err = xenvbd_sysfs_addif(dev);
if (err) {
xen_vbd_free(&be->blkif->vbd);
be->major = 0;
be->minor = 0;
xenbus_dev_fatal(dev, err, "creating sysfs entries");
return;
}
}
if (err) {
kfree(be->mode);
be->mode = NULL;
be->major = 0;
be->minor = 0;
} else {
/* We're potentially connected now */
xen_update_blkif_status(be->blkif);
}
......
......@@ -791,7 +791,7 @@ static void blkif_restart_queue(struct work_struct *work)
static void blkif_free(struct blkfront_info *info, int suspend)
{
struct llist_node *all_gnts;
struct grant *persistent_gnt;
struct grant *persistent_gnt, *tmp;
struct llist_node *n;
/* Prevent new requests being issued until we fix things up. */
......@@ -805,10 +805,17 @@ static void blkif_free(struct blkfront_info *info, int suspend)
/* Remove all persistent grants */
if (info->persistent_gnts_c) {
all_gnts = llist_del_all(&info->persistent_gnts);
llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) {
persistent_gnt = llist_entry(all_gnts, typeof(*(persistent_gnt)), node);
while (persistent_gnt) {
gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
__free_page(pfn_to_page(persistent_gnt->pfn));
kfree(persistent_gnt);
tmp = persistent_gnt;
n = persistent_gnt->node.next;
if (n)
persistent_gnt = llist_entry(n, typeof(*(persistent_gnt)), node);
else
persistent_gnt = NULL;
kfree(tmp);
}
info->persistent_gnts_c = 0;
}
......
......@@ -124,31 +124,6 @@ static inline void init_llist_head(struct llist_head *list)
&(pos)->member != NULL; \
(pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))
/**
* llist_for_each_entry_safe - iterate safely against remove over some entries
* of lock-less list of given type.
* @pos: the type * to use as a loop cursor.
* @n: another type * to use as a temporary storage.
* @node: the fist entry of deleted list entries.
* @member: the name of the llist_node with the struct.
*
* In general, some entries of the lock-less list can be traversed
* safely only after being removed from list, so start with an entry
* instead of list head. This variant allows removal of entries
* as we iterate.
*
* If being used on entries deleted from lock-less list directly, the
* traverse order is from the newest to the oldest added entry. If
* you want to traverse from the oldest to the newest, you must
* reverse the order by yourself before traversing.
*/
#define llist_for_each_entry_safe(pos, n, node, member) \
for ((pos) = llist_entry((node), typeof(*(pos)), member), \
(n) = (pos)->member.next; \
&(pos)->member != NULL; \
(pos) = llist_entry(n, typeof(*(pos)), member), \
(n) = (&(pos)->member != NULL) ? (pos)->member.next : NULL)
/**
* llist_empty - tests whether a lock-less list is empty
* @head: the list to test
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册