提交 16008d64 编写于 作者: L Linus Torvalds

Merge branch 'for-3.3/drivers' of git://git.kernel.dk/linux-block

* 'for-3.3/drivers' of git://git.kernel.dk/linux-block:
  mtip32xx: do rebuild monitoring asynchronously
  xen-blkfront: Use kcalloc instead of kzalloc to allocate array
  mtip32xx: uninitialized variable in mtip_quiesce_io()
  mtip32xx: updates based on feedback
  xen-blkback: convert hole punching to discard request on loop devices
  xen/blkback: Move processing of BLKIF_OP_DISCARD from dispatch_rw_block_io
  xen/blk[front|back]: Enhance discard support with secure erasing support.
  xen/blk[front|back]: Squash blkif_request_rw and blkif_request_discard together
  mtip32xx: update to new ->make_request() API
  mtip32xx: add module.h include to avoid conflict with moduleh tree
  mtip32xx: mark a few more items static
  mtip32xx: ensure that all local functions are static
  mtip32xx: cleanup compat ioctl handling
  mtip32xx: fix warnings/errors on 32-bit compiles
  block: Add driver for Micron RealSSD pcie flash cards
......@@ -116,6 +116,8 @@ config PARIDE
source "drivers/block/paride/Kconfig"
source "drivers/block/mtip32xx/Kconfig"
config BLK_CPQ_DA
tristate "Compaq SMART2 support"
depends on PCI && VIRT_TO_BUS
......
......@@ -39,5 +39,6 @@ obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
obj-$(CONFIG_BLK_DEV_RBD) += rbd.o
obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/
swim_mod-y := swim.o swim_asm.o
#
# mtip32xx device driver configuration
#
config BLK_DEV_PCIESSD_MTIP32XX
tristate "Block Device Driver for Micron PCIe SSDs"
depends on HOTPLUG_PCI_PCIE
help
This enables the block driver for Micron PCIe SSDs.
#
# Makefile for Block device driver for Micron PCIe SSD
#
obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx.o
此差异已折叠。
/*
* mtip32xx.h - Header file for the P320 SSD Block Driver
* Copyright (C) 2011 Micron Technology, Inc.
*
* Portions of this code were derived from works subjected to the
* following copyright:
* Copyright (C) 2009 Integrated Device Technology, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __MTIP32XX_H__
#define __MTIP32XX_H__
#include <linux/spinlock.h>
#include <linux/rwsem.h>
#include <linux/ata.h>
#include <linux/interrupt.h>
#include <linux/genhd.h>
#include <linux/version.h>
/* Offset of Subsystem Device ID in pci confoguration space */
#define PCI_SUBSYSTEM_DEVICEID 0x2E
/* offset of Device Control register in PCIe extended capabilites space */
#define PCIE_CONFIG_EXT_DEVICE_CONTROL_OFFSET 0x48
/* # of times to retry timed out IOs */
#define MTIP_MAX_RETRIES 5
/* Various timeout values in ms */
#define MTIP_NCQ_COMMAND_TIMEOUT_MS 5000
#define MTIP_IOCTL_COMMAND_TIMEOUT_MS 5000
#define MTIP_INTERNAL_COMMAND_TIMEOUT_MS 5000
/* check for timeouts every 500ms */
#define MTIP_TIMEOUT_CHECK_PERIOD 500
/* ftl rebuild */
#define MTIP_FTL_REBUILD_OFFSET 142
#define MTIP_FTL_REBUILD_MAGIC 0xED51
#define MTIP_FTL_REBUILD_TIMEOUT_MS 2400000
/* Macro to extract the tag bit number from a tag value. */
#define MTIP_TAG_BIT(tag) (tag & 0x1F)
/*
* Macro to extract the tag index from a tag value. The index
* is used to access the correct s_active/Command Issue register based
* on the tag value.
*/
#define MTIP_TAG_INDEX(tag) (tag >> 5)
/*
* Maximum number of scatter gather entries
* a single command may have.
*/
#define MTIP_MAX_SG 128
/*
* Maximum number of slot groups (Command Issue & s_active registers)
* NOTE: This is the driver maximum; check dd->slot_groups for actual value.
*/
#define MTIP_MAX_SLOT_GROUPS 8
/* Internal command tag. */
#define MTIP_TAG_INTERNAL 0
/* Micron Vendor ID & P320x SSD Device ID */
#define PCI_VENDOR_ID_MICRON 0x1344
#define P320_DEVICE_ID 0x5150
/* Driver name and version strings */
#define MTIP_DRV_NAME "mtip32xx"
#define MTIP_DRV_VERSION "1.2.6os3"
/* Maximum number of minor device numbers per device. */
#define MTIP_MAX_MINORS 16
/* Maximum number of supported command slots. */
#define MTIP_MAX_COMMAND_SLOTS (MTIP_MAX_SLOT_GROUPS * 32)
/*
* Per-tag bitfield size in longs.
* Linux bit manipulation functions
* (i.e. test_and_set_bit, find_next_zero_bit)
* manipulate memory in longs, so we try to make the math work.
* take the slot groups and find the number of longs, rounding up.
* Careful! i386 and x86_64 use different size longs!
*/
#define U32_PER_LONG (sizeof(long) / sizeof(u32))
#define SLOTBITS_IN_LONGS ((MTIP_MAX_SLOT_GROUPS + \
(U32_PER_LONG-1))/U32_PER_LONG)
/* BAR number used to access the HBA registers. */
#define MTIP_ABAR 5
/* Forced Unit Access Bit */
#define FUA_BIT 0x80
#ifdef DEBUG
#define dbg_printk(format, arg...) \
printk(pr_fmt(format), ##arg);
#else
#define dbg_printk(format, arg...)
#endif
#define __force_bit2int (unsigned int __force)
/* below are bit numbers in 'flags' defined in mtip_port */
#define MTIP_FLAG_IC_ACTIVE_BIT 0
#define MTIP_FLAG_EH_ACTIVE_BIT 1
#define MTIP_FLAG_SVC_THD_ACTIVE_BIT 2
#define MTIP_FLAG_ISSUE_CMDS_BIT 4
#define MTIP_FLAG_REBUILD_BIT 5
#define MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT 8
/* Register Frame Information Structure (FIS), host to device. */
struct host_to_dev_fis {
/*
* FIS type.
* - 27h Register FIS, host to device.
* - 34h Register FIS, device to host.
* - 39h DMA Activate FIS, device to host.
* - 41h DMA Setup FIS, bi-directional.
* - 46h Data FIS, bi-directional.
* - 58h BIST Activate FIS, bi-directional.
* - 5Fh PIO Setup FIS, device to host.
* - A1h Set Device Bits FIS, device to host.
*/
unsigned char type;
unsigned char opts;
unsigned char command;
unsigned char features;
union {
unsigned char lba_low;
unsigned char sector;
};
union {
unsigned char lba_mid;
unsigned char cyl_low;
};
union {
unsigned char lba_hi;
unsigned char cyl_hi;
};
union {
unsigned char device;
unsigned char head;
};
union {
unsigned char lba_low_ex;
unsigned char sector_ex;
};
union {
unsigned char lba_mid_ex;
unsigned char cyl_low_ex;
};
union {
unsigned char lba_hi_ex;
unsigned char cyl_hi_ex;
};
unsigned char features_ex;
unsigned char sect_count;
unsigned char sect_cnt_ex;
unsigned char res2;
unsigned char control;
unsigned int res3;
};
/* Command header structure. */
struct mtip_cmd_hdr {
/*
* Command options.
* - Bits 31:16 Number of PRD entries.
* - Bits 15:8 Unused in this implementation.
* - Bit 7 Prefetch bit, informs the drive to prefetch PRD entries.
* - Bit 6 Write bit, should be set when writing data to the device.
* - Bit 5 Unused in this implementation.
* - Bits 4:0 Length of the command FIS in DWords (DWord = 4 bytes).
*/
unsigned int opts;
/* This field is unsed when using NCQ. */
union {
unsigned int byte_count;
unsigned int status;
};
/*
* Lower 32 bits of the command table address associated with this
* header. The command table addresses must be 128 byte aligned.
*/
unsigned int ctba;
/*
* If 64 bit addressing is used this field is the upper 32 bits
* of the command table address associated with this command.
*/
unsigned int ctbau;
/* Reserved and unused. */
unsigned int res[4];
};
/* Command scatter gather structure (PRD). */
struct mtip_cmd_sg {
/*
* Low 32 bits of the data buffer address. For P320 this
* address must be 8 byte aligned signified by bits 2:0 being
* set to 0.
*/
unsigned int dba;
/*
* When 64 bit addressing is used this field is the upper
* 32 bits of the data buffer address.
*/
unsigned int dba_upper;
/* Unused. */
unsigned int reserved;
/*
* Bit 31: interrupt when this data block has been transferred.
* Bits 30..22: reserved
* Bits 21..0: byte count (minus 1). For P320 the byte count must be
* 8 byte aligned signified by bits 2:0 being set to 1.
*/
unsigned int info;
};
struct mtip_port;
/* Structure used to describe a command. */
struct mtip_cmd {
struct mtip_cmd_hdr *command_header; /* ptr to command header entry */
dma_addr_t command_header_dma; /* corresponding physical address */
void *command; /* ptr to command table entry */
dma_addr_t command_dma; /* corresponding physical address */
void *comp_data; /* data passed to completion function comp_func() */
/*
* Completion function called by the ISR upon completion of
* a command.
*/
void (*comp_func)(struct mtip_port *port,
int tag,
void *data,
int status);
/* Additional callback function that may be called by comp_func() */
void (*async_callback)(void *data, int status);
void *async_data; /* Addl. data passed to async_callback() */
int scatter_ents; /* Number of scatter list entries used */
struct scatterlist sg[MTIP_MAX_SG]; /* Scatter list entries */
int retries; /* The number of retries left for this command. */
int direction; /* Data transfer direction */
unsigned long comp_time; /* command completion time, in jiffies */
atomic_t active; /* declares if this command sent to the drive. */
};
/* Structure used to describe a port. */
struct mtip_port {
/* Pointer back to the driver data for this port. */
struct driver_data *dd;
/*
* Used to determine if the data pointed to by the
* identify field is valid.
*/
unsigned long identify_valid;
/* Base address of the memory mapped IO for the port. */
void __iomem *mmio;
/* Array of pointers to the memory mapped s_active registers. */
void __iomem *s_active[MTIP_MAX_SLOT_GROUPS];
/* Array of pointers to the memory mapped completed registers. */
void __iomem *completed[MTIP_MAX_SLOT_GROUPS];
/* Array of pointers to the memory mapped Command Issue registers. */
void __iomem *cmd_issue[MTIP_MAX_SLOT_GROUPS];
/*
* Pointer to the beginning of the command header memory as used
* by the driver.
*/
void *command_list;
/*
* Pointer to the beginning of the command header memory as used
* by the DMA.
*/
dma_addr_t command_list_dma;
/*
* Pointer to the beginning of the RX FIS memory as used
* by the driver.
*/
void *rxfis;
/*
* Pointer to the beginning of the RX FIS memory as used
* by the DMA.
*/
dma_addr_t rxfis_dma;
/*
* Pointer to the beginning of the command table memory as used
* by the driver.
*/
void *command_table;
/*
* Pointer to the beginning of the command table memory as used
* by the DMA.
*/
dma_addr_t command_tbl_dma;
/*
* Pointer to the beginning of the identify data memory as used
* by the driver.
*/
u16 *identify;
/*
* Pointer to the beginning of the identify data memory as used
* by the DMA.
*/
dma_addr_t identify_dma;
/*
* Pointer to the beginning of a sector buffer that is used
* by the driver when issuing internal commands.
*/
u16 *sector_buffer;
/*
* Pointer to the beginning of a sector buffer that is used
* by the DMA when the driver issues internal commands.
*/
dma_addr_t sector_buffer_dma;
/*
* Bit significant, used to determine if a command slot has
* been allocated. i.e. the slot is in use. Bits are cleared
* when the command slot and all associated data structures
* are no longer needed.
*/
unsigned long allocated[SLOTBITS_IN_LONGS];
/*
* used to queue commands when an internal command is in progress
* or error handling is active
*/
unsigned long cmds_to_issue[SLOTBITS_IN_LONGS];
/*
* Array of command slots. Structure includes pointers to the
* command header and command table, and completion function and data
* pointers.
*/
struct mtip_cmd commands[MTIP_MAX_COMMAND_SLOTS];
/* Used by mtip_service_thread to wait for an event */
wait_queue_head_t svc_wait;
/*
* indicates the state of the port. Also, helps the service thread
* to determine its action on wake up.
*/
unsigned long flags;
/*
* Timer used to complete commands that have been active for too long.
*/
struct timer_list cmd_timer;
/*
* Semaphore used to block threads if there are no
* command slots available.
*/
struct semaphore cmd_slot;
/* Spinlock for working around command-issue bug. */
spinlock_t cmd_issue_lock;
};
/*
* Driver private data structure.
*
* One structure is allocated per probed device.
*/
struct driver_data {
void __iomem *mmio; /* Base address of the HBA registers. */
int major; /* Major device number. */
int instance; /* Instance number. First device probed is 0, ... */
struct gendisk *disk; /* Pointer to our gendisk structure. */
struct pci_dev *pdev; /* Pointer to the PCI device structure. */
struct request_queue *queue; /* Our request queue. */
struct mtip_port *port; /* Pointer to the port data structure. */
/* Tasklet used to process the bottom half of the ISR. */
struct tasklet_struct tasklet;
unsigned product_type; /* magic value declaring the product type */
unsigned slot_groups; /* number of slot groups the product supports */
atomic_t drv_cleanup_done; /* Atomic variable for SRSI */
unsigned long index; /* Index to determine the disk name */
unsigned int ftlrebuildflag; /* FTL rebuild flag */
atomic_t resumeflag; /* Atomic variable to track suspend/resume */
atomic_t eh_active; /* Flag for error handling tracking */
struct task_struct *mtip_svc_handler; /* task_struct of svc thd */
};
#endif
......@@ -39,9 +39,6 @@
#include <linux/list.h>
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/loop.h>
#include <linux/falloc.h>
#include <linux/fs.h>
#include <xen/events.h>
#include <xen/page.h>
......@@ -362,7 +359,7 @@ static int xen_blkbk_map(struct blkif_request *req,
{
struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
int i;
int nseg = req->nr_segments;
int nseg = req->u.rw.nr_segments;
int ret = 0;
/*
......@@ -416,30 +413,25 @@ static int xen_blkbk_map(struct blkif_request *req,
return ret;
}
static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req)
static int dispatch_discard_io(struct xen_blkif *blkif,
struct blkif_request *req)
{
int err = 0;
int status = BLKIF_RSP_OKAY;
struct block_device *bdev = blkif->vbd.bdev;
if (blkif->blk_backend_type == BLKIF_BACKEND_PHY)
/* just forward the discard request */
blkif->st_ds_req++;
xen_blkif_get(blkif);
if (blkif->blk_backend_type == BLKIF_BACKEND_PHY ||
blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
unsigned long secure = (blkif->vbd.discard_secure &&
(req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
BLKDEV_DISCARD_SECURE : 0;
err = blkdev_issue_discard(bdev,
req->u.discard.sector_number,
req->u.discard.nr_sectors,
GFP_KERNEL, 0);
else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
/* punch a hole in the backing file */
struct loop_device *lo = bdev->bd_disk->private_data;
struct file *file = lo->lo_backing_file;
if (file->f_op->fallocate)
err = file->f_op->fallocate(file,
FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
req->u.discard.sector_number << 9,
req->u.discard.nr_sectors << 9);
else
err = -EOPNOTSUPP;
GFP_KERNEL, secure);
} else
err = -EOPNOTSUPP;
......@@ -449,7 +441,9 @@ static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req)
} else if (err)
status = BLKIF_RSP_ERROR;
make_response(blkif, req->id, req->operation, status);
make_response(blkif, req->u.discard.id, req->operation, status);
xen_blkif_put(blkif);
return err;
}
static void xen_blk_drain_io(struct xen_blkif *blkif)
......@@ -573,8 +567,11 @@ __do_block_io_op(struct xen_blkif *blkif)
/* Apply all sanity checks to /private copy/ of request. */
barrier();
if (dispatch_rw_block_io(blkif, &req, pending_req))
if (unlikely(req.operation == BLKIF_OP_DISCARD)) {
free_req(pending_req);
if (dispatch_discard_io(blkif, &req))
break;
} else if (dispatch_rw_block_io(blkif, &req, pending_req))
break;
/* Yield point for this unbounded loop. */
......@@ -633,10 +630,6 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
blkif->st_f_req++;
operation = WRITE_FLUSH;
break;
case BLKIF_OP_DISCARD:
blkif->st_ds_req++;
operation = REQ_DISCARD;
break;
default:
operation = 0; /* make gcc happy */
goto fail_response;
......@@ -644,9 +637,9 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
}
/* Check that the number of segments is sane. */
nseg = req->nr_segments;
if (unlikely(nseg == 0 && operation != WRITE_FLUSH &&
operation != REQ_DISCARD) ||
nseg = req->u.rw.nr_segments;
if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
nseg);
......@@ -654,12 +647,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
goto fail_response;
}
preq.dev = req->handle;
preq.dev = req->u.rw.handle;
preq.sector_number = req->u.rw.sector_number;
preq.nr_sects = 0;
pending_req->blkif = blkif;
pending_req->id = req->id;
pending_req->id = req->u.rw.id;
pending_req->operation = req->operation;
pending_req->status = BLKIF_RSP_OKAY;
pending_req->nr_pages = nseg;
......@@ -707,7 +700,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
* the hypercall to unmap the grants - that is all done in
* xen_blkbk_unmap.
*/
if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg))
if (xen_blkbk_map(req, pending_req, seg))
goto fail_flush;
/*
......@@ -739,23 +732,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
/* This will be hit if the operation was a flush or discard. */
if (!bio) {
BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD);
BUG_ON(operation != WRITE_FLUSH);
if (operation == WRITE_FLUSH) {
bio = bio_alloc(GFP_KERNEL, 0);
if (unlikely(bio == NULL))
goto fail_put_bio;
bio = bio_alloc(GFP_KERNEL, 0);
if (unlikely(bio == NULL))
goto fail_put_bio;
biolist[nbio++] = bio;
bio->bi_bdev = preq.bdev;
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
} else if (operation == REQ_DISCARD) {
xen_blk_discard(blkif, req);
xen_blkif_put(blkif);
free_req(pending_req);
return 0;
}
biolist[nbio++] = bio;
bio->bi_bdev = preq.bdev;
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
}
/*
......@@ -784,7 +770,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
xen_blkbk_unmap(pending_req);
fail_response:
/* Haven't submitted any bio's yet. */
make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR);
free_req(pending_req);
msleep(1); /* back off a bit */
return -EIO;
......
......@@ -60,58 +60,66 @@ struct blkif_common_response {
char dummy;
};
/* i386 protocol version */
#pragma pack(push, 4)
struct blkif_x86_32_request_rw {
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};
} __attribute__((__packed__));
struct blkif_x86_32_request_discard {
uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
blkif_vdev_t _pad1; /* was "handle" for read/write requests */
uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
uint64_t nr_sectors;
};
uint64_t nr_sectors;
} __attribute__((__packed__));
struct blkif_x86_32_request {
uint8_t operation; /* BLKIF_OP_??? */
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
uint64_t id; /* private guest value, echoed in resp */
union {
struct blkif_x86_32_request_rw rw;
struct blkif_x86_32_request_discard discard;
} u;
};
} __attribute__((__packed__));
/* i386 protocol version */
#pragma pack(push, 4)
struct blkif_x86_32_response {
uint64_t id; /* copied from request */
uint8_t operation; /* copied from request */
int16_t status; /* BLKIF_RSP_??? */
};
#pragma pack(pop)
/* x86_64 protocol version */
struct blkif_x86_64_request_rw {
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
uint32_t _pad1; /* offsetof(blkif_reqest..,u.rw.id)==8 */
uint64_t id;
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};
} __attribute__((__packed__));
struct blkif_x86_64_request_discard {
uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
blkif_vdev_t _pad1; /* was "handle" for read/write requests */
uint32_t _pad2; /* offsetof(blkif_..,u.discard.id)==8 */
uint64_t id;
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
uint64_t nr_sectors;
};
uint64_t nr_sectors;
} __attribute__((__packed__));
struct blkif_x86_64_request {
uint8_t operation; /* BLKIF_OP_??? */
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
uint64_t __attribute__((__aligned__(8))) id;
union {
struct blkif_x86_64_request_rw rw;
struct blkif_x86_64_request_discard discard;
} u;
};
} __attribute__((__packed__));
struct blkif_x86_64_response {
uint64_t __attribute__((__aligned__(8))) id;
uint8_t operation; /* copied from request */
......@@ -156,6 +164,7 @@ struct xen_vbd {
/* Cached size parameter. */
sector_t size;
bool flush_support;
bool discard_secure;
};
struct backend_info;
......@@ -237,22 +246,23 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
{
int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
dst->operation = src->operation;
dst->nr_segments = src->nr_segments;
dst->handle = src->handle;
dst->id = src->id;
switch (src->operation) {
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
case BLKIF_OP_WRITE_BARRIER:
case BLKIF_OP_FLUSH_DISKCACHE:
dst->u.rw.nr_segments = src->u.rw.nr_segments;
dst->u.rw.handle = src->u.rw.handle;
dst->u.rw.id = src->u.rw.id;
dst->u.rw.sector_number = src->u.rw.sector_number;
barrier();
if (n > dst->nr_segments)
n = dst->nr_segments;
if (n > dst->u.rw.nr_segments)
n = dst->u.rw.nr_segments;
for (i = 0; i < n; i++)
dst->u.rw.seg[i] = src->u.rw.seg[i];
break;
case BLKIF_OP_DISCARD:
dst->u.discard.flag = src->u.discard.flag;
dst->u.discard.sector_number = src->u.discard.sector_number;
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
break;
......@@ -266,22 +276,23 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
{
int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
dst->operation = src->operation;
dst->nr_segments = src->nr_segments;
dst->handle = src->handle;
dst->id = src->id;
switch (src->operation) {
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
case BLKIF_OP_WRITE_BARRIER:
case BLKIF_OP_FLUSH_DISKCACHE:
dst->u.rw.nr_segments = src->u.rw.nr_segments;
dst->u.rw.handle = src->u.rw.handle;
dst->u.rw.id = src->u.rw.id;
dst->u.rw.sector_number = src->u.rw.sector_number;
barrier();
if (n > dst->nr_segments)
n = dst->nr_segments;
if (n > dst->u.rw.nr_segments)
n = dst->u.rw.nr_segments;
for (i = 0; i < n; i++)
dst->u.rw.seg[i] = src->u.rw.seg[i];
break;
case BLKIF_OP_DISCARD:
dst->u.discard.flag = src->u.discard.flag;
dst->u.discard.sector_number = src->u.discard.sector_number;
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
break;
......
......@@ -338,6 +338,9 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
if (q && q->flush_flags)
vbd->flush_support = true;
if (q && blk_queue_secdiscard(q))
vbd->discard_secure = true;
DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
handle, blkif->domid);
return 0;
......@@ -420,6 +423,15 @@ int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
state = 1;
blkif->blk_backend_type = BLKIF_BACKEND_PHY;
}
/* Optional. */
err = xenbus_printf(xbt, dev->nodename,
"discard-secure", "%d",
blkif->vbd.discard_secure);
if (err) {
xenbus_dev_fatal(dev, err,
"writting discard-secure");
goto kfree;
}
}
} else {
err = PTR_ERR(type);
......
......@@ -98,7 +98,8 @@ struct blkfront_info
unsigned long shadow_free;
unsigned int feature_flush;
unsigned int flush_op;
unsigned int feature_discard;
unsigned int feature_discard:1;
unsigned int feature_secdiscard:1;
unsigned int discard_granularity;
unsigned int discard_alignment;
int is_ready;
......@@ -135,15 +136,15 @@ static int get_id_from_freelist(struct blkfront_info *info)
{
unsigned long free = info->shadow_free;
BUG_ON(free >= BLK_RING_SIZE);
info->shadow_free = info->shadow[free].req.id;
info->shadow[free].req.id = 0x0fffffee; /* debug */
info->shadow_free = info->shadow[free].req.u.rw.id;
info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
return free;
}
static void add_id_to_freelist(struct blkfront_info *info,
unsigned long id)
{
info->shadow[id].req.id = info->shadow_free;
info->shadow[id].req.u.rw.id = info->shadow_free;
info->shadow[id].request = NULL;
info->shadow_free = id;
}
......@@ -156,7 +157,7 @@ static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
if (end > nr_minors) {
unsigned long *bitmap, *old;
bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
bitmap = kcalloc(BITS_TO_LONGS(end), sizeof(*bitmap),
GFP_KERNEL);
if (bitmap == NULL)
return -ENOMEM;
......@@ -287,9 +288,9 @@ static int blkif_queue_request(struct request *req)
id = get_id_from_freelist(info);
info->shadow[id].request = req;
ring_req->id = id;
ring_req->u.rw.id = id;
ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
ring_req->handle = info->handle;
ring_req->u.rw.handle = info->handle;
ring_req->operation = rq_data_dir(req) ?
BLKIF_OP_WRITE : BLKIF_OP_READ;
......@@ -305,16 +306,21 @@ static int blkif_queue_request(struct request *req)
ring_req->operation = info->flush_op;
}
if (unlikely(req->cmd_flags & REQ_DISCARD)) {
if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) {
/* id, sector_number and handle are set above. */
ring_req->operation = BLKIF_OP_DISCARD;
ring_req->nr_segments = 0;
ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
else
ring_req->u.discard.flag = 0;
} else {
ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req,
info->sg);
BUG_ON(ring_req->u.rw.nr_segments >
BLKIF_MAX_SEGMENTS_PER_REQUEST);
for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
fsect = sg->offset >> 9;
lsect = fsect + (sg->length >> 9) - 1;
......@@ -424,6 +430,8 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
blk_queue_max_discard_sectors(rq, get_capacity(gd));
rq->limits.discard_granularity = info->discard_granularity;
rq->limits.discard_alignment = info->discard_alignment;
if (info->feature_secdiscard)
queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
}
/* Hard sector size and max sectors impersonate the equiv. hardware. */
......@@ -705,7 +713,9 @@ static void blkif_free(struct blkfront_info *info, int suspend)
static void blkif_completion(struct blk_shadow *s)
{
int i;
for (i = 0; i < s->req.nr_segments; i++)
/* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place
* flag. */
for (i = 0; i < s->req.u.rw.nr_segments; i++)
gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
}
......@@ -736,7 +746,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
id = bret->id;
req = info->shadow[id].request;
blkif_completion(&info->shadow[id]);
if (bret->operation != BLKIF_OP_DISCARD)
blkif_completion(&info->shadow[id]);
add_id_to_freelist(info, id);
......@@ -749,7 +760,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
info->gd->disk_name);
error = -EOPNOTSUPP;
info->feature_discard = 0;
info->feature_secdiscard = 0;
queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
}
__blk_end_request_all(req, error);
break;
......@@ -763,7 +776,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
error = -EOPNOTSUPP;
}
if (unlikely(bret->status == BLKIF_RSP_ERROR &&
info->shadow[id].req.nr_segments == 0)) {
info->shadow[id].req.u.rw.nr_segments == 0)) {
printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n",
info->flush_op == BLKIF_OP_WRITE_BARRIER ?
"barrier" : "flush disk cache",
......@@ -984,8 +997,8 @@ static int blkfront_probe(struct xenbus_device *dev,
INIT_WORK(&info->work, blkif_restart_queue);
for (i = 0; i < BLK_RING_SIZE; i++)
info->shadow[i].req.id = i+1;
info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
info->shadow[i].req.u.rw.id = i+1;
info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
/* Front end dir is a number, which is used as the id. */
info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
......@@ -1019,9 +1032,9 @@ static int blkif_recover(struct blkfront_info *info)
/* Stage 2: Set up free list. */
memset(&info->shadow, 0, sizeof(info->shadow));
for (i = 0; i < BLK_RING_SIZE; i++)
info->shadow[i].req.id = i+1;
info->shadow[i].req.u.rw.id = i+1;
info->shadow_free = info->ring.req_prod_pvt;
info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
/* Stage 3: Find pending requests and requeue them. */
for (i = 0; i < BLK_RING_SIZE; i++) {
......@@ -1034,17 +1047,19 @@ static int blkif_recover(struct blkfront_info *info)
*req = copy[i].req;
/* We get a new request id, and must reset the shadow state. */
req->id = get_id_from_freelist(info);
memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
req->u.rw.id = get_id_from_freelist(info);
memcpy(&info->shadow[req->u.rw.id], &copy[i], sizeof(copy[i]));
if (req->operation != BLKIF_OP_DISCARD) {
/* Rewrite any grant references invalidated by susp/resume. */
for (j = 0; j < req->nr_segments; j++)
gnttab_grant_foreign_access_ref(
req->u.rw.seg[j].gref,
info->xbdev->otherend_id,
pfn_to_mfn(info->shadow[req->id].frame[j]),
rq_data_dir(info->shadow[req->id].request));
info->shadow[req->id].req = *req;
for (j = 0; j < req->u.rw.nr_segments; j++)
gnttab_grant_foreign_access_ref(
req->u.rw.seg[j].gref,
info->xbdev->otherend_id,
pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
rq_data_dir(info->shadow[req->u.rw.id].request));
}
info->shadow[req->u.rw.id].req = *req;
info->ring.req_prod_pvt++;
}
......@@ -1135,11 +1150,13 @@ static void blkfront_setup_discard(struct blkfront_info *info)
char *type;
unsigned int discard_granularity;
unsigned int discard_alignment;
unsigned int discard_secure;
type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
if (IS_ERR(type))
return;
info->feature_secdiscard = 0;
if (strncmp(type, "phy", 3) == 0) {
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"discard-granularity", "%u", &discard_granularity,
......@@ -1150,6 +1167,12 @@ static void blkfront_setup_discard(struct blkfront_info *info)
info->discard_granularity = discard_granularity;
info->discard_alignment = discard_alignment;
}
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"discard-secure", "%d", &discard_secure,
NULL);
if (!err)
info->feature_secdiscard = discard_secure;
} else if (strncmp(type, "file", 4) == 0)
info->feature_discard = 1;
......
......@@ -84,6 +84,21 @@ typedef uint64_t blkif_sector_t;
* e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
* http://www.seagate.com/staticfiles/support/disc/manuals/
* Interface%20manuals/100293068c.pdf
* The backend can optionally provide three extra XenBus attributes to
* further optimize the discard functionality:
* 'discard-aligment' - Devices that support discard functionality may
* internally allocate space in units that are bigger than the exported
* logical block size. The discard-alignment parameter indicates how many bytes
* the beginning of the partition is offset from the internal allocation unit's
* natural alignment.
* 'discard-granularity' - Devices that support discard functionality may
* internally allocate space using units that are bigger than the logical block
* size. The discard-granularity parameter indicates the size of the internal
* allocation unit in bytes if reported by the device. Otherwise the
* discard-granularity will be set to match the device's physical block size.
* 'discard-secure' - All copies of the discarded sectors (potentially created
* by garbage collection) must also be erased. To use this feature, the flag
* BLKIF_DISCARD_SECURE must be set in the blkif_request_trim.
*/
#define BLKIF_OP_DISCARD 5
......@@ -95,6 +110,12 @@ typedef uint64_t blkif_sector_t;
#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
struct blkif_request_rw {
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
#ifdef CONFIG_X86_64
uint32_t _pad1; /* offsetof(blkif_request,u.rw.id) == 8 */
#endif
uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment {
grant_ref_t gref; /* reference to I/O buffer frame */
......@@ -102,23 +123,28 @@ struct blkif_request_rw {
/* @last_sect: last sector in frame to transfer (inclusive). */
uint8_t first_sect, last_sect;
} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};
} __attribute__((__packed__));
struct blkif_request_discard {
uint8_t flag; /* BLKIF_DISCARD_SECURE or zero. */
#define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */
blkif_vdev_t _pad1; /* only for read/write requests */
#ifdef CONFIG_X86_64
uint32_t _pad2; /* offsetof(blkif_req..,u.discard.id)==8*/
#endif
uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;
uint64_t nr_sectors;
};
uint64_t nr_sectors;
uint8_t _pad3;
} __attribute__((__packed__));
struct blkif_request {
uint8_t operation; /* BLKIF_OP_??? */
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
uint64_t id; /* private guest value, echoed in resp */
union {
struct blkif_request_rw rw;
struct blkif_request_discard discard;
} u;
};
} __attribute__((__packed__));
struct blkif_response {
uint64_t id; /* copied from request */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册