提交 2f408420 编写于 作者: L Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

* 'for-linus' of git://git.kernel.dk/linux-2.6-block: (34 commits)
  cfq-iosched: Fix the incorrect timeslice accounting with forced_dispatch
  loop: Update mtime when writing using aops
  block: expose the statistics in blkio.time and blkio.sectors for the root cgroup
  backing-dev: Handle class_create() failure
  Block: Fix block/elevator.c elevator_get() off-by-one error
  drbd: lc_element_by_index() never returns NULL
  cciss: unlock on error path
  cfq-iosched: Do not merge queues of BE and IDLE classes
  cfq-iosched: Add additional blktrace log messages in CFQ for easier debugging
  i2o: Remove the dangerous kobj_to_i2o_device macro
  block: remove 16 bytes of padding from struct request on 64bits
  cfq-iosched: fix a kbuild regression
  block: make CONFIG_BLK_CGROUP visible
  Remove GENHD_FL_DRIVERFS
  block: Export max number of segments and max segment size in sysfs
  block: Finalize conversion of block limits functions
  block: Fix overrun in lcm() and move it to lib
  vfs: improve writeback_inodes_wb()
  paride: fix off-by-one test
  drbd: fix al-to-on-disk-bitmap for 4k logical_block_size
  ...
...@@ -16,6 +16,15 @@ ...@@ -16,6 +16,15 @@
</address> </address>
</affiliation> </affiliation>
</author> </author>
<author>
<firstname>William</firstname>
<surname>Cohen</surname>
<affiliation>
<address>
<email>wcohen@redhat.com</email>
</address>
</affiliation>
</author>
</authorgroup> </authorgroup>
<legalnotice> <legalnotice>
...@@ -91,4 +100,8 @@ ...@@ -91,4 +100,8 @@
!Iinclude/trace/events/signal.h !Iinclude/trace/events/signal.h
</chapter> </chapter>
<chapter id="block">
<title>Block IO</title>
!Iinclude/trace/events/block.h
</chapter>
</book> </book>
...@@ -1162,8 +1162,8 @@ where a driver received a request ala this before: ...@@ -1162,8 +1162,8 @@ where a driver received a request ala this before:
As mentioned, there is no virtual mapping of a bio. For DMA, this is As mentioned, there is no virtual mapping of a bio. For DMA, this is
not a problem as the driver probably never will need a virtual mapping. not a problem as the driver probably never will need a virtual mapping.
Instead it needs a bus mapping (pci_map_page for a single segment or Instead it needs a bus mapping (dma_map_page for a single segment or
use blk_rq_map_sg for scatter gather) to be able to ship it to the driver. For use dma_map_sg for scatter gather) to be able to ship it to the driver. For
PIO drivers (or drivers that need to revert to PIO transfer once in a PIO drivers (or drivers that need to revert to PIO transfer once in a
while (IDE for example)), where the CPU is doing the actual data while (IDE for example)), where the CPU is doing the actual data
transfer a virtual mapping is needed. If the driver supports highmem I/O, transfer a virtual mapping is needed. If the driver supports highmem I/O,
......
...@@ -78,8 +78,9 @@ config BLK_DEV_INTEGRITY ...@@ -78,8 +78,9 @@ config BLK_DEV_INTEGRITY
Protection. If in doubt, say N. Protection. If in doubt, say N.
config BLK_CGROUP config BLK_CGROUP
tristate tristate "Block cgroup support"
depends on CGROUPS depends on CGROUPS
depends on CFQ_GROUP_IOSCHED
default n default n
---help--- ---help---
Generic block IO controller cgroup interface. This is the common Generic block IO controller cgroup interface. This is the common
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
#include <linux/gcd.h> #include <linux/gcd.h>
#include <linux/lcm.h>
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <linux/gfp.h> #include <linux/gfp.h>
...@@ -462,16 +463,6 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) ...@@ -462,16 +463,6 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
} }
EXPORT_SYMBOL(blk_queue_stack_limits); EXPORT_SYMBOL(blk_queue_stack_limits);
static unsigned int lcm(unsigned int a, unsigned int b)
{
if (a && b)
return (a * b) / gcd(a, b);
else if (b)
return b;
return a;
}
/** /**
* blk_stack_limits - adjust queue_limits for stacked devices * blk_stack_limits - adjust queue_limits for stacked devices
* @t: the stacking driver limits (top device) * @t: the stacking driver limits (top device)
......
...@@ -107,6 +107,19 @@ static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) ...@@ -107,6 +107,19 @@ static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
return queue_var_show(max_sectors_kb, (page)); return queue_var_show(max_sectors_kb, (page));
} }
static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
{
return queue_var_show(queue_max_segments(q), (page));
}
static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
{
if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
return queue_var_show(queue_max_segment_size(q), (page));
return queue_var_show(PAGE_CACHE_SIZE, (page));
}
static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page) static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
{ {
return queue_var_show(queue_logical_block_size(q), page); return queue_var_show(queue_logical_block_size(q), page);
...@@ -281,6 +294,16 @@ static struct queue_sysfs_entry queue_max_hw_sectors_entry = { ...@@ -281,6 +294,16 @@ static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
.show = queue_max_hw_sectors_show, .show = queue_max_hw_sectors_show,
}; };
static struct queue_sysfs_entry queue_max_segments_entry = {
.attr = {.name = "max_segments", .mode = S_IRUGO },
.show = queue_max_segments_show,
};
static struct queue_sysfs_entry queue_max_segment_size_entry = {
.attr = {.name = "max_segment_size", .mode = S_IRUGO },
.show = queue_max_segment_size_show,
};
static struct queue_sysfs_entry queue_iosched_entry = { static struct queue_sysfs_entry queue_iosched_entry = {
.attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
.show = elv_iosched_show, .show = elv_iosched_show,
...@@ -356,6 +379,8 @@ static struct attribute *default_attrs[] = { ...@@ -356,6 +379,8 @@ static struct attribute *default_attrs[] = {
&queue_ra_entry.attr, &queue_ra_entry.attr,
&queue_max_hw_sectors_entry.attr, &queue_max_hw_sectors_entry.attr,
&queue_max_sectors_entry.attr, &queue_max_sectors_entry.attr,
&queue_max_segments_entry.attr,
&queue_max_segment_size_entry.attr,
&queue_iosched_entry.attr, &queue_iosched_entry.attr,
&queue_hw_sector_size_entry.attr, &queue_hw_sector_size_entry.attr,
&queue_logical_block_size_entry.attr, &queue_logical_block_size_entry.attr,
......
...@@ -48,6 +48,7 @@ static const int cfq_hist_divisor = 4; ...@@ -48,6 +48,7 @@ static const int cfq_hist_divisor = 4;
#define CFQ_SERVICE_SHIFT 12 #define CFQ_SERVICE_SHIFT 12
#define CFQQ_SEEK_THR (sector_t)(8 * 100) #define CFQQ_SEEK_THR (sector_t)(8 * 100)
#define CFQQ_CLOSE_THR (sector_t)(8 * 1024)
#define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32) #define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32)
#define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8)
...@@ -948,6 +949,11 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) ...@@ -948,6 +949,11 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
unsigned int major, minor; unsigned int major, minor;
cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key)); cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
cfqg->blkg.dev = MKDEV(major, minor);
goto done;
}
if (cfqg || !create) if (cfqg || !create)
goto done; goto done;
...@@ -1518,7 +1524,8 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, ...@@ -1518,7 +1524,8 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
struct cfq_queue *cfqq) struct cfq_queue *cfqq)
{ {
if (cfqq) { if (cfqq) {
cfq_log_cfqq(cfqd, cfqq, "set_active"); cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d",
cfqd->serving_prio, cfqd->serving_type);
cfqq->slice_start = 0; cfqq->slice_start = 0;
cfqq->dispatch_start = jiffies; cfqq->dispatch_start = jiffies;
cfqq->allocated_slice = 0; cfqq->allocated_slice = 0;
...@@ -1661,9 +1668,9 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, ...@@ -1661,9 +1668,9 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
} }
static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq, static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq,
struct request *rq, bool for_preempt) struct request *rq)
{ {
return cfq_dist_from_last(cfqd, rq) <= CFQQ_SEEK_THR; return cfq_dist_from_last(cfqd, rq) <= CFQQ_CLOSE_THR;
} }
static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
...@@ -1690,7 +1697,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, ...@@ -1690,7 +1697,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
* will contain the closest sector. * will contain the closest sector.
*/ */
__cfqq = rb_entry(parent, struct cfq_queue, p_node); __cfqq = rb_entry(parent, struct cfq_queue, p_node);
if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq, false)) if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
return __cfqq; return __cfqq;
if (blk_rq_pos(__cfqq->next_rq) < sector) if (blk_rq_pos(__cfqq->next_rq) < sector)
...@@ -1701,7 +1708,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, ...@@ -1701,7 +1708,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
return NULL; return NULL;
__cfqq = rb_entry(node, struct cfq_queue, p_node); __cfqq = rb_entry(node, struct cfq_queue, p_node);
if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq, false)) if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
return __cfqq; return __cfqq;
return NULL; return NULL;
...@@ -1722,6 +1729,8 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd, ...@@ -1722,6 +1729,8 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
{ {
struct cfq_queue *cfqq; struct cfq_queue *cfqq;
if (cfq_class_idle(cur_cfqq))
return NULL;
if (!cfq_cfqq_sync(cur_cfqq)) if (!cfq_cfqq_sync(cur_cfqq))
return NULL; return NULL;
if (CFQQ_SEEKY(cur_cfqq)) if (CFQQ_SEEKY(cur_cfqq))
...@@ -1788,7 +1797,11 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) ...@@ -1788,7 +1797,11 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
* Otherwise, we do only if they are the last ones * Otherwise, we do only if they are the last ones
* in their service tree. * in their service tree.
*/ */
return service_tree->count == 1 && cfq_cfqq_sync(cfqq); if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
return 1;
cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
service_tree->count);
return 0;
} }
static void cfq_arm_slice_timer(struct cfq_data *cfqd) static void cfq_arm_slice_timer(struct cfq_data *cfqd)
...@@ -1833,8 +1846,11 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) ...@@ -1833,8 +1846,11 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
* time slice. * time slice.
*/ */
if (sample_valid(cic->ttime_samples) && if (sample_valid(cic->ttime_samples) &&
(cfqq->slice_end - jiffies < cic->ttime_mean)) (cfqq->slice_end - jiffies < cic->ttime_mean)) {
cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%d",
cic->ttime_mean);
return; return;
}
cfq_mark_cfqq_wait_request(cfqq); cfq_mark_cfqq_wait_request(cfqq);
...@@ -2042,6 +2058,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) ...@@ -2042,6 +2058,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
slice = max(slice, 2 * cfqd->cfq_slice_idle); slice = max(slice, 2 * cfqd->cfq_slice_idle);
slice = max_t(unsigned, slice, CFQ_MIN_TT); slice = max_t(unsigned, slice, CFQ_MIN_TT);
cfq_log(cfqd, "workload slice:%d", slice);
cfqd->workload_expires = jiffies + slice; cfqd->workload_expires = jiffies + slice;
cfqd->noidle_tree_requires_idle = false; cfqd->noidle_tree_requires_idle = false;
} }
...@@ -2189,10 +2206,13 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd) ...@@ -2189,10 +2206,13 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
struct cfq_queue *cfqq; struct cfq_queue *cfqq;
int dispatched = 0; int dispatched = 0;
while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) /* Expire the timeslice of the current active queue first */
cfq_slice_expired(cfqd, 0);
while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) {
__cfq_set_active_queue(cfqd, cfqq);
dispatched += __cfq_forced_dispatch_cfqq(cfqq); dispatched += __cfq_forced_dispatch_cfqq(cfqq);
}
cfq_slice_expired(cfqd, 0);
BUG_ON(cfqd->busy_queues); BUG_ON(cfqd->busy_queues);
cfq_log(cfqd, "forced_dispatch=%d", dispatched); cfq_log(cfqd, "forced_dispatch=%d", dispatched);
...@@ -3104,7 +3124,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, ...@@ -3104,7 +3124,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
* if this request is as-good as one we would expect from the * if this request is as-good as one we would expect from the
* current cfqq, let it preempt * current cfqq, let it preempt
*/ */
if (cfq_rq_close(cfqd, cfqq, rq, true)) if (cfq_rq_close(cfqd, cfqq, rq))
return true; return true;
return false; return false;
...@@ -3308,6 +3328,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) ...@@ -3308,6 +3328,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
if (cfq_should_wait_busy(cfqd, cfqq)) { if (cfq_should_wait_busy(cfqd, cfqq)) {
cfqq->slice_end = jiffies + cfqd->cfq_slice_idle; cfqq->slice_end = jiffies + cfqd->cfq_slice_idle;
cfq_mark_cfqq_wait_busy(cfqq); cfq_mark_cfqq_wait_busy(cfqq);
cfq_log_cfqq(cfqd, cfqq, "will busy wait");
} }
/* /*
......
...@@ -154,7 +154,7 @@ static struct elevator_type *elevator_get(const char *name) ...@@ -154,7 +154,7 @@ static struct elevator_type *elevator_get(const char *name)
spin_unlock(&elv_list_lock); spin_unlock(&elv_list_lock);
sprintf(elv, "%s-iosched", name); snprintf(elv, sizeof(elv), "%s-iosched", name);
request_module("%s", elv); request_module("%s", elv);
spin_lock(&elv_list_lock); spin_lock(&elv_list_lock);
......
...@@ -2533,7 +2533,6 @@ static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller) ...@@ -2533,7 +2533,6 @@ static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
Controller->RequestQueue[n] = RequestQueue; Controller->RequestQueue[n] = RequestQueue;
blk_queue_bounce_limit(RequestQueue, Controller->BounceBufferLimit); blk_queue_bounce_limit(RequestQueue, Controller->BounceBufferLimit);
RequestQueue->queuedata = Controller; RequestQueue->queuedata = Controller;
blk_queue_max_hw_segments(RequestQueue, Controller->DriverScatterGatherLimit);
blk_queue_max_segments(RequestQueue, Controller->DriverScatterGatherLimit); blk_queue_max_segments(RequestQueue, Controller->DriverScatterGatherLimit);
blk_queue_max_hw_sectors(RequestQueue, Controller->MaxBlocksPerCommand); blk_queue_max_hw_sectors(RequestQueue, Controller->MaxBlocksPerCommand);
disk->queue = RequestQueue; disk->queue = RequestQueue;
......
...@@ -536,7 +536,9 @@ static void atodb_endio(struct bio *bio, int error) ...@@ -536,7 +536,9 @@ static void atodb_endio(struct bio *bio, int error)
put_ldev(mdev); put_ldev(mdev);
} }
/* sector to word */
#define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) #define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
/* activity log to on disk bitmap -- prepare bio unless that sector /* activity log to on disk bitmap -- prepare bio unless that sector
* is already covered by previously prepared bios */ * is already covered by previously prepared bios */
static int atodb_prepare_unless_covered(struct drbd_conf *mdev, static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
...@@ -546,13 +548,20 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev, ...@@ -546,13 +548,20 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
{ {
struct bio *bio; struct bio *bio;
struct page *page; struct page *page;
sector_t on_disk_sector = enr + mdev->ldev->md.md_offset sector_t on_disk_sector;
+ mdev->ldev->md.bm_offset;
unsigned int page_offset = PAGE_SIZE; unsigned int page_offset = PAGE_SIZE;
int offset; int offset;
int i = 0; int i = 0;
int err = -ENOMEM; int err = -ENOMEM;
/* We always write aligned, full 4k blocks,
* so we can ignore the logical_block_size (for now) */
enr &= ~7U;
on_disk_sector = enr + mdev->ldev->md.md_offset
+ mdev->ldev->md.bm_offset;
D_ASSERT(!(on_disk_sector & 7U));
/* Check if that enr is already covered by an already created bio. /* Check if that enr is already covered by an already created bio.
* Caution, bios[] is not NULL terminated, * Caution, bios[] is not NULL terminated,
* but only initialized to all NULL. * but only initialized to all NULL.
...@@ -588,7 +597,7 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev, ...@@ -588,7 +597,7 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
offset = S2W(enr); offset = S2W(enr);
drbd_bm_get_lel(mdev, offset, drbd_bm_get_lel(mdev, offset,
min_t(size_t, S2W(1), drbd_bm_words(mdev) - offset), min_t(size_t, S2W(8), drbd_bm_words(mdev) - offset),
kmap(page) + page_offset); kmap(page) + page_offset);
kunmap(page); kunmap(page);
...@@ -597,7 +606,7 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev, ...@@ -597,7 +606,7 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
bio->bi_bdev = mdev->ldev->md_bdev; bio->bi_bdev = mdev->ldev->md_bdev;
bio->bi_sector = on_disk_sector; bio->bi_sector = on_disk_sector;
if (bio_add_page(bio, page, MD_SECTOR_SIZE, page_offset) != MD_SECTOR_SIZE) if (bio_add_page(bio, page, 4096, page_offset) != 4096)
goto out_put_page; goto out_put_page;
atomic_inc(&wc->count); atomic_inc(&wc->count);
...@@ -1327,7 +1336,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev) ...@@ -1327,7 +1336,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev)
/* ok, ->resync is there. */ /* ok, ->resync is there. */
for (i = 0; i < mdev->resync->nr_elements; i++) { for (i = 0; i < mdev->resync->nr_elements; i++) {
e = lc_element_by_index(mdev->resync, i); e = lc_element_by_index(mdev->resync, i);
bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; bm_ext = lc_entry(e, struct bm_extent, lce);
if (bm_ext->lce.lc_number == LC_FREE) if (bm_ext->lce.lc_number == LC_FREE)
continue; continue;
if (bm_ext->lce.lc_number == mdev->resync_wenr) { if (bm_ext->lce.lc_number == mdev->resync_wenr) {
......
...@@ -67,7 +67,7 @@ struct drbd_bitmap { ...@@ -67,7 +67,7 @@ struct drbd_bitmap {
size_t bm_words; size_t bm_words;
size_t bm_number_of_pages; size_t bm_number_of_pages;
sector_t bm_dev_capacity; sector_t bm_dev_capacity;
struct semaphore bm_change; /* serializes resize operations */ struct mutex bm_change; /* serializes resize operations */
atomic_t bm_async_io; atomic_t bm_async_io;
wait_queue_head_t bm_io_wait; wait_queue_head_t bm_io_wait;
...@@ -115,7 +115,7 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why) ...@@ -115,7 +115,7 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why)
return; return;
} }
trylock_failed = down_trylock(&b->bm_change); trylock_failed = !mutex_trylock(&b->bm_change);
if (trylock_failed) { if (trylock_failed) {
dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
...@@ -126,7 +126,7 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why) ...@@ -126,7 +126,7 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why)
b->bm_task == mdev->receiver.task ? "receiver" : b->bm_task == mdev->receiver.task ? "receiver" :
b->bm_task == mdev->asender.task ? "asender" : b->bm_task == mdev->asender.task ? "asender" :
b->bm_task == mdev->worker.task ? "worker" : "?"); b->bm_task == mdev->worker.task ? "worker" : "?");
down(&b->bm_change); mutex_lock(&b->bm_change);
} }
if (__test_and_set_bit(BM_LOCKED, &b->bm_flags)) if (__test_and_set_bit(BM_LOCKED, &b->bm_flags))
dev_err(DEV, "FIXME bitmap already locked in bm_lock\n"); dev_err(DEV, "FIXME bitmap already locked in bm_lock\n");
...@@ -148,7 +148,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev) ...@@ -148,7 +148,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
b->bm_why = NULL; b->bm_why = NULL;
b->bm_task = NULL; b->bm_task = NULL;
up(&b->bm_change); mutex_unlock(&b->bm_change);
} }
/* word offset to long pointer */ /* word offset to long pointer */
...@@ -296,7 +296,7 @@ int drbd_bm_init(struct drbd_conf *mdev) ...@@ -296,7 +296,7 @@ int drbd_bm_init(struct drbd_conf *mdev)
if (!b) if (!b)
return -ENOMEM; return -ENOMEM;
spin_lock_init(&b->bm_lock); spin_lock_init(&b->bm_lock);
init_MUTEX(&b->bm_change); mutex_init(&b->bm_change);
init_waitqueue_head(&b->bm_io_wait); init_waitqueue_head(&b->bm_io_wait);
mdev->bitmap = b; mdev->bitmap = b;
......
...@@ -261,6 +261,9 @@ static inline const char *cmdname(enum drbd_packets cmd) ...@@ -261,6 +261,9 @@ static inline const char *cmdname(enum drbd_packets cmd)
[P_OV_REQUEST] = "OVRequest", [P_OV_REQUEST] = "OVRequest",
[P_OV_REPLY] = "OVReply", [P_OV_REPLY] = "OVReply",
[P_OV_RESULT] = "OVResult", [P_OV_RESULT] = "OVResult",
[P_CSUM_RS_REQUEST] = "CsumRSRequest",
[P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
[P_COMPRESSED_BITMAP] = "CBitmap",
[P_MAX_CMD] = NULL, [P_MAX_CMD] = NULL,
}; };
...@@ -443,13 +446,18 @@ struct p_rs_param_89 { ...@@ -443,13 +446,18 @@ struct p_rs_param_89 {
char csums_alg[SHARED_SECRET_MAX]; char csums_alg[SHARED_SECRET_MAX];
} __packed; } __packed;
enum drbd_conn_flags {
CF_WANT_LOSE = 1,
CF_DRY_RUN = 2,
};
struct p_protocol { struct p_protocol {
struct p_header head; struct p_header head;
u32 protocol; u32 protocol;
u32 after_sb_0p; u32 after_sb_0p;
u32 after_sb_1p; u32 after_sb_1p;
u32 after_sb_2p; u32 after_sb_2p;
u32 want_lose; u32 conn_flags;
u32 two_primaries; u32 two_primaries;
/* Since protocol version 87 and higher. */ /* Since protocol version 87 and higher. */
...@@ -791,6 +799,8 @@ enum { ...@@ -791,6 +799,8 @@ enum {
* while this is set. */ * while this is set. */
RESIZE_PENDING, /* Size change detected locally, waiting for the response from RESIZE_PENDING, /* Size change detected locally, waiting for the response from
* the peer, if it changed there as well. */ * the peer, if it changed there as well. */
CONN_DRY_RUN, /* Expect disconnect after resync handshake. */
GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */
}; };
struct drbd_bitmap; /* opaque for drbd_conf */ struct drbd_bitmap; /* opaque for drbd_conf */
......
...@@ -1668,7 +1668,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) ...@@ -1668,7 +1668,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
int drbd_send_protocol(struct drbd_conf *mdev) int drbd_send_protocol(struct drbd_conf *mdev)
{ {
struct p_protocol *p; struct p_protocol *p;
int size, rv; int size, cf, rv;
size = sizeof(struct p_protocol); size = sizeof(struct p_protocol);
...@@ -1685,9 +1685,21 @@ int drbd_send_protocol(struct drbd_conf *mdev) ...@@ -1685,9 +1685,21 @@ int drbd_send_protocol(struct drbd_conf *mdev)
p->after_sb_0p = cpu_to_be32(mdev->net_conf->after_sb_0p); p->after_sb_0p = cpu_to_be32(mdev->net_conf->after_sb_0p);
p->after_sb_1p = cpu_to_be32(mdev->net_conf->after_sb_1p); p->after_sb_1p = cpu_to_be32(mdev->net_conf->after_sb_1p);
p->after_sb_2p = cpu_to_be32(mdev->net_conf->after_sb_2p); p->after_sb_2p = cpu_to_be32(mdev->net_conf->after_sb_2p);
p->want_lose = cpu_to_be32(mdev->net_conf->want_lose);
p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries); p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries);
cf = 0;
if (mdev->net_conf->want_lose)
cf |= CF_WANT_LOSE;
if (mdev->net_conf->dry_run) {
if (mdev->agreed_pro_version >= 92)
cf |= CF_DRY_RUN;
else {
dev_err(DEV, "--dry-run is not supported by peer");
return 0;
}
}
p->conn_flags = cpu_to_be32(cf);
if (mdev->agreed_pro_version >= 87) if (mdev->agreed_pro_version >= 87)
strcpy(p->integrity_alg, mdev->net_conf->integrity_alg); strcpy(p->integrity_alg, mdev->net_conf->integrity_alg);
...@@ -3161,14 +3173,18 @@ void drbd_free_bc(struct drbd_backing_dev *ldev) ...@@ -3161,14 +3173,18 @@ void drbd_free_bc(struct drbd_backing_dev *ldev)
void drbd_free_sock(struct drbd_conf *mdev) void drbd_free_sock(struct drbd_conf *mdev)
{ {
if (mdev->data.socket) { if (mdev->data.socket) {
mutex_lock(&mdev->data.mutex);
kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR); kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR);
sock_release(mdev->data.socket); sock_release(mdev->data.socket);
mdev->data.socket = NULL; mdev->data.socket = NULL;
mutex_unlock(&mdev->data.mutex);
} }
if (mdev->meta.socket) { if (mdev->meta.socket) {
mutex_lock(&mdev->meta.mutex);
kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR); kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR);
sock_release(mdev->meta.socket); sock_release(mdev->meta.socket);
mdev->meta.socket = NULL; mdev->meta.socket = NULL;
mutex_unlock(&mdev->meta.mutex);
} }
} }
......
...@@ -285,8 +285,8 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) ...@@ -285,8 +285,8 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
} }
if (r == SS_NO_UP_TO_DATE_DISK && force && if (r == SS_NO_UP_TO_DATE_DISK && force &&
(mdev->state.disk == D_INCONSISTENT || (mdev->state.disk < D_UP_TO_DATE &&
mdev->state.disk == D_OUTDATED)) { mdev->state.disk >= D_INCONSISTENT)) {
mask.disk = D_MASK; mask.disk = D_MASK;
val.disk = D_UP_TO_DATE; val.disk = D_UP_TO_DATE;
forced = 1; forced = 1;
...@@ -407,7 +407,7 @@ static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, ...@@ -407,7 +407,7 @@ static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
} }
reply->ret_code = reply->ret_code =
drbd_set_role(mdev, R_PRIMARY, primary_args.overwrite_peer); drbd_set_role(mdev, R_PRIMARY, primary_args.primary_force);
return 0; return 0;
} }
...@@ -941,6 +941,25 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp ...@@ -941,6 +941,25 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
drbd_md_set_sector_offsets(mdev, nbc); drbd_md_set_sector_offsets(mdev, nbc);
/* allocate a second IO page if logical_block_size != 512 */
logical_block_size = bdev_logical_block_size(nbc->md_bdev);
if (logical_block_size == 0)
logical_block_size = MD_SECTOR_SIZE;
if (logical_block_size != MD_SECTOR_SIZE) {
if (!mdev->md_io_tmpp) {
struct page *page = alloc_page(GFP_NOIO);
if (!page)
goto force_diskless_dec;
dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n",
logical_block_size, MD_SECTOR_SIZE);
dev_warn(DEV, "Workaround engaged (has performance impact).\n");
mdev->md_io_tmpp = page;
}
}
if (!mdev->bitmap) { if (!mdev->bitmap) {
if (drbd_bm_init(mdev)) { if (drbd_bm_init(mdev)) {
retcode = ERR_NOMEM; retcode = ERR_NOMEM;
...@@ -980,25 +999,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp ...@@ -980,25 +999,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
goto force_diskless_dec; goto force_diskless_dec;
} }
/* allocate a second IO page if logical_block_size != 512 */
logical_block_size = bdev_logical_block_size(nbc->md_bdev);
if (logical_block_size == 0)
logical_block_size = MD_SECTOR_SIZE;
if (logical_block_size != MD_SECTOR_SIZE) {
if (!mdev->md_io_tmpp) {
struct page *page = alloc_page(GFP_NOIO);
if (!page)
goto force_diskless_dec;
dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n",
logical_block_size, MD_SECTOR_SIZE);
dev_warn(DEV, "Workaround engaged (has performance impact).\n");
mdev->md_io_tmpp = page;
}
}
/* Reset the "barriers don't work" bits here, then force meta data to /* Reset the "barriers don't work" bits here, then force meta data to
* be written, to ensure we determine if barriers are supported. */ * be written, to ensure we determine if barriers are supported. */
if (nbc->dc.no_md_flush) if (nbc->dc.no_md_flush)
......
...@@ -2513,6 +2513,10 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol ...@@ -2513,6 +2513,10 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
} }
if (hg == -100) { if (hg == -100) {
/* FIXME this log message is not correct if we end up here
* after an attempted attach on a diskless node.
* We just refuse to attach -- well, we drop the "connection"
* to that disk, in a way... */
dev_alert(DEV, "Split-Brain detected, dropping connection!\n"); dev_alert(DEV, "Split-Brain detected, dropping connection!\n");
drbd_khelper(mdev, "split-brain"); drbd_khelper(mdev, "split-brain");
return C_MASK; return C_MASK;
...@@ -2538,6 +2542,16 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol ...@@ -2538,6 +2542,16 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
} }
} }
if (mdev->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
if (hg == 0)
dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
else
dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
abs(hg) >= 2 ? "full" : "bit-map based");
return C_MASK;
}
if (abs(hg) >= 2) { if (abs(hg) >= 2) {
dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake")) if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake"))
...@@ -2585,7 +2599,7 @@ static int receive_protocol(struct drbd_conf *mdev, struct p_header *h) ...@@ -2585,7 +2599,7 @@ static int receive_protocol(struct drbd_conf *mdev, struct p_header *h)
struct p_protocol *p = (struct p_protocol *)h; struct p_protocol *p = (struct p_protocol *)h;
int header_size, data_size; int header_size, data_size;
int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
int p_want_lose, p_two_primaries; int p_want_lose, p_two_primaries, cf;
char p_integrity_alg[SHARED_SECRET_MAX] = ""; char p_integrity_alg[SHARED_SECRET_MAX] = "";
header_size = sizeof(*p) - sizeof(*h); header_size = sizeof(*p) - sizeof(*h);
...@@ -2598,8 +2612,14 @@ static int receive_protocol(struct drbd_conf *mdev, struct p_header *h) ...@@ -2598,8 +2612,14 @@ static int receive_protocol(struct drbd_conf *mdev, struct p_header *h)
p_after_sb_0p = be32_to_cpu(p->after_sb_0p); p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
p_after_sb_1p = be32_to_cpu(p->after_sb_1p); p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
p_after_sb_2p = be32_to_cpu(p->after_sb_2p); p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
p_want_lose = be32_to_cpu(p->want_lose);
p_two_primaries = be32_to_cpu(p->two_primaries); p_two_primaries = be32_to_cpu(p->two_primaries);
cf = be32_to_cpu(p->conn_flags);
p_want_lose = cf & CF_WANT_LOSE;
clear_bit(CONN_DRY_RUN, &mdev->flags);
if (cf & CF_DRY_RUN)
set_bit(CONN_DRY_RUN, &mdev->flags);
if (p_proto != mdev->net_conf->wire_protocol) { if (p_proto != mdev->net_conf->wire_protocol) {
dev_err(DEV, "incompatible communication protocols\n"); dev_err(DEV, "incompatible communication protocols\n");
...@@ -3118,13 +3138,16 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h) ...@@ -3118,13 +3138,16 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h)
put_ldev(mdev); put_ldev(mdev);
if (nconn == C_MASK) { if (nconn == C_MASK) {
nconn = C_CONNECTED;
if (mdev->state.disk == D_NEGOTIATING) { if (mdev->state.disk == D_NEGOTIATING) {
drbd_force_state(mdev, NS(disk, D_DISKLESS)); drbd_force_state(mdev, NS(disk, D_DISKLESS));
nconn = C_CONNECTED;
} else if (peer_state.disk == D_NEGOTIATING) { } else if (peer_state.disk == D_NEGOTIATING) {
dev_err(DEV, "Disk attach process on the peer node was aborted.\n"); dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
peer_state.disk = D_DISKLESS; peer_state.disk = D_DISKLESS;
real_peer_disk = D_DISKLESS;
} else { } else {
if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
return FALSE;
D_ASSERT(oconn == C_WF_REPORT_PARAMS); D_ASSERT(oconn == C_WF_REPORT_PARAMS);
drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
return FALSE; return FALSE;
...@@ -3594,10 +3617,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) ...@@ -3594,10 +3617,7 @@ static void drbd_disconnect(struct drbd_conf *mdev)
/* asender does not clean up anything. it must not interfere, either */ /* asender does not clean up anything. it must not interfere, either */
drbd_thread_stop(&mdev->asender); drbd_thread_stop(&mdev->asender);
mutex_lock(&mdev->data.mutex);
drbd_free_sock(mdev); drbd_free_sock(mdev);
mutex_unlock(&mdev->data.mutex);
spin_lock_irq(&mdev->req_lock); spin_lock_irq(&mdev->req_lock);
_drbd_wait_ee_list_empty(mdev, &mdev->active_ee); _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
...@@ -4054,6 +4074,8 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header *h) ...@@ -4054,6 +4074,8 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header *h)
{ {
/* restore idle timeout */ /* restore idle timeout */
mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags))
wake_up(&mdev->misc_wait);
return TRUE; return TRUE;
} }
......
...@@ -938,7 +938,8 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) ...@@ -938,7 +938,8 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
if (eq) { if (eq) {
drbd_set_in_sync(mdev, e->sector, e->size); drbd_set_in_sync(mdev, e->sector, e->size);
mdev->rs_same_csum++; /* rs_same_csums unit is BM_BLOCK_SIZE */
mdev->rs_same_csum += e->size >> BM_BLOCK_SHIFT;
ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e); ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e);
} else { } else {
inc_rs_pending(mdev); inc_rs_pending(mdev);
...@@ -1288,6 +1289,14 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na) ...@@ -1288,6 +1289,14 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na)
return retcode; return retcode;
} }
static void ping_peer(struct drbd_conf *mdev)
{
clear_bit(GOT_PING_ACK, &mdev->flags);
request_ping(mdev);
wait_event(mdev->misc_wait,
test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED);
}
/** /**
* drbd_start_resync() - Start the resync process * drbd_start_resync() - Start the resync process
* @mdev: DRBD device. * @mdev: DRBD device.
...@@ -1371,7 +1380,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) ...@@ -1371,7 +1380,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
_drbd_pause_after(mdev); _drbd_pause_after(mdev);
} }
write_unlock_irq(&global_state_lock); write_unlock_irq(&global_state_lock);
drbd_state_unlock(mdev);
put_ldev(mdev); put_ldev(mdev);
if (r == SS_SUCCESS) { if (r == SS_SUCCESS) {
...@@ -1382,11 +1390,8 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) ...@@ -1382,11 +1390,8 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
if (mdev->rs_total == 0) { if (mdev->rs_total == 0) {
/* Peer still reachable? Beware of failing before-resync-target handlers! */ /* Peer still reachable? Beware of failing before-resync-target handlers! */
request_ping(mdev); ping_peer(mdev);
__set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(mdev->net_conf->ping_timeo*HZ/9); /* 9 instead 10 */
drbd_resync_finished(mdev); drbd_resync_finished(mdev);
return;
} }
/* ns.conn may already be != mdev->state.conn, /* ns.conn may already be != mdev->state.conn,
...@@ -1398,6 +1403,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) ...@@ -1398,6 +1403,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
drbd_md_sync(mdev); drbd_md_sync(mdev);
} }
drbd_state_unlock(mdev);
} }
int drbd_worker(struct drbd_thread *thi) int drbd_worker(struct drbd_thread *thi)
......
...@@ -237,6 +237,8 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, ...@@ -237,6 +237,8 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
if (ret) if (ret)
goto fail; goto fail;
file_update_time(file);
transfer_result = lo_do_transfer(lo, WRITE, page, offset, transfer_result = lo_do_transfer(lo, WRITE, page, offset,
bvec->bv_page, bv_offs, size, IV); bvec->bv_page, bv_offs, size, IV);
copied = size; copied = size;
......
...@@ -341,11 +341,11 @@ static int pcd_wait(struct pcd_unit *cd, int go, int stop, char *fun, char *msg) ...@@ -341,11 +341,11 @@ static int pcd_wait(struct pcd_unit *cd, int go, int stop, char *fun, char *msg)
&& (j++ < PCD_SPIN)) && (j++ < PCD_SPIN))
udelay(PCD_DELAY); udelay(PCD_DELAY);
if ((r & (IDE_ERR & stop)) || (j >= PCD_SPIN)) { if ((r & (IDE_ERR & stop)) || (j > PCD_SPIN)) {
s = read_reg(cd, 7); s = read_reg(cd, 7);
e = read_reg(cd, 1); e = read_reg(cd, 1);
p = read_reg(cd, 2); p = read_reg(cd, 2);
if (j >= PCD_SPIN) if (j > PCD_SPIN)
e |= 0x100; e |= 0x100;
if (fun) if (fun)
printk("%s: %s %s: alt=0x%x stat=0x%x err=0x%x" printk("%s: %s %s: alt=0x%x stat=0x%x err=0x%x"
......
...@@ -391,11 +391,11 @@ static int pf_wait(struct pf_unit *pf, int go, int stop, char *fun, char *msg) ...@@ -391,11 +391,11 @@ static int pf_wait(struct pf_unit *pf, int go, int stop, char *fun, char *msg)
&& (j++ < PF_SPIN)) && (j++ < PF_SPIN))
udelay(PF_SPIN_DEL); udelay(PF_SPIN_DEL);
if ((r & (STAT_ERR & stop)) || (j >= PF_SPIN)) { if ((r & (STAT_ERR & stop)) || (j > PF_SPIN)) {
s = read_reg(pf, 7); s = read_reg(pf, 7);
e = read_reg(pf, 1); e = read_reg(pf, 1);
p = read_reg(pf, 2); p = read_reg(pf, 2);
if (j >= PF_SPIN) if (j > PF_SPIN)
e |= 0x100; e |= 0x100;
if (fun) if (fun)
printk("%s: %s %s: alt=0x%x stat=0x%x err=0x%x" printk("%s: %s %s: alt=0x%x stat=0x%x err=0x%x"
......
...@@ -274,11 +274,11 @@ static int pt_wait(struct pt_unit *tape, int go, int stop, char *fun, char *msg) ...@@ -274,11 +274,11 @@ static int pt_wait(struct pt_unit *tape, int go, int stop, char *fun, char *msg)
&& (j++ < PT_SPIN)) && (j++ < PT_SPIN))
udelay(PT_SPIN_DEL); udelay(PT_SPIN_DEL);
if ((r & (STAT_ERR & stop)) || (j >= PT_SPIN)) { if ((r & (STAT_ERR & stop)) || (j > PT_SPIN)) {
s = read_reg(pi, 7); s = read_reg(pi, 7);
e = read_reg(pi, 1); e = read_reg(pi, 1);
p = read_reg(pi, 2); p = read_reg(pi, 2);
if (j >= PT_SPIN) if (j > PT_SPIN)
e |= 0x100; e |= 0x100;
if (fun) if (fun)
printk("%s: %s %s: alt=0x%x stat=0x%x err=0x%x" printk("%s: %s %s: alt=0x%x stat=0x%x err=0x%x"
......
...@@ -348,14 +348,13 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) ...@@ -348,14 +348,13 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
set_capacity(vblk->disk, cap); set_capacity(vblk->disk, cap);
/* We can handle whatever the host told us to handle. */ /* We can handle whatever the host told us to handle. */
blk_queue_max_phys_segments(q, vblk->sg_elems-2); blk_queue_max_segments(q, vblk->sg_elems-2);
blk_queue_max_hw_segments(q, vblk->sg_elems-2);
/* No need to bounce any requests */ /* No need to bounce any requests */
blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
/* No real sector limit. */ /* No real sector limit. */
blk_queue_max_sectors(q, -1U); blk_queue_max_hw_sectors(q, -1U);
/* Host can optionally specify maximum segment size and number of /* Host can optionally specify maximum segment size and number of
* segments. */ * segments. */
......
...@@ -2186,7 +2186,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie) ...@@ -2186,7 +2186,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
blk_queue_prep_rq(sdp->request_queue, sd_prep_fn); blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
gd->driverfs_dev = &sdp->sdev_gendev; gd->driverfs_dev = &sdp->sdev_gendev;
gd->flags = GENHD_FL_EXT_DEVT | GENHD_FL_DRIVERFS; gd->flags = GENHD_FL_EXT_DEVT;
if (sdp->removable) if (sdp->removable)
gd->flags |= GENHD_FL_REMOVABLE; gd->flags |= GENHD_FL_REMOVABLE;
......
...@@ -554,7 +554,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page ...@@ -554,7 +554,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
.bi_rw = bio->bi_rw, .bi_rw = bio->bi_rw,
}; };
if (q->merge_bvec_fn(q, &bvm, prev) < len) { if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) {
prev->bv_len -= len; prev->bv_len -= len;
return 0; return 0;
} }
...@@ -607,7 +607,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page ...@@ -607,7 +607,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
* merge_bvec_fn() returns number of bytes it can accept * merge_bvec_fn() returns number of bytes it can accept
* at this offset * at this offset
*/ */
if (q->merge_bvec_fn(q, &bvm, bvec) < len) { if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
bvec->bv_page = NULL; bvec->bv_page = NULL;
bvec->bv_len = 0; bvec->bv_len = 0;
bvec->bv_offset = 0; bvec->bv_offset = 0;
......
...@@ -554,108 +554,85 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) ...@@ -554,108 +554,85 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
return ret; return ret;
} }
static void unpin_sb_for_writeback(struct super_block **psb) static void unpin_sb_for_writeback(struct super_block *sb)
{ {
struct super_block *sb = *psb; up_read(&sb->s_umount);
put_super(sb);
if (sb) {
up_read(&sb->s_umount);
put_super(sb);
*psb = NULL;
}
} }
enum sb_pin_state {
SB_PINNED,
SB_NOT_PINNED,
SB_PIN_FAILED
};
/* /*
* For WB_SYNC_NONE writeback, the caller does not have the sb pinned * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
* before calling writeback. So make sure that we do pin it, so it doesn't * before calling writeback. So make sure that we do pin it, so it doesn't
* go away while we are writing inodes from it. * go away while we are writing inodes from it.
*
* Returns 0 if the super was successfully pinned (or pinning wasn't needed),
* 1 if we failed.
*/ */
static int pin_sb_for_writeback(struct writeback_control *wbc, static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
struct inode *inode, struct super_block **psb) struct super_block *sb)
{ {
struct super_block *sb = inode->i_sb;
/*
* If this sb is already pinned, nothing more to do. If not and
* *psb is non-NULL, unpin the old one first
*/
if (sb == *psb)
return 0;
else if (*psb)
unpin_sb_for_writeback(psb);
/* /*
* Caller must already hold the ref for this * Caller must already hold the ref for this
*/ */
if (wbc->sync_mode == WB_SYNC_ALL) { if (wbc->sync_mode == WB_SYNC_ALL) {
WARN_ON(!rwsem_is_locked(&sb->s_umount)); WARN_ON(!rwsem_is_locked(&sb->s_umount));
return 0; return SB_NOT_PINNED;
} }
spin_lock(&sb_lock); spin_lock(&sb_lock);
sb->s_count++; sb->s_count++;
if (down_read_trylock(&sb->s_umount)) { if (down_read_trylock(&sb->s_umount)) {
if (sb->s_root) { if (sb->s_root) {
spin_unlock(&sb_lock); spin_unlock(&sb_lock);
goto pinned; return SB_PINNED;
} }
/* /*
* umounted, drop rwsem again and fall through to failure * umounted, drop rwsem again and fall through to failure
*/ */
up_read(&sb->s_umount); up_read(&sb->s_umount);
} }
sb->s_count--; sb->s_count--;
spin_unlock(&sb_lock); spin_unlock(&sb_lock);
return 1; return SB_PIN_FAILED;
pinned:
*psb = sb;
return 0;
} }
static void writeback_inodes_wb(struct bdi_writeback *wb, /*
struct writeback_control *wbc) * Write a portion of b_io inodes which belong to @sb.
* If @wbc->sb != NULL, then find and write all such
* inodes. Otherwise write only ones which go sequentially
* in reverse order.
* Return 1, if the caller writeback routine should be
* interrupted. Otherwise return 0.
*/
static int writeback_sb_inodes(struct super_block *sb,
struct bdi_writeback *wb,
struct writeback_control *wbc)
{ {
struct super_block *sb = wbc->sb, *pin_sb = NULL;
const unsigned long start = jiffies; /* livelock avoidance */
spin_lock(&inode_lock);
if (!wbc->for_kupdate || list_empty(&wb->b_io))
queue_io(wb, wbc->older_than_this);
while (!list_empty(&wb->b_io)) { while (!list_empty(&wb->b_io)) {
struct inode *inode = list_entry(wb->b_io.prev,
struct inode, i_list);
long pages_skipped; long pages_skipped;
struct inode *inode = list_entry(wb->b_io.prev,
/* struct inode, i_list);
* super block given and doesn't match, skip this inode if (wbc->sb && sb != inode->i_sb) {
*/ /* super block given and doesn't
if (sb && sb != inode->i_sb) { match, skip this inode */
redirty_tail(inode); redirty_tail(inode);
continue; continue;
} }
if (sb != inode->i_sb)
/* finish with this superblock */
return 0;
if (inode->i_state & (I_NEW | I_WILL_FREE)) { if (inode->i_state & (I_NEW | I_WILL_FREE)) {
requeue_io(inode); requeue_io(inode);
continue; continue;
} }
/* /*
* Was this inode dirtied after sync_sb_inodes was called? * Was this inode dirtied after sync_sb_inodes was called?
* This keeps sync from extra jobs and livelock. * This keeps sync from extra jobs and livelock.
*/ */
if (inode_dirtied_after(inode, start)) if (inode_dirtied_after(inode, wbc->wb_start))
break; return 1;
if (pin_sb_for_writeback(wbc, inode, &pin_sb)) {
requeue_io(inode);
continue;
}
BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
__iget(inode); __iget(inode);
...@@ -674,14 +651,50 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, ...@@ -674,14 +651,50 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
spin_lock(&inode_lock); spin_lock(&inode_lock);
if (wbc->nr_to_write <= 0) { if (wbc->nr_to_write <= 0) {
wbc->more_io = 1; wbc->more_io = 1;
break; return 1;
} }
if (!list_empty(&wb->b_more_io)) if (!list_empty(&wb->b_more_io))
wbc->more_io = 1; wbc->more_io = 1;
} }
/* b_io is empty */
return 1;
}
static void writeback_inodes_wb(struct bdi_writeback *wb,
struct writeback_control *wbc)
{
int ret = 0;
unpin_sb_for_writeback(&pin_sb); wbc->wb_start = jiffies; /* livelock avoidance */
spin_lock(&inode_lock);
if (!wbc->for_kupdate || list_empty(&wb->b_io))
queue_io(wb, wbc->older_than_this);
while (!list_empty(&wb->b_io)) {
struct inode *inode = list_entry(wb->b_io.prev,
struct inode, i_list);
struct super_block *sb = inode->i_sb;
enum sb_pin_state state;
if (wbc->sb && sb != wbc->sb) {
/* super block given and doesn't
match, skip this inode */
redirty_tail(inode);
continue;
}
state = pin_sb_for_writeback(wbc, sb);
if (state == SB_PIN_FAILED) {
requeue_io(inode);
continue;
}
ret = writeback_sb_inodes(sb, wb, wbc);
if (state == SB_PINNED)
unpin_sb_for_writeback(sb);
if (ret)
break;
}
spin_unlock(&inode_lock); spin_unlock(&inode_lock);
/* Leave any unwritten inodes on b_io */ /* Leave any unwritten inodes on b_io */
} }
......
...@@ -158,7 +158,6 @@ enum rq_flag_bits { ...@@ -158,7 +158,6 @@ enum rq_flag_bits {
struct request { struct request {
struct list_head queuelist; struct list_head queuelist;
struct call_single_data csd; struct call_single_data csd;
int cpu;
struct request_queue *q; struct request_queue *q;
...@@ -166,9 +165,11 @@ struct request { ...@@ -166,9 +165,11 @@ struct request {
enum rq_cmd_type_bits cmd_type; enum rq_cmd_type_bits cmd_type;
unsigned long atomic_flags; unsigned long atomic_flags;
int cpu;
/* the following two fields are internal, NEVER access directly */ /* the following two fields are internal, NEVER access directly */
sector_t __sector; /* sector cursor */
unsigned int __data_len; /* total data len */ unsigned int __data_len; /* total data len */
sector_t __sector; /* sector cursor */
struct bio *bio; struct bio *bio;
struct bio *biotail; struct bio *biotail;
...@@ -201,20 +202,20 @@ struct request { ...@@ -201,20 +202,20 @@ struct request {
unsigned short ioprio; unsigned short ioprio;
int ref_count;
void *special; /* opaque pointer available for LLD use */ void *special; /* opaque pointer available for LLD use */
char *buffer; /* kaddr of the current segment if available */ char *buffer; /* kaddr of the current segment if available */
int tag; int tag;
int errors; int errors;
int ref_count;
/* /*
* when request is used as a packet command carrier * when request is used as a packet command carrier
*/ */
unsigned short cmd_len;
unsigned char __cmd[BLK_MAX_CDB]; unsigned char __cmd[BLK_MAX_CDB];
unsigned char *cmd; unsigned char *cmd;
unsigned short cmd_len;
unsigned int extra_len; /* length of alignment and padding */ unsigned int extra_len; /* length of alignment and padding */
unsigned int sense_len; unsigned int sense_len;
...@@ -921,26 +922,7 @@ extern void blk_cleanup_queue(struct request_queue *); ...@@ -921,26 +922,7 @@ extern void blk_cleanup_queue(struct request_queue *);
extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
extern void blk_queue_bounce_limit(struct request_queue *, u64); extern void blk_queue_bounce_limit(struct request_queue *, u64);
extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
/* Temporary compatibility wrapper */
static inline void blk_queue_max_sectors(struct request_queue *q, unsigned int max)
{
blk_queue_max_hw_sectors(q, max);
}
extern void blk_queue_max_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segments(struct request_queue *, unsigned short);
static inline void blk_queue_max_phys_segments(struct request_queue *q, unsigned short max)
{
blk_queue_max_segments(q, max);
}
static inline void blk_queue_max_hw_segments(struct request_queue *q, unsigned short max)
{
blk_queue_max_segments(q, max);
}
extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
extern void blk_queue_max_discard_sectors(struct request_queue *q, extern void blk_queue_max_discard_sectors(struct request_queue *q,
unsigned int max_discard_sectors); unsigned int max_discard_sectors);
...@@ -1030,11 +1012,6 @@ static inline int sb_issue_discard(struct super_block *sb, ...@@ -1030,11 +1012,6 @@ static inline int sb_issue_discard(struct super_block *sb,
extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
#define MAX_PHYS_SEGMENTS 128
#define MAX_HW_SEGMENTS 128
#define SAFE_MAX_SECTORS 255
#define MAX_SEGMENT_SIZE 65536
enum blk_default_limits { enum blk_default_limits {
BLK_MAX_SEGMENTS = 128, BLK_MAX_SEGMENTS = 128,
BLK_SAFE_MAX_SECTORS = 255, BLK_SAFE_MAX_SECTORS = 255,
......
...@@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void); ...@@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void);
#define REL_VERSION "8.3.7" #define REL_VERSION "8.3.7"
#define API_VERSION 88 #define API_VERSION 88
#define PRO_VERSION_MIN 86 #define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 91 #define PRO_VERSION_MAX 92
enum drbd_io_error_p { enum drbd_io_error_p {
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
#endif #endif
NL_PACKET(primary, 1, NL_PACKET(primary, 1,
NL_BIT( 1, T_MAY_IGNORE, overwrite_peer) NL_BIT( 1, T_MAY_IGNORE, primary_force)
) )
NL_PACKET(secondary, 2, ) NL_PACKET(secondary, 2, )
...@@ -63,6 +63,7 @@ NL_PACKET(net_conf, 5, ...@@ -63,6 +63,7 @@ NL_PACKET(net_conf, 5,
NL_BIT( 41, T_MAY_IGNORE, always_asbp) NL_BIT( 41, T_MAY_IGNORE, always_asbp)
NL_BIT( 61, T_MAY_IGNORE, no_cork) NL_BIT( 61, T_MAY_IGNORE, no_cork)
NL_BIT( 62, T_MANDATORY, auto_sndbuf_size) NL_BIT( 62, T_MANDATORY, auto_sndbuf_size)
NL_BIT( 70, T_MANDATORY, dry_run)
) )
NL_PACKET(disconnect, 6, ) NL_PACKET(disconnect, 6, )
......
...@@ -109,7 +109,7 @@ struct hd_struct { ...@@ -109,7 +109,7 @@ struct hd_struct {
}; };
#define GENHD_FL_REMOVABLE 1 #define GENHD_FL_REMOVABLE 1
#define GENHD_FL_DRIVERFS 2 /* 2 is unused */
#define GENHD_FL_MEDIA_CHANGE_NOTIFY 4 #define GENHD_FL_MEDIA_CHANGE_NOTIFY 4
#define GENHD_FL_CD 8 #define GENHD_FL_CD 8
#define GENHD_FL_UP 16 #define GENHD_FL_UP 16
......
...@@ -782,7 +782,6 @@ extern int i2o_exec_lct_get(struct i2o_controller *); ...@@ -782,7 +782,6 @@ extern int i2o_exec_lct_get(struct i2o_controller *);
#define to_i2o_driver(drv) container_of(drv,struct i2o_driver, driver) #define to_i2o_driver(drv) container_of(drv,struct i2o_driver, driver)
#define to_i2o_device(dev) container_of(dev, struct i2o_device, device) #define to_i2o_device(dev) container_of(dev, struct i2o_device, device)
#define to_i2o_controller(dev) container_of(dev, struct i2o_controller, device) #define to_i2o_controller(dev) container_of(dev, struct i2o_controller, device)
#define kobj_to_i2o_device(kobj) to_i2o_device(container_of(kobj, struct device, kobj))
/** /**
* i2o_out_to_virt - Turn an I2O message to a virtual address * i2o_out_to_virt - Turn an I2O message to a virtual address
......
#ifndef _LCM_H
#define _LCM_H
#include <linux/compiler.h>
unsigned long lcm(unsigned long a, unsigned long b) __attribute_const__;
#endif /* _LCM_H */
...@@ -34,6 +34,9 @@ struct writeback_control { ...@@ -34,6 +34,9 @@ struct writeback_control {
enum writeback_sync_modes sync_mode; enum writeback_sync_modes sync_mode;
unsigned long *older_than_this; /* If !NULL, only write back inodes unsigned long *older_than_this; /* If !NULL, only write back inodes
older than this */ older than this */
unsigned long wb_start; /* Time writeback_inodes_wb was
called. This is needed to avoid
extra jobs and livelock */
long nr_to_write; /* Write this many pages, and decrement long nr_to_write; /* Write this many pages, and decrement
this for each page written */ this for each page written */
long pages_skipped; /* Pages which were not written */ long pages_skipped; /* Pages which were not written */
......
...@@ -40,6 +40,16 @@ DECLARE_EVENT_CLASS(block_rq_with_error, ...@@ -40,6 +40,16 @@ DECLARE_EVENT_CLASS(block_rq_with_error,
__entry->nr_sector, __entry->errors) __entry->nr_sector, __entry->errors)
); );
/**
* block_rq_abort - abort block operation request
* @q: queue containing the block operation request
* @rq: block IO operation request
*
* Called immediately after pending block IO operation request @rq in
* queue @q is aborted. The fields in the operation request @rq
* can be examined to determine which device and sectors the pending
* operation would access.
*/
DEFINE_EVENT(block_rq_with_error, block_rq_abort, DEFINE_EVENT(block_rq_with_error, block_rq_abort,
TP_PROTO(struct request_queue *q, struct request *rq), TP_PROTO(struct request_queue *q, struct request *rq),
...@@ -47,6 +57,15 @@ DEFINE_EVENT(block_rq_with_error, block_rq_abort, ...@@ -47,6 +57,15 @@ DEFINE_EVENT(block_rq_with_error, block_rq_abort,
TP_ARGS(q, rq) TP_ARGS(q, rq)
); );
/**
* block_rq_requeue - place block IO request back on a queue
* @q: queue holding operation
* @rq: block IO operation request
*
* The block operation request @rq is being placed back into queue
* @q. For some reason the request was not completed and needs to be
* put back in the queue.
*/
DEFINE_EVENT(block_rq_with_error, block_rq_requeue, DEFINE_EVENT(block_rq_with_error, block_rq_requeue,
TP_PROTO(struct request_queue *q, struct request *rq), TP_PROTO(struct request_queue *q, struct request *rq),
...@@ -54,6 +73,17 @@ DEFINE_EVENT(block_rq_with_error, block_rq_requeue, ...@@ -54,6 +73,17 @@ DEFINE_EVENT(block_rq_with_error, block_rq_requeue,
TP_ARGS(q, rq) TP_ARGS(q, rq)
); );
/**
* block_rq_complete - block IO operation completed by device driver
* @q: queue containing the block operation request
* @rq: block operations request
*
* The block_rq_complete tracepoint event indicates that some portion
* of operation request has been completed by the device driver. If
* the @rq->bio is %NULL, then there is absolutely no additional work to
* do for the request. If @rq->bio is non-NULL then there is
* additional work required to complete the request.
*/
DEFINE_EVENT(block_rq_with_error, block_rq_complete, DEFINE_EVENT(block_rq_with_error, block_rq_complete,
TP_PROTO(struct request_queue *q, struct request *rq), TP_PROTO(struct request_queue *q, struct request *rq),
...@@ -95,6 +125,16 @@ DECLARE_EVENT_CLASS(block_rq, ...@@ -95,6 +125,16 @@ DECLARE_EVENT_CLASS(block_rq,
__entry->nr_sector, __entry->comm) __entry->nr_sector, __entry->comm)
); );
/**
* block_rq_insert - insert block operation request into queue
* @q: target queue
* @rq: block IO operation request
*
* Called immediately before block operation request @rq is inserted
* into queue @q. The fields in the operation request @rq struct can
* be examined to determine which device and sectors the pending
* operation would access.
*/
DEFINE_EVENT(block_rq, block_rq_insert, DEFINE_EVENT(block_rq, block_rq_insert,
TP_PROTO(struct request_queue *q, struct request *rq), TP_PROTO(struct request_queue *q, struct request *rq),
...@@ -102,6 +142,14 @@ DEFINE_EVENT(block_rq, block_rq_insert, ...@@ -102,6 +142,14 @@ DEFINE_EVENT(block_rq, block_rq_insert,
TP_ARGS(q, rq) TP_ARGS(q, rq)
); );
/**
* block_rq_issue - issue pending block IO request operation to device driver
* @q: queue holding operation
* @rq: block IO operation operation request
*
* Called when block operation request @rq from queue @q is sent to a
* device driver for processing.
*/
DEFINE_EVENT(block_rq, block_rq_issue, DEFINE_EVENT(block_rq, block_rq_issue,
TP_PROTO(struct request_queue *q, struct request *rq), TP_PROTO(struct request_queue *q, struct request *rq),
...@@ -109,6 +157,17 @@ DEFINE_EVENT(block_rq, block_rq_issue, ...@@ -109,6 +157,17 @@ DEFINE_EVENT(block_rq, block_rq_issue,
TP_ARGS(q, rq) TP_ARGS(q, rq)
); );
/**
* block_bio_bounce - used bounce buffer when processing block operation
* @q: queue holding the block operation
* @bio: block operation
*
* A bounce buffer was used to handle the block operation @bio in @q.
* This occurs when hardware limitations prevent a direct transfer of
* data between the @bio data memory area and the IO device. Use of a
* bounce buffer requires extra copying of data and decreases
* performance.
*/
TRACE_EVENT(block_bio_bounce, TRACE_EVENT(block_bio_bounce,
TP_PROTO(struct request_queue *q, struct bio *bio), TP_PROTO(struct request_queue *q, struct bio *bio),
...@@ -138,6 +197,14 @@ TRACE_EVENT(block_bio_bounce, ...@@ -138,6 +197,14 @@ TRACE_EVENT(block_bio_bounce,
__entry->nr_sector, __entry->comm) __entry->nr_sector, __entry->comm)
); );
/**
* block_bio_complete - completed all work on the block operation
* @q: queue holding the block operation
* @bio: block operation completed
*
* This tracepoint indicates there is no further work to do on this
* block IO operation @bio.
*/
TRACE_EVENT(block_bio_complete, TRACE_EVENT(block_bio_complete,
TP_PROTO(struct request_queue *q, struct bio *bio), TP_PROTO(struct request_queue *q, struct bio *bio),
...@@ -193,6 +260,14 @@ DECLARE_EVENT_CLASS(block_bio, ...@@ -193,6 +260,14 @@ DECLARE_EVENT_CLASS(block_bio,
__entry->nr_sector, __entry->comm) __entry->nr_sector, __entry->comm)
); );
/**
* block_bio_backmerge - merging block operation to the end of an existing operation
* @q: queue holding operation
* @bio: new block operation to merge
*
* Merging block request @bio to the end of an existing block request
* in queue @q.
*/
DEFINE_EVENT(block_bio, block_bio_backmerge, DEFINE_EVENT(block_bio, block_bio_backmerge,
TP_PROTO(struct request_queue *q, struct bio *bio), TP_PROTO(struct request_queue *q, struct bio *bio),
...@@ -200,6 +275,14 @@ DEFINE_EVENT(block_bio, block_bio_backmerge, ...@@ -200,6 +275,14 @@ DEFINE_EVENT(block_bio, block_bio_backmerge,
TP_ARGS(q, bio) TP_ARGS(q, bio)
); );
/**
* block_bio_frontmerge - merging block operation to the beginning of an existing operation
* @q: queue holding operation
* @bio: new block operation to merge
*
* Merging block IO operation @bio to the beginning of an existing block
* operation in queue @q.
*/
DEFINE_EVENT(block_bio, block_bio_frontmerge, DEFINE_EVENT(block_bio, block_bio_frontmerge,
TP_PROTO(struct request_queue *q, struct bio *bio), TP_PROTO(struct request_queue *q, struct bio *bio),
...@@ -207,6 +290,13 @@ DEFINE_EVENT(block_bio, block_bio_frontmerge, ...@@ -207,6 +290,13 @@ DEFINE_EVENT(block_bio, block_bio_frontmerge,
TP_ARGS(q, bio) TP_ARGS(q, bio)
); );
/**
* block_bio_queue - putting new block IO operation in queue
* @q: queue holding operation
* @bio: new block operation
*
* About to place the block IO operation @bio into queue @q.
*/
DEFINE_EVENT(block_bio, block_bio_queue, DEFINE_EVENT(block_bio, block_bio_queue,
TP_PROTO(struct request_queue *q, struct bio *bio), TP_PROTO(struct request_queue *q, struct bio *bio),
...@@ -243,6 +333,15 @@ DECLARE_EVENT_CLASS(block_get_rq, ...@@ -243,6 +333,15 @@ DECLARE_EVENT_CLASS(block_get_rq,
__entry->nr_sector, __entry->comm) __entry->nr_sector, __entry->comm)
); );
/**
* block_getrq - get a free request entry in queue for block IO operations
* @q: queue for operations
* @bio: pending block IO operation
* @rw: low bit indicates a read (%0) or a write (%1)
*
* A request struct for queue @q has been allocated to handle the
* block IO operation @bio.
*/
DEFINE_EVENT(block_get_rq, block_getrq, DEFINE_EVENT(block_get_rq, block_getrq,
TP_PROTO(struct request_queue *q, struct bio *bio, int rw), TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
...@@ -250,6 +349,17 @@ DEFINE_EVENT(block_get_rq, block_getrq, ...@@ -250,6 +349,17 @@ DEFINE_EVENT(block_get_rq, block_getrq,
TP_ARGS(q, bio, rw) TP_ARGS(q, bio, rw)
); );
/**
* block_sleeprq - waiting to get a free request entry in queue for block IO operation
* @q: queue for operation
* @bio: pending block IO operation
* @rw: low bit indicates a read (%0) or a write (%1)
*
* In the case where a request struct cannot be provided for queue @q
* the process needs to wait for an request struct to become
* available. This tracepoint event is generated each time the
* process goes to sleep waiting for request struct become available.
*/
DEFINE_EVENT(block_get_rq, block_sleeprq, DEFINE_EVENT(block_get_rq, block_sleeprq,
TP_PROTO(struct request_queue *q, struct bio *bio, int rw), TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
...@@ -257,6 +367,14 @@ DEFINE_EVENT(block_get_rq, block_sleeprq, ...@@ -257,6 +367,14 @@ DEFINE_EVENT(block_get_rq, block_sleeprq,
TP_ARGS(q, bio, rw) TP_ARGS(q, bio, rw)
); );
/**
* block_plug - keep operations requests in request queue
* @q: request queue to plug
*
* Plug the request queue @q. Do not allow block operation requests
* to be sent to the device driver. Instead, accumulate requests in
* the queue to improve throughput performance of the block device.
*/
TRACE_EVENT(block_plug, TRACE_EVENT(block_plug,
TP_PROTO(struct request_queue *q), TP_PROTO(struct request_queue *q),
...@@ -293,6 +411,13 @@ DECLARE_EVENT_CLASS(block_unplug, ...@@ -293,6 +411,13 @@ DECLARE_EVENT_CLASS(block_unplug,
TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
); );
/**
* block_unplug_timer - timed release of operations requests in queue to device driver
* @q: request queue to unplug
*
* Unplug the request queue @q because a timer expired and allow block
* operation requests to be sent to the device driver.
*/
DEFINE_EVENT(block_unplug, block_unplug_timer, DEFINE_EVENT(block_unplug, block_unplug_timer,
TP_PROTO(struct request_queue *q), TP_PROTO(struct request_queue *q),
...@@ -300,6 +425,13 @@ DEFINE_EVENT(block_unplug, block_unplug_timer, ...@@ -300,6 +425,13 @@ DEFINE_EVENT(block_unplug, block_unplug_timer,
TP_ARGS(q) TP_ARGS(q)
); );
/**
* block_unplug_io - release of operations requests in request queue
* @q: request queue to unplug
*
* Unplug request queue @q because device driver is scheduled to work
* on elements in the request queue.
*/
DEFINE_EVENT(block_unplug, block_unplug_io, DEFINE_EVENT(block_unplug, block_unplug_io,
TP_PROTO(struct request_queue *q), TP_PROTO(struct request_queue *q),
...@@ -307,6 +439,17 @@ DEFINE_EVENT(block_unplug, block_unplug_io, ...@@ -307,6 +439,17 @@ DEFINE_EVENT(block_unplug, block_unplug_io,
TP_ARGS(q) TP_ARGS(q)
); );
/**
* block_split - split a single bio struct into two bio structs
* @q: queue containing the bio
* @bio: block operation being split
* @new_sector: The starting sector for the new bio
*
* The bio request @bio in request queue @q needs to be split into two
* bio requests. The newly created @bio request starts at
* @new_sector. This split may be required due to hardware limitation
* such as operation crossing device boundaries in a RAID system.
*/
TRACE_EVENT(block_split, TRACE_EVENT(block_split,
TP_PROTO(struct request_queue *q, struct bio *bio, TP_PROTO(struct request_queue *q, struct bio *bio,
...@@ -337,6 +480,16 @@ TRACE_EVENT(block_split, ...@@ -337,6 +480,16 @@ TRACE_EVENT(block_split,
__entry->comm) __entry->comm)
); );
/**
* block_remap - map request for a partition to the raw device
* @q: queue holding the operation
* @bio: revised operation
* @dev: device for the operation
* @from: original sector for the operation
*
* An operation for a partition on a block device has been mapped to the
* raw block device.
*/
TRACE_EVENT(block_remap, TRACE_EVENT(block_remap,
TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
...@@ -370,6 +523,17 @@ TRACE_EVENT(block_remap, ...@@ -370,6 +523,17 @@ TRACE_EVENT(block_remap,
(unsigned long long)__entry->old_sector) (unsigned long long)__entry->old_sector)
); );
/**
* block_rq_remap - map request for a block operation request
* @q: queue holding the operation
* @rq: block IO operation request
* @dev: device for the operation
* @from: original sector for the operation
*
* The block operation request @rq in @q has been remapped. The block
* operation request @rq holds the current information and @from hold
* the original sector.
*/
TRACE_EVENT(block_rq_remap, TRACE_EVENT(block_rq_remap,
TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev, TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev,
......
...@@ -21,7 +21,7 @@ lib-y += kobject.o kref.o klist.o ...@@ -21,7 +21,7 @@ lib-y += kobject.o kref.o klist.o
obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
string_helpers.o gcd.o list_sort.o string_helpers.o gcd.o lcm.o list_sort.o
ifeq ($(CONFIG_DEBUG_KOBJECT),y) ifeq ($(CONFIG_DEBUG_KOBJECT),y)
CFLAGS_kobject.o += -DDEBUG CFLAGS_kobject.o += -DDEBUG
......
#include <linux/kernel.h>
#include <linux/gcd.h>
#include <linux/module.h>
/* Lowest common multiple */
unsigned long lcm(unsigned long a, unsigned long b)
{
if (a && b)
return (a * b) / gcd(a, b);
else if (b)
return b;
return a;
}
EXPORT_SYMBOL_GPL(lcm);
...@@ -227,6 +227,9 @@ static struct device_attribute bdi_dev_attrs[] = { ...@@ -227,6 +227,9 @@ static struct device_attribute bdi_dev_attrs[] = {
static __init int bdi_class_init(void) static __init int bdi_class_init(void)
{ {
bdi_class = class_create(THIS_MODULE, "bdi"); bdi_class = class_create(THIS_MODULE, "bdi");
if (IS_ERR(bdi_class))
return PTR_ERR(bdi_class);
bdi_class->dev_attrs = bdi_dev_attrs; bdi_class->dev_attrs = bdi_dev_attrs;
bdi_debug_init(); bdi_debug_init();
return 0; return 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册