提交 8abfc6e7 编写于 作者: L Linus Torvalds

Merge branch 'for-2.6.37/drivers' of git://git.kernel.dk/linux-2.6-block

* 'for-2.6.37/drivers' of git://git.kernel.dk/linux-2.6-block: (95 commits)
  cciss: fix PCI IDs for new Smart Array controllers
  drbd: add race-breaker to drbd_go_diskless
  drbd: use dynamic_dev_dbg to optionally log uuid changes
  dynamic_debug.h: Fix dynamic_dev_dbg() macro if CONFIG_DYNAMIC_DEBUG not set
  drbd: cleanup: change "<= 0" to "== 0"
  drbd: relax the grace period of the md_sync timer again
  drbd: add some more explicit drbd_md_sync
  drbd: drop wrong debug asserts, fix recently introduced race
  drbd: cleanup useless leftover warn/error printk's
  drbd: add explicit drbd_md_sync to drbd_resync_finished
  drbd: Do not log an ASSERT for P_OV_REQUEST packets while C_CONNECTED
  drbd: fix for possible deadlock on IO error during resync
  drbd: fix unlikely access after free and list corruption
  drbd: fix for spurious fullsync (uuids rotated too fast)
  drbd: allow for explicit resync-finished notifications
  drbd: preparation commit, using full state in receive_state()
  drbd: drbd_send_ack_dp must not rely on header information
  drbd: Fix regression in recv_bm_rle_bits (compressed bitmap)
  drbd: Fixed a stupid copy and paste error
  drbd: Allow larger values for c-fill-target.
  ...

Fix up trivial conflict in drivers/block/ataflop.c due to BKL removal
......@@ -115,8 +115,6 @@ static unsigned long int fd_def_df0 = FD_DD_3; /* default for df0 if it does
module_param(fd_def_df0, ulong, 0);
MODULE_LICENSE("GPL");
static struct request_queue *floppy_queue;
/*
* Macros
*/
......@@ -165,6 +163,7 @@ static volatile int selected = -1; /* currently selected drive */
static int writepending;
static int writefromint;
static char *raw_buf;
static int fdc_queue;
static DEFINE_SPINLOCK(amiflop_lock);
......@@ -1335,6 +1334,42 @@ static int get_track(int drive, int track)
return -1;
}
/*
* Round-robin between our available drives, doing one request from each
*/
static struct request *set_next_request(void)
{
struct request_queue *q;
int cnt = FD_MAX_UNITS;
struct request *rq;
/* Find next queue we can dispatch from */
fdc_queue = fdc_queue + 1;
if (fdc_queue == FD_MAX_UNITS)
fdc_queue = 0;
for(cnt = FD_MAX_UNITS; cnt > 0; cnt--) {
if (unit[fdc_queue].type->code == FD_NODRIVE) {
if (++fdc_queue == FD_MAX_UNITS)
fdc_queue = 0;
continue;
}
q = unit[fdc_queue].gendisk->queue;
if (q) {
rq = blk_fetch_request(q);
if (rq)
break;
}
if (++fdc_queue == FD_MAX_UNITS)
fdc_queue = 0;
}
return rq;
}
static void redo_fd_request(void)
{
struct request *rq;
......@@ -1346,7 +1381,7 @@ static void redo_fd_request(void)
int err;
next_req:
rq = blk_fetch_request(floppy_queue);
rq = set_next_request();
if (!rq) {
/* Nothing left to do */
return;
......@@ -1683,6 +1718,13 @@ static int __init fd_probe_drives(void)
continue;
}
unit[drive].gendisk = disk;
disk->queue = blk_init_queue(do_fd_request, &amiflop_lock);
if (!disk->queue) {
unit[drive].type->code = FD_NODRIVE;
continue;
}
drives++;
if ((unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL)) == NULL) {
printk("no mem for ");
......@@ -1696,7 +1738,6 @@ static int __init fd_probe_drives(void)
disk->fops = &floppy_fops;
sprintf(disk->disk_name, "fd%d", drive);
disk->private_data = &unit[drive];
disk->queue = floppy_queue;
set_capacity(disk, 880*2);
add_disk(disk);
}
......@@ -1744,11 +1785,6 @@ static int __init amiga_floppy_probe(struct platform_device *pdev)
goto out_irq2;
}
ret = -ENOMEM;
floppy_queue = blk_init_queue(do_fd_request, &amiflop_lock);
if (!floppy_queue)
goto out_queue;
ret = -ENODEV;
if (fd_probe_drives() < 1) /* No usable drives */
goto out_probe;
......@@ -1792,8 +1828,6 @@ static int __init amiga_floppy_probe(struct platform_device *pdev)
return 0;
out_probe:
blk_cleanup_queue(floppy_queue);
out_queue:
free_irq(IRQ_AMIGA_CIAA_TB, NULL);
out_irq2:
free_irq(IRQ_AMIGA_DSKBLK, NULL);
......@@ -1811,9 +1845,12 @@ static int __exit amiga_floppy_remove(struct platform_device *pdev)
for( i = 0; i < FD_MAX_UNITS; i++) {
if (unit[i].type->code != FD_NODRIVE) {
struct request_queue *q = unit[i].gendisk->queue;
del_gendisk(unit[i].gendisk);
put_disk(unit[i].gendisk);
kfree(unit[i].trackbuf);
if (q)
blk_cleanup_queue(q);
}
}
blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
......@@ -1821,7 +1858,6 @@ static int __exit amiga_floppy_remove(struct platform_device *pdev)
free_irq(IRQ_AMIGA_DSKBLK, NULL);
custom.dmacon = DMAF_DISK; /* disable DMA */
amiga_chip_free(raw_buf);
blk_cleanup_queue(floppy_queue);
unregister_blkdev(FLOPPY_MAJOR, "fd");
}
#endif
......
......@@ -80,8 +80,8 @@
#undef DEBUG
static DEFINE_MUTEX(ataflop_mutex);
static struct request_queue *floppy_queue;
static struct request *fd_request;
static int fdc_queue;
/* Disk types: DD, HD, ED */
static struct atari_disk_type {
......@@ -1392,6 +1392,29 @@ static void setup_req_params( int drive )
ReqTrack, ReqSector, (unsigned long)ReqData ));
}
/*
* Round-robin between our available drives, doing one request from each
*/
static struct request *set_next_request(void)
{
struct request_queue *q;
int old_pos = fdc_queue;
struct request *rq;
do {
q = unit[fdc_queue].disk->queue;
if (++fdc_queue == FD_MAX_UNITS)
fdc_queue = 0;
if (q) {
rq = blk_fetch_request(q);
if (rq)
break;
}
} while (fdc_queue != old_pos);
return rq;
}
static void redo_fd_request(void)
{
......@@ -1406,7 +1429,7 @@ static void redo_fd_request(void)
repeat:
if (!fd_request) {
fd_request = blk_fetch_request(floppy_queue);
fd_request = set_next_request();
if (!fd_request)
goto the_end;
}
......@@ -1933,10 +1956,6 @@ static int __init atari_floppy_init (void)
PhysTrackBuffer = virt_to_phys(TrackBuffer);
BufferDrive = BufferSide = BufferTrack = -1;
floppy_queue = blk_init_queue(do_fd_request, &ataflop_lock);
if (!floppy_queue)
goto Enomem;
for (i = 0; i < FD_MAX_UNITS; i++) {
unit[i].track = -1;
unit[i].flags = 0;
......@@ -1945,7 +1964,10 @@ static int __init atari_floppy_init (void)
sprintf(unit[i].disk->disk_name, "fd%d", i);
unit[i].disk->fops = &floppy_fops;
unit[i].disk->private_data = &unit[i];
unit[i].disk->queue = floppy_queue;
unit[i].disk->queue = blk_init_queue(do_fd_request,
&ataflop_lock);
if (!unit[i].disk->queue)
goto Enomem;
set_capacity(unit[i].disk, MAX_DISK_SIZE * 2);
add_disk(unit[i].disk);
}
......@@ -1960,10 +1982,14 @@ static int __init atari_floppy_init (void)
return 0;
Enomem:
while (i--)
while (i--) {
struct request_queue *q = unit[i].disk->queue;
put_disk(unit[i].disk);
if (floppy_queue)
blk_cleanup_queue(floppy_queue);
if (q)
blk_cleanup_queue(q);
}
unregister_blkdev(FLOPPY_MAJOR, "fd");
return -ENOMEM;
}
......@@ -2012,12 +2038,14 @@ static void __exit atari_floppy_exit(void)
int i;
blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
for (i = 0; i < FD_MAX_UNITS; i++) {
struct request_queue *q = unit[i].disk->queue;
del_gendisk(unit[i].disk);
put_disk(unit[i].disk);
blk_cleanup_queue(q);
}
unregister_blkdev(FLOPPY_MAJOR, "fd");
blk_cleanup_queue(floppy_queue);
del_timer_sync(&fd_timer);
atari_stram_free( DMABuffer );
}
......
此差异已折叠。
......@@ -965,29 +965,30 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
* ok, (capacity & 7) != 0 sometimes, but who cares...
* we count rs_{total,left} in bits, not sectors.
*/
spin_lock_irqsave(&mdev->al_lock, flags);
count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
if (count) {
/* we need the lock for drbd_try_clear_on_disk_bm */
if (jiffies - mdev->rs_mark_time > HZ*10) {
/* should be rolling marks,
* but we estimate only anyways. */
if (mdev->rs_mark_left != drbd_bm_total_weight(mdev) &&
if (count && get_ldev(mdev)) {
unsigned long now = jiffies;
unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark];
int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS;
if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
unsigned long tw = drbd_bm_total_weight(mdev);
if (mdev->rs_mark_left[mdev->rs_last_mark] != tw &&
mdev->state.conn != C_PAUSED_SYNC_T &&
mdev->state.conn != C_PAUSED_SYNC_S) {
mdev->rs_mark_time = jiffies;
mdev->rs_mark_left = drbd_bm_total_weight(mdev);
mdev->rs_mark_time[next] = now;
mdev->rs_mark_left[next] = tw;
mdev->rs_last_mark = next;
}
}
if (get_ldev(mdev)) {
drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE);
put_ldev(mdev);
}
spin_lock_irqsave(&mdev->al_lock, flags);
drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE);
spin_unlock_irqrestore(&mdev->al_lock, flags);
/* just wake_up unconditional now, various lc_chaged(),
* lc_put() in drbd_try_clear_on_disk_bm(). */
wake_up = 1;
put_ldev(mdev);
}
spin_unlock_irqrestore(&mdev->al_lock, flags);
if (wake_up)
wake_up(&mdev->al_wait);
}
......@@ -1118,7 +1119,7 @@ static int _is_in_al(struct drbd_conf *mdev, unsigned int enr)
* @mdev: DRBD device.
* @sector: The sector number.
*
* This functions sleeps on al_wait. Returns 1 on success, 0 if interrupted.
* This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted.
*/
int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
{
......@@ -1129,10 +1130,10 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
sig = wait_event_interruptible(mdev->al_wait,
(bm_ext = _bme_get(mdev, enr)));
if (sig)
return 0;
return -EINTR;
if (test_bit(BME_LOCKED, &bm_ext->flags))
return 1;
return 0;
for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
sig = wait_event_interruptible(mdev->al_wait,
......@@ -1145,13 +1146,11 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
wake_up(&mdev->al_wait);
}
spin_unlock_irq(&mdev->al_lock);
return 0;
return -EINTR;
}
}
set_bit(BME_LOCKED, &bm_ext->flags);
return 1;
return 0;
}
/**
......
......@@ -569,7 +569,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
*
* maybe bm_set should be atomic_t ?
*/
static unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
{
struct drbd_bitmap *b = mdev->bitmap;
unsigned long s;
......
此差异已折叠。
此差异已折叠。
......@@ -33,10 +33,13 @@
#include <linux/blkpg.h>
#include <linux/cpumask.h>
#include "drbd_int.h"
#include "drbd_req.h"
#include "drbd_wrappers.h"
#include <asm/unaligned.h>
#include <linux/drbd_tag_magic.h>
#include <linux/drbd_limits.h>
#include <linux/compiler.h>
#include <linux/kthread.h>
static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int);
static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *);
......@@ -169,6 +172,10 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
put_net_conf(mdev);
}
/* The helper may take some time.
* write out any unsynced meta data changes now */
drbd_md_sync(mdev);
dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
drbd_bcast_ev_helper(mdev, cmd);
......@@ -202,12 +209,10 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
put_ldev(mdev);
} else {
dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
return mdev->state.pdsk;
nps = mdev->state.pdsk;
goto out;
}
if (fp == FP_STONITH)
_drbd_request_state(mdev, NS(susp, 1), CS_WAIT_COMPLETE);
r = drbd_khelper(mdev, "fence-peer");
switch ((r>>8) & 0xff) {
......@@ -252,9 +257,36 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
dev_info(DEV, "fence-peer helper returned %d (%s)\n",
(r>>8) & 0xff, ex_to_string);
out:
if (mdev->state.susp_fen && nps >= D_UNKNOWN) {
/* The handler was not successful... unfreeze here, the
state engine can not unfreeze... */
_drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE);
}
return nps;
}
static int _try_outdate_peer_async(void *data)
{
struct drbd_conf *mdev = (struct drbd_conf *)data;
enum drbd_disk_state nps;
nps = drbd_try_outdate_peer(mdev);
drbd_request_state(mdev, NS(pdsk, nps));
return 0;
}
void drbd_try_outdate_peer_async(struct drbd_conf *mdev)
{
struct task_struct *opa;
opa = kthread_run(_try_outdate_peer_async, mdev, "drbd%d_a_helper", mdev_to_minor(mdev));
if (IS_ERR(opa))
dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n");
}
int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
{
......@@ -394,6 +426,39 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
return r;
}
static struct drbd_conf *ensure_mdev(int minor, int create)
{
struct drbd_conf *mdev;
if (minor >= minor_count)
return NULL;
mdev = minor_to_mdev(minor);
if (!mdev && create) {
struct gendisk *disk = NULL;
mdev = drbd_new_device(minor);
spin_lock_irq(&drbd_pp_lock);
if (minor_table[minor] == NULL) {
minor_table[minor] = mdev;
disk = mdev->vdisk;
mdev = NULL;
} /* else: we lost the race */
spin_unlock_irq(&drbd_pp_lock);
if (disk) /* we won the race above */
/* in case we ever add a drbd_delete_device(),
* don't forget the del_gendisk! */
add_disk(disk);
else /* we lost the race above */
drbd_free_mdev(mdev);
mdev = minor_to_mdev(minor);
}
return mdev;
}
static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
struct drbd_nl_cfg_reply *reply)
......@@ -494,6 +559,8 @@ char *ppsize(char *buf, unsigned long long size)
void drbd_suspend_io(struct drbd_conf *mdev)
{
set_bit(SUSPEND_IO, &mdev->flags);
if (is_susp(mdev->state))
return;
wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
}
......@@ -713,9 +780,6 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu
blk_queue_segment_boundary(q, PAGE_SIZE-1);
blk_stack_limits(&q->limits, &b->limits, 0);
if (b->merge_bvec_fn)
dev_warn(DEV, "Backing device's merge_bvec_fn() = %p\n",
b->merge_bvec_fn);
dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q));
if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
......@@ -729,14 +793,16 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu
/* serialize deconfig (worker exiting, doing cleanup)
* and reconfig (drbdsetup disk, drbdsetup net)
*
* wait for a potentially exiting worker, then restart it,
* or start a new one.
* Wait for a potentially exiting worker, then restart it,
* or start a new one. Flush any pending work, there may still be an
* after_state_change queued.
*/
static void drbd_reconfig_start(struct drbd_conf *mdev)
{
wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags));
wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags));
drbd_thread_start(&mdev->worker);
drbd_flush_workqueue(mdev);
}
/* if still unconfigured, stops worker again.
......@@ -756,6 +822,29 @@ static void drbd_reconfig_done(struct drbd_conf *mdev)
wake_up(&mdev->state_wait);
}
/* Make sure IO is suspended before calling this function(). */
static void drbd_suspend_al(struct drbd_conf *mdev)
{
int s = 0;
if (lc_try_lock(mdev->act_log)) {
drbd_al_shrink(mdev);
lc_unlock(mdev->act_log);
} else {
dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n");
return;
}
spin_lock_irq(&mdev->req_lock);
if (mdev->state.conn < C_CONNECTED)
s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags);
spin_unlock_irq(&mdev->req_lock);
if (s)
dev_info(DEV, "Suspended AL updates\n");
}
/* does always return 0;
* interesting return code is in reply->ret_code */
static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
......@@ -769,6 +858,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
struct inode *inode, *inode2;
struct lru_cache *resync_lru = NULL;
union drbd_state ns, os;
unsigned int max_seg_s;
int rv;
int cp_discovered = 0;
int logical_block_size;
......@@ -803,6 +893,15 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
goto fail;
}
if (get_net_conf(mdev)) {
int prot = mdev->net_conf->wire_protocol;
put_net_conf(mdev);
if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) {
retcode = ERR_STONITH_AND_PROT_A;
goto fail;
}
}
nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0);
if (IS_ERR(nbc->lo_file)) {
dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
......@@ -924,7 +1023,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
drbd_suspend_io(mdev);
/* also wait for the last barrier ack. */
wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt));
wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state));
/* and for any other previously queued work */
drbd_flush_workqueue(mdev);
......@@ -1021,7 +1120,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
else
clear_bit(CRASHED_PRIMARY, &mdev->flags);
if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND)) {
if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
!(mdev->state.role == R_PRIMARY && mdev->state.susp_nod)) {
set_bit(CRASHED_PRIMARY, &mdev->flags);
cp_discovered = 1;
}
......@@ -1031,7 +1131,20 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
mdev->read_cnt = 0;
mdev->writ_cnt = 0;
drbd_setup_queue_param(mdev, DRBD_MAX_SEGMENT_SIZE);
max_seg_s = DRBD_MAX_SEGMENT_SIZE;
if (mdev->state.conn == C_CONNECTED) {
/* We are Primary, Connected, and now attach a new local
* backing store. We must not increase the user visible maximum
* bio size on this device to something the peer may not be
* able to handle. */
if (mdev->agreed_pro_version < 94)
max_seg_s = queue_max_segment_size(mdev->rq_queue);
else if (mdev->agreed_pro_version == 94)
max_seg_s = DRBD_MAX_SIZE_H80_PACKET;
/* else: drbd 8.3.9 and later, stay with default */
}
drbd_setup_queue_param(mdev, max_seg_s);
/* If I am currently not R_PRIMARY,
* but meta data primary indicator is set,
......@@ -1079,6 +1192,9 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
drbd_al_to_on_disk_bm(mdev);
}
if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
drbd_suspend_al(mdev); /* IO is still suspended here... */
spin_lock_irq(&mdev->req_lock);
os = mdev->state;
ns.i = os.i;
......@@ -1235,7 +1351,16 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
&& (new_conf->wire_protocol != DRBD_PROT_C)) {
retcode = ERR_NOT_PROTO_C;
goto fail;
};
}
if (get_ldev(mdev)) {
enum drbd_fencing_p fp = mdev->ldev->dc.fencing;
put_ldev(mdev);
if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) {
retcode = ERR_STONITH_AND_PROT_A;
goto fail;
}
}
if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
retcode = ERR_DISCARD;
......@@ -1350,6 +1475,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
}
}
drbd_flush_workqueue(mdev);
spin_lock_irq(&mdev->req_lock);
if (mdev->net_conf != NULL) {
retcode = ERR_NET_CONFIGURED;
......@@ -1388,10 +1514,9 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
mdev->int_dig_out=int_dig_out;
mdev->int_dig_in=int_dig_in;
mdev->int_dig_vv=int_dig_vv;
retcode = _drbd_set_state(_NS(mdev, conn, C_UNCONNECTED), CS_VERBOSE, NULL);
spin_unlock_irq(&mdev->req_lock);
retcode = _drbd_request_state(mdev, NS(conn, C_UNCONNECTED), CS_VERBOSE);
kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
reply->ret_code = retcode;
drbd_reconfig_done(mdev);
......@@ -1546,6 +1671,8 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
struct crypto_hash *csums_tfm = NULL;
struct syncer_conf sc;
cpumask_var_t new_cpu_mask;
int *rs_plan_s = NULL;
int fifo_size;
if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
retcode = ERR_NOMEM;
......@@ -1557,6 +1684,12 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
sc.rate = DRBD_RATE_DEF;
sc.after = DRBD_AFTER_DEF;
sc.al_extents = DRBD_AL_EXTENTS_DEF;
sc.on_no_data = DRBD_ON_NO_DATA_DEF;
sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF;
sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF;
sc.c_fill_target = DRBD_C_FILL_TARGET_DEF;
sc.c_max_rate = DRBD_C_MAX_RATE_DEF;
sc.c_min_rate = DRBD_C_MIN_RATE_DEF;
} else
memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
......@@ -1634,6 +1767,12 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
}
#undef AL_MAX
/* to avoid spurious errors when configuring minors before configuring
* the minors they depend on: if necessary, first create the minor we
* depend on */
if (sc.after >= 0)
ensure_mdev(sc.after, 1);
/* most sanity checks done, try to assign the new sync-after
* dependency. need to hold the global lock in there,
* to avoid a race in the dependency loop check. */
......@@ -1641,6 +1780,16 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
if (retcode != NO_ERROR)
goto fail;
fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
if (!rs_plan_s) {
dev_err(DEV, "kmalloc of fifo_buffer failed");
retcode = ERR_NOMEM;
goto fail;
}
}
/* ok, assign the rest of it as well.
* lock against receive_SyncParam() */
spin_lock(&mdev->peer_seq_lock);
......@@ -1657,6 +1806,15 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
mdev->verify_tfm = verify_tfm;
verify_tfm = NULL;
}
if (fifo_size != mdev->rs_plan_s.size) {
kfree(mdev->rs_plan_s.values);
mdev->rs_plan_s.values = rs_plan_s;
mdev->rs_plan_s.size = fifo_size;
mdev->rs_planed = 0;
rs_plan_s = NULL;
}
spin_unlock(&mdev->peer_seq_lock);
if (get_ldev(mdev)) {
......@@ -1688,6 +1846,7 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
fail:
kfree(rs_plan_s);
free_cpumask_var(new_cpu_mask);
crypto_free_hash(csums_tfm);
crypto_free_hash(verify_tfm);
......@@ -1721,12 +1880,38 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
return 0;
}
static int drbd_bmio_set_susp_al(struct drbd_conf *mdev)
{
int rv;
rv = drbd_bmio_set_n_write(mdev);
drbd_suspend_al(mdev);
return rv;
}
static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
struct drbd_nl_cfg_reply *reply)
{
int retcode;
reply->ret_code = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
if (retcode < SS_SUCCESS) {
if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) {
/* The peer will get a resync upon connect anyways. Just make that
into a full resync. */
retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
if (retcode >= SS_SUCCESS) {
/* open coded drbd_bitmap_io() */
if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
"set_n_write from invalidate_peer"))
retcode = ERR_IO_MD_DISK;
}
} else
retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
}
reply->ret_code = retcode;
return 0;
}
......@@ -1765,7 +1950,21 @@ static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
struct drbd_nl_cfg_reply *reply)
{
reply->ret_code = drbd_request_state(mdev, NS(susp, 0));
if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
drbd_uuid_new_current(mdev);
clear_bit(NEW_CUR_UUID, &mdev->flags);
drbd_md_sync(mdev);
}
drbd_suspend_io(mdev);
reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
if (reply->ret_code == SS_SUCCESS) {
if (mdev->state.conn < C_CONNECTED)
tl_clear(mdev);
if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED)
tl_restart(mdev, fail_frozen_disk_io);
}
drbd_resume_io(mdev);
return 0;
}
......@@ -1941,40 +2140,6 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
return 0;
}
static struct drbd_conf *ensure_mdev(struct drbd_nl_cfg_req *nlp)
{
struct drbd_conf *mdev;
if (nlp->drbd_minor >= minor_count)
return NULL;
mdev = minor_to_mdev(nlp->drbd_minor);
if (!mdev && (nlp->flags & DRBD_NL_CREATE_DEVICE)) {
struct gendisk *disk = NULL;
mdev = drbd_new_device(nlp->drbd_minor);
spin_lock_irq(&drbd_pp_lock);
if (minor_table[nlp->drbd_minor] == NULL) {
minor_table[nlp->drbd_minor] = mdev;
disk = mdev->vdisk;
mdev = NULL;
} /* else: we lost the race */
spin_unlock_irq(&drbd_pp_lock);
if (disk) /* we won the race above */
/* in case we ever add a drbd_delete_device(),
* don't forget the del_gendisk! */
add_disk(disk);
else /* we lost the race above */
drbd_free_mdev(mdev);
mdev = minor_to_mdev(nlp->drbd_minor);
}
return mdev;
}
struct cn_handler_struct {
int (*function)(struct drbd_conf *,
struct drbd_nl_cfg_req *,
......@@ -2035,7 +2200,8 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
goto fail;
}
mdev = ensure_mdev(nlp);
mdev = ensure_mdev(nlp->drbd_minor,
(nlp->flags & DRBD_NL_CREATE_DEVICE));
if (!mdev) {
retcode = ERR_MINOR_INVALID;
goto fail;
......
......@@ -57,6 +57,7 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
unsigned long db, dt, dbdt, rt, rs_left;
unsigned int res;
int i, x, y;
int stalled = 0;
drbd_get_syncer_progress(mdev, &rs_left, &res);
......@@ -90,18 +91,17 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
* db: blocks written from mark until now
* rt: remaining time
*/
dt = (jiffies - mdev->rs_mark_time) / HZ;
if (dt > 20) {
/* if we made no update to rs_mark_time for too long,
* we are stalled. show that. */
seq_printf(seq, "stalled\n");
return;
}
/* Rolling marks. last_mark+1 may just now be modified. last_mark+2 is
* at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at
* least DRBD_SYNC_MARK_STEP time before it will be modified. */
i = (mdev->rs_last_mark + 2) % DRBD_SYNC_MARKS;
dt = (jiffies - mdev->rs_mark_time[i]) / HZ;
if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS))
stalled = 1;
if (!dt)
dt++;
db = mdev->rs_mark_left - rs_left;
db = mdev->rs_mark_left[i] - rs_left;
rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */
seq_printf(seq, "finish: %lu:%02lu:%02lu",
......@@ -118,7 +118,7 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
/* mean speed since syncer started
* we do account for PausedSync periods */
dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
if (dt <= 0)
if (dt == 0)
dt = 1;
db = mdev->rs_total - rs_left;
dbdt = Bit2KB(db/dt);
......@@ -128,7 +128,14 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
else
seq_printf(seq, " (%ld)", dbdt);
seq_printf(seq, " K/sec\n");
if (mdev->state.conn == C_SYNC_TARGET) {
if (mdev->c_sync_rate > 1000)
seq_printf(seq, " want: %d,%03d",
mdev->c_sync_rate / 1000, mdev->c_sync_rate % 1000);
else
seq_printf(seq, " want: %d", mdev->c_sync_rate);
}
seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : "");
}
static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
......@@ -196,7 +203,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%2d: cs:Unconfigured\n", i);
} else {
seq_printf(seq,
"%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c\n"
"%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n"
" ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
"lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
i, sn,
......@@ -206,11 +213,12 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
drbd_disk_str(mdev->state.pdsk),
(mdev->net_conf == NULL ? ' ' :
(mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')),
mdev->state.susp ? 's' : 'r',
is_susp(mdev->state) ? 's' : 'r',
mdev->state.aftr_isp ? 'a' : '-',
mdev->state.peer_isp ? 'p' : '-',
mdev->state.user_isp ? 'u' : '-',
mdev->congestion_reason ?: '-',
test_bit(AL_SUSPENDED, &mdev->flags) ? 's' : '-',
mdev->send_cnt/2,
mdev->recv_cnt/2,
mdev->writ_cnt/2,
......
此差异已折叠。
......@@ -59,17 +59,19 @@ static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req)
static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw)
{
const unsigned long s = req->rq_state;
/* remove it from the transfer log.
* well, only if it had been there in the first
* place... if it had not (local only or conflicting
* and never sent), it should still be "empty" as
* initialized in drbd_req_new(), so we can list_del() it
* here unconditionally */
list_del(&req->tl_requests);
/* if it was a write, we may have to set the corresponding
* bit(s) out-of-sync first. If it had a local part, we need to
* release the reference to the activity log. */
if (rw == WRITE) {
/* remove it from the transfer log.
* well, only if it had been there in the first
* place... if it had not (local only or conflicting
* and never sent), it should still be "empty" as
* initialized in drbd_req_new(), so we can list_del() it
* here unconditionally */
list_del(&req->tl_requests);
/* Set out-of-sync unless both OK flags are set
* (local only or remote failed).
* Other places where we set out-of-sync:
......@@ -92,7 +94,8 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const
*/
if (s & RQ_LOCAL_MASK) {
if (get_ldev_if_state(mdev, D_FAILED)) {
drbd_al_complete_io(mdev, req->sector);
if (s & RQ_IN_ACT_LOG)
drbd_al_complete_io(mdev, req->sector);
put_ldev(mdev);
} else if (__ratelimit(&drbd_ratelimit_state)) {
dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), "
......@@ -280,6 +283,14 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
* protocol A or B, barrier ack still pending... */
}
static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_error *m)
{
struct drbd_conf *mdev = req->mdev;
if (!is_susp(mdev->state))
_req_may_be_done(req, m);
}
/*
* checks whether there was an overlapping request
* or ee already registered.
......@@ -380,10 +391,11 @@ static int _req_conflicts(struct drbd_request *req)
* and it enforces that we have to think in a very structured manner
* about the "events" that may happen to a request during its life time ...
*/
void __req_mod(struct drbd_request *req, enum drbd_req_event what,
int __req_mod(struct drbd_request *req, enum drbd_req_event what,
struct bio_and_error *m)
{
struct drbd_conf *mdev = req->mdev;
int rv = 0;
m->bio = NULL;
switch (what) {
......@@ -420,7 +432,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
req->rq_state &= ~RQ_LOCAL_PENDING;
_req_may_be_done(req, m);
_req_may_be_done_not_susp(req, m);
put_ldev(mdev);
break;
......@@ -429,7 +441,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state &= ~RQ_LOCAL_PENDING;
__drbd_chk_io_error(mdev, FALSE);
_req_may_be_done(req, m);
_req_may_be_done_not_susp(req, m);
put_ldev(mdev);
break;
......@@ -437,7 +449,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
/* it is legal to fail READA */
req->rq_state |= RQ_LOCAL_COMPLETED;
req->rq_state &= ~RQ_LOCAL_PENDING;
_req_may_be_done(req, m);
_req_may_be_done_not_susp(req, m);
put_ldev(mdev);
break;
......@@ -455,7 +467,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
/* no point in retrying if there is no good remote data,
* or we have no connection. */
if (mdev->state.pdsk != D_UP_TO_DATE) {
_req_may_be_done(req, m);
_req_may_be_done_not_susp(req, m);
break;
}
......@@ -517,11 +529,9 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0);
req->epoch = mdev->newest_tle->br_number;
list_add_tail(&req->tl_requests,
&mdev->newest_tle->requests);
/* increment size of current epoch */
mdev->newest_tle->n_req++;
mdev->newest_tle->n_writes++;
/* queue work item to send data */
D_ASSERT(req->rq_state & RQ_NET_PENDING);
......@@ -530,7 +540,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
drbd_queue_work(&mdev->data.work, &req->w);
/* close the epoch, in case it outgrew the limit */
if (mdev->newest_tle->n_req >= mdev->net_conf->max_epoch_size)
if (mdev->newest_tle->n_writes >= mdev->net_conf->max_epoch_size)
queue_barrier(mdev);
break;
......@@ -543,7 +553,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state &= ~RQ_NET_QUEUED;
/* if we did it right, tl_clear should be scheduled only after
* this, so this should not be necessary! */
_req_may_be_done(req, m);
_req_may_be_done_not_susp(req, m);
break;
case handed_over_to_network:
......@@ -568,7 +578,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
* "completed_ok" events came in, once we return from
* _drbd_send_zc_bio (drbd_send_dblock), we have to check
* whether it is done already, and end it. */
_req_may_be_done(req, m);
_req_may_be_done_not_susp(req, m);
break;
case read_retry_remote_canceled:
......@@ -584,7 +594,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
/* if it is still queued, we may not complete it here.
* it will be canceled soon. */
if (!(req->rq_state & RQ_NET_QUEUED))
_req_may_be_done(req, m);
_req_may_be_done(req, m); /* Allowed while state.susp */
break;
case write_acked_by_peer_and_sis:
......@@ -619,7 +629,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
D_ASSERT(req->rq_state & RQ_NET_PENDING);
dec_ap_pending(mdev);
req->rq_state &= ~RQ_NET_PENDING;
_req_may_be_done(req, m);
_req_may_be_done_not_susp(req, m);
break;
case neg_acked:
......@@ -629,11 +639,50 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
req->rq_state |= RQ_NET_DONE;
_req_may_be_done(req, m);
_req_may_be_done_not_susp(req, m);
/* else: done by handed_over_to_network */
break;
case fail_frozen_disk_io:
if (!(req->rq_state & RQ_LOCAL_COMPLETED))
break;
_req_may_be_done(req, m); /* Allowed while state.susp */
break;
case restart_frozen_disk_io:
if (!(req->rq_state & RQ_LOCAL_COMPLETED))
break;
req->rq_state &= ~RQ_LOCAL_COMPLETED;
rv = MR_READ;
if (bio_data_dir(req->master_bio) == WRITE)
rv = MR_WRITE;
get_ldev(mdev);
req->w.cb = w_restart_disk_io;
drbd_queue_work(&mdev->data.work, &req->w);
break;
case resend:
/* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
before the connection loss (B&C only); only P_BARRIER_ACK was missing.
Trowing them out of the TL here by pretending we got a BARRIER_ACK
We ensure that the peer was not rebooted */
if (!(req->rq_state & RQ_NET_OK)) {
if (req->w.cb) {
drbd_queue_work(&mdev->data.work, &req->w);
rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
}
break;
}
/* else, fall through to barrier_acked */
case barrier_acked:
if (!(req->rq_state & RQ_WRITE))
break;
if (req->rq_state & RQ_NET_PENDING) {
/* barrier came in before all requests have been acked.
* this is bad, because if the connection is lost now,
......@@ -643,7 +692,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
}
D_ASSERT(req->rq_state & RQ_NET_SENT);
req->rq_state |= RQ_NET_DONE;
_req_may_be_done(req, m);
_req_may_be_done(req, m); /* Allowed while state.susp */
break;
case data_received:
......@@ -651,9 +700,11 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
dec_ap_pending(mdev);
req->rq_state &= ~RQ_NET_PENDING;
req->rq_state |= (RQ_NET_OK|RQ_NET_DONE);
_req_may_be_done(req, m);
_req_may_be_done_not_susp(req, m);
break;
};
return rv;
}
/* we may do a local read if:
......@@ -752,14 +803,16 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
* resync extent to finish, and, if necessary, pulls in the target
* extent into the activity log, which involves further disk io because
* of transactional on-disk meta data updates. */
if (rw == WRITE && local)
if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) {
req->rq_state |= RQ_IN_ACT_LOG;
drbd_al_begin_io(mdev, sector);
}
remote = remote && (mdev->state.pdsk == D_UP_TO_DATE ||
(mdev->state.pdsk == D_INCONSISTENT &&
mdev->state.conn >= C_CONNECTED));
if (!(local || remote) && !mdev->state.susp) {
if (!(local || remote) && !is_susp(mdev->state)) {
dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
goto fail_free_complete;
}
......@@ -785,7 +838,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
/* GOOD, everything prepared, grab the spin_lock */
spin_lock_irq(&mdev->req_lock);
if (mdev->state.susp) {
if (is_susp(mdev->state)) {
/* If we got suspended, use the retry mechanism of
generic_make_request() to restart processing of this
bio. In the next call to drbd_make_request_26
......@@ -867,30 +920,10 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
/* check this request on the collision detection hash tables.
* if we have a conflict, just complete it here.
* THINK do we want to check reads, too? (I don't think so...) */
if (rw == WRITE && _req_conflicts(req)) {
/* this is a conflicting request.
* even though it may have been only _partially_
* overlapping with one of the currently pending requests,
* without even submitting or sending it, we will
* pretend that it was successfully served right now.
*/
if (local) {
bio_put(req->private_bio);
req->private_bio = NULL;
drbd_al_complete_io(mdev, req->sector);
put_ldev(mdev);
local = 0;
}
if (remote)
dec_ap_pending(mdev);
_drbd_end_io_acct(mdev, req);
/* THINK: do we want to fail it (-EIO), or pretend success? */
bio_endio(req->master_bio, 0);
req->master_bio = NULL;
dec_ap_bio(mdev);
drbd_req_free(req);
remote = 0;
}
if (rw == WRITE && _req_conflicts(req))
goto fail_conflicting;
list_add_tail(&req->tl_requests, &mdev->newest_tle->requests);
/* NOTE remote first: to get the concurrent write detection right,
* we must register the request before start of local IO. */
......@@ -923,6 +956,21 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
return 0;
fail_conflicting:
/* this is a conflicting request.
* even though it may have been only _partially_
* overlapping with one of the currently pending requests,
* without even submitting or sending it, we will
* pretend that it was successfully served right now.
*/
_drbd_end_io_acct(mdev, req);
spin_unlock_irq(&mdev->req_lock);
if (remote)
dec_ap_pending(mdev);
/* THINK: do we want to fail it (-EIO), or pretend success?
* this pretends success. */
err = 0;
fail_free_complete:
if (rw == WRITE && local)
drbd_al_complete_io(mdev, sector);
......@@ -961,21 +1009,6 @@ static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write)
return 1;
}
/*
* Paranoia: we might have been primary, but sync target, or
* even diskless, then lost the connection.
* This should have been handled (panic? suspend?) somewhere
* else. But maybe it was not, so check again here.
* Caution: as long as we do not have a read/write lock on mdev,
* to serialize state changes, this is racy, since we may lose
* the connection *after* we test for the cstate.
*/
if (mdev->state.disk < D_UP_TO_DATE && mdev->state.pdsk < D_UP_TO_DATE) {
if (__ratelimit(&drbd_ratelimit_state))
dev_err(DEV, "Sorry, I have no access to good data anymore.\n");
return 1;
}
return 0;
}
......
......@@ -104,6 +104,9 @@ enum drbd_req_event {
read_ahead_completed_with_error,
write_completed_with_error,
completed_ok,
resend,
fail_frozen_disk_io,
restart_frozen_disk_io,
nothing, /* for tracing only */
};
......@@ -183,6 +186,12 @@ enum drbd_req_state_bits {
/* keep this last, its for the RQ_NET_MASK */
__RQ_NET_MAX,
/* Set when this is a write, clear for a read */
__RQ_WRITE,
/* Should call drbd_al_complete_io() for this request... */
__RQ_IN_ACT_LOG,
};
#define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING)
......@@ -201,6 +210,16 @@ enum drbd_req_state_bits {
/* 0x1f8 */
#define RQ_NET_MASK (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK)
#define RQ_WRITE (1UL << __RQ_WRITE)
#define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG)
/* For waking up the frozen transfer log mod_req() has to return if the request
should be counted in the epoch object*/
#define MR_WRITE_SHIFT 0
#define MR_WRITE (1 << MR_WRITE_SHIFT)
#define MR_READ_SHIFT 1
#define MR_READ (1 << MR_READ_SHIFT)
/* epoch entries */
static inline
struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector)
......@@ -244,30 +263,36 @@ static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev,
return NULL;
}
static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
{
struct bio *bio;
bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
req->private_bio = bio;
bio->bi_private = req;
bio->bi_end_io = drbd_endio_pri;
bio->bi_next = NULL;
}
static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
struct bio *bio_src)
{
struct bio *bio;
struct drbd_request *req =
mempool_alloc(drbd_request_mempool, GFP_NOIO);
if (likely(req)) {
bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
drbd_req_make_private_bio(req, bio_src);
req->rq_state = 0;
req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;
req->mdev = mdev;
req->master_bio = bio_src;
req->private_bio = bio;
req->epoch = 0;
req->sector = bio->bi_sector;
req->size = bio->bi_size;
req->sector = bio_src->bi_sector;
req->size = bio_src->bi_size;
req->start_time = jiffies;
INIT_HLIST_NODE(&req->colision);
INIT_LIST_HEAD(&req->tl_requests);
INIT_LIST_HEAD(&req->w.list);
bio->bi_private = req;
bio->bi_end_io = drbd_endio_pri;
bio->bi_next = NULL;
}
return req;
}
......@@ -292,36 +317,43 @@ struct bio_and_error {
extern void _req_may_be_done(struct drbd_request *req,
struct bio_and_error *m);
extern void __req_mod(struct drbd_request *req, enum drbd_req_event what,
extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
struct bio_and_error *m);
extern void complete_master_bio(struct drbd_conf *mdev,
struct bio_and_error *m);
/* use this if you don't want to deal with calling complete_master_bio()
* outside the spinlock, e.g. when walking some list on cleanup. */
static inline void _req_mod(struct drbd_request *req, enum drbd_req_event what)
static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
{
struct drbd_conf *mdev = req->mdev;
struct bio_and_error m;
int rv;
/* __req_mod possibly frees req, do not touch req after that! */
__req_mod(req, what, &m);
rv = __req_mod(req, what, &m);
if (m.bio)
complete_master_bio(mdev, &m);
return rv;
}
/* completion of master bio is outside of spinlock.
* If you need it irqsave, do it your self! */
static inline void req_mod(struct drbd_request *req,
static inline int req_mod(struct drbd_request *req,
enum drbd_req_event what)
{
struct drbd_conf *mdev = req->mdev;
struct bio_and_error m;
int rv;
spin_lock_irq(&mdev->req_lock);
__req_mod(req, what, &m);
rv = __req_mod(req, what, &m);
spin_unlock_irq(&mdev->req_lock);
if (m.bio)
complete_master_bio(mdev, &m);
return rv;
}
#endif
......@@ -39,8 +39,6 @@
#include "drbd_int.h"
#include "drbd_req.h"
#define SLEEP_TIME (HZ/10)
static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);
......@@ -217,10 +215,8 @@ void drbd_endio_sec(struct bio *bio, int error)
*/
void drbd_endio_pri(struct bio *bio, int error)
{
unsigned long flags;
struct drbd_request *req = bio->bi_private;
struct drbd_conf *mdev = req->mdev;
struct bio_and_error m;
enum drbd_req_event what;
int uptodate = bio_flagged(bio, BIO_UPTODATE);
......@@ -246,12 +242,7 @@ void drbd_endio_pri(struct bio *bio, int error)
bio_put(req->private_bio);
req->private_bio = ERR_PTR(error);
spin_lock_irqsave(&mdev->req_lock, flags);
__req_mod(req, what, &m);
spin_unlock_irqrestore(&mdev->req_lock, flags);
if (m.bio)
complete_master_bio(mdev, &m);
req_mod(req, what);
}
int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
......@@ -376,54 +367,145 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
struct drbd_epoch_entry *e;
if (!get_ldev(mdev))
return 0;
return -EIO;
if (drbd_rs_should_slow_down(mdev))
goto defer;
/* GFP_TRY, because if there is no memory available right now, this may
* be rescheduled for later. It is "only" background resync, after all. */
e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY);
if (!e)
goto fail;
goto defer;
e->w.cb = w_e_send_csum;
spin_lock_irq(&mdev->req_lock);
list_add(&e->w.list, &mdev->read_ee);
spin_unlock_irq(&mdev->req_lock);
e->w.cb = w_e_send_csum;
atomic_add(size >> 9, &mdev->rs_sect_ev);
if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0)
return 1;
return 0;
/* drbd_submit_ee currently fails for one reason only:
* not being able to allocate enough bios.
* Is dropping the connection going to help? */
spin_lock_irq(&mdev->req_lock);
list_del(&e->w.list);
spin_unlock_irq(&mdev->req_lock);
drbd_free_ee(mdev, e);
fail:
defer:
put_ldev(mdev);
return 2;
return -EAGAIN;
}
void resync_timer_fn(unsigned long data)
{
unsigned long flags;
struct drbd_conf *mdev = (struct drbd_conf *) data;
int queue;
spin_lock_irqsave(&mdev->req_lock, flags);
if (likely(!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags))) {
queue = 1;
if (mdev->state.conn == C_VERIFY_S)
mdev->resync_work.cb = w_make_ov_request;
else
mdev->resync_work.cb = w_make_resync_request;
} else {
queue = 1;
switch (mdev->state.conn) {
case C_VERIFY_S:
mdev->resync_work.cb = w_make_ov_request;
break;
case C_SYNC_TARGET:
mdev->resync_work.cb = w_make_resync_request;
break;
default:
queue = 0;
mdev->resync_work.cb = w_resync_inactive;
}
spin_unlock_irqrestore(&mdev->req_lock, flags);
/* harmless race: list_empty outside data.work.q_lock */
if (list_empty(&mdev->resync_work.list) && queue)
drbd_queue_work(&mdev->data.work, &mdev->resync_work);
}
static void fifo_set(struct fifo_buffer *fb, int value)
{
int i;
for (i = 0; i < fb->size; i++)
fb->values[i] = value;
}
static int fifo_push(struct fifo_buffer *fb, int value)
{
int ov;
ov = fb->values[fb->head_index];
fb->values[fb->head_index++] = value;
if (fb->head_index >= fb->size)
fb->head_index = 0;
return ov;
}
static void fifo_add_val(struct fifo_buffer *fb, int value)
{
int i;
for (i = 0; i < fb->size; i++)
fb->values[i] += value;
}
int drbd_rs_controller(struct drbd_conf *mdev)
{
unsigned int sect_in; /* Number of sectors that came in since the last turn */
unsigned int want; /* The number of sectors we want in the proxy */
int req_sect; /* Number of sectors to request in this turn */
int correction; /* Number of sectors more we need in the proxy*/
int cps; /* correction per invocation of drbd_rs_controller() */
int steps; /* Number of time steps to plan ahead */
int curr_corr;
int max_sect;
sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
mdev->rs_in_flight -= sect_in;
spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */
steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps;
} else { /* normal path */
want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target :
sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10);
}
correction = want - mdev->rs_in_flight - mdev->rs_planed;
/* Plan ahead */
cps = correction / steps;
fifo_add_val(&mdev->rs_plan_s, cps);
mdev->rs_planed += cps * steps;
/* What we do in this step */
curr_corr = fifo_push(&mdev->rs_plan_s, 0);
spin_unlock(&mdev->peer_seq_lock);
mdev->rs_planed -= curr_corr;
req_sect = sect_in + curr_corr;
if (req_sect < 0)
req_sect = 0;
max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ;
if (req_sect > max_sect)
req_sect = max_sect;
/*
dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
sect_in, mdev->rs_in_flight, want, correction,
steps, cps, mdev->rs_planed, curr_corr, req_sect);
*/
return req_sect;
}
int w_make_resync_request(struct drbd_conf *mdev,
struct drbd_work *w, int cancel)
{
......@@ -431,8 +513,9 @@ int w_make_resync_request(struct drbd_conf *mdev,
sector_t sector;
const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
int max_segment_size;
int number, i, size, pe, mx;
int number, rollback_i, size, pe, mx;
int align, queued, sndbuf;
int i = 0;
if (unlikely(cancel))
return 1;
......@@ -446,6 +529,12 @@ int w_make_resync_request(struct drbd_conf *mdev,
dev_err(DEV, "%s in w_make_resync_request\n",
drbd_conn_str(mdev->state.conn));
if (mdev->rs_total == 0) {
/* empty resync? */
drbd_resync_finished(mdev);
return 1;
}
if (!get_ldev(mdev)) {
/* Since we only need to access mdev->rsync a
get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
......@@ -458,11 +547,25 @@ int w_make_resync_request(struct drbd_conf *mdev,
/* starting with drbd 8.3.8, we can handle multi-bio EEs,
* if it should be necessary */
max_segment_size = mdev->agreed_pro_version < 94 ?
queue_max_segment_size(mdev->rq_queue) : DRBD_MAX_SEGMENT_SIZE;
max_segment_size =
mdev->agreed_pro_version < 94 ? queue_max_segment_size(mdev->rq_queue) :
mdev->agreed_pro_version < 95 ? DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_SEGMENT_SIZE;
number = SLEEP_TIME * mdev->sync_conf.rate / ((BM_BLOCK_SIZE / 1024) * HZ);
pe = atomic_read(&mdev->rs_pending_cnt);
if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
} else {
mdev->c_sync_rate = mdev->sync_conf.rate;
number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
}
/* Throttle resync on lower level disk activity, which may also be
* caused by application IO on Primary/SyncTarget.
* Keep this after the call to drbd_rs_controller, as that assumes
* to be called as precisely as possible every SLEEP_TIME,
* and would be confused otherwise. */
if (drbd_rs_should_slow_down(mdev))
goto requeue;
mutex_lock(&mdev->data.mutex);
if (mdev->data.socket)
......@@ -476,6 +579,7 @@ int w_make_resync_request(struct drbd_conf *mdev,
mx = number;
/* Limit the number of pending RS requests to no more than the peer's receive buffer */
pe = atomic_read(&mdev->rs_pending_cnt);
if ((pe + number) > mx) {
number = mx - pe;
}
......@@ -526,6 +630,7 @@ int w_make_resync_request(struct drbd_conf *mdev,
* be prepared for all stripe sizes of software RAIDs.
*/
align = 1;
rollback_i = i;
for (;;) {
if (size + BM_BLOCK_SIZE > max_segment_size)
break;
......@@ -561,14 +666,19 @@ int w_make_resync_request(struct drbd_conf *mdev,
size = (capacity-sector)<<9;
if (mdev->agreed_pro_version >= 89 && mdev->csums_tfm) {
switch (read_for_csum(mdev, sector, size)) {
case 0: /* Disk failure*/
case -EIO: /* Disk failure */
put_ldev(mdev);
return 0;
case 2: /* Allocation failed */
case -EAGAIN: /* allocation failed, or ldev busy */
drbd_rs_complete_io(mdev, sector);
mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
i = rollback_i;
goto requeue;
/* case 1: everything ok */
case 0:
/* everything ok */
break;
default:
BUG();
}
} else {
inc_rs_pending(mdev);
......@@ -595,6 +705,7 @@ int w_make_resync_request(struct drbd_conf *mdev,
}
requeue:
mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
put_ldev(mdev);
return 1;
......@@ -670,6 +781,14 @@ static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int ca
return 1;
}
static void ping_peer(struct drbd_conf *mdev)
{
clear_bit(GOT_PING_ACK, &mdev->flags);
request_ping(mdev);
wait_event(mdev->misc_wait,
test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED);
}
int drbd_resync_finished(struct drbd_conf *mdev)
{
unsigned long db, dt, dbdt;
......@@ -709,6 +828,8 @@ int drbd_resync_finished(struct drbd_conf *mdev)
if (!get_ldev(mdev))
goto out;
ping_peer(mdev);
spin_lock_irq(&mdev->req_lock);
os = mdev->state;
......@@ -801,6 +922,8 @@ int drbd_resync_finished(struct drbd_conf *mdev)
mdev->rs_paused = 0;
mdev->ov_start_sector = 0;
drbd_md_sync(mdev);
if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) {
dev_warn(DEV, "Writing the whole bitmap, due to failed kmalloc\n");
drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished");
......@@ -817,9 +940,13 @@ static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_ent
{
if (drbd_ee_has_active_page(e)) {
/* This might happen if sendpage() has not finished */
int i = (e->size + PAGE_SIZE -1) >> PAGE_SHIFT;
atomic_add(i, &mdev->pp_in_use_by_net);
atomic_sub(i, &mdev->pp_in_use);
spin_lock_irq(&mdev->req_lock);
list_add_tail(&e->w.list, &mdev->net_ee);
spin_unlock_irq(&mdev->req_lock);
wake_up(&drbd_pp_wait);
} else
drbd_free_ee(mdev, e);
}
......@@ -926,9 +1053,12 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
return 1;
}
drbd_rs_complete_io(mdev, e->sector);
if (get_ldev(mdev)) {
drbd_rs_complete_io(mdev, e->sector);
put_ldev(mdev);
}
di = (struct digest_info *)(unsigned long)e->block_id;
di = e->digest;
if (likely((e->flags & EE_WAS_ERROR) == 0)) {
/* quick hack to try to avoid a race against reconfiguration.
......@@ -952,7 +1082,9 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e);
} else {
inc_rs_pending(mdev);
e->block_id = ID_SYNCER;
e->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
e->flags &= ~EE_HAS_DIGEST; /* This e no longer has a digest pointer */
kfree(di);
ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
}
} else {
......@@ -962,9 +1094,6 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
}
dec_unacked(mdev);
kfree(di);
move_to_net_ee_or_free(mdev, e);
if (unlikely(!ok))
......@@ -1034,9 +1163,12 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
/* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
* the resync lru has been cleaned up already */
drbd_rs_complete_io(mdev, e->sector);
if (get_ldev(mdev)) {
drbd_rs_complete_io(mdev, e->sector);
put_ldev(mdev);
}
di = (struct digest_info *)(unsigned long)e->block_id;
di = e->digest;
if (likely((e->flags & EE_WAS_ERROR) == 0)) {
digest_size = crypto_hash_digestsize(mdev->verify_tfm);
......@@ -1055,9 +1187,6 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
}
dec_unacked(mdev);
kfree(di);
if (!eq)
drbd_ov_oos_found(mdev, e->sector, e->size);
else
......@@ -1108,7 +1237,7 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
* dec_ap_pending will be done in got_BarrierAck
* or (on connection loss) in w_clear_epoch. */
ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER,
(struct p_header *)p, sizeof(*p), 0);
(struct p_header80 *)p, sizeof(*p), 0);
drbd_put_data_sock(mdev);
return ok;
......@@ -1173,6 +1302,24 @@ int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
return ok;
}
int w_restart_disk_io(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
struct drbd_request *req = container_of(w, struct drbd_request, w);
if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
drbd_al_begin_io(mdev, req->sector);
/* Calling drbd_al_begin_io() out of the worker might deadlocks
theoretically. Practically it can not deadlock, since this is
only used when unfreezing IOs. All the extents of the requests
that made it into the TL are already active */
drbd_req_make_private_bio(req, req->master_bio);
req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
generic_make_request(req->private_bio);
return 1;
}
static int _drbd_may_sync_now(struct drbd_conf *mdev)
{
struct drbd_conf *odev = mdev;
......@@ -1298,14 +1445,6 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na)
return retcode;
}
static void ping_peer(struct drbd_conf *mdev)
{
clear_bit(GOT_PING_ACK, &mdev->flags);
request_ping(mdev);
wait_event(mdev->misc_wait,
test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED);
}
/**
* drbd_start_resync() - Start the resync process
* @mdev: DRBD device.
......@@ -1379,13 +1518,21 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
r = SS_UNKNOWN_ERROR;
if (r == SS_SUCCESS) {
mdev->rs_total =
mdev->rs_mark_left = drbd_bm_total_weight(mdev);
unsigned long tw = drbd_bm_total_weight(mdev);
unsigned long now = jiffies;
int i;
mdev->rs_failed = 0;
mdev->rs_paused = 0;
mdev->rs_start =
mdev->rs_mark_time = jiffies;
mdev->rs_same_csum = 0;
mdev->rs_last_events = 0;
mdev->rs_last_sect_ev = 0;
mdev->rs_total = tw;
mdev->rs_start = now;
for (i = 0; i < DRBD_SYNC_MARKS; i++) {
mdev->rs_mark_left[i] = tw;
mdev->rs_mark_time[i] = now;
}
_drbd_pause_after(mdev);
}
write_unlock_irq(&global_state_lock);
......@@ -1397,12 +1544,31 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
(unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
(unsigned long) mdev->rs_total);
if (mdev->rs_total == 0) {
/* Peer still reachable? Beware of failing before-resync-target handlers! */
ping_peer(mdev);
if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) {
/* This still has a race (about when exactly the peers
* detect connection loss) that can lead to a full sync
* on next handshake. In 8.3.9 we fixed this with explicit
* resync-finished notifications, but the fix
* introduces a protocol change. Sleeping for some
* time longer than the ping interval + timeout on the
* SyncSource, to give the SyncTarget the chance to
* detect connection loss, then waiting for a ping
* response (implicit in drbd_resync_finished) reduces
* the race considerably, but does not solve it. */
if (side == C_SYNC_SOURCE)
schedule_timeout_interruptible(
mdev->net_conf->ping_int * HZ +
mdev->net_conf->ping_timeo*HZ/9);
drbd_resync_finished(mdev);
}
atomic_set(&mdev->rs_sect_in, 0);
atomic_set(&mdev->rs_sect_ev, 0);
mdev->rs_in_flight = 0;
mdev->rs_planed = 0;
spin_lock(&mdev->peer_seq_lock);
fifo_set(&mdev->rs_plan_s, 0);
spin_unlock(&mdev->peer_seq_lock);
/* ns.conn may already be != mdev->state.conn,
* we may have been paused in between, or become paused until
* the timer triggers.
......
......@@ -258,8 +258,8 @@ static int irqdma_allocated;
#include <linux/completion.h>
static struct request *current_req;
static struct request_queue *floppy_queue;
static void do_fd_request(struct request_queue *q);
static int set_next_request(void);
#ifndef fd_get_dma_residue
#define fd_get_dma_residue() get_dma_residue(FLOPPY_DMA)
......@@ -413,6 +413,7 @@ static struct gendisk *disks[N_DRIVE];
static struct block_device *opened_bdev[N_DRIVE];
static DEFINE_MUTEX(open_lock);
static struct floppy_raw_cmd *raw_cmd, default_raw_cmd;
static int fdc_queue;
/*
* This struct defines the different floppy types.
......@@ -890,8 +891,8 @@ static void unlock_fdc(void)
del_timer(&fd_timeout);
cont = NULL;
clear_bit(0, &fdc_busy);
if (current_req || blk_peek_request(floppy_queue))
do_fd_request(floppy_queue);
if (current_req || set_next_request())
do_fd_request(current_req->q);
spin_unlock_irqrestore(&floppy_lock, flags);
wake_up(&fdc_wait);
}
......@@ -2243,8 +2244,8 @@ static void floppy_end_request(struct request *req, int error)
* logical buffer */
static void request_done(int uptodate)
{
struct request_queue *q = floppy_queue;
struct request *req = current_req;
struct request_queue *q;
unsigned long flags;
int block;
char msg[sizeof("request done ") + sizeof(int) * 3];
......@@ -2258,6 +2259,8 @@ static void request_done(int uptodate)
return;
}
q = req->q;
if (uptodate) {
/* maintain values for invalidation on geometry
* change */
......@@ -2811,6 +2814,28 @@ static int make_raw_rw_request(void)
return 2;
}
/*
* Round-robin between our available drives, doing one request from each
*/
static int set_next_request(void)
{
struct request_queue *q;
int old_pos = fdc_queue;
do {
q = disks[fdc_queue]->queue;
if (++fdc_queue == N_DRIVE)
fdc_queue = 0;
if (q) {
current_req = blk_fetch_request(q);
if (current_req)
break;
}
} while (fdc_queue != old_pos);
return current_req != NULL;
}
static void redo_fd_request(void)
{
int drive;
......@@ -2822,17 +2847,17 @@ static void redo_fd_request(void)
do_request:
if (!current_req) {
struct request *req;
int pending;
spin_lock_irq(&floppy_lock);
pending = set_next_request();
spin_unlock_irq(&floppy_lock);
spin_lock_irq(floppy_queue->queue_lock);
req = blk_fetch_request(floppy_queue);
spin_unlock_irq(floppy_queue->queue_lock);
if (!req) {
if (!pending) {
do_floppy = NULL;
unlock_fdc();
return;
}
current_req = req;
}
drive = (long)current_req->rq_disk->private_data;
set_fdc(drive);
......@@ -4165,6 +4190,13 @@ static int __init floppy_init(void)
goto out_put_disk;
}
disks[dr]->queue = blk_init_queue(do_fd_request, &floppy_lock);
if (!disks[dr]->queue) {
err = -ENOMEM;
goto out_put_disk;
}
blk_queue_max_hw_sectors(disks[dr]->queue, 64);
disks[dr]->major = FLOPPY_MAJOR;
disks[dr]->first_minor = TOMINOR(dr);
disks[dr]->fops = &floppy_fops;
......@@ -4183,13 +4215,6 @@ static int __init floppy_init(void)
if (err)
goto out_unreg_blkdev;
floppy_queue = blk_init_queue(do_fd_request, &floppy_lock);
if (!floppy_queue) {
err = -ENOMEM;
goto out_unreg_driver;
}
blk_queue_max_hw_sectors(floppy_queue, 64);
blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE,
floppy_find, NULL, NULL);
......@@ -4317,7 +4342,6 @@ static int __init floppy_init(void)
/* to be cleaned up... */
disks[drive]->private_data = (void *)(long)drive;
disks[drive]->queue = floppy_queue;
disks[drive]->flags |= GENHD_FL_REMOVABLE;
disks[drive]->driverfs_dev = &floppy_device[drive].dev;
add_disk(disks[drive]);
......@@ -4333,8 +4357,6 @@ static int __init floppy_init(void)
floppy_release_irq_and_dma();
out_unreg_region:
blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
blk_cleanup_queue(floppy_queue);
out_unreg_driver:
platform_driver_unregister(&floppy_driver);
out_unreg_blkdev:
unregister_blkdev(FLOPPY_MAJOR, "fd");
......@@ -4342,6 +4364,8 @@ static int __init floppy_init(void)
while (dr--) {
del_timer(&motor_off_timer[dr]);
put_disk(disks[dr]);
if (disks[dr]->queue)
blk_cleanup_queue(disks[dr]->queue);
}
return err;
}
......@@ -4550,11 +4574,11 @@ static void __exit floppy_module_exit(void)
platform_device_unregister(&floppy_device[drive]);
}
put_disk(disks[drive]);
blk_cleanup_queue(disks[drive]->queue);
}
del_timer_sync(&fd_timeout);
del_timer_sync(&fd_timer);
blk_cleanup_queue(floppy_queue);
if (atomic_read(&usage_count))
floppy_release_irq_and_dma();
......
......@@ -74,6 +74,7 @@
#include <linux/highmem.h>
#include <linux/kthread.h>
#include <linux/splice.h>
#include <linux/sysfs.h>
#include <asm/uaccess.h>
......@@ -738,6 +739,103 @@ static inline int is_loop_device(struct file *file)
return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
}
/* loop sysfs attributes */
static ssize_t loop_attr_show(struct device *dev, char *page,
ssize_t (*callback)(struct loop_device *, char *))
{
struct loop_device *l, *lo = NULL;
mutex_lock(&loop_devices_mutex);
list_for_each_entry(l, &loop_devices, lo_list)
if (disk_to_dev(l->lo_disk) == dev) {
lo = l;
break;
}
mutex_unlock(&loop_devices_mutex);
return lo ? callback(lo, page) : -EIO;
}
#define LOOP_ATTR_RO(_name) \
static ssize_t loop_attr_##_name##_show(struct loop_device *, char *); \
static ssize_t loop_attr_do_show_##_name(struct device *d, \
struct device_attribute *attr, char *b) \
{ \
return loop_attr_show(d, b, loop_attr_##_name##_show); \
} \
static struct device_attribute loop_attr_##_name = \
__ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL);
static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
{
ssize_t ret;
char *p = NULL;
mutex_lock(&lo->lo_ctl_mutex);
if (lo->lo_backing_file)
p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1);
mutex_unlock(&lo->lo_ctl_mutex);
if (IS_ERR_OR_NULL(p))
ret = PTR_ERR(p);
else {
ret = strlen(p);
memmove(buf, p, ret);
buf[ret++] = '\n';
buf[ret] = 0;
}
return ret;
}
static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf)
{
return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset);
}
static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf)
{
return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
}
static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
{
int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR);
return sprintf(buf, "%s\n", autoclear ? "1" : "0");
}
LOOP_ATTR_RO(backing_file);
LOOP_ATTR_RO(offset);
LOOP_ATTR_RO(sizelimit);
LOOP_ATTR_RO(autoclear);
static struct attribute *loop_attrs[] = {
&loop_attr_backing_file.attr,
&loop_attr_offset.attr,
&loop_attr_sizelimit.attr,
&loop_attr_autoclear.attr,
NULL,
};
static struct attribute_group loop_attribute_group = {
.name = "loop",
.attrs= loop_attrs,
};
static int loop_sysfs_init(struct loop_device *lo)
{
return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
&loop_attribute_group);
}
static void loop_sysfs_exit(struct loop_device *lo)
{
sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
&loop_attribute_group);
}
static int loop_set_fd(struct loop_device *lo, fmode_t mode,
struct block_device *bdev, unsigned int arg)
{
......@@ -837,6 +935,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
set_capacity(lo->lo_disk, size);
bd_set_size(bdev, size << 9);
loop_sysfs_init(lo);
/* let user-space know about the new size */
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
......@@ -855,6 +954,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
return 0;
out_clr:
loop_sysfs_exit(lo);
lo->lo_thread = NULL;
lo->lo_device = NULL;
lo->lo_backing_file = NULL;
......@@ -951,6 +1051,7 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
set_capacity(lo->lo_disk, 0);
if (bdev) {
bd_set_size(bdev, 0);
loop_sysfs_exit(lo);
/* let user-space know about this change */
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
}
......
......@@ -53,10 +53,10 @@
extern const char *drbd_buildtag(void);
#define REL_VERSION "8.3.8.1"
#define REL_VERSION "8.3.9rc2"
#define API_VERSION 88
#define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 94
#define PRO_VERSION_MAX 95
enum drbd_io_error_p {
......@@ -91,6 +91,11 @@ enum drbd_after_sb_p {
ASB_VIOLENTLY
};
enum drbd_on_no_data {
OND_IO_ERROR,
OND_SUSPEND_IO
};
/* KEEP the order, do not delete or insert. Only append. */
enum drbd_ret_codes {
ERR_CODE_BASE = 100,
......@@ -140,6 +145,7 @@ enum drbd_ret_codes {
ERR_CONNECTED = 151, /* DRBD 8.3 only */
ERR_PERM = 152,
ERR_NEED_APV_93 = 153,
ERR_STONITH_AND_PROT_A = 154,
/* insert new ones above this line */
AFTER_LAST_ERR_CODE
......@@ -226,13 +232,17 @@ union drbd_state {
unsigned conn:5 ; /* 17/32 cstates */
unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
unsigned susp:1 ; /* 2/2 IO suspended no/yes */
unsigned susp:1 ; /* 2/2 IO suspended no/yes (by user) */
unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
unsigned peer_isp:1 ;
unsigned user_isp:1 ;
unsigned _pad:11; /* 0 unused */
unsigned susp_nod:1 ; /* IO suspended because no data */
unsigned susp_fen:1 ; /* IO suspended because fence peer handler runs*/
unsigned _pad:9; /* 0 unused */
#elif defined(__BIG_ENDIAN_BITFIELD)
unsigned _pad:11; /* 0 unused */
unsigned _pad:9;
unsigned susp_fen:1 ;
unsigned susp_nod:1 ;
unsigned user_isp:1 ;
unsigned peer_isp:1 ;
unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
......@@ -312,6 +322,8 @@ enum drbd_timeout_flag {
#define DRBD_MAGIC 0x83740267
#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
#define DRBD_MAGIC_BIG 0x835a
#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG)
/* these are of type "int" */
#define DRBD_MD_INDEX_INTERNAL -1
......
......@@ -128,26 +128,31 @@
#define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT
#define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT
#define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
#define DRBD_ON_NO_DATA_DEF OND_IO_ERROR
#define DRBD_MAX_BIO_BVECS_MIN 0
#define DRBD_MAX_BIO_BVECS_MAX 128
#define DRBD_MAX_BIO_BVECS_DEF 0
#define DRBD_DP_VOLUME_MIN 4
#define DRBD_DP_VOLUME_MAX 1048576
#define DRBD_DP_VOLUME_DEF 16384
#define DRBD_C_PLAN_AHEAD_MIN 0
#define DRBD_C_PLAN_AHEAD_MAX 300
#define DRBD_C_PLAN_AHEAD_DEF 0 /* RS rate controller disabled by default */
#define DRBD_DP_INTERVAL_MIN 1
#define DRBD_DP_INTERVAL_MAX 600
#define DRBD_DP_INTERVAL_DEF 5
#define DRBD_C_DELAY_TARGET_MIN 1
#define DRBD_C_DELAY_TARGET_MAX 100
#define DRBD_C_DELAY_TARGET_DEF 10
#define DRBD_RS_THROTTLE_TH_MIN 1
#define DRBD_RS_THROTTLE_TH_MAX 600
#define DRBD_RS_THROTTLE_TH_DEF 20
#define DRBD_C_FILL_TARGET_MIN 0
#define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */
#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */
#define DRBD_RS_HOLD_OFF_TH_MIN 1
#define DRBD_RS_HOLD_OFF_TH_MAX 6000
#define DRBD_RS_HOLD_OFF_TH_DEF 100
#define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */
#define DRBD_C_MAX_RATE_MAX (4 << 20)
#define DRBD_C_MAX_RATE_DEF 102400
#define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */
#define DRBD_C_MIN_RATE_MAX (4 << 20)
#define DRBD_C_MIN_RATE_DEF 4096
#undef RANGE
#endif
......@@ -87,6 +87,12 @@ NL_PACKET(syncer_conf, 8,
NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32)
NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX)
NL_BIT( 65, T_MAY_IGNORE, use_rle)
NL_INTEGER( 75, T_MAY_IGNORE, on_no_data)
NL_INTEGER( 76, T_MAY_IGNORE, c_plan_ahead)
NL_INTEGER( 77, T_MAY_IGNORE, c_delay_target)
NL_INTEGER( 78, T_MAY_IGNORE, c_fill_target)
NL_INTEGER( 79, T_MAY_IGNORE, c_max_rate)
NL_INTEGER( 80, T_MAY_IGNORE, c_min_rate)
)
NL_PACKET(invalidate, 9, )
......
......@@ -83,7 +83,7 @@ static inline int ddebug_remove_module(const char *mod)
#define dynamic_pr_debug(fmt, ...) \
do { if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0)
#define dynamic_dev_dbg(dev, format, ...) \
#define dynamic_dev_dbg(dev, fmt, ...) \
do { if (0) dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); } while (0)
#endif
......
......@@ -743,6 +743,7 @@
#define PCI_DEVICE_ID_HP_CISSC 0x3230
#define PCI_DEVICE_ID_HP_CISSD 0x3238
#define PCI_DEVICE_ID_HP_CISSE 0x323a
#define PCI_DEVICE_ID_HP_CISSF 0x323b
#define PCI_DEVICE_ID_HP_ZX2_IOC 0x4031
#define PCI_VENDOR_ID_PCTECH 0x1042
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册