提交 f91ab628 编写于 作者: P Philipp Reisner

drbd: Implemented side-stepping in drbd_res_begin_io()

Before:
  drbd_rs_begin_io() locked app-IO out of an RS extent, and
  waited then until all previous app-IO in that area finished.
  (But not only until the disk-IO was finished but until the
   barrier/epoch ack came in for that == round trip time latency ++)

After:
  As soon as a new app-IO waits wants to start new IO on that
  RS extent, drbd_rs_begin_io() steps aside (clearing the
  BME_NO_WRITES flag again). It retries after 100ms.
Signed-off-by: NPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: NLars Ellenberg <lars.ellenberg@linbit.com>
上级 9d77a5fe
...@@ -176,14 +176,17 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) ...@@ -176,14 +176,17 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
struct lc_element *al_ext; struct lc_element *al_ext;
struct lc_element *tmp; struct lc_element *tmp;
unsigned long al_flags = 0; unsigned long al_flags = 0;
int wake;
spin_lock_irq(&mdev->al_lock); spin_lock_irq(&mdev->al_lock);
tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT); tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT);
if (unlikely(tmp != NULL)) { if (unlikely(tmp != NULL)) {
struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
set_bit(BME_PRIORITY, &bm_ext->flags); wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags);
spin_unlock_irq(&mdev->al_lock); spin_unlock_irq(&mdev->al_lock);
if (wake)
wake_up(&mdev->al_wait);
return NULL; return NULL;
} }
} }
...@@ -1135,7 +1138,10 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) ...@@ -1135,7 +1138,10 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
unsigned int enr = BM_SECT_TO_EXT(sector); unsigned int enr = BM_SECT_TO_EXT(sector);
struct bm_extent *bm_ext; struct bm_extent *bm_ext;
int i, sig; int i, sig;
int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait.
200 times -> 20 seconds. */
retry:
sig = wait_event_interruptible(mdev->al_wait, sig = wait_event_interruptible(mdev->al_wait,
(bm_ext = _bme_get(mdev, enr))); (bm_ext = _bme_get(mdev, enr)));
if (sig) if (sig)
...@@ -1146,16 +1152,24 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) ...@@ -1146,16 +1152,24 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
sig = wait_event_interruptible(mdev->al_wait, sig = wait_event_interruptible(mdev->al_wait,
!_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i)); !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i) ||
if (sig) { (test_bit(BME_PRIORITY, &bm_ext->flags) && sa));
if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) {
spin_lock_irq(&mdev->al_lock); spin_lock_irq(&mdev->al_lock);
if (lc_put(mdev->resync, &bm_ext->lce) == 0) { if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
clear_bit(BME_NO_WRITES, &bm_ext->flags); bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */
mdev->resync_locked--; mdev->resync_locked--;
wake_up(&mdev->al_wait); wake_up(&mdev->al_wait);
} }
spin_unlock_irq(&mdev->al_lock); spin_unlock_irq(&mdev->al_lock);
return -EINTR; if (sig)
return -EINTR;
if (schedule_timeout_interruptible(HZ/10))
return -EINTR;
if (--sa == 0)
dev_warn(DEV,"drbd_rs_begin_io() no longer stepping aside.\n");
goto retry;
} }
} }
set_bit(BME_LOCKED, &bm_ext->flags); set_bit(BME_LOCKED, &bm_ext->flags);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册