diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 125fe1481ca256ffe7efbfed022a64b174730223..277c69c9465b946b9a987e195f4823223710c558 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -831,7 +831,8 @@ enum drbd_flag { once no more io in flight, start bitmap io */ BITMAP_IO_QUEUED, /* Started bitmap IO */ GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */ - WAS_IO_ERROR, /* Local disk failed returned IO error */ + WAS_IO_ERROR, /* Local disk failed, returned IO error */ + WAS_READ_ERROR, /* Local disk READ failed (set additionally to the above) */ FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */ RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ NET_CONGESTED, /* The data socket is congested */ @@ -1879,30 +1880,53 @@ static inline int drbd_request_state(struct drbd_conf *mdev, } enum drbd_force_detach_flags { - DRBD_IO_ERROR, + DRBD_READ_ERROR, + DRBD_WRITE_ERROR, DRBD_META_IO_ERROR, DRBD_FORCE_DETACH, }; #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, - enum drbd_force_detach_flags forcedetach, + enum drbd_force_detach_flags df, const char *where) { switch (mdev->ldev->dc.on_io_error) { case EP_PASS_ON: - if (forcedetach == DRBD_IO_ERROR) { + if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Local IO failed in %s.\n", where); if (mdev->state.disk > D_INCONSISTENT) _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL); break; } - /* NOTE fall through to detach case if forcedetach set */ + /* NOTE fall through for DRBD_META_IO_ERROR or DRBD_FORCE_DETACH */ case EP_DETACH: case EP_CALL_HELPER: + /* Remember whether we saw a READ or WRITE error. + * + * Recovery of the affected area for WRITE failure is covered + * by the activity log. + * READ errors may fall outside that area though. Certain READ + * errors can be "healed" by writing good data to the affected + * blocks, which triggers block re-allocation in lower layers. + * + * If we can not write the bitmap after a READ error, + * we may need to trigger a full sync (see w_go_diskless()). + * + * Force-detach is not really an IO error, but rather a + * desperate measure to try to deal with a completely + * unresponsive lower level IO stack. + * Still it should be treated as a WRITE error. + * + * Meta IO error is always WRITE error: + * we read meta data only once during attach, + * which will fail in case of errors. + */ drbd_set_flag(mdev, WAS_IO_ERROR); - if (forcedetach == DRBD_FORCE_DETACH) + if (df == DRBD_READ_ERROR) + drbd_set_flag(mdev, WAS_READ_ERROR); + if (df == DRBD_FORCE_DETACH) drbd_set_flag(mdev, FORCE_DETACH); if (mdev->state.disk > D_FAILED) { _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index d8ba5c42670f1268cac30963816237ccae069bd2..9b833e0fb4409d4dd03ad25bbb7495aae7a928d0 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1617,17 +1617,20 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* first half of local IO error, failure to attach, * or administrative detach */ if (os.disk != D_FAILED && ns.disk == D_FAILED) { - enum drbd_io_error_p eh = EP_PASS_ON; - int was_io_error = 0; /* corresponding get_ldev was in __drbd_set_state, to serialize * our cleanup here with the transition to D_DISKLESS. - * But is is still not save to dreference ldev here, since - * we might come from an failed Attach before ldev was set. */ + * But it is still not safe to dreference ldev here, we may end + * up here from a failed attach, before ldev was even set. */ if (mdev->ldev) { - eh = mdev->ldev->dc.on_io_error; - was_io_error = drbd_test_and_clear_flag(mdev, WAS_IO_ERROR); - - if (was_io_error && eh == EP_CALL_HELPER) + enum drbd_io_error_p eh = mdev->ldev->dc.on_io_error; + + /* In some setups, this handler triggers a suicide, + * basically mapping IO error to node failure, to + * reduce the number of different failure scenarios. + * + * This handler intentionally runs before we abort IO, + * notify the peer, or try to update our meta data. */ + if (eh == EP_CALL_HELPER && drbd_test_flag(mdev, WAS_IO_ERROR)) drbd_khelper(mdev, "local-io-error"); /* Immediately allow completion of all application IO, @@ -1643,7 +1646,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, * So aborting local requests may cause crashes, * or even worse, silent data corruption. */ - if (drbd_test_and_clear_flag(mdev, FORCE_DETACH)) + if (drbd_test_flag(mdev, FORCE_DETACH)) tl_abort_disk_io(mdev); /* current state still has to be D_FAILED, @@ -4220,6 +4223,26 @@ static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused * inc/dec it frequently. Once we are D_DISKLESS, no one will touch * the protected members anymore, though, so once put_ldev reaches zero * again, it will be safe to free them. */ + + /* Try to write changed bitmap pages, read errors may have just + * set some bits outside the area covered by the activity log. + * + * If we have an IO error during the bitmap writeout, + * we will want a full sync next time, just in case. + * (Do we want a specific meta data flag for this?) + * + * If that does not make it to stable storage either, + * we cannot do anything about that anymore. */ + if (mdev->bitmap) { + if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write, + "detach", BM_LOCKED_MASK)) { + if (drbd_test_flag(mdev, WAS_READ_ERROR)) { + drbd_md_set_flag(mdev, MDF_FULL_SYNC); + drbd_md_sync(mdev); + } + } + } + drbd_force_state(mdev, NS(disk, D_DISKLESS)); return 1; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 42d172877aea825543db7da7bd72425d99a7fe61..c8dda4e8dfce0303d787bccad29fbe193bf8e43f 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -959,6 +959,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp /* make sure there is no leftover from previous force-detach attempts */ drbd_clear_flag(mdev, FORCE_DETACH); + drbd_clear_flag(mdev, WAS_IO_ERROR); + drbd_clear_flag(mdev, WAS_READ_ERROR); /* and no leftover from previously aborted resync or verify, either */ mdev->rs_total = 0; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 9220d9f9d6cda097c589107891d971e424fdbdf4..d9e5962a9a8c0aa9ec7b81e3629e83fe3e54ed6c 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -455,7 +455,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; - __drbd_chk_io_error(mdev, DRBD_IO_ERROR); + __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR); _req_may_be_done_not_susp(req, m); break; @@ -477,7 +477,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; } - __drbd_chk_io_error(mdev, DRBD_IO_ERROR); + __drbd_chk_io_error(mdev, DRBD_READ_ERROR); goto_queue_for_net_read: diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index acb614ac9fe1f4973a30cf1d2254a1327174305e..7cd32e73b0165bcbcf054ba637db7623f2befdf8 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -111,7 +111,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) if (list_empty(&mdev->read_ee)) wake_up(&mdev->ee_wait); if (test_bit(__EE_WAS_ERROR, &e->flags)) - __drbd_chk_io_error(mdev, DRBD_IO_ERROR); + __drbd_chk_io_error(mdev, DRBD_READ_ERROR); spin_unlock_irqrestore(&mdev->req_lock, flags); drbd_queue_work(&mdev->data.work, &e->w); @@ -154,7 +154,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo : list_empty(&mdev->active_ee); if (test_bit(__EE_WAS_ERROR, &e->flags)) - __drbd_chk_io_error(mdev, DRBD_IO_ERROR); + __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR); spin_unlock_irqrestore(&mdev->req_lock, flags); if (is_syncer_req)