Merge branch 'for-jens' of git://git.drbd.org/linux-drbd into for-linus

79df9b40 · Jens Axboe · 4004e90c · d1aa4d04 · 79df9b40 · 79df9b40
5 changed file
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -889,6 +889,7 @@ struct bm_aio_ctx {
 	unsigned int done;
 	unsigned flags;
 #define BM_AIO_COPY_PAGES	1
+#define BM_WRITE_ALL_PAGES	2
 	int error;
 	struct kref kref;
 };
@@ -1059,7 +1060,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
 		if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
 			break;
 		if (rw & WRITE) {
-			if (bm_test_page_unchanged(b->bm_pages[i])) {
+			if (!(flags & BM_WRITE_ALL_PAGES) &&
+			    bm_test_page_unchanged(b->bm_pages[i])) {
 				dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
 				continue;
 			}
@@ -1140,6 +1142,17 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
 	return bm_rw(mdev, WRITE, 0, 0);
 }

+/**
+ * drbd_bm_write_all() - Write the whole bitmap to its on disk location.
+ * @mdev:	DRBD device.
+ *
+ * Will write all pages.
+ */
+int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local)
+{
+	return bm_rw(mdev, WRITE, BM_WRITE_ALL_PAGES, 0);
+}
+
 /**
 * drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed.
 * @mdev:	DRBD device.

--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1469,6 +1469,7 @@ extern int  drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr);
 extern int  drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local);
 extern int  drbd_bm_read(struct drbd_conf *mdev) __must_hold(local);
 extern int  drbd_bm_write(struct drbd_conf *mdev) __must_hold(local);
+extern int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local);
 extern int  drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local);
 extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,
 		unsigned long al_enr);

--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -79,6 +79,7 @@ static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
 static void md_sync_timer_fn(unsigned long data);
 static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
 static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+static void _tl_clear(struct drbd_conf *mdev);

 MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
 	      "Lars Ellenberg <lars@linbit.com>");
@@ -432,19 +433,10 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)

 	/* Actions operating on the disk state, also want to work on
 	   requests that got barrier acked. */
-	switch (what) {
-	case fail_frozen_disk_io:
-	case restart_frozen_disk_io:
-		list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
-			req = list_entry(le, struct drbd_request, tl_requests);
-			_req_mod(req, what);
-		}

-	case connection_lost_while_pending:
-	case resend:
-		break;
-	default:
-		dev_err(DEV, "what = %d in _tl_restart()\n", what);
+	list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
+		req = list_entry(le, struct drbd_request, tl_requests);
+		_req_mod(req, what);
 	}
 }

@@ -458,12 +450,17 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
 * receiver thread and the worker thread.
 */
 void tl_clear(struct drbd_conf *mdev)
+{
+	spin_lock_irq(&mdev->req_lock);
+	_tl_clear(mdev);
+	spin_unlock_irq(&mdev->req_lock);
+}
+
+static void _tl_clear(struct drbd_conf *mdev)
 {
 	struct list_head *le, *tle;
 	struct drbd_request *r;

-	spin_lock_irq(&mdev->req_lock);
-
 	_tl_restart(mdev, connection_lost_while_pending);

 	/* we expect this list to be empty. */
@@ -482,7 +479,6 @@ void tl_clear(struct drbd_conf *mdev)

 	memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *));

-	spin_unlock_irq(&mdev->req_lock);
 }

 void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
@@ -1476,12 +1472,12 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 	if (ns.susp_fen) {
 		/* case1: The outdate peer handler is successful: */
 		if (os.pdsk > D_OUTDATED  && ns.pdsk <= D_OUTDATED) {
-			tl_clear(mdev);
 			if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
 				drbd_uuid_new_current(mdev);
 				clear_bit(NEW_CUR_UUID, &mdev->flags);
 			}
 			spin_lock_irq(&mdev->req_lock);
+			_tl_clear(mdev);
 			_drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
 			spin_unlock_irq(&mdev->req_lock);
 		}

--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -674,8 +674,8 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 			 la_size_changed && md_moved ? "size changed and md moved" :
 			 la_size_changed ? "size changed" : "md moved");
 		/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
-		err = drbd_bitmap_io(mdev, &drbd_bm_write,
-				"size changed", BM_LOCKED_MASK);
+		err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
+				     "size changed", BM_LOCKED_MASK);
 		if (err) {
 			rv = dev_size_error;
 			goto out;

--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -695,6 +695,12 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		break;

 	case resend:
+		/* Simply complete (local only) READs. */
+		if (!(req->rq_state & RQ_WRITE) && !req->w.cb) {
+			_req_may_be_done(req, m);
+			break;
+		}
+
 		/* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
 		   before the connection loss (B&C only); only P_BARRIER_ACK was missing.
 		   Trowing them out of the TL here by pretending we got a BARRIER_ACK
@@ -834,7 +840,15 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns
 		req->private_bio = NULL;
 	}
 	if (rw == WRITE) {
-		remote = 1;
+		/* Need to replicate writes.  Unless it is an empty flush,
+		 * which is better mapped to a DRBD P_BARRIER packet,
+		 * also for drbd wire protocol compatibility reasons. */
+		if (unlikely(size == 0)) {
+			/* The only size==0 bios we expect are empty flushes. */
+			D_ASSERT(bio->bi_rw & REQ_FLUSH);
+			remote = 0;
+		} else
+			remote = 1;
 	} else {
 		/* READ || READA */
 		if (local) {
@@ -870,8 +884,11 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns
 	 * extent.  This waits for any resync activity in the corresponding
 	 * resync extent to finish, and, if necessary, pulls in the target
 	 * extent into the activity log, which involves further disk io because
-	 * of transactional on-disk meta data updates. */
-	if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) {
+	 * of transactional on-disk meta data updates.
+	 * Empty flushes don't need to go into the activity log, they can only
+	 * flush data for pending writes which are already in there. */
+	if (rw == WRITE && local && size
+	&& !test_bit(AL_SUSPENDED, &mdev->flags)) {
 		req->rq_state |= RQ_IN_ACT_LOG;
 		drbd_al_begin_io(mdev, sector);
 	}
@@ -994,7 +1011,10 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns
 	if (rw == WRITE && _req_conflicts(req))
 		goto fail_conflicting;

-	list_add_tail(&req->tl_requests, &mdev->newest_tle->requests);
+	/* no point in adding empty flushes to the transfer log,
+	 * they are mapped to drbd barriers already. */
+	if (likely(size!=0))
+		list_add_tail(&req->tl_requests, &mdev->newest_tle->requests);

 	/* NOTE remote first: to get the concurrent write detection right,
 	 * we must register the request before start of local IO.  */
@@ -1014,6 +1034,14 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns
 	    mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96)
 		maybe_pull_ahead(mdev);

+	/* If this was a flush, queue a drbd barrier/start a new epoch.
+	 * Unless the current epoch was empty anyways, or we are not currently
+	 * replicating, in which case there is no point. */
+	if (unlikely(bio->bi_rw & REQ_FLUSH)
+		&& mdev->newest_tle->n_writes
+		&& drbd_should_do_remote(mdev->state))
+		queue_barrier(mdev);
+
 	spin_unlock_irq(&mdev->req_lock);
 	kfree(b); /* if someone else has beaten us to it... */