drbd: factored tl_restart() out of tl_clear().

If IO was frozen for a temporal network outage, resend the content of the transfer-log into the newly established connection. Signed-off-by: N Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: N Lars Ellenberg <lars.ellenberg@linbit.com>

drbd: factored tl_restart() out of tl_clear().
If IO was frozen for a temporal network outage, resend the content of the transfer-log into the newly established connection. Signed-off-by: N Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: N Lars Ellenberg <lars.ellenberg@linbit.com>
11b58e73 · Philipp Reisner · 2a80699f · 11b58e73 · 11b58e73 · 11b58e73
5 changed file
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1138,6 +1138,8 @@ extern void drbd_free_resources(struct drbd_conf *mdev);
 extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
 		       unsigned int set_size);
 extern void tl_clear(struct drbd_conf *mdev);
+enum drbd_req_event;
+extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
 extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
 extern void drbd_free_sock(struct drbd_conf *mdev);
 extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,

--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -333,59 +333,94 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
 	drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
 }

-
 /**
- * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
+ * _tl_restart() - Walks the transfer log, and applies an action to all requests
 * @mdev:	DRBD device.
+ * @what:       The action/event to perform with all request objects
 *
- * This is called after the connection to the peer was lost. The storage covered
- * by the requests on the transfer gets marked as our of sync. Called from the
- * receiver thread and the worker thread.
+ * @what might be one of connection_lost_while_pending, resend, fail_frozen_disk_io,
+ * restart_frozen_disk_io.
 */
-void tl_clear(struct drbd_conf *mdev)
+static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
 {
-	struct drbd_tl_epoch *b, *tmp;
+	struct drbd_tl_epoch *b, *tmp, **pn;
 	struct list_head *le, *tle;
-	struct drbd_request *r;
-	int new_initial_bnr = net_random();
-
-	spin_lock_irq(&mdev->req_lock);
+	struct drbd_request *req;
+	int rv, n_writes, n_reads;

 	b = mdev->oldest_tle;
+	pn = &mdev->oldest_tle;
 	while (b) {
+		n_writes = 0;
+		n_reads = 0;
 		list_for_each_safe(le, tle, &b->requests) {
-			r = list_entry(le, struct drbd_request, tl_requests);
-			/* It would be nice to complete outside of spinlock.
-			 * But this is easier for now. */
-			_req_mod(r, connection_lost_while_pending);
+			req = list_entry(le, struct drbd_request, tl_requests);
+			rv = _req_mod(req, what);
+
+			n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
+			n_reads  += (rv & MR_READ) >> MR_READ_SHIFT;
 		}
 		tmp = b->next;

-		/* there could still be requests on that ring list,
-		 * in case local io is still pending */
-		list_del(&b->requests);
-
-		/* dec_ap_pending corresponding to queue_barrier.
-		 * the newest barrier may not have been queued yet,
-		 * in which case w.cb is still NULL. */
-		if (b->w.cb != NULL)
-			dec_ap_pending(mdev);
-
-		if (b == mdev->newest_tle) {
-			/* recycle, but reinit! */
-			D_ASSERT(tmp == NULL);
-			INIT_LIST_HEAD(&b->requests);
-			INIT_LIST_HEAD(&b->w.list);
-			b->w.cb = NULL;
-			b->br_number = new_initial_bnr;
-			b->n_writes = 0;
-
-			mdev->oldest_tle = b;
-			break;
+		if (n_writes + n_reads) {
+			if (what == resend) {
+				b->n_writes = n_writes;
+				if (b->w.cb == NULL) {
+					b->w.cb = w_send_barrier;
+					inc_ap_pending(mdev);
+					set_bit(CREATE_BARRIER, &mdev->flags);
+				}
+
+				drbd_queue_work(&mdev->data.work, &b->w);
+			}
+			pn = &b->next;
+		} else {
+			/* there could still be requests on that ring list,
+			 * in case local io is still pending */
+			list_del(&b->requests);
+
+			/* dec_ap_pending corresponding to queue_barrier.
+			 * the newest barrier may not have been queued yet,
+			 * in which case w.cb is still NULL. */
+			if (b->w.cb != NULL)
+				dec_ap_pending(mdev);
+
+			if (b == mdev->newest_tle) {
+				/* recycle, but reinit! */
+				D_ASSERT(tmp == NULL);
+				INIT_LIST_HEAD(&b->requests);
+				INIT_LIST_HEAD(&b->w.list);
+				b->w.cb = NULL;
+				b->br_number = net_random();
+				b->n_writes = 0;
+
+				*pn = b;
+				break;
+			}
+			*pn = tmp;
+			kfree(b);
 		}
-		kfree(b);
 		b = tmp;
 	}
+}
+
+
+/**
+ * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
+ * @mdev:	DRBD device.
+ *
+ * This is called after the connection to the peer was lost. The storage covered
+ * by the requests on the transfer gets marked as our of sync. Called from the
+ * receiver thread and the worker thread.
+ */
+void tl_clear(struct drbd_conf *mdev)
+{
+	struct list_head *le, *tle;
+	struct drbd_request *r;
+
+	spin_lock_irq(&mdev->req_lock);
+
+	_tl_restart(mdev, connection_lost_while_pending);

 	/* we expect this list to be empty. */
 	D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
@@ -406,6 +441,13 @@ void tl_clear(struct drbd_conf *mdev)
 	spin_unlock_irq(&mdev->req_lock);
 }

+void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
+{
+	spin_lock_irq(&mdev->req_lock);
+	_tl_restart(mdev, what);
+	spin_unlock_irq(&mdev->req_lock);
+}
+
 /**
 * cl_wide_st_chg() - TRUE if the state change is a cluster wide one
 * @mdev:	DRBD device.

--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -776,9 +776,6 @@ static int drbd_connect(struct drbd_conf *mdev)

 	D_ASSERT(!mdev->data.socket);

-	if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags))
-		dev_err(DEV, "CREATE_BARRIER flag was set in drbd_connect - now cleared!\n");
-
 	if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
 		return -2;


--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -634,6 +634,20 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		/* else: done by handed_over_to_network */
 		break;

+	case resend:
+		/* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
+		   before the connection loss; only P_BARRIER_ACK was missing.
+		   Trowing them out of the TL here by pretending we got a BARRIER_ACK
+		   TODO: Either resync them, or ensure peer was not rebooted. */
+		if (!(req->rq_state & RQ_NET_OK)) {
+			if (req->w.cb) {
+				drbd_queue_work(&mdev->data.work, &req->w);
+				rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
+			}
+			break;
+		}
+		/* else, fall through to barrier_acked */
+
 	case barrier_acked:
 		if (!(req->rq_state & RQ_WRITE))
 			break;

--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -104,6 +104,7 @@ enum drbd_req_event {
 	read_ahead_completed_with_error,
 	write_completed_with_error,
 	completed_ok,
+	resend,
 	nothing, /* for tracing only */
 };

@@ -206,6 +207,13 @@ enum drbd_req_state_bits {

 #define RQ_WRITE           (1UL << __RQ_WRITE)

+/* For waking up the frozen transfer log mod_req() has to return if the request
+   should be counted in the epoch object*/
+#define MR_WRITE_SHIFT 0
+#define MR_WRITE       (1 << MR_WRITE_SHIFT)
+#define MR_READ_SHIFT  1
+#define MR_READ        (1 << MR_READ_SHIFT)
+
 /* epoch entries */
 static inline
 struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector)