提交 d9cc6e23 编写于 作者: L Lars Ellenberg 提交者: Philipp Reisner

drbd: fix various disconnecting races

If an admin requests disconnect at a time when the state handling
already disconnects/reconnects, there have been some races.

Make sure to always really stop the network threads before
returning success for disconnect. Do not pretend successfull
forced disconnect, if the state handling returned an error.

Return success from drbd_adm_down() only after all threads are finished.
Signed-off-by: NPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: NLars Ellenberg <lars.ellenberg@linbit.com>
上级 5ee743e9
...@@ -2075,10 +2075,9 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool for ...@@ -2075,10 +2075,9 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool for
enum drbd_state_rv rv; enum drbd_state_rv rv;
if (force) { if (force) {
spin_lock_irq(&tconn->req_lock); spin_lock_irq(&tconn->req_lock);
if (tconn->cstate >= C_WF_CONNECTION) rv = _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
_conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
spin_unlock_irq(&tconn->req_lock); spin_unlock_irq(&tconn->req_lock);
return SS_SUCCESS; return rv;
} }
rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0); rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0);
...@@ -2137,10 +2136,12 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) ...@@ -2137,10 +2136,12 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
if (rv < SS_SUCCESS) if (rv < SS_SUCCESS)
goto fail; goto fail;
/* No one else can reconfigure the network while I am here.
* The state handling only uses drbd_thread_stop_nowait(),
* we want to really wait here until the receiver is no more. */
drbd_thread_stop(&tconn->receiver);
if (wait_event_interruptible(tconn->ping_wait, if (wait_event_interruptible(tconn->ping_wait,
tconn->cstate != C_DISCONNECTING)) { tconn->cstate == C_STANDALONE)) {
/* Do not test for mdev->state.conn == C_STANDALONE, since
someone else might connect us in the mean time! */
retcode = ERR_INTR; retcode = ERR_INTR;
goto fail; goto fail;
} }
...@@ -3043,6 +3044,10 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) ...@@ -3043,6 +3044,10 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
goto out_unlock; goto out_unlock;
} }
/* Make sure the network threads have actually stopped,
* state handling only does drbd_thread_stop_nowait(). */
drbd_thread_stop(&adm_ctx.tconn->receiver);
/* detach */ /* detach */
idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
rv = adm_detach(mdev); rv = adm_detach(mdev);
...@@ -3066,11 +3071,9 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) ...@@ -3066,11 +3071,9 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
} }
} }
/* stop all threads */
conn_reconfig_done(adm_ctx.tconn);
/* delete connection */ /* delete connection */
if (conn_lowest_minor(adm_ctx.tconn) < 0) { if (conn_lowest_minor(adm_ctx.tconn) < 0) {
drbd_thread_stop(&adm_ctx.tconn->worker);
list_del(&adm_ctx.tconn->all_tconn); list_del(&adm_ctx.tconn->all_tconn);
kref_put(&adm_ctx.tconn->kref, &conn_destroy); kref_put(&adm_ctx.tconn->kref, &conn_destroy);
......
...@@ -4226,7 +4226,7 @@ static void drbd_disconnect(struct drbd_tconn *tconn) ...@@ -4226,7 +4226,7 @@ static void drbd_disconnect(struct drbd_tconn *tconn)
synchronize_rcu(); synchronize_rcu();
kfree(old_conf); kfree(old_conf);
conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE); conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
} }
} }
......
...@@ -604,21 +604,27 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns) ...@@ -604,21 +604,27 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns)
static enum drbd_state_rv static enum drbd_state_rv
is_valid_conn_transition(enum drbd_conns oc, enum drbd_conns nc) is_valid_conn_transition(enum drbd_conns oc, enum drbd_conns nc)
{ {
enum drbd_state_rv rv = SS_SUCCESS; /* no change -> nothing to do, at least for the connection part */
if (oc == nc)
return SS_NOTHING_TO_DO;
/* Disallow Network errors to configure a device's network part */ /* disconnect of an unconfigured connection does not make sense */
if ((nc >= C_TIMEOUT && nc <= C_TEAR_DOWN) && oc <= C_DISCONNECTING) if (oc == C_STANDALONE && nc == C_DISCONNECTING)
rv = SS_NEED_CONNECTION; return SS_ALREADY_STANDALONE;
/* from C_STANDALONE, we start with C_UNCONNECTED */
if (oc == C_STANDALONE && nc != C_UNCONNECTED)
return SS_NEED_CONNECTION;
/* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */ /* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */
if (oc >= C_TIMEOUT && oc <= C_TEAR_DOWN && nc != C_UNCONNECTED && nc != C_DISCONNECTING) if (oc >= C_TIMEOUT && oc <= C_TEAR_DOWN && nc != C_UNCONNECTED && nc != C_DISCONNECTING)
rv = SS_IN_TRANSIENT_STATE; return SS_IN_TRANSIENT_STATE;
/* After C_DISCONNECTING only C_STANDALONE may follow */ /* After C_DISCONNECTING only C_STANDALONE may follow */
if (oc == C_DISCONNECTING && nc != C_STANDALONE) if (oc == C_DISCONNECTING && nc != C_STANDALONE)
rv = SS_IN_TRANSIENT_STATE; return SS_IN_TRANSIENT_STATE;
return rv; return SS_SUCCESS;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册