提交 b4868b44 编写于 作者: B Benjamin Coddington 提交者: Anna Schumaker

NFSv4: Wait for stateid updates after CLOSE/OPEN_DOWNGRADE

Since commit 0e0cb35b ("NFSv4: Handle NFS4ERR_OLD_STATEID in
CLOSE/OPEN_DOWNGRADE") the following livelock may occur if a CLOSE races
with the update of the nfs_state:

Process 1           Process 2           Server
=========           =========           ========
 OPEN file
                    OPEN file
                                        Reply OPEN (1)
                                        Reply OPEN (2)
 Update state (1)
 CLOSE file (1)
                                        Reply OLD_STATEID (1)
 CLOSE file (2)
                                        Reply CLOSE (-1)
                    Update state (2)
                    wait for state change
 OPEN file
                    wake
 CLOSE file
 OPEN file
                    wake
 CLOSE file
 ...
                    ...

We can avoid this situation by not issuing an immediate retry with a bumped
seqid when CLOSE/OPEN_DOWNGRADE receives NFS4ERR_OLD_STATEID.  Instead,
take the same approach used by OPEN and wait at least 5 seconds for
outstanding stateid updates to complete if we can detect that we're out of
sequence.

Note that after this change it is still possible (though unlikely) that
CLOSE waits a full 5 seconds, bumps the seqid, and retries -- and that
attempt races with another OPEN at the same time.  In order to avoid this
race (which would result in the livelock), update
nfs_need_update_open_stateid() to handle the case where:
 - the state is NFS_OPEN_STATE, and
 - the stateid doesn't match the current open stateid

Finally, nfs_need_update_open_stateid() is modified to be idempotent and
renamed to better suit the purpose of signaling that the stateid passed
is the next stateid in sequence.

Fixes: 0e0cb35b ("NFSv4: Handle NFS4ERR_OLD_STATEID in CLOSE/OPEN_DOWNGRADE")
Cc: stable@vger.kernel.org # v5.4+
Signed-off-by: NBenjamin Coddington <bcodding@redhat.com>
Signed-off-by: NAnna Schumaker <Anna.Schumaker@Netapp.com>
上级 fb08334b
...@@ -599,6 +599,14 @@ static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stat ...@@ -599,6 +599,14 @@ static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stat
return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0; return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0;
} }
static inline bool nfs4_stateid_is_next(const nfs4_stateid *s1, const nfs4_stateid *s2)
{
u32 seq1 = be32_to_cpu(s1->seqid);
u32 seq2 = be32_to_cpu(s2->seqid);
return seq2 == seq1 + 1U || (seq2 == 1U && seq1 == 0xffffffffU);
}
static inline bool nfs4_stateid_match_or_older(const nfs4_stateid *dst, const nfs4_stateid *src) static inline bool nfs4_stateid_match_or_older(const nfs4_stateid *dst, const nfs4_stateid *src)
{ {
return nfs4_stateid_match_other(dst, src) && return nfs4_stateid_match_other(dst, src) &&
......
...@@ -1550,19 +1550,6 @@ static void nfs_state_log_update_open_stateid(struct nfs4_state *state) ...@@ -1550,19 +1550,6 @@ static void nfs_state_log_update_open_stateid(struct nfs4_state *state)
wake_up_all(&state->waitq); wake_up_all(&state->waitq);
} }
static void nfs_state_log_out_of_order_open_stateid(struct nfs4_state *state,
const nfs4_stateid *stateid)
{
u32 state_seqid = be32_to_cpu(state->open_stateid.seqid);
u32 stateid_seqid = be32_to_cpu(stateid->seqid);
if (stateid_seqid == state_seqid + 1U ||
(stateid_seqid == 1U && state_seqid == 0xffffffffU))
nfs_state_log_update_open_stateid(state);
else
set_bit(NFS_STATE_CHANGE_WAIT, &state->flags);
}
static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state)
{ {
struct nfs_client *clp = state->owner->so_server->nfs_client; struct nfs_client *clp = state->owner->so_server->nfs_client;
...@@ -1588,20 +1575,18 @@ static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) ...@@ -1588,20 +1575,18 @@ static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state)
* i.e. The stateid seqids have to be initialised to 1, and * i.e. The stateid seqids have to be initialised to 1, and
* are then incremented on every state transition. * are then incremented on every state transition.
*/ */
static bool nfs_need_update_open_stateid(struct nfs4_state *state, static bool nfs_stateid_is_sequential(struct nfs4_state *state,
const nfs4_stateid *stateid) const nfs4_stateid *stateid)
{ {
if (test_bit(NFS_OPEN_STATE, &state->flags) == 0 || if (test_bit(NFS_OPEN_STATE, &state->flags)) {
!nfs4_stateid_match_other(stateid, &state->open_stateid)) { /* The common case - we're updating to a new sequence number */
if (stateid->seqid == cpu_to_be32(1)) if (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
nfs_state_log_update_open_stateid(state); nfs4_stateid_is_next(&state->open_stateid, stateid)) {
else
set_bit(NFS_STATE_CHANGE_WAIT, &state->flags);
return true; return true;
} }
} else {
if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) { /* This is the first OPEN in this generation */
nfs_state_log_out_of_order_open_stateid(state, stateid); if (stateid->seqid == cpu_to_be32(1))
return true; return true;
} }
return false; return false;
...@@ -1676,16 +1661,16 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, ...@@ -1676,16 +1661,16 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state,
int status = 0; int status = 0;
for (;;) { for (;;) {
if (!nfs_need_update_open_stateid(state, stateid)) if (nfs_stateid_is_sequential(state, stateid))
return;
if (!test_bit(NFS_STATE_CHANGE_WAIT, &state->flags))
break; break;
if (status) if (status)
break; break;
/* Rely on seqids for serialisation with NFSv4.0 */ /* Rely on seqids for serialisation with NFSv4.0 */
if (!nfs4_has_session(NFS_SERVER(state->inode)->nfs_client)) if (!nfs4_has_session(NFS_SERVER(state->inode)->nfs_client))
break; break;
set_bit(NFS_STATE_CHANGE_WAIT, &state->flags);
prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE); prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE);
/* /*
* Ensure we process the state changes in the same order * Ensure we process the state changes in the same order
...@@ -1696,6 +1681,7 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, ...@@ -1696,6 +1681,7 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state,
spin_unlock(&state->owner->so_lock); spin_unlock(&state->owner->so_lock);
rcu_read_unlock(); rcu_read_unlock();
trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0); trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0);
if (!signal_pending(current)) { if (!signal_pending(current)) {
if (schedule_timeout(5*HZ) == 0) if (schedule_timeout(5*HZ) == 0)
status = -EAGAIN; status = -EAGAIN;
...@@ -3438,7 +3424,8 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst, ...@@ -3438,7 +3424,8 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst,
__be32 seqid_open; __be32 seqid_open;
u32 dst_seqid; u32 dst_seqid;
bool ret; bool ret;
int seq; int seq, status = -EAGAIN;
DEFINE_WAIT(wait);
for (;;) { for (;;) {
ret = false; ret = false;
...@@ -3450,15 +3437,41 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst, ...@@ -3450,15 +3437,41 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst,
continue; continue;
break; break;
} }
write_seqlock(&state->seqlock);
seqid_open = state->open_stateid.seqid; seqid_open = state->open_stateid.seqid;
if (read_seqretry(&state->seqlock, seq))
continue;
dst_seqid = be32_to_cpu(dst->seqid); dst_seqid = be32_to_cpu(dst->seqid);
if ((s32)(dst_seqid - be32_to_cpu(seqid_open)) >= 0)
dst->seqid = cpu_to_be32(dst_seqid + 1); /* Did another OPEN bump the state's seqid? try again: */
else if ((s32)(be32_to_cpu(seqid_open) - dst_seqid) > 0) {
dst->seqid = seqid_open; dst->seqid = seqid_open;
write_sequnlock(&state->seqlock);
ret = true;
break;
}
/* server says we're behind but we haven't seen the update yet */
set_bit(NFS_STATE_CHANGE_WAIT, &state->flags);
prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE);
write_sequnlock(&state->seqlock);
trace_nfs4_close_stateid_update_wait(state->inode, dst, 0);
if (signal_pending(current))
status = -EINTR;
else
if (schedule_timeout(5*HZ) != 0)
status = 0;
finish_wait(&state->waitq, &wait);
if (!status)
continue;
if (status == -EINTR)
break;
/* we slept the whole 5 seconds, we must have lost a seqid */
dst->seqid = cpu_to_be32(dst_seqid + 1);
ret = true; ret = true;
break; break;
} }
......
...@@ -1511,6 +1511,7 @@ DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr); ...@@ -1511,6 +1511,7 @@ DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr);
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn);
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update);
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update_wait); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update_wait);
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_close_stateid_update_wait);
DECLARE_EVENT_CLASS(nfs4_getattr_event, DECLARE_EVENT_CLASS(nfs4_getattr_event,
TP_PROTO( TP_PROTO(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册