syncCommit.c 5.5 KB
Newer Older
M
Minghao Li 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

M
Minghao Li 已提交
16
#include "syncCommit.h"
M
Minghao Li 已提交
17
#include "syncIndexMgr.h"
M
Minghao Li 已提交
18
#include "syncInt.h"
M
Minghao Li 已提交
19
#include "syncRaftCfg.h"
M
Minghao Li 已提交
20
#include "syncRaftLog.h"
M
Minghao Li 已提交
21
#include "syncRaftStore.h"
M
Minghao Li 已提交
22
#include "syncUtil.h"
M
Minghao Li 已提交
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46

// \* Leader i advances its commitIndex.
// \* This is done as a separate step from handling AppendEntries responses,
// \* in part to minimize atomic regions, and in part so that leaders of
// \* single-server clusters are able to mark entries committed.
// AdvanceCommitIndex(i) ==
//     /\ state[i] = Leader
//     /\ LET \* The set of servers that agree up through index.
//            Agree(index) == {i} \cup {k \in Server :
//                                          matchIndex[i][k] >= index}
//            \* The maximum indexes for which a quorum agrees
//            agreeIndexes == {index \in 1..Len(log[i]) :
//                                 Agree(index) \in Quorum}
//            \* New value for commitIndex'[i]
//            newCommitIndex ==
//               IF /\ agreeIndexes /= {}
//                  /\ log[i][Max(agreeIndexes)].term = currentTerm[i]
//               THEN
//                   Max(agreeIndexes)
//               ELSE
//                   commitIndex[i]
//        IN commitIndex' = [commitIndex EXCEPT ![i] = newCommitIndex]
//     /\ UNCHANGED <<messages, serverVars, candidateVars, leaderVars, log>>
//
M
Minghao Li 已提交
47
void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) {
M
Minghao Li 已提交
48 49
  syncIndexMgrLog2("==syncNodeMaybeAdvanceCommitIndex== pNextIndex", pSyncNode->pNextIndex);
  syncIndexMgrLog2("==syncNodeMaybeAdvanceCommitIndex== pMatchIndex", pSyncNode->pMatchIndex);
M
Minghao Li 已提交
50

51 52 53
  // advance commit index to sanpshot first
  SSnapshot snapshot;
  pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot);
54 55 56 57 58
  if (snapshot.lastApplyIndex > 0 && snapshot.lastApplyIndex > pSyncNode->commitIndex) {
    SyncIndex commitBegin = pSyncNode->commitIndex;
    SyncIndex commitEnd = snapshot.lastApplyIndex;
    pSyncNode->commitIndex = snapshot.lastApplyIndex;

M
Minghao Li 已提交
59 60 61
    sDebug("vgId:%d sync event %s commitIndex:%ld currentTerm:%lu commit by snapshot from index:%ld to index:%ld",
           pSyncNode->vgId, syncUtilState2String(pSyncNode->state), pSyncNode->commitIndex,
           pSyncNode->pRaftStore->currentTerm, pSyncNode->commitIndex, snapshot.lastApplyIndex);
62 63
  }

M
Minghao Li 已提交
64
  // update commit index
M
Minghao Li 已提交
65
  SyncIndex newCommitIndex = pSyncNode->commitIndex;
66
  for (SyncIndex index = syncNodeGetLastIndex(pSyncNode); index > pSyncNode->commitIndex; --index) {
M
Minghao Li 已提交
67
    bool agree = syncAgree(pSyncNode, index);
68 69 70 71 72 73

    if (gRaftDetailLog) {
      sTrace("syncMaybeAdvanceCommitIndex syncAgree:%d, index:%ld, pSyncNode->commitIndex:%ld", agree, index,
             pSyncNode->commitIndex);
    }

M
Minghao Li 已提交
74
    if (agree) {
M
Minghao Li 已提交
75 76 77 78 79 80 81 82
      // term
      SSyncRaftEntry* pEntry = pSyncNode->pLogStore->getEntry(pSyncNode->pLogStore, index);
      assert(pEntry != NULL);

      // cannot commit, even if quorum agree. need check term!
      if (pEntry->term == pSyncNode->pRaftStore->currentTerm) {
        // update commit index
        newCommitIndex = index;
83 84 85 86 87

        if (gRaftDetailLog) {
          sTrace("syncMaybeAdvanceCommitIndex maybe to update, newCommitIndex:%ld commit, pSyncNode->commitIndex:%ld",
                 newCommitIndex, pSyncNode->commitIndex);
        }
88 89

        syncEntryDestory(pEntry);
M
Minghao Li 已提交
90
        break;
M
Minghao Li 已提交
91
      } else {
92 93 94 95 96 97
        if (gRaftDetailLog) {
          sTrace(
              "syncMaybeAdvanceCommitIndex can not commit due to term not equal, pEntry->term:%lu, "
              "pSyncNode->pRaftStore->currentTerm:%lu",
              pEntry->term, pSyncNode->pRaftStore->currentTerm);
        }
M
Minghao Li 已提交
98
      }
M
Minghao Li 已提交
99 100

      syncEntryDestory(pEntry);
M
Minghao Li 已提交
101 102
    }
  }
M
Minghao Li 已提交
103

M
Minghao Li 已提交
104 105 106
  if (newCommitIndex > pSyncNode->commitIndex) {
    SyncIndex beginIndex = pSyncNode->commitIndex + 1;
    SyncIndex endIndex = newCommitIndex;
M
Minghao Li 已提交
107

108 109 110
    if (gRaftDetailLog) {
      sTrace("syncMaybeAdvanceCommitIndex sync commit %ld", newCommitIndex);
    }
M
Minghao Li 已提交
111

M
Minghao Li 已提交
112
    // update commit index
M
Minghao Li 已提交
113
    pSyncNode->commitIndex = newCommitIndex;
M
Minghao Li 已提交
114

M
Minghao Li 已提交
115 116 117 118
    // call back Wal
    pSyncNode->pLogStore->updateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex);

    // execute fsm
M
Minghao Li 已提交
119
    if (pSyncNode->pFsm != NULL) {
120
      int32_t code = syncNodeCommit(pSyncNode, beginIndex, endIndex, pSyncNode->state);
121
      ASSERT(code == 0);
M
Minghao Li 已提交
122 123
    }
  }
M
Minghao Li 已提交
124 125 126
}

bool syncAgreeIndex(SSyncNode* pSyncNode, SRaftId* pRaftId, SyncIndex index) {
M
Minghao Li 已提交
127 128 129 130
  // I am leader, I agree
  if (syncUtilSameId(pRaftId, &(pSyncNode->myRaftId)) && pSyncNode->state == TAOS_SYNC_STATE_LEADER) {
    return true;
  }
M
Minghao Li 已提交
131

M
Minghao Li 已提交
132 133
  // follower agree
  SyncIndex matchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, pRaftId);
M
Minghao Li 已提交
134
  if (matchIndex >= index) {
M
Minghao Li 已提交
135
    return true;
M
Minghao Li 已提交
136
  }
M
Minghao Li 已提交
137

M
Minghao Li 已提交
138
  // not agree
M
Minghao Li 已提交
139
  return false;
M
Minghao Li 已提交
140 141 142 143 144 145 146 147 148 149 150 151 152
}

bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) {
  int agreeCount = 0;
  for (int i = 0; i < pSyncNode->replicaNum; ++i) {
    if (syncAgreeIndex(pSyncNode, &(pSyncNode->replicasId[i]), index)) {
      ++agreeCount;
    }
    if (agreeCount >= pSyncNode->quorum) {
      return true;
    }
  }
  return false;
153
}