mndSync.c 5.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#define _DEFAULT_SOURCE
S
Shengliang Guan 已提交
17
#include "mndSync.h"
18
#include "mndTrans.h"
19

20
static int32_t mndInitWal(SMnode *pMnode) {
S
Shengliang Guan 已提交
21 22
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;

S
Shengliang Guan 已提交
23 24 25 26 27 28
  char path[PATH_MAX] = {0};
  snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP);
  SWalCfg cfg = {.vgId = 1,
                 .fsyncPeriod = 0,
                 .rollPeriod = -1,
                 .segSize = -1,
29 30
                 .retentionPeriod = -1,
                 .retentionSize = -1,
S
Shengliang Guan 已提交
31 32
                 .level = TAOS_WAL_FSYNC};
  pMgmt->pWal = walOpen(path, &cfg);
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
  if (pMgmt->pWal == NULL) return -1;

  return 0;
}

static void mndCloseWal(SMnode *pMnode) {
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
  if (pMgmt->pWal != NULL) {
    walClose(pMgmt->pWal);
    pMgmt->pWal = NULL;
  }
}

static int32_t mndRestoreWal(SMnode *pMnode) {
  SWal   *pWal = pMnode->syncMgmt.pWal;
  SSdb   *pSdb = pMnode->pSdb;
  int64_t lastSdbVer = sdbUpdateVer(pSdb, 0);
  int32_t code = -1;

  SWalReadHandle *pHandle = walOpenReadHandle(pWal);
  if (pHandle == NULL) return -1;

S
Shengliang Guan 已提交
55 56
  int64_t first = walGetFirstVer(pWal);
  int64_t last = walGetLastVer(pWal);
57
  mDebug("start to restore sdb wal, sdb ver:%" PRId64 ", wal first:%" PRId64 " last:%" PRId64, lastSdbVer, first, last);
58

dengyihao's avatar
dengyihao 已提交
59
  first = TMAX(lastSdbVer + 1, first);
S
Shengliang Guan 已提交
60
  for (int64_t ver = first; ver >= 0 && ver <= last; ++ver) {
61
    if (walReadWithHandle(pHandle, ver) < 0) {
S
Shengliang Guan 已提交
62
      mError("failed to read by wal handle since %s, ver:%" PRId64, terrstr(), ver);
63 64 65 66 67 68 69
      goto WAL_RESTORE_OVER;
    }

    SWalHead *pHead = pHandle->pHead;
    int64_t   sdbVer = sdbUpdateVer(pSdb, 0);
    if (sdbVer + 1 != ver) {
      terrno = TSDB_CODE_SDB_INVALID_WAl_VER;
S
Shengliang Guan 已提交
70
      mError("failed to read wal from sdb, sdbVer:%" PRId64 " inconsistent with ver:%" PRId64, sdbVer, ver);
71 72 73
      goto WAL_RESTORE_OVER;
    }

74
    mTrace("wal:%" PRId64 ", will be restored, content:%p", ver, pHead->head.body);
75
    if (sdbWriteNotFree(pSdb, (void *)pHead->head.body) < 0) {
S
Shengliang Guan 已提交
76
      mError("failed to read wal from sdb since %s, ver:%" PRId64, terrstr(), ver);
77 78 79 80
      goto WAL_RESTORE_OVER;
    }

    sdbUpdateVer(pSdb, 1);
S
Shengliang Guan 已提交
81
    mDebug("wal:%" PRId64 ", is restored", ver);
82 83 84
  }

  int64_t sdbVer = sdbUpdateVer(pSdb, 0);
S
Shengliang Guan 已提交
85 86
  mDebug("restore sdb wal finished, sdb ver:%" PRId64, sdbVer);

S
Shengliang Guan 已提交
87 88
  mndTransPullup(pMnode);

S
Shengliang Guan 已提交
89
  if (sdbVer != lastSdbVer) {
S
Shengliang Guan 已提交
90
    mInfo("sdb restored from %" PRId64 " to %" PRId64 ", write file", lastSdbVer, sdbVer);
S
Shengliang Guan 已提交
91 92
    if (sdbWriteFile(pSdb) != 0) {
      goto WAL_RESTORE_OVER;
93 94
    }

S
Shengliang Guan 已提交
95 96 97 98
    if (walCommit(pWal, sdbVer) != 0) {
      goto WAL_RESTORE_OVER;
    }

L
Liu Jicong 已提交
99 100 101 102 103 104 105
    if (walBeginSnapshot(pWal, sdbVer) < 0) {
      goto WAL_RESTORE_OVER;
    }

    if (walEndSnapshot(pWal) < 0) {
      goto WAL_RESTORE_OVER;
    }
L
Liu Jicong 已提交
106

L
Liu Jicong 已提交
107
  }
108 109 110 111 112

  code = 0;

WAL_RESTORE_OVER:
  walCloseReadHandle(pHandle);
S
Shengliang Guan 已提交
113
  return code;
114 115 116 117 118 119 120 121 122 123 124 125 126
}

int32_t mndInitSync(SMnode *pMnode) {
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
  tsem_init(&pMgmt->syncSem, 0, 0);

  if (mndInitWal(pMnode) < 0) {
    mError("failed to open wal since %s", terrstr());
    return -1;
  }

  if (mndRestoreWal(pMnode) < 0) {
    mError("failed to restore wal since %s", terrstr());
S
Shengliang Guan 已提交
127 128 129
    return -1;
  }

S
Shengliang Guan 已提交
130 131 132 133 134 135 136
  pMgmt->state = TAOS_SYNC_STATE_LEADER;
  pMgmt->pSyncNode = NULL;
  return 0;
}

void mndCleanupSync(SMnode *pMnode) {
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
137 138
  tsem_destroy(&pMgmt->syncSem);
  mndCloseWal(pMnode);
S
Shengliang Guan 已提交
139 140 141 142 143 144 145 146 147 148 149
}

static int32_t mndSyncApplyCb(struct SSyncFSM *fsm, SyncIndex index, const SSyncBuffer *buf, void *pData) {
  SMnode    *pMnode = pData;
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;

  pMgmt->errCode = 0;
  tsem_post(&pMgmt->syncSem);

  return 0;
}
150 151

int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw) {
S
Shengliang Guan 已提交
152 153 154 155 156 157 158 159 160 161
  SWal *pWal = pMnode->syncMgmt.pWal;
  SSdb *pSdb = pMnode->pSdb;

  int64_t ver = sdbUpdateVer(pSdb, 1);
  if (walWrite(pWal, ver, 1, pRaw, sdbGetRawTotalSize(pRaw)) < 0) {
    sdbUpdateVer(pSdb, -1);
    mError("failed to write raw:%p since %s, ver:%" PRId64, pRaw, terrstr(), ver);
    return -1;
  }

S
Shengliang Guan 已提交
162
  mTrace("raw:%p, write to wal, ver:%" PRId64, pRaw, ver);
163
  walCommit(pWal, ver);
S
Shengliang Guan 已提交
164 165
  walFsync(pWal, true);

S
Shengliang Guan 已提交
166 167 168 169 170 171 172 173 174 175 176 177
#if 1
  return 0;
#else
  if (pMnode->replica == 1) return 0;

  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
  pMgmt->errCode = 0;

  SSyncBuffer buf = {.data = pRaw, .len = sdbGetRawTotalSize(pRaw)};

  bool    isWeak = false;
  int32_t code = syncPropose(pMgmt->pSyncNode, &buf, pMnode, isWeak);
178

S
Shengliang Guan 已提交
179
  if (code != 0) return code;
180

S
Shengliang Guan 已提交
181 182 183
  tsem_wait(&pMgmt->syncSem);
  return pMgmt->errCode;
#endif
184 185
}

S
Shengliang Guan 已提交
186
bool mndIsMaster(SMnode *pMnode) {
S
Shengliang Guan 已提交
187 188
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
  return pMgmt->state == TAOS_SYNC_STATE_LEADER;
L
Liu Jicong 已提交
189
}