mndSync.c 5.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#define _DEFAULT_SOURCE
S
Shengliang Guan 已提交
17
#include "mndSync.h"
18
#include "mndTrans.h"
19

20
static int32_t mndInitWal(SMnode *pMnode) {
S
Shengliang Guan 已提交
21 22
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;

S
Shengliang Guan 已提交
23 24
  char path[PATH_MAX] = {0};
  snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP);
25 26 27 28 29 30 31 32 33
  SWalCfg cfg = {
      .vgId = 1,
      .fsyncPeriod = 0,
      .rollPeriod = -1,
      .segSize = -1,
      .retentionPeriod = -1,
      .retentionSize = -1,
      .level = TAOS_WAL_FSYNC,
  };
S
Shengliang Guan 已提交
34
  pMgmt->pWal = walOpen(path, &cfg);
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
  if (pMgmt->pWal == NULL) return -1;

  return 0;
}

static void mndCloseWal(SMnode *pMnode) {
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
  if (pMgmt->pWal != NULL) {
    walClose(pMgmt->pWal);
    pMgmt->pWal = NULL;
  }
}

static int32_t mndRestoreWal(SMnode *pMnode) {
  SWal   *pWal = pMnode->syncMgmt.pWal;
  SSdb   *pSdb = pMnode->pSdb;
  int64_t lastSdbVer = sdbUpdateVer(pSdb, 0);
  int32_t code = -1;

  SWalReadHandle *pHandle = walOpenReadHandle(pWal);
  if (pHandle == NULL) return -1;

S
Shengliang Guan 已提交
57 58
  int64_t first = walGetFirstVer(pWal);
  int64_t last = walGetLastVer(pWal);
59
  mDebug("start to restore wal, sdbver:%" PRId64 ", first:%" PRId64 " last:%" PRId64, lastSdbVer, first, last);
60

dengyihao's avatar
dengyihao 已提交
61
  first = TMAX(lastSdbVer + 1, first);
S
Shengliang Guan 已提交
62
  for (int64_t ver = first; ver >= 0 && ver <= last; ++ver) {
63
    if (walReadWithHandle(pHandle, ver) < 0) {
64 65
      mError("ver:%" PRId64 ", failed to read from wal since %s", ver, terrstr());
      goto _OVER;
66 67 68 69 70 71
    }

    SWalHead *pHead = pHandle->pHead;
    int64_t   sdbVer = sdbUpdateVer(pSdb, 0);
    if (sdbVer + 1 != ver) {
      terrno = TSDB_CODE_SDB_INVALID_WAl_VER;
72 73
      mError("ver:%" PRId64 ", failed to write to sdb, since inconsistent with sdbver:%" PRId64, ver, sdbVer);
      goto _OVER;
74 75
    }

76
    mTrace("ver:%" PRId64 ", will be restored, content:%p", ver, pHead->head.body);
77
    if (sdbWriteWithoutFree(pSdb, (void *)pHead->head.body) < 0) {
78 79
      mError("ver:%" PRId64 ", failed to write to sdb since %s", ver, terrstr());
      goto _OVER;
80 81 82
    }

    sdbUpdateVer(pSdb, 1);
83
    mDebug("ver:%" PRId64 ", is restored", ver);
84 85 86
  }

  int64_t sdbVer = sdbUpdateVer(pSdb, 0);
87
  mDebug("restore wal finished, sdbver:%" PRId64, sdbVer);
S
Shengliang Guan 已提交
88

S
Shengliang Guan 已提交
89
  mndTransPullup(pMnode);
90
  sdbVer = sdbUpdateVer(pSdb, 0);
91
  mDebug("pullup trans finished, sdbver:%" PRId64, sdbVer);
S
Shengliang Guan 已提交
92

S
Shengliang Guan 已提交
93
  if (sdbVer != lastSdbVer) {
S
Shengliang Guan 已提交
94
    mInfo("sdb restored from %" PRId64 " to %" PRId64 ", write file", lastSdbVer, sdbVer);
S
Shengliang Guan 已提交
95
    if (sdbWriteFile(pSdb) != 0) {
96
      goto _OVER;
97 98
    }

S
Shengliang Guan 已提交
99
    if (walCommit(pWal, sdbVer) != 0) {
100
      goto _OVER;
S
Shengliang Guan 已提交
101 102
    }

L
Liu Jicong 已提交
103
    if (walBeginSnapshot(pWal, sdbVer) < 0) {
104
      goto _OVER;
L
Liu Jicong 已提交
105 106 107
    }

    if (walEndSnapshot(pWal) < 0) {
108
      goto _OVER;
L
Liu Jicong 已提交
109
    }
L
Liu Jicong 已提交
110
  }
111 112 113

  code = 0;

114
_OVER:
115
  walCloseReadHandle(pHandle);
S
Shengliang Guan 已提交
116
  return code;
117 118 119 120 121 122 123 124 125 126 127 128 129
}

int32_t mndInitSync(SMnode *pMnode) {
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
  tsem_init(&pMgmt->syncSem, 0, 0);

  if (mndInitWal(pMnode) < 0) {
    mError("failed to open wal since %s", terrstr());
    return -1;
  }

  if (mndRestoreWal(pMnode) < 0) {
    mError("failed to restore wal since %s", terrstr());
S
Shengliang Guan 已提交
130 131 132
    return -1;
  }

133 134 135
  if (pMnode->selfId == 1) {
    pMgmt->state = TAOS_SYNC_STATE_LEADER;
  }
S
Shengliang Guan 已提交
136 137 138 139 140 141
  pMgmt->pSyncNode = NULL;
  return 0;
}

void mndCleanupSync(SMnode *pMnode) {
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
142 143
  tsem_destroy(&pMgmt->syncSem);
  mndCloseWal(pMnode);
S
Shengliang Guan 已提交
144 145 146 147 148 149 150 151 152 153 154
}

static int32_t mndSyncApplyCb(struct SSyncFSM *fsm, SyncIndex index, const SSyncBuffer *buf, void *pData) {
  SMnode    *pMnode = pData;
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;

  pMgmt->errCode = 0;
  tsem_post(&pMgmt->syncSem);

  return 0;
}
155 156

int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw) {
S
Shengliang Guan 已提交
157 158 159 160 161 162
  SWal *pWal = pMnode->syncMgmt.pWal;
  SSdb *pSdb = pMnode->pSdb;

  int64_t ver = sdbUpdateVer(pSdb, 1);
  if (walWrite(pWal, ver, 1, pRaw, sdbGetRawTotalSize(pRaw)) < 0) {
    sdbUpdateVer(pSdb, -1);
163
    mError("ver:%" PRId64 ", failed to write raw:%p to wal since %s", ver, pRaw, terrstr());
S
Shengliang Guan 已提交
164 165 166
    return -1;
  }

167
  mTrace("ver:%" PRId64 ", write to wal, raw:%p", ver, pRaw);
168
  walCommit(pWal, ver);
S
Shengliang Guan 已提交
169 170
  walFsync(pWal, true);

S
Shengliang Guan 已提交
171 172 173 174 175 176 177 178 179 180 181 182
#if 1
  return 0;
#else
  if (pMnode->replica == 1) return 0;

  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
  pMgmt->errCode = 0;

  SSyncBuffer buf = {.data = pRaw, .len = sdbGetRawTotalSize(pRaw)};

  bool    isWeak = false;
  int32_t code = syncPropose(pMgmt->pSyncNode, &buf, pMnode, isWeak);
183

S
Shengliang Guan 已提交
184
  if (code != 0) return code;
185

S
Shengliang Guan 已提交
186 187 188
  tsem_wait(&pMgmt->syncSem);
  return pMgmt->errCode;
#endif
189 190
}

S
Shengliang Guan 已提交
191
bool mndIsMaster(SMnode *pMnode) {
S
Shengliang Guan 已提交
192 193
  SSyncMgmt *pMgmt = &pMnode->syncMgmt;
  return pMgmt->state == TAOS_SYNC_STATE_LEADER;
L
Liu Jicong 已提交
194
}