dmEps.c 17.8 KB
Newer Older
S
shm  
Shengliang Guan 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#define _DEFAULT_SOURCE
S
Shengliang Guan 已提交
17
#include "dmUtil.h"
18
#include "tjson.h"
H
Haojun Liao 已提交
19
#include "tmisce.h"
S
shm  
Shengliang Guan 已提交
20

21 22 23 24 25 26 27 28 29 30 31 32
typedef struct {
  int32_t  id;
  uint16_t oldPort;
  uint16_t newPort;
  char     oldFqdn[TSDB_FQDN_LEN];
  char     newFqdn[TSDB_FQDN_LEN];
} SDnodeEpPair;

static void    dmPrintEps(SDnodeData *pData);
static bool    dmIsEpChanged(SDnodeData *pData, int32_t dnodeId, const char *ep);
static void    dmResetEps(SDnodeData *pData, SArray *dnodeEps);
static int32_t dmReadDnodePairs(SDnodeData *pData);
S
shm  
Shengliang Guan 已提交
33

S
Shengliang Guan 已提交
34
static void dmGetDnodeEp(SDnodeData *pData, int32_t dnodeId, char *pEp, char *pFqdn, uint16_t *pPort) {
35
  taosThreadRwlockRdlock(&pData->lock);
S
Shengliang Guan 已提交
36

S
Shengliang Guan 已提交
37
  SDnodeEp *pDnodeEp = taosHashGet(pData->dnodeHash, &dnodeId, sizeof(int32_t));
S
Shengliang Guan 已提交
38 39 40 41 42 43 44 45 46 47 48 49
  if (pDnodeEp != NULL) {
    if (pPort != NULL) {
      *pPort = pDnodeEp->ep.port;
    }
    if (pFqdn != NULL) {
      tstrncpy(pFqdn, pDnodeEp->ep.fqdn, TSDB_FQDN_LEN);
    }
    if (pEp != NULL) {
      snprintf(pEp, TSDB_EP_LEN, "%s:%u", pDnodeEp->ep.fqdn, pDnodeEp->ep.port);
    }
  }

50
  taosThreadRwlockUnlock(&pData->lock);
S
Shengliang Guan 已提交
51 52
}

S
Shengliang Guan 已提交
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
static int32_t dmDecodeEps(SJson *pJson, SDnodeData *pData) {
  int32_t code = 0;

  tjsonGetInt32ValueFromDouble(pJson, "dnodeId", pData->dnodeId, code);
  if (code < 0) return -1;
  tjsonGetNumberValue(pJson, "dnodeVer", pData->dnodeVer, code);
  if (code < 0) return -1;
  tjsonGetNumberValue(pJson, "clusterId", pData->clusterId, code);
  if (code < 0) return -1;
  tjsonGetInt32ValueFromDouble(pJson, "dropped", pData->dropped, code);
  if (code < 0) return -1;

  SJson *dnodes = tjsonGetObjectItem(pJson, "dnodes");
  if (dnodes == NULL) return 0;
  int32_t numOfDnodes = tjsonGetArraySize(dnodes);

  for (int32_t i = 0; i < numOfDnodes; ++i) {
    SJson *dnode = tjsonGetArrayItem(dnodes, i);
    if (dnode == NULL) return -1;

    SDnodeEp dnodeEp = {0};
    tjsonGetInt32ValueFromDouble(dnode, "id", dnodeEp.id, code);
    if (code < 0) return -1;
    code = tjsonGetStringValue(dnode, "fqdn", dnodeEp.ep.fqdn);
    if (code < 0) return -1;
    tjsonGetUInt16ValueFromDouble(dnode, "port", dnodeEp.ep.port, code);
    if (code < 0) return -1;
    tjsonGetInt8ValueFromDouble(dnode, "isMnode", dnodeEp.isMnode, code);
    if (code < 0) return -1;

    if (taosArrayPush(pData->dnodeEps, &dnodeEp) == NULL) return -1;
  }

  return 0;
}

S
Shengliang Guan 已提交
89
int32_t dmReadEps(SDnodeData *pData) {
S
Shengliang Guan 已提交
90
  int32_t   code = -1;
S
shm  
Shengliang Guan 已提交
91
  TdFilePtr pFile = NULL;
S
Shengliang Guan 已提交
92 93 94 95
  char     *content = NULL;
  SJson    *pJson = NULL;
  char      file[PATH_MAX] = {0};
  snprintf(file, sizeof(file), "%s%sdnode%sdnode.json", tsDataDir, TD_DIRSEP, TD_DIRSEP);
S
shm  
Shengliang Guan 已提交
96

S
Shengliang Guan 已提交
97 98
  pData->dnodeEps = taosArrayInit(1, sizeof(SDnodeEp));
  if (pData->dnodeEps == NULL) {
S
shm  
Shengliang Guan 已提交
99
    dError("failed to calloc dnodeEp array since %s", strerror(errno));
S
Shengliang Guan 已提交
100
    goto _OVER;
S
shm  
Shengliang Guan 已提交
101 102
  }

S
Shengliang Guan 已提交
103 104
  if (taosStatFile(file, NULL, NULL) < 0) {
    dInfo("dnode file:%s not exist", file);
S
shm  
Shengliang Guan 已提交
105
    code = 0;
S
Shengliang Guan 已提交
106
    goto _OVER;
S
shm  
Shengliang Guan 已提交
107 108
  }

S
Shengliang Guan 已提交
109 110 111 112
  pFile = taosOpenFile(file, TD_FILE_READ);
  if (pFile == NULL) {
    terrno = TAOS_SYSTEM_ERROR(errno);
    dError("failed to open dnode file:%s since %s", file, terrstr());
S
Shengliang Guan 已提交
113
    goto _OVER;
S
shm  
Shengliang Guan 已提交
114 115
  }

S
Shengliang Guan 已提交
116 117 118 119
  int64_t size = 0;
  if (taosFStatFile(pFile, &size, NULL) < 0) {
    terrno = TAOS_SYSTEM_ERROR(errno);
    dError("failed to fstat dnode file:%s since %s", file, terrstr());
S
Shengliang Guan 已提交
120
    goto _OVER;
S
shm  
Shengliang Guan 已提交
121 122
  }

S
Shengliang Guan 已提交
123 124 125
  content = taosMemoryMalloc(size + 1);
  if (content == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
126 127 128
    goto _OVER;
  }

S
Shengliang Guan 已提交
129 130 131
  if (taosReadFile(pFile, content, size) != size) {
    terrno = TAOS_SYSTEM_ERROR(errno);
    dError("failed to read dnode file:%s since %s", file, terrstr());
S
Shengliang Guan 已提交
132
    goto _OVER;
S
shm  
Shengliang Guan 已提交
133 134
  }

S
Shengliang Guan 已提交
135
  content[size] = '\0';
S
shm  
Shengliang Guan 已提交
136

S
Shengliang Guan 已提交
137 138 139
  pJson = tjsonParse(content);
  if (pJson == NULL) {
    terrno = TSDB_CODE_INVALID_JSON_FORMAT;
S
Shengliang Guan 已提交
140
    goto _OVER;
S
shm  
Shengliang Guan 已提交
141 142
  }

S
Shengliang Guan 已提交
143 144
  if (dmDecodeEps(pJson, pData) < 0) {
    terrno = TSDB_CODE_INVALID_JSON_FORMAT;
S
Shengliang Guan 已提交
145
    goto _OVER;
S
shm  
Shengliang Guan 已提交
146 147 148
  }

  code = 0;
149
  dInfo("succceed to read dnode file %s", file);
S
shm  
Shengliang Guan 已提交
150

S
Shengliang Guan 已提交
151
_OVER:
wafwerar's avatar
wafwerar 已提交
152
  if (content != NULL) taosMemoryFree(content);
S
Shengliang Guan 已提交
153
  if (pJson != NULL) cJSON_Delete(pJson);
S
shm  
Shengliang Guan 已提交
154 155
  if (pFile != NULL) taosCloseFile(&pFile);

S
Shengliang Guan 已提交
156 157
  if (code != 0) {
    dError("failed to read dnode file:%s since %s", file, terrstr());
158
    return code;
S
shm  
Shengliang Guan 已提交
159
  }
S
Shengliang Guan 已提交
160 161 162 163 164 165 166 167

  if (taosArrayGetSize(pData->dnodeEps) == 0) {
    SDnodeEp dnodeEp = {0};
    dnodeEp.isMnode = 1;
    taosGetFqdnPortFromEp(tsFirst, &dnodeEp.ep);
    taosArrayPush(pData->dnodeEps, &dnodeEp);
  }

168 169 170 171
  if (dmReadDnodePairs(pData) != 0) {
    return -1;
  }

S
Shengliang Guan 已提交
172 173 174
  dDebug("reset dnode list on startup");
  dmResetEps(pData, pData->dnodeEps);

175
  if (pData->dnodeEps == NULL && dmIsEpChanged(pData, pData->dnodeId, tsLocalEp)) {
S
Shengliang Guan 已提交
176 177 178 179
    dError("localEp %s different with %s and need reconfigured", tsLocalEp, file);
    return -1;
  }

S
shm  
Shengliang Guan 已提交
180
  return code;
S
shm  
Shengliang Guan 已提交
181 182
}

183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
static int32_t dmEncodeEps(SJson *pJson, SDnodeData *pData) {
  if (tjsonAddDoubleToObject(pJson, "dnodeId", pData->dnodeId) < 0) return -1;
  if (tjsonAddIntegerToObject(pJson, "dnodeVer", pData->dnodeVer) < 0) return -1;
  if (tjsonAddIntegerToObject(pJson, "clusterId", pData->clusterId) < 0) return -1;
  if (tjsonAddDoubleToObject(pJson, "dropped", pData->dropped) < 0) return -1;

  SJson *dnodes = tjsonCreateArray();
  if (dnodes == NULL) return -1;
  if (tjsonAddItemToObject(pJson, "dnodes", dnodes) < 0) return -1;

  int32_t numOfEps = (int32_t)taosArrayGetSize(pData->dnodeEps);
  for (int32_t i = 0; i < numOfEps; ++i) {
    SDnodeEp *pDnodeEp = taosArrayGet(pData->dnodeEps, i);
    SJson    *dnode = tjsonCreateObject();
    if (dnode == NULL) return -1;

    if (tjsonAddDoubleToObject(dnode, "id", pDnodeEp->id) < 0) return -1;
    if (tjsonAddStringToObject(dnode, "fqdn", pDnodeEp->ep.fqdn) < 0) return -1;
    if (tjsonAddDoubleToObject(dnode, "port", pDnodeEp->ep.port) < 0) return -1;
    if (tjsonAddDoubleToObject(dnode, "isMnode", pDnodeEp->isMnode) < 0) return -1;
    if (tjsonAddItemToArray(dnodes, dnode) < 0) return -1;
  }

  return 0;
}

S
Shengliang Guan 已提交
209
int32_t dmWriteEps(SDnodeData *pData) {
210
  int32_t   code = -1;
211 212
  char     *buffer = NULL;
  SJson    *pJson = NULL;
213
  TdFilePtr pFile = NULL;
214 215
  char      file[PATH_MAX] = {0};
  char      realfile[PATH_MAX] = {0};
216 217
  snprintf(file, sizeof(file), "%s%sdnode%sdnode.json.bak", tsDataDir, TD_DIRSEP, TD_DIRSEP);
  snprintf(realfile, sizeof(realfile), "%s%sdnode%sdnode.json", tsDataDir, TD_DIRSEP, TD_DIRSEP);
S
shm  
Shengliang Guan 已提交
218

219 220 221 222 223 224
  terrno = TSDB_CODE_OUT_OF_MEMORY;
  pJson = tjsonCreateObject();
  if (pJson == NULL) goto _OVER;
  if (dmEncodeEps(pJson, pData) != 0) goto _OVER;
  buffer = tjsonToString(pJson);
  if (buffer == NULL) goto _OVER;
225
  terrno = 0;
226

S
Shengliang Guan 已提交
227 228 229
  pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC);
  if (pFile == NULL) goto _OVER;

230 231 232
  int32_t len = strlen(buffer);
  if (taosWriteFile(pFile, buffer, len) <= 0) goto _OVER;
  if (taosFsyncFile(pFile) < 0) goto _OVER;
S
shm  
Shengliang Guan 已提交
233

234
  taosCloseFile(&pFile);
235
  if (taosRenameFile(file, realfile) != 0) goto _OVER;
S
shm  
Shengliang Guan 已提交
236

237
  code = 0;
S
Shengliang Guan 已提交
238
  pData->updateTime = taosGetTimestampMs();
239 240
  dInfo("succeed to write dnode file:%s, num:%d ver:%" PRId64, realfile, (int32_t)taosArrayGetSize(pData->dnodeEps),
        pData->dnodeVer);
241 242

_OVER:
243 244
  if (pJson != NULL) tjsonDelete(pJson);
  if (buffer != NULL) taosMemoryFree(buffer);
245
  if (pFile != NULL) taosCloseFile(&pFile);
246

247
  if (code != 0) {
248
    if (terrno == 0) terrno = TAOS_SYSTEM_ERROR(errno);
S
Shengliang Guan 已提交
249
    dError("failed to write dnode file:%s since %s, dnodeVer:%" PRId64, realfile, terrstr(), pData->dnodeVer);
250 251
  }
  return code;
S
shm  
Shengliang Guan 已提交
252 253
}

S
Shengliang Guan 已提交
254
void dmUpdateEps(SDnodeData *pData, SArray *eps) {
255
  taosThreadRwlockWrlock(&pData->lock);
256 257 258
  dDebug("new dnode list get from mnode, dnodeVer:%" PRId64, pData->dnodeVer);
  dmResetEps(pData, eps);
  dmWriteEps(pData);
259
  taosThreadRwlockUnlock(&pData->lock);
S
shm  
Shengliang Guan 已提交
260 261
}

S
Shengliang Guan 已提交
262 263 264
static void dmResetEps(SDnodeData *pData, SArray *dnodeEps) {
  if (pData->dnodeEps != dnodeEps) {
    SArray *tmp = pData->dnodeEps;
H
Haojun Liao 已提交
265
    pData->dnodeEps = taosArrayDup(dnodeEps, NULL);
S
shm  
Shengliang Guan 已提交
266 267 268
    taosArrayDestroy(tmp);
  }

S
Shengliang Guan 已提交
269 270
  pData->mnodeEps.inUse = 0;
  pData->mnodeEps.numOfEps = 0;
S
shm  
Shengliang Guan 已提交
271 272

  int32_t mIndex = 0;
S
shm  
Shengliang Guan 已提交
273
  int32_t numOfEps = (int32_t)taosArrayGetSize(dnodeEps);
S
shm  
Shengliang Guan 已提交
274 275

  for (int32_t i = 0; i < numOfEps; i++) {
S
shm  
Shengliang Guan 已提交
276
    SDnodeEp *pDnodeEp = taosArrayGet(dnodeEps, i);
S
shm  
Shengliang Guan 已提交
277 278
    if (!pDnodeEp->isMnode) continue;
    if (mIndex >= TSDB_MAX_REPLICA) continue;
S
Shengliang Guan 已提交
279
    pData->mnodeEps.numOfEps++;
S
shm  
Shengliang Guan 已提交
280

S
Shengliang Guan 已提交
281
    pData->mnodeEps.eps[mIndex] = pDnodeEp->ep;
S
shm  
Shengliang Guan 已提交
282 283 284 285
    mIndex++;
  }

  for (int32_t i = 0; i < numOfEps; i++) {
S
shm  
Shengliang Guan 已提交
286
    SDnodeEp *pDnodeEp = taosArrayGet(dnodeEps, i);
S
Shengliang Guan 已提交
287
    taosHashPut(pData->dnodeHash, &pDnodeEp->id, sizeof(int32_t), pDnodeEp, sizeof(SDnodeEp));
S
shm  
Shengliang Guan 已提交
288 289
  }

S
Shengliang Guan 已提交
290
  dmPrintEps(pData);
S
shm  
Shengliang Guan 已提交
291 292
}

S
Shengliang Guan 已提交
293 294
static void dmPrintEps(SDnodeData *pData) {
  int32_t numOfEps = (int32_t)taosArrayGetSize(pData->dnodeEps);
S
Shengliang Guan 已提交
295
  dDebug("print dnode list, num:%d", numOfEps);
S
shm  
Shengliang Guan 已提交
296
  for (int32_t i = 0; i < numOfEps; i++) {
S
Shengliang Guan 已提交
297
    SDnodeEp *pEp = taosArrayGet(pData->dnodeEps, i);
298
    dDebug("dnode:%d, fqdn:%s port:%u isMnode:%d", pEp->id, pEp->ep.fqdn, pEp->ep.port, pEp->isMnode);
S
shm  
Shengliang Guan 已提交
299 300 301
  }
}

S
Shengliang Guan 已提交
302
static bool dmIsEpChanged(SDnodeData *pData, int32_t dnodeId, const char *ep) {
S
shm  
Shengliang Guan 已提交
303
  bool changed = false;
304
  if (dnodeId == 0) return changed;
305
  taosThreadRwlockRdlock(&pData->lock);
S
shm  
Shengliang Guan 已提交
306

S
Shengliang Guan 已提交
307
  SDnodeEp *pDnodeEp = taosHashGet(pData->dnodeHash, &dnodeId, sizeof(int32_t));
S
shm  
Shengliang Guan 已提交
308
  if (pDnodeEp != NULL) {
309
    char epstr[TSDB_EP_LEN + 1] = {0};
S
shm  
Shengliang Guan 已提交
310
    snprintf(epstr, TSDB_EP_LEN, "%s:%u", pDnodeEp->ep.fqdn, pDnodeEp->ep.port);
311 312
    changed = (strcmp(ep, epstr) != 0);
    if (changed) {
313
      dError("dnode:%d, localEp %s different from %s", dnodeId, ep, epstr);
314
    }
S
shm  
Shengliang Guan 已提交
315 316
  }

317
  taosThreadRwlockUnlock(&pData->lock);
S
shm  
Shengliang Guan 已提交
318 319
  return changed;
}
S
Shengliang Guan 已提交
320 321

void dmGetMnodeEpSet(SDnodeData *pData, SEpSet *pEpSet) {
322
  taosThreadRwlockRdlock(&pData->lock);
S
Shengliang Guan 已提交
323
  *pEpSet = pData->mnodeEps;
324
  taosThreadRwlockUnlock(&pData->lock);
S
Shengliang Guan 已提交
325 326
}

327 328
void dmGetMnodeEpSetForRedirect(SDnodeData *pData, SRpcMsg *pMsg, SEpSet *pEpSet) {
  dmGetMnodeEpSet(pData, pEpSet);
S
Shengliang Guan 已提交
329
  dTrace("msg is redirected, handle:%p num:%d use:%d", pMsg->info.handle, pEpSet->numOfEps, pEpSet->inUse);
330
  for (int32_t i = 0; i < pEpSet->numOfEps; ++i) {
S
Shengliang Guan 已提交
331
    dTrace("mnode index:%d %s:%u", i, pEpSet->eps[i].fqdn, pEpSet->eps[i].port);
332 333 334 335 336 337
    if (strcmp(pEpSet->eps[i].fqdn, tsLocalFqdn) == 0 && pEpSet->eps[i].port == tsServerPort) {
      pEpSet->inUse = (i + 1) % pEpSet->numOfEps;
    }
  }
}

S
Shengliang Guan 已提交
338
void dmSetMnodeEpSet(SDnodeData *pData, SEpSet *pEpSet) {
339
  if (memcmp(pEpSet, &pData->mnodeEps, sizeof(SEpSet)) == 0) return;
340
  taosThreadRwlockWrlock(&pData->lock);
S
Shengliang Guan 已提交
341
  pData->mnodeEps = *pEpSet;
342 343 344
  taosThreadRwlockUnlock(&pData->lock);

  dInfo("mnode is changed, num:%d use:%d", pEpSet->numOfEps, pEpSet->inUse);
S
Shengliang Guan 已提交
345 346 347 348
  for (int32_t i = 0; i < pEpSet->numOfEps; ++i) {
    dInfo("mnode index:%d %s:%u", i, pEpSet->eps[i].fqdn, pEpSet->eps[i].port);
  }
}
349

350 351
bool dmUpdateDnodeInfo(void *data, int32_t *did, int64_t *clusterId, char *fqdn, uint16_t *port) {
  bool        updated = false;
352
  SDnodeData *pData = data;
353 354 355
  int32_t     dnodeId = -1;
  if (did != NULL) dnodeId = *did;

356
  taosThreadRwlockRdlock(&pData->lock);
357 358 359 360

  if (pData->oldDnodeEps != NULL) {
    int32_t size = (int32_t)taosArrayGetSize(pData->oldDnodeEps);
    for (int32_t i = 0; i < size; ++i) {
361 362 363 364 365
      SDnodeEpPair *pair = taosArrayGet(pData->oldDnodeEps, i);
      if (strcmp(pair->oldFqdn, fqdn) == 0 && pair->oldPort == *port) {
        dInfo("dnode:%d, update ep:%s:%u to %s:%u", dnodeId, fqdn, *port, pair->newFqdn, pair->newPort);
        tstrncpy(fqdn, pair->newFqdn, TSDB_FQDN_LEN);
        *port = pair->newPort;
366
        updated = true;
367 368 369 370 371 372 373
      }
    }
  }

  if (did != NULL && dnodeId <= 0) {
    int32_t size = (int32_t)taosArrayGetSize(pData->dnodeEps);
    for (int32_t i = 0; i < size; ++i) {
374 375
      SDnodeEp *pDnodeEp = taosArrayGet(pData->dnodeEps, i);
      if (strcmp(pDnodeEp->ep.fqdn, fqdn) == 0 && pDnodeEp->ep.port == *port) {
376 377
        dInfo("dnode:%s:%u, update dnodeId to dnode:%d", fqdn, *port, pDnodeEp->id);
        *did = pDnodeEp->id;
378
        if (clusterId != NULL) *clusterId = pData->clusterId;
379 380
      }
    }
381 382 383 384
  }

  if (dnodeId > 0) {
    SDnodeEp *pDnodeEp = taosHashGet(pData->dnodeHash, &dnodeId, sizeof(int32_t));
385
    if (pDnodeEp) {
386 387
      if (strcmp(pDnodeEp->ep.fqdn, fqdn) != 0 || pDnodeEp->ep.port != *port) {
        dInfo("dnode:%d, update ep:%s:%u to %s:%u", dnodeId, fqdn, *port, pDnodeEp->ep.fqdn, pDnodeEp->ep.port);
388 389
        tstrncpy(fqdn, pDnodeEp->ep.fqdn, TSDB_FQDN_LEN);
        *port = pDnodeEp->ep.port;
390
        updated = true;
391
      }
392
      if (clusterId != NULL) *clusterId = pData->clusterId;
393 394
    }
  }
395

396
  taosThreadRwlockUnlock(&pData->lock);
397
  return updated;
398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428
}

static int32_t dmDecodeEpPairs(SJson *pJson, SDnodeData *pData) {
  int32_t code = 0;

  SJson *dnodes = tjsonGetObjectItem(pJson, "dnodes");
  if (dnodes == NULL) return 0;
  int32_t numOfDnodes = tjsonGetArraySize(dnodes);

  for (int32_t i = 0; i < numOfDnodes; ++i) {
    SJson *dnode = tjsonGetArrayItem(dnodes, i);
    if (dnode == NULL) return -1;

    SDnodeEpPair pair = {0};
    tjsonGetInt32ValueFromDouble(dnode, "id", pair.id, code);
    if (code < 0) return -1;
    code = tjsonGetStringValue(dnode, "fqdn", pair.oldFqdn);
    if (code < 0) return -1;
    tjsonGetUInt16ValueFromDouble(dnode, "port", pair.oldPort, code);
    if (code < 0) return -1;
    code = tjsonGetStringValue(dnode, "new_fqdn", pair.newFqdn);
    if (code < 0) return -1;
    tjsonGetUInt16ValueFromDouble(dnode, "new_port", pair.newPort, code);
    if (code < 0) return -1;

    if (taosArrayPush(pData->oldDnodeEps, &pair) == NULL) return -1;
  }

  return code;
}

429 430
void dmRemoveDnodePairs(SDnodeData *pData) {
  char file[PATH_MAX] = {0};
431
  char bak[PATH_MAX] = {0};
432
  snprintf(file, sizeof(file), "%s%sdnode%sep.json", tsDataDir, TD_DIRSEP, TD_DIRSEP);
433 434 435
  snprintf(bak, sizeof(bak), "%s%sdnode%sep.json.bak", tsDataDir, TD_DIRSEP, TD_DIRSEP);
  dInfo("dnode file:%s is rename to bak file", file);
  (void)taosRenameFile(file, bak);
436 437
}

438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
static int32_t dmReadDnodePairs(SDnodeData *pData) {
  int32_t   code = -1;
  TdFilePtr pFile = NULL;
  char     *content = NULL;
  SJson    *pJson = NULL;
  char      file[PATH_MAX] = {0};
  snprintf(file, sizeof(file), "%s%sdnode%sep.json", tsDataDir, TD_DIRSEP, TD_DIRSEP);

  if (taosStatFile(file, NULL, NULL) < 0) {
    dDebug("dnode file:%s not exist", file);
    code = 0;
    goto _OVER;
  }

  pFile = taosOpenFile(file, TD_FILE_READ);
  if (pFile == NULL) {
    terrno = TAOS_SYSTEM_ERROR(errno);
    dError("failed to open dnode file:%s since %s", file, terrstr());
    goto _OVER;
  }

  int64_t size = 0;
  if (taosFStatFile(pFile, &size, NULL) < 0) {
    terrno = TAOS_SYSTEM_ERROR(errno);
    dError("failed to fstat dnode file:%s since %s", file, terrstr());
    goto _OVER;
  }

  content = taosMemoryMalloc(size + 1);
  if (content == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    goto _OVER;
  }

  if (taosReadFile(pFile, content, size) != size) {
    terrno = TAOS_SYSTEM_ERROR(errno);
    dError("failed to read dnode file:%s since %s", file, terrstr());
    goto _OVER;
  }

  content[size] = '\0';

  pJson = tjsonParse(content);
  if (pJson == NULL) {
    terrno = TSDB_CODE_INVALID_JSON_FORMAT;
    goto _OVER;
  }

486
  pData->oldDnodeEps = taosArrayInit(1, sizeof(SDnodeEpPair));
487 488 489 490 491
  if (pData->oldDnodeEps == NULL) {
    dError("failed to calloc dnodeEp array since %s", strerror(errno));
    goto _OVER;
  }

492
  if (dmDecodeEpPairs(pJson, pData) < 0) {
493 494
    taosArrayDestroy(pData->oldDnodeEps);
    pData->oldDnodeEps = NULL;
495 496 497 498 499
    terrno = TSDB_CODE_INVALID_JSON_FORMAT;
    goto _OVER;
  }

  code = 0;
500
  dInfo("succceed to read dnode file %s", file);
501 502 503 504 505 506 507 508 509 510 511 512 513 514

_OVER:
  if (content != NULL) taosMemoryFree(content);
  if (pJson != NULL) cJSON_Delete(pJson);
  if (pFile != NULL) taosCloseFile(&pFile);

  if (code != 0) {
    dError("failed to read dnode file:%s since %s", file, terrstr());
  }

  for (int32_t i = 0; i < (int32_t)taosArrayGetSize(pData->oldDnodeEps); ++i) {
    SDnodeEpPair *pair = taosArrayGet(pData->oldDnodeEps, i);
    for (int32_t j = 0; j < (int32_t)taosArrayGetSize(pData->dnodeEps); ++j) {
      SDnodeEp *pDnodeEp = taosArrayGet(pData->dnodeEps, j);
515 516 517 518
      if (pDnodeEp->id != pair->id &&
          (strcmp(pDnodeEp->ep.fqdn, pair->newFqdn) == 0 && pDnodeEp->ep.port == pair->newPort)) {
        dError("dnode:%d, can't update ep:%s:%u to %s:%u since already exists as dnode:%d", pair->id, pair->oldFqdn,
               pair->oldPort, pair->newFqdn, pair->newPort, pDnodeEp->id);
519 520 521
        tstrncpy(pDnodeEp->ep.fqdn, pair->newFqdn, TSDB_FQDN_LEN);
        pDnodeEp->ep.port = pair->newPort;
      }
522 523 524 525 526 527

#if 0
      if (pDnodeEp->id == pair->id &&
          (strcmp(pDnodeEp->ep.fqdn, pair->oldFqdn) == 0 && pDnodeEp->ep.port == pair->oldPort)) {
        dError("dnode:%d, can't update ep:%s:%u to %s:%u since endpoint not matched", pair->id, pair->oldFqdn,
               pair->oldPort, pair->newFqdn, pair->newPort, pDnodeEp->id);
528 529 530
        tstrncpy(pDnodeEp->ep.fqdn, pair->newFqdn, TSDB_FQDN_LEN);
        pDnodeEp->ep.port = pair->newPort;
      }
531
#endif
532 533 534
    }
  }

535 536 537 538 539 540 541 542 543 544 545
  for (int32_t i = 0; i < (int32_t)taosArrayGetSize(pData->oldDnodeEps); ++i) {
    SDnodeEpPair *pair = taosArrayGet(pData->oldDnodeEps, i);
    for (int32_t j = 0; j < (int32_t)taosArrayGetSize(pData->dnodeEps); ++j) {
      SDnodeEp *pDnodeEp = taosArrayGet(pData->dnodeEps, j);
      if (strcmp(pDnodeEp->ep.fqdn, pair->oldFqdn) == 0 && pDnodeEp->ep.port == pair->oldPort) {
        dInfo("dnode:%d, will update ep:%s:%u to %s:%u", pDnodeEp->id, pDnodeEp->ep.fqdn, pDnodeEp->ep.port,
              pair->newFqdn, pair->newPort);
        tstrncpy(pDnodeEp->ep.fqdn, pair->newFqdn, TSDB_FQDN_LEN);
        pDnodeEp->ep.port = pair->newPort;
      }
    }
546 547
  }

548
  pData->dnodeVer = 0;
549 550
  return code;
}