dndMgmt.c 21.3 KB
Newer Older
S
Shengliang Guan 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#define _DEFAULT_SOURCE
S
Shengliang Guan 已提交
17
#include "dndMgmt.h"
S
Shengliang Guan 已提交
18 19 20 21
#include "dndBnode.h"
#include "dndMnode.h"
#include "dndQnode.h"
#include "dndSnode.h"
S
Shengliang Guan 已提交
22 23
#include "dndTransport.h"
#include "dndVnodes.h"
S
Shengliang Guan 已提交
24
#include "dndWorker.h"
S
Shengliang Guan 已提交
25

S
Shengliang Guan 已提交
26
static void dndProcessMgmtQueue(SDnode *pDnode, SRpcMsg *pMsg);
S
Shengliang Guan 已提交
27 28 29 30 31

static int32_t dndReadDnodes(SDnode *pDnode);
static int32_t dndWriteDnodes(SDnode *pDnode);
static void   *dnodeThreadRoutine(void *param);

S
Shengliang Guan 已提交
32 33 34 35
static int32_t dndProcessConfigDnodeReq(SDnode *pDnode, SRpcMsg *pReq);
static void    dndProcessStatusRsp(SDnode *pDnode, SRpcMsg *pRsp);
static void    dndProcessAuthRsp(SDnode *pDnode, SRpcMsg *pRsp);
static void    dndProcessGrantRsp(SDnode *pDnode, SRpcMsg *pRsp);
S
Shengliang Guan 已提交
36

S
Shengliang Guan 已提交
37
int32_t dndGetDnodeId(SDnode *pDnode) {
S
Shengliang Guan 已提交
38 39 40 41
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;
  taosRLockLatch(&pMgmt->latch);
  int32_t dnodeId = pMgmt->dnodeId;
  taosRUnLockLatch(&pMgmt->latch);
S
Shengliang Guan 已提交
42
  return dnodeId;
S
Shengliang Guan 已提交
43 44
}

S
Shengliang Guan 已提交
45
int64_t dndGetClusterId(SDnode *pDnode) {
S
Shengliang Guan 已提交
46 47
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;
  taosRLockLatch(&pMgmt->latch);
S
Shengliang Guan 已提交
48
  int64_t clusterId = pMgmt->clusterId;
S
Shengliang Guan 已提交
49
  taosRUnLockLatch(&pMgmt->latch);
S
Shengliang Guan 已提交
50
  return clusterId;
S
Shengliang Guan 已提交
51 52
}

S
Shengliang Guan 已提交
53
void dndGetDnodeEp(SDnode *pDnode, int32_t dnodeId, char *pEp, char *pFqdn, uint16_t *pPort) {
S
Shengliang Guan 已提交
54 55
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;
  taosRLockLatch(&pMgmt->latch);
S
Shengliang Guan 已提交
56

S
Shengliang Guan 已提交
57
  SDnodeEp *pDnodeEp = taosHashGet(pMgmt->dnodeHash, &dnodeId, sizeof(int32_t));
S
Shengliang Guan 已提交
58 59
  if (pDnodeEp != NULL) {
    if (pPort != NULL) {
H
Haojun Liao 已提交
60
      *pPort = pDnodeEp->ep.port;
S
Shengliang Guan 已提交
61 62
    }
    if (pFqdn != NULL) {
H
Haojun Liao 已提交
63
      tstrncpy(pFqdn, pDnodeEp->ep.fqdn, TSDB_FQDN_LEN);
S
Shengliang Guan 已提交
64 65
    }
    if (pEp != NULL) {
H
Haojun Liao 已提交
66
      snprintf(pEp, TSDB_EP_LEN, "%s:%u", pDnodeEp->ep.fqdn, pDnodeEp->ep.port);
S
Shengliang Guan 已提交
67
    }
S
Shengliang Guan 已提交
68
  }
S
Shengliang Guan 已提交
69

S
Shengliang Guan 已提交
70
  taosRUnLockLatch(&pMgmt->latch);
S
Shengliang Guan 已提交
71
}
S
Shengliang Guan 已提交
72

S
Shengliang Guan 已提交
73
void dndGetMnodeEpSet(SDnode *pDnode, SEpSet *pEpSet) {
S
Shengliang Guan 已提交
74 75 76 77
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;
  taosRLockLatch(&pMgmt->latch);
  *pEpSet = pMgmt->mnodeEpSet;
  taosRUnLockLatch(&pMgmt->latch);
S
Shengliang Guan 已提交
78 79
}

S
Shengliang Guan 已提交
80 81
void dndSendRedirectRsp(SDnode *pDnode, SRpcMsg *pReq) {
  tmsg_t msgType = pReq->msgType;
S
Shengliang Guan 已提交
82

S
Shengliang Guan 已提交
83
  SEpSet epSet = {0};
S
Shengliang Guan 已提交
84
  dndGetMnodeEpSet(pDnode, &epSet);
S
Shengliang Guan 已提交
85

S
Shengliang Guan 已提交
86
  dDebug("RPC %p, req:%s is redirected, num:%d use:%d", pReq->handle, TMSG_INFO(msgType), epSet.numOfEps, epSet.inUse);
S
Shengliang Guan 已提交
87
  for (int32_t i = 0; i < epSet.numOfEps; ++i) {
H
Haojun Liao 已提交
88 89
    dDebug("mnode index:%d %s:%u", i, epSet.eps[i].fqdn, epSet.eps[i].port);
    if (strcmp(epSet.eps[i].fqdn, pDnode->cfg.localFqdn) == 0 && epSet.eps[i].port == pDnode->cfg.serverPort) {
S
Shengliang Guan 已提交
90
      epSet.inUse = (i + 1) % epSet.numOfEps;
S
Shengliang Guan 已提交
91 92
    }

H
Haojun Liao 已提交
93
    epSet.eps[i].port = htons(epSet.eps[i].port);
S
Shengliang Guan 已提交
94 95
  }

S
Shengliang Guan 已提交
96
  rpcSendRedirectRsp(pReq->handle, &epSet);
S
Shengliang Guan 已提交
97 98
}

S
Shengliang Guan 已提交
99
static void dndUpdateMnodeEpSet(SDnode *pDnode, SEpSet *pEpSet) {
S
Shengliang Guan 已提交
100
  dInfo("mnode is changed, num:%d use:%d", pEpSet->numOfEps, pEpSet->inUse);
S
Shengliang Guan 已提交
101

S
Shengliang Guan 已提交
102 103
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;
  taosWLockLatch(&pMgmt->latch);
S
Shengliang Guan 已提交
104

S
Shengliang Guan 已提交
105
  pMgmt->mnodeEpSet = *pEpSet;
S
Shengliang Guan 已提交
106
  for (int32_t i = 0; i < pEpSet->numOfEps; ++i) {
H
Haojun Liao 已提交
107
    dInfo("mnode index:%d %s:%u", i, pEpSet->eps[i].fqdn, pEpSet->eps[i].port);
S
Shengliang Guan 已提交
108 109
  }

S
Shengliang Guan 已提交
110
  taosWUnLockLatch(&pMgmt->latch);
S
Shengliang Guan 已提交
111 112
}

S
Shengliang Guan 已提交
113 114
static void dndPrintDnodes(SDnode *pDnode) {
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;
S
Shengliang Guan 已提交
115

S
Shengliang Guan 已提交
116 117 118
  dDebug("print dnode ep list, num:%d", pMgmt->dnodeEps->num);
  for (int32_t i = 0; i < pMgmt->dnodeEps->num; i++) {
    SDnodeEp *pEp = &pMgmt->dnodeEps->eps[i];
H
Haojun Liao 已提交
119
    dDebug("dnode:%d, fqdn:%s port:%u isMnode:%d", pEp->id, pEp->ep.fqdn, pEp->ep.port, pEp->isMnode);
S
Shengliang Guan 已提交
120 121 122
  }
}

S
Shengliang Guan 已提交
123 124
static void dndResetDnodes(SDnode *pDnode, SDnodeEps *pDnodeEps) {
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;
S
Shengliang Guan 已提交
125

S
Shengliang Guan 已提交
126
  int32_t size = sizeof(SDnodeEps) + pDnodeEps->num * sizeof(SDnodeEp);
S
Shengliang Guan 已提交
127
  if (pDnodeEps->num > pMgmt->dnodeEps->num) {
S
Shengliang Guan 已提交
128 129 130
    SDnodeEps *tmp = calloc(1, size);
    if (tmp == NULL) return;

S
Shengliang Guan 已提交
131 132
    tfree(pMgmt->dnodeEps);
    pMgmt->dnodeEps = tmp;
S
Shengliang Guan 已提交
133 134
  }

S
Shengliang Guan 已提交
135 136
  if (pMgmt->dnodeEps != pDnodeEps) {
    memcpy(pMgmt->dnodeEps, pDnodeEps, size);
S
Shengliang Guan 已提交
137 138
  }

S
Shengliang Guan 已提交
139
  pMgmt->mnodeEpSet.inUse = 0;
140
  pMgmt->mnodeEpSet.numOfEps = 0;
S
Shengliang Guan 已提交
141 142

  int32_t mIndex = 0;
S
Shengliang Guan 已提交
143 144
  for (int32_t i = 0; i < pMgmt->dnodeEps->num; i++) {
    SDnodeEp *pDnodeEp = &pMgmt->dnodeEps->eps[i];
S
Shengliang Guan 已提交
145
    if (!pDnodeEp->isMnode) continue;
S
Shengliang Guan 已提交
146
    if (mIndex >= TSDB_MAX_REPLICA) continue;
147
    pMgmt->mnodeEpSet.numOfEps++;
H
Haojun Liao 已提交
148 149

    pMgmt->mnodeEpSet.eps[mIndex] = pDnodeEp->ep;
S
Shengliang Guan 已提交
150
    mIndex++;
S
Shengliang Guan 已提交
151 152
  }

S
Shengliang Guan 已提交
153 154 155
  for (int32_t i = 0; i < pMgmt->dnodeEps->num; ++i) {
    SDnodeEp *pDnodeEp = &pMgmt->dnodeEps->eps[i];
    taosHashPut(pMgmt->dnodeHash, &pDnodeEp->id, sizeof(int32_t), pDnodeEp, sizeof(SDnodeEp));
S
Shengliang Guan 已提交
156 157
  }

S
Shengliang Guan 已提交
158
  dndPrintDnodes(pDnode);
S
Shengliang Guan 已提交
159 160
}

S
Shengliang Guan 已提交
161
static bool dndIsEpChanged(SDnode *pDnode, int32_t dnodeId, char *pEp) {
S
Shengliang Guan 已提交
162 163
  bool changed = false;

S
Shengliang Guan 已提交
164 165 166 167
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;
  taosRLockLatch(&pMgmt->latch);

  SDnodeEp *pDnodeEp = taosHashGet(pMgmt->dnodeHash, &dnodeId, sizeof(int32_t));
S
Shengliang Guan 已提交
168 169
  if (pDnodeEp != NULL) {
    char epstr[TSDB_EP_LEN + 1];
H
Haojun Liao 已提交
170
    snprintf(epstr, TSDB_EP_LEN, "%s:%u", pDnodeEp->ep.fqdn, pDnodeEp->ep.port);
S
Shengliang Guan 已提交
171
    changed = strcmp(pEp, epstr) != 0;
S
Shengliang Guan 已提交
172 173
  }

S
Shengliang Guan 已提交
174
  taosRUnLockLatch(&pMgmt->latch);
S
Shengliang Guan 已提交
175 176 177
  return changed;
}

S
Shengliang Guan 已提交
178 179
static int32_t dndReadDnodes(SDnode *pDnode) {
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;
S
Shengliang Guan 已提交
180

S
Shengliang Guan 已提交
181
  int32_t code = TSDB_CODE_DND_DNODE_READ_FILE_ERROR;
S
Shengliang Guan 已提交
182
  int32_t len = 0;
S
Shengliang Guan 已提交
183
  int32_t maxLen = 256 * 1024;
S
Shengliang Guan 已提交
184 185 186 187
  char   *content = calloc(1, maxLen + 1);
  cJSON  *root = NULL;
  FILE   *fp = NULL;

S
Shengliang Guan 已提交
188 189 190 191
  fp = fopen(pMgmt->file, "r");
  if (fp == NULL) {
    dDebug("file %s not exist", pMgmt->file);
    code = 0;
S
Shengliang Guan 已提交
192
    goto PRASE_DNODE_OVER;
S
Shengliang Guan 已提交
193 194 195 196
  }

  len = (int32_t)fread(content, 1, maxLen, fp);
  if (len <= 0) {
S
Shengliang Guan 已提交
197
    dError("failed to read %s since content is null", pMgmt->file);
S
Shengliang Guan 已提交
198
    goto PRASE_DNODE_OVER;
S
Shengliang Guan 已提交
199 200 201 202 203
  }

  content[len] = 0;
  root = cJSON_Parse(content);
  if (root == NULL) {
S
Shengliang Guan 已提交
204
    dError("failed to read %s since invalid json format", pMgmt->file);
S
Shengliang Guan 已提交
205
    goto PRASE_DNODE_OVER;
S
Shengliang Guan 已提交
206 207 208
  }

  cJSON *dnodeId = cJSON_GetObjectItem(root, "dnodeId");
S
Shengliang Guan 已提交
209
  if (!dnodeId || dnodeId->type != cJSON_Number) {
S
Shengliang Guan 已提交
210
    dError("failed to read %s since dnodeId not found", pMgmt->file);
S
Shengliang Guan 已提交
211
    goto PRASE_DNODE_OVER;
S
Shengliang Guan 已提交
212
  }
S
Shengliang Guan 已提交
213
  pMgmt->dnodeId = dnodeId->valueint;
S
Shengliang Guan 已提交
214

S
Shengliang Guan 已提交
215
  cJSON *clusterId = cJSON_GetObjectItem(root, "clusterId");
S
Shengliang Guan 已提交
216
  if (!clusterId || clusterId->type != cJSON_String) {
S
Shengliang Guan 已提交
217
    dError("failed to read %s since clusterId not found", pMgmt->file);
S
Shengliang Guan 已提交
218
    goto PRASE_DNODE_OVER;
S
Shengliang Guan 已提交
219
  }
S
Shengliang Guan 已提交
220
  pMgmt->clusterId = atoll(clusterId->valuestring);
S
Shengliang Guan 已提交
221

S
Shengliang Guan 已提交
222
  cJSON *dropped = cJSON_GetObjectItem(root, "dropped");
S
Shengliang Guan 已提交
223
  if (!dropped || dropped->type != cJSON_Number) {
S
Shengliang Guan 已提交
224
    dError("failed to read %s since dropped not found", pMgmt->file);
S
Shengliang Guan 已提交
225
    goto PRASE_DNODE_OVER;
S
Shengliang Guan 已提交
226
  }
S
Shengliang Guan 已提交
227
  pMgmt->dropped = dropped->valueint;
S
Shengliang Guan 已提交
228

S
Shengliang Guan 已提交
229 230 231
  cJSON *dnodes = cJSON_GetObjectItem(root, "dnodes");
  if (!dnodes || dnodes->type != cJSON_Array) {
    dError("failed to read %s since dnodes not found", pMgmt->file);
S
Shengliang Guan 已提交
232
    goto PRASE_DNODE_OVER;
S
Shengliang Guan 已提交
233 234
  }

S
Shengliang Guan 已提交
235 236 237
  int32_t numOfDnodes = cJSON_GetArraySize(dnodes);
  if (numOfDnodes <= 0) {
    dError("failed to read %s since numOfDnodes:%d invalid", pMgmt->file, numOfDnodes);
S
Shengliang Guan 已提交
238
    goto PRASE_DNODE_OVER;
S
Shengliang Guan 已提交
239 240
  }

S
Shengliang Guan 已提交
241
  pMgmt->dnodeEps = calloc(1, numOfDnodes * sizeof(SDnodeEp) + sizeof(SDnodeEps));
S
Shengliang Guan 已提交
242
  if (pMgmt->dnodeEps == NULL) {
S
Shengliang Guan 已提交
243
    dError("failed to calloc dnodeEpList since %s", strerror(errno));
S
Shengliang Guan 已提交
244
    goto PRASE_DNODE_OVER;
S
Shengliang Guan 已提交
245
  }
S
Shengliang Guan 已提交
246
  pMgmt->dnodeEps->num = numOfDnodes;
S
Shengliang Guan 已提交
247

S
Shengliang Guan 已提交
248
  for (int32_t i = 0; i < numOfDnodes; ++i) {
S
Shengliang Guan 已提交
249 250
    cJSON *node = cJSON_GetArrayItem(dnodes, i);
    if (node == NULL) break;
S
Shengliang Guan 已提交
251

S
Shengliang Guan 已提交
252
    SDnodeEp *pDnodeEp = &pMgmt->dnodeEps->eps[i];
S
Shengliang Guan 已提交
253

H
Haojun Liao 已提交
254 255
    cJSON *did = cJSON_GetObjectItem(node, "id");
    if (!did || did->type != cJSON_Number) {
S
Shengliang Guan 已提交
256
      dError("failed to read %s since dnodeId not found", pMgmt->file);
S
Shengliang Guan 已提交
257
      goto PRASE_DNODE_OVER;
S
Shengliang Guan 已提交
258
    }
H
Haojun Liao 已提交
259

S
Shengliang Guan 已提交
260
    pDnodeEp->id = dnodeId->valueint;
S
Shengliang Guan 已提交
261

S
Shengliang Guan 已提交
262
    cJSON *dnodeFqdn = cJSON_GetObjectItem(node, "fqdn");
S
Shengliang Guan 已提交
263
    if (!dnodeFqdn || dnodeFqdn->type != cJSON_String || dnodeFqdn->valuestring == NULL) {
S
Shengliang Guan 已提交
264
      dError("failed to read %s since dnodeFqdn not found", pMgmt->file);
S
Shengliang Guan 已提交
265
      goto PRASE_DNODE_OVER;
S
Shengliang Guan 已提交
266
    }
H
Haojun Liao 已提交
267
    tstrncpy(pDnodeEp->ep.fqdn, dnodeFqdn->valuestring, TSDB_FQDN_LEN);
S
Shengliang Guan 已提交
268

S
Shengliang Guan 已提交
269 270
    cJSON *dnodePort = cJSON_GetObjectItem(node, "port");
    if (!dnodePort || dnodePort->type != cJSON_Number) {
S
Shengliang Guan 已提交
271
      dError("failed to read %s since dnodePort not found", pMgmt->file);
S
Shengliang Guan 已提交
272
      goto PRASE_DNODE_OVER;
S
Shengliang Guan 已提交
273
    }
H
Haojun Liao 已提交
274 275

    pDnodeEp->ep.port = dnodePort->valueint;
S
Shengliang Guan 已提交
276 277 278

    cJSON *isMnode = cJSON_GetObjectItem(node, "isMnode");
    if (!isMnode || isMnode->type != cJSON_Number) {
S
Shengliang Guan 已提交
279
      dError("failed to read %s since isMnode not found", pMgmt->file);
S
Shengliang Guan 已提交
280 281 282
      goto PRASE_DNODE_OVER;
    }
    pDnodeEp->isMnode = isMnode->valueint;
S
Shengliang Guan 已提交
283 284
  }

S
Shengliang Guan 已提交
285 286 287
  code = 0;
  dInfo("succcessed to read file %s", pMgmt->file);
  dndPrintDnodes(pDnode);
S
Shengliang Guan 已提交
288

S
Shengliang Guan 已提交
289
PRASE_DNODE_OVER:
S
Shengliang Guan 已提交
290 291 292 293
  if (content != NULL) free(content);
  if (root != NULL) cJSON_Delete(root);
  if (fp != NULL) fclose(fp);

S
Shengliang Guan 已提交
294 295
  if (dndIsEpChanged(pDnode, pMgmt->dnodeId, pDnode->cfg.localEp)) {
    dError("localEp %s different with %s and need reconfigured", pDnode->cfg.localEp, pMgmt->file);
S
Shengliang Guan 已提交
296 297 298
    return -1;
  }

S
Shengliang Guan 已提交
299 300 301
  if (pMgmt->dnodeEps == NULL) {
    pMgmt->dnodeEps = calloc(1, sizeof(SDnodeEps) + sizeof(SDnodeEp));
    pMgmt->dnodeEps->num = 1;
S
Shengliang Guan 已提交
302
    pMgmt->dnodeEps->eps[0].isMnode = 1;
H
Haojun Liao 已提交
303 304

    taosGetFqdnPortFromEp(pDnode->cfg.firstEp, &(pMgmt->dnodeEps->eps[0].ep));
S
Shengliang Guan 已提交
305 306
  }

S
Shengliang Guan 已提交
307
  dndResetDnodes(pDnode, pMgmt->dnodeEps);
S
Shengliang Guan 已提交
308 309 310 311 312

  terrno = 0;
  return 0;
}

S
Shengliang Guan 已提交
313 314
static int32_t dndWriteDnodes(SDnode *pDnode) {
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;
S
Shengliang Guan 已提交
315

S
Shengliang Guan 已提交
316 317 318 319
  FILE *fp = fopen(pMgmt->file, "w");
  if (fp == NULL) {
    dError("failed to write %s since %s", pMgmt->file, strerror(errno));
    terrno = TAOS_SYSTEM_ERROR(errno);
S
Shengliang Guan 已提交
320 321 322 323
    return -1;
  }

  int32_t len = 0;
S
Shengliang Guan 已提交
324
  int32_t maxLen = 256 * 1024;
S
Shengliang Guan 已提交
325 326 327
  char   *content = calloc(1, maxLen + 1);

  len += snprintf(content + len, maxLen - len, "{\n");
S
Shengliang Guan 已提交
328
  len += snprintf(content + len, maxLen - len, "  \"dnodeId\": %d,\n", pMgmt->dnodeId);
329
  len += snprintf(content + len, maxLen - len, "  \"clusterId\": \"%" PRId64 "\",\n", pMgmt->clusterId);
S
Shengliang Guan 已提交
330 331
  len += snprintf(content + len, maxLen - len, "  \"dropped\": %d,\n", pMgmt->dropped);
  len += snprintf(content + len, maxLen - len, "  \"dnodes\": [{\n");
S
Shengliang Guan 已提交
332 333
  for (int32_t i = 0; i < pMgmt->dnodeEps->num; ++i) {
    SDnodeEp *pDnodeEp = &pMgmt->dnodeEps->eps[i];
S
Shengliang Guan 已提交
334
    len += snprintf(content + len, maxLen - len, "    \"id\": %d,\n", pDnodeEp->id);
H
Haojun Liao 已提交
335 336
    len += snprintf(content + len, maxLen - len, "    \"fqdn\": \"%s\",\n", pDnodeEp->ep.fqdn);
    len += snprintf(content + len, maxLen - len, "    \"port\": %u,\n", pDnodeEp->ep.port);
S
Shengliang Guan 已提交
337
    len += snprintf(content + len, maxLen - len, "    \"isMnode\": %d\n", pDnodeEp->isMnode);
S
Shengliang Guan 已提交
338
    if (i < pMgmt->dnodeEps->num - 1) {
S
Shengliang Guan 已提交
339 340 341 342 343 344 345 346 347 348 349 350 351
      len += snprintf(content + len, maxLen - len, "  },{\n");
    } else {
      len += snprintf(content + len, maxLen - len, "  }]\n");
    }
  }
  len += snprintf(content + len, maxLen - len, "}\n");

  fwrite(content, 1, len, fp);
  taosFsyncFile(fileno(fp));
  fclose(fp);
  free(content);
  terrno = 0;

S
Shengliang Guan 已提交
352
  pMgmt->updateTime = taosGetTimestampMs();
S
Shengliang Guan 已提交
353
  dDebug("successed to write %s", pMgmt->file);
S
Shengliang Guan 已提交
354 355 356
  return 0;
}

S
Shengliang Guan 已提交
357
void dndSendStatusReq(SDnode *pDnode) {
S
Shengliang Guan 已提交
358
  int32_t contLen = sizeof(SStatusReq) + TSDB_MAX_VNODES * sizeof(SVnodeLoad);
S
Shengliang Guan 已提交
359

S
Shengliang Guan 已提交
360
  SStatusReq *pStatus = rpcMallocCont(contLen);
S
Shengliang Guan 已提交
361 362 363 364 365
  if (pStatus == NULL) {
    dError("failed to malloc status message");
    return;
  }

S
Shengliang Guan 已提交
366 367
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;
  taosRLockLatch(&pMgmt->latch);
S
Shengliang Guan 已提交
368
  pStatus->sver = htonl(pDnode->env.sver);
369
  pStatus->dver = htobe64(pMgmt->dver);
S
Shengliang Guan 已提交
370
  pStatus->dnodeId = htonl(pMgmt->dnodeId);
371
  pStatus->clusterId = htobe64(pMgmt->clusterId);
372
  pStatus->rebootTime = htobe64(pMgmt->rebootTime);
S
Shengliang Guan 已提交
373
  pStatus->updateTime = htobe64(pMgmt->updateTime);
S
Shengliang Guan 已提交
374 375 376
  pStatus->numOfCores = htonl(pDnode->env.numOfCores);
  pStatus->numOfSupportVnodes = htonl(pDnode->cfg.numOfSupportVnodes);
  tstrncpy(pStatus->dnodeEp, pDnode->cfg.localEp, TSDB_EP_LEN);
S
Shengliang Guan 已提交
377

S
Shengliang Guan 已提交
378
  pStatus->clusterCfg.statusInterval = htonl(pDnode->cfg.statusInterval);
S
Shengliang Guan 已提交
379
  pStatus->clusterCfg.checkTime = 0;
S
Shengliang Guan 已提交
380 381
  char timestr[32] = "1970-01-01 00:00:00.00";
  (void)taosParseTime(timestr, &pStatus->clusterCfg.checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, 0);
S
Shengliang Guan 已提交
382
  pStatus->clusterCfg.checkTime = htonl(pStatus->clusterCfg.checkTime);
S
Shengliang Guan 已提交
383 384 385
  tstrncpy(pStatus->clusterCfg.timezone, pDnode->env.timezone, TSDB_TIMEZONE_LEN);
  tstrncpy(pStatus->clusterCfg.locale, pDnode->env.locale, TSDB_LOCALE_LEN);
  tstrncpy(pStatus->clusterCfg.charset, pDnode->env.charset, TSDB_LOCALE_LEN);
S
Shengliang Guan 已提交
386
  taosRUnLockLatch(&pMgmt->latch);
S
Shengliang Guan 已提交
387

S
Shengliang Guan 已提交
388
  dndGetVnodeLoads(pDnode, &pStatus->vnodeLoads);
S
Shengliang Guan 已提交
389
  contLen = sizeof(SStatusReq) + pStatus->vnodeLoads.num * sizeof(SVnodeLoad);
S
Shengliang Guan 已提交
390

S
Shengliang Guan 已提交
391
  SRpcMsg rpcMsg = {.pCont = pStatus, .contLen = contLen, .msgType = TDMT_MND_STATUS, .ahandle = (void *)9527};
392
  pMgmt->statusSent = 1;
S
Shengliang Guan 已提交
393

S
Shengliang Guan 已提交
394
  dTrace("pDnode:%p, send status req to mnode", pDnode);
S
Shengliang Guan 已提交
395
  dndSendReqToMnode(pDnode, &rpcMsg);
S
Shengliang Guan 已提交
396 397
}

S
Shengliang Guan 已提交
398 399
static void dndUpdateDnodeCfg(SDnode *pDnode, SDnodeCfg *pCfg) {
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;
S
Shengliang Guan 已提交
400
  if (pMgmt->dnodeId == 0) {
H
Haojun Liao 已提交
401
    dInfo("set dnodeId:%d clusterId:0x%" PRId64, pCfg->dnodeId, pCfg->clusterId);
S
Shengliang Guan 已提交
402
    taosWLockLatch(&pMgmt->latch);
S
Shengliang Guan 已提交
403 404
    pMgmt->dnodeId = pCfg->dnodeId;
    pMgmt->clusterId = pCfg->clusterId;
405
    dndWriteDnodes(pDnode);
S
Shengliang Guan 已提交
406
    taosWUnLockLatch(&pMgmt->latch);
S
Shengliang Guan 已提交
407
  }
S
Shengliang Guan 已提交
408 409
}

S
Shengliang Guan 已提交
410
static void dndUpdateDnodeEps(SDnode *pDnode, SDnodeEps *pDnodeEps) {
S
Shengliang Guan 已提交
411
  if (pDnodeEps == NULL || pDnodeEps->num <= 0) return;
S
Shengliang Guan 已提交
412

S
Shengliang Guan 已提交
413 414
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;
  taosWLockLatch(&pMgmt->latch);
S
Shengliang Guan 已提交
415

S
Shengliang Guan 已提交
416
  if (pDnodeEps->num != pMgmt->dnodeEps->num) {
S
Shengliang Guan 已提交
417 418
    dndResetDnodes(pDnode, pDnodeEps);
    dndWriteDnodes(pDnode);
S
Shengliang Guan 已提交
419
  } else {
S
Shengliang Guan 已提交
420
    int32_t size = pDnodeEps->num * sizeof(SDnodeEp) + sizeof(SDnodeEps);
S
Shengliang Guan 已提交
421
    if (memcmp(pMgmt->dnodeEps, pDnodeEps, size) != 0) {
S
Shengliang Guan 已提交
422 423
      dndResetDnodes(pDnode, pDnodeEps);
      dndWriteDnodes(pDnode);
S
Shengliang Guan 已提交
424 425 426
    }
  }

S
Shengliang Guan 已提交
427
  taosWUnLockLatch(&pMgmt->latch);
S
Shengliang Guan 已提交
428 429
}

S
Shengliang Guan 已提交
430
static void dndProcessStatusRsp(SDnode *pDnode, SRpcMsg *pRsp) {
431 432
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;

S
Shengliang Guan 已提交
433
  if (pRsp->code != TSDB_CODE_SUCCESS) {
434
    pMgmt->statusSent = 0;
S
Shengliang Guan 已提交
435
    if (pRsp->code == TSDB_CODE_MND_DNODE_NOT_EXIST && !pMgmt->dropped && pMgmt->dnodeId > 0) {
S
Shengliang Guan 已提交
436 437 438 439
      dInfo("dnode:%d, set to dropped since not exist in mnode", pMgmt->dnodeId);
      pMgmt->dropped = 1;
      dndWriteDnodes(pDnode);
    }
440 441
    return;
  }
S
Shengliang Guan 已提交
442

S
Shengliang Guan 已提交
443 444 445
  if (pRsp->pCont != NULL && pRsp->contLen != 0) {
    SStatusRsp *pStatus = pRsp->pCont;
    pMgmt->dver = htobe64(pStatus->dver);
446

S
Shengliang Guan 已提交
447
    SDnodeCfg *pCfg = &pStatus->dnodeCfg;
S
Shengliang Guan 已提交
448 449 450 451
    pCfg->dnodeId = htonl(pCfg->dnodeId);
    pCfg->clusterId = htobe64(pCfg->clusterId);
    dndUpdateDnodeCfg(pDnode, pCfg);

S
Shengliang Guan 已提交
452
    SDnodeEps *pDnodeEps = &pStatus->dnodeEps;
S
Shengliang Guan 已提交
453 454 455
    pDnodeEps->num = htonl(pDnodeEps->num);
    for (int32_t i = 0; i < pDnodeEps->num; ++i) {
      pDnodeEps->eps[i].id = htonl(pDnodeEps->eps[i].id);
H
Haojun Liao 已提交
456
      pDnodeEps->eps[i].ep.port = htons(pDnodeEps->eps[i].ep.port);
S
Shengliang Guan 已提交
457
    }
S
Shengliang Guan 已提交
458

S
Shengliang Guan 已提交
459 460
    dndUpdateDnodeEps(pDnode, pDnodeEps);
  }
461
  pMgmt->statusSent = 0;
S
Shengliang Guan 已提交
462
}
S
Shengliang Guan 已提交
463

S
Shengliang Guan 已提交
464
static void dndProcessAuthRsp(SDnode *pDnode, SRpcMsg *pReq) { dError("auth rsp is received, but not supported yet"); }
S
Shengliang Guan 已提交
465

S
Shengliang Guan 已提交
466 467 468
static void dndProcessGrantRsp(SDnode *pDnode, SRpcMsg *pReq) {
  dError("grant rsp is received, but not supported yet");
}
S
Shengliang Guan 已提交
469

S
Shengliang Guan 已提交
470 471 472
static int32_t dndProcessConfigDnodeReq(SDnode *pDnode, SRpcMsg *pReq) {
  dError("config req is received, but not supported yet");
  SDCfgDnodeReq *pCfg = pReq->pCont;
S
Shengliang Guan 已提交
473
  return TSDB_CODE_OPS_NOT_SUPPORT;
S
Shengliang Guan 已提交
474 475
}

S
Shengliang Guan 已提交
476 477
void dndProcessStartupReq(SDnode *pDnode, SRpcMsg *pReq) {
  dDebug("startup req is received");
S
Shengliang Guan 已提交
478

S
Shengliang Guan 已提交
479
  SStartupReq *pStartup = rpcMallocCont(sizeof(SStartupReq));
S
Shengliang Guan 已提交
480
  dndGetStartup(pDnode, pStartup);
S
Shengliang Guan 已提交
481

S
Shengliang Guan 已提交
482
  dDebug("startup req is sent, step:%s desc:%s finished:%d", pStartup->name, pStartup->desc, pStartup->finished);
S
Shengliang Guan 已提交
483

S
Shengliang Guan 已提交
484
  SRpcMsg rpcRsp = {.handle = pReq->handle, .pCont = pStartup, .contLen = sizeof(SStartupReq)};
S
Shengliang Guan 已提交
485
  rpcSendResponse(&rpcRsp);
S
Shengliang Guan 已提交
486 487 488
}

static void *dnodeThreadRoutine(void *param) {
489 490
  SDnode     *pDnode = param;
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;
S
Shengliang Guan 已提交
491
  int32_t     ms = pDnode->cfg.statusInterval * 1000;
S
Shengliang Guan 已提交
492

H
Haojun Liao 已提交
493 494
  setThreadName("dnode-hb");

S
Shengliang Guan 已提交
495 496
  while (true) {
    pthread_testcancel();
S
Shengliang Guan 已提交
497
    taosMsleep(ms);
S
Shengliang Guan 已提交
498

S
Shengliang Guan 已提交
499
    if (dndGetStat(pDnode) == DND_STAT_RUNNING && !pMgmt->statusSent && !pMgmt->dropped) {
S
Shengliang Guan 已提交
500
      dndSendStatusReq(pDnode);
S
Shengliang Guan 已提交
501
    }
S
Shengliang Guan 已提交
502 503 504
  }
}

S
Shengliang Guan 已提交
505
int32_t dndInitMgmt(SDnode *pDnode) {
S
Shengliang Guan 已提交
506
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;
S
Shengliang Guan 已提交
507

S
Shengliang Guan 已提交
508
  pMgmt->dnodeId = 0;
509
  pMgmt->rebootTime = taosGetTimestampMs();
S
Shengliang Guan 已提交
510 511
  pMgmt->dropped = 0;
  pMgmt->clusterId = 0;
S
Shengliang Guan 已提交
512
  taosInitRWLatch(&pMgmt->latch);
S
Shengliang Guan 已提交
513 514

  char path[PATH_MAX];
S
Shengliang Guan 已提交
515 516 517
  snprintf(path, PATH_MAX, "%s/dnode.json", pDnode->dir.dnode);
  pMgmt->file = strdup(path);
  if (pMgmt->file == NULL) {
S
Shengliang Guan 已提交
518 519 520 521
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }

522
  pMgmt->dnodeHash = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK);
S
Shengliang Guan 已提交
523
  if (pMgmt->dnodeHash == NULL) {
S
Shengliang Guan 已提交
524
    dError("failed to init dnode hash");
S
Shengliang Guan 已提交
525 526
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
S
Shengliang Guan 已提交
527 528
  }

S
Shengliang Guan 已提交
529 530
  if (dndReadDnodes(pDnode) != 0) {
    dError("failed to read file:%s since %s", pMgmt->file, terrstr());
S
Shengliang Guan 已提交
531
    return -1;
S
Shengliang Guan 已提交
532 533
  }

S
Shengliang Guan 已提交
534
  if (pMgmt->dropped) {
S
Shengliang Guan 已提交
535
    dError("dnode not start since its already dropped");
S
Shengliang Guan 已提交
536 537 538
    return -1;
  }

S
Shengliang Guan 已提交
539 540
  if (dndInitWorker(pDnode, &pMgmt->mgmtWorker, DND_WORKER_SINGLE, "dnode-mgmt", 1, 1, dndProcessMgmtQueue) != 0) {
    dError("failed to start dnode mgmt worker since %s", terrstr());
S
Shengliang Guan 已提交
541 542
    return -1;
  }
S
Shengliang Guan 已提交
543

544 545 546 547 548
  if (dndInitWorker(pDnode, &pMgmt->statusWorker, DND_WORKER_SINGLE, "dnode-status", 1, 1, dndProcessMgmtQueue) != 0) {
    dError("failed to start dnode mgmt worker since %s", terrstr());
    return -1;
  }

S
Shengliang Guan 已提交
549 550
  pMgmt->threadId = taosCreateThread(dnodeThreadRoutine, pDnode);
  if (pMgmt->threadId == NULL) {
S
Shengliang Guan 已提交
551 552 553
    dError("failed to init dnode thread");
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
S
Shengliang Guan 已提交
554 555
  }

S
Shengliang Guan 已提交
556
  dInfo("dnode-mgmt is initialized");
S
Shengliang Guan 已提交
557 558 559
  return 0;
}

S
Shengliang Guan 已提交
560
void dndStopMgmt(SDnode *pDnode) {
S
Shengliang Guan 已提交
561
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;
S
Shengliang Guan 已提交
562
  dndCleanupWorker(&pMgmt->mgmtWorker);
563
  dndCleanupWorker(&pMgmt->statusWorker);
S
Shengliang Guan 已提交
564

S
Shengliang Guan 已提交
565 566 567
  if (pMgmt->threadId != NULL) {
    taosDestoryThread(pMgmt->threadId);
    pMgmt->threadId = NULL;
S
Shengliang Guan 已提交
568
  }
S
Shengliang Guan 已提交
569
}
S
Shengliang Guan 已提交
570

S
Shengliang Guan 已提交
571 572
void dndCleanupMgmt(SDnode *pDnode) {
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;
S
Shengliang Guan 已提交
573
  taosWLockLatch(&pMgmt->latch);
S
Shengliang Guan 已提交
574

S
Shengliang Guan 已提交
575 576 577
  if (pMgmt->dnodeEps != NULL) {
    free(pMgmt->dnodeEps);
    pMgmt->dnodeEps = NULL;
S
Shengliang Guan 已提交
578
  }
S
Shengliang Guan 已提交
579

S
Shengliang Guan 已提交
580 581 582
  if (pMgmt->dnodeHash != NULL) {
    taosHashCleanup(pMgmt->dnodeHash);
    pMgmt->dnodeHash = NULL;
S
Shengliang Guan 已提交
583
  }
S
Shengliang Guan 已提交
584

S
Shengliang Guan 已提交
585 586 587
  if (pMgmt->file != NULL) {
    free(pMgmt->file);
    pMgmt->file = NULL;
S
Shengliang Guan 已提交
588 589
  }

S
Shengliang Guan 已提交
590
  taosWUnLockLatch(&pMgmt->latch);
S
Shengliang Guan 已提交
591
  dInfo("dnode-mgmt is cleaned up");
S
Shengliang Guan 已提交
592 593
}

S
Shengliang Guan 已提交
594
void dndProcessMgmtMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
S
Shengliang Guan 已提交
595 596
  SDnodeMgmt *pMgmt = &pDnode->dmgmt;

S
Shengliang Guan 已提交
597
  if (pEpSet && pEpSet->numOfEps > 0 && pMsg->msgType == TDMT_MND_STATUS_RSP) {
S
Shengliang Guan 已提交
598 599 600
    dndUpdateMnodeEpSet(pDnode, pEpSet);
  }

601 602 603 604 605 606
  SDnodeWorker *pWorker = &pMgmt->mgmtWorker;
  if (pMsg->msgType == TDMT_MND_STATUS_RSP) {
    pWorker = &pMgmt->statusWorker;
  }

  if (dndWriteMsgToWorker(pWorker, pMsg, sizeof(SRpcMsg)) != 0) {
S
Shengliang Guan 已提交
607 608
    if (pMsg->msgType & 1u) {
      SRpcMsg rsp = {.handle = pMsg->handle, .code = TSDB_CODE_OUT_OF_MEMORY};
S
Shengliang Guan 已提交
609 610
      rpcSendResponse(&rsp);
    }
S
Shengliang Guan 已提交
611
    rpcFreeCont(pMsg->pCont);
S
Shengliang Guan 已提交
612 613 614 615 616 617 618
    taosFreeQitem(pMsg);
  }
}

static void dndProcessMgmtQueue(SDnode *pDnode, SRpcMsg *pMsg) {
  int32_t code = 0;

S
Shengliang Guan 已提交
619
  switch (pMsg->msgType) {
S
Shengliang Guan 已提交
620 621 622 623 624 625 626 627 628
    case TDMT_DND_CREATE_MNODE:
      code = dndProcessCreateMnodeReq(pDnode, pMsg);
      break;
    case TDMT_DND_ALTER_MNODE:
      code = dndProcessAlterMnodeReq(pDnode, pMsg);
      break;
    case TDMT_DND_DROP_MNODE:
      code = dndProcessDropMnodeReq(pDnode, pMsg);
      break;
S
Shengliang Guan 已提交
629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646
    case TDMT_DND_CREATE_QNODE:
      code = dndProcessCreateQnodeReq(pDnode, pMsg);
      break;
    case TDMT_DND_DROP_QNODE:
      code = dndProcessDropQnodeReq(pDnode, pMsg);
      break;
    case TDMT_DND_CREATE_SNODE:
      code = dndProcessCreateSnodeReq(pDnode, pMsg);
      break;
    case TDMT_DND_DROP_SNODE:
      code = dndProcessDropSnodeReq(pDnode, pMsg);
      break;
    case TDMT_DND_CREATE_BNODE:
      code = dndProcessCreateBnodeReq(pDnode, pMsg);
      break;
    case TDMT_DND_DROP_BNODE:
      code = dndProcessDropBnodeReq(pDnode, pMsg);
      break;
H
Hongze Cheng 已提交
647
    case TDMT_DND_CONFIG_DNODE:
S
Shengliang Guan 已提交
648
      code = dndProcessConfigDnodeReq(pDnode, pMsg);
S
Shengliang Guan 已提交
649
      break;
H
Hongze Cheng 已提交
650
    case TDMT_MND_STATUS_RSP:
S
Shengliang Guan 已提交
651
      dndProcessStatusRsp(pDnode, pMsg);
S
Shengliang Guan 已提交
652
      break;
H
Hongze Cheng 已提交
653
    case TDMT_MND_AUTH_RSP:
S
Shengliang Guan 已提交
654
      dndProcessAuthRsp(pDnode, pMsg);
S
Shengliang Guan 已提交
655
      break;
H
Hongze Cheng 已提交
656
    case TDMT_MND_GRANT_RSP:
S
Shengliang Guan 已提交
657
      dndProcessGrantRsp(pDnode, pMsg);
S
Shengliang Guan 已提交
658
      break;
S
Shengliang Guan 已提交
659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
    case TDMT_DND_CREATE_VNODE:
      code = dndProcessCreateVnodeReq(pDnode, pMsg);
      break;
    case TDMT_DND_ALTER_VNODE:
      code = dndProcessAlterVnodeReq(pDnode, pMsg);
      break;
    case TDMT_DND_DROP_VNODE:
      code = dndProcessDropVnodeReq(pDnode, pMsg);
      break;
    case TDMT_DND_AUTH_VNODE:
      code = dndProcessAuthVnodeReq(pDnode, pMsg);
      break;
    case TDMT_DND_SYNC_VNODE:
      code = dndProcessSyncVnodeReq(pDnode, pMsg);
      break;
    case TDMT_DND_COMPACT_VNODE:
      code = dndProcessCompactVnodeReq(pDnode, pMsg);
      break;
S
Shengliang Guan 已提交
677
    default:
S
Shengliang Guan 已提交
678 679
      terrno = TSDB_CODE_MSG_NOT_PROCESSED;
      code = -1;
S
Shengliang Guan 已提交
680
      dError("RPC %p, dnode msg:%s not processed", pMsg->handle, TMSG_INFO(pMsg->msgType));
S
Shengliang Guan 已提交
681 682 683 684 685 686 687
      break;
  }

  if (pMsg->msgType & 1u) {
    if (code != 0) code = terrno;
    SRpcMsg rsp = {.code = code, .handle = pMsg->handle, .ahandle = pMsg->ahandle};
    rpcSendResponse(&rsp);
S
Shengliang Guan 已提交
688
  }
S
Shengliang Guan 已提交
689

S
Shengliang Guan 已提交
690
  rpcFreeCont(pMsg->pCont);
S
Shengliang Guan 已提交
691
  pMsg->pCont = NULL;
S
Shengliang Guan 已提交
692
  taosFreeQitem(pMsg);
S
Shengliang Guan 已提交
693
}