tq.c 40.9 KB
Newer Older
H
refact  
Hongze Cheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
S
Shengliang Guan 已提交
14 15
 */

H
Hongze Cheng 已提交
16
#include "tq.h"
S
Shengliang Guan 已提交
17

dengyihao's avatar
dengyihao 已提交
18 19 20
// 0: not init
// 1: already inited
// 2: wait to be inited or cleaup
21
#define WAL_READ_TASKS_ID       (-1)
22

23 24
static int32_t tqInitialize(STQ* pTq);

L
Liu Jicong 已提交
25
int32_t tqInit() {
L
Liu Jicong 已提交
26 27 28 29 30 31
  int8_t old;
  while (1) {
    old = atomic_val_compare_exchange_8(&tqMgmt.inited, 0, 2);
    if (old != 2) break;
  }

32 33 34 35 36 37
  if (old == 0) {
    tqMgmt.timer = taosTmrInit(10000, 100, 10000, "TQ");
    if (tqMgmt.timer == NULL) {
      atomic_store_8(&tqMgmt.inited, 0);
      return -1;
    }
38 39 40
    if (streamInit() < 0) {
      return -1;
    }
L
Liu Jicong 已提交
41
    atomic_store_8(&tqMgmt.inited, 1);
42
  }
43

L
Liu Jicong 已提交
44 45
  return 0;
}
L
Liu Jicong 已提交
46

47
void tqCleanUp() {
L
Liu Jicong 已提交
48 49 50 51 52 53 54 55
  int8_t old;
  while (1) {
    old = atomic_val_compare_exchange_8(&tqMgmt.inited, 1, 2);
    if (old != 2) break;
  }

  if (old == 1) {
    taosTmrCleanUp(tqMgmt.timer);
L
Liu Jicong 已提交
56
    streamCleanUp();
L
Liu Jicong 已提交
57 58
    atomic_store_8(&tqMgmt.inited, 0);
  }
59
}
L
Liu Jicong 已提交
60

61
static void destroyTqHandle(void* data) {
62 63 64
  STqHandle* pData = (STqHandle*)data;
  qDestroyTask(pData->execHandle.task);
  if (pData->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
L
Liu Jicong 已提交
65
    taosMemoryFreeClear(pData->execHandle.execCol.qmsg);
66
  } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__DB) {
67
    tqCloseReader(pData->execHandle.pTqReader);
68 69
    walCloseReader(pData->pWalReader);
    taosHashCleanup(pData->execHandle.execDb.pFilterOutTbUid);
L
Liu Jicong 已提交
70
  } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__TABLE) {
71
    walCloseReader(pData->pWalReader);
72
    tqCloseReader(pData->execHandle.pTqReader);
73
  }
74 75 76 77
  if(pData->msg != NULL) {
    rpcFreeCont(pData->msg->pCont);
    taosMemoryFree(pData->msg);
    pData->msg = NULL;
H
Haojun Liao 已提交
78
  }
L
Liu Jicong 已提交
79 80
}

81 82 83 84 85
static bool tqOffsetLessOrEqual(const STqOffset* pLeft, const STqOffset* pRight) {
  return pLeft->val.type == TMQ_OFFSET__LOG && pRight->val.type == TMQ_OFFSET__LOG &&
         pLeft->val.version <= pRight->val.version;
}

L
Liu Jicong 已提交
86
STQ* tqOpen(const char* path, SVnode* pVnode) {
87
  STQ* pTq = taosMemoryCalloc(1, sizeof(STQ));
L
Liu Jicong 已提交
88
  if (pTq == NULL) {
S
Shengliang Guan 已提交
89
    terrno = TSDB_CODE_OUT_OF_MEMORY;
L
Liu Jicong 已提交
90 91
    return NULL;
  }
92

93
  pTq->path = taosStrdup(path);
L
Liu Jicong 已提交
94
  pTq->pVnode = pVnode;
L
Liu Jicong 已提交
95
  pTq->walLogLastVer = pVnode->pWal->vers.lastVer;
96

97
  pTq->pHandle = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK);
98
  taosHashSetFreeFp(pTq->pHandle, destroyTqHandle);
99

100
  taosInitRWLatch(&pTq->lock);
101
  pTq->pPushMgr = taosHashInit(64, MurmurHash3_32, false, HASH_NO_LOCK);
L
Liu Jicong 已提交
102

103
  pTq->pCheckInfo = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK);
L
Liu Jicong 已提交
104
  taosHashSetFreeFp(pTq->pCheckInfo, (FDelete)tDeleteSTqCheckInfo);
L
Liu Jicong 已提交
105

106 107 108 109 110 111 112
  int32_t code = tqInitialize(pTq);
  if (code != TSDB_CODE_SUCCESS) {
    tqClose(pTq);
    return NULL;
  } else {
    return pTq;
  }
113 114 115
}

int32_t tqInitialize(STQ* pTq) {
L
Liu Jicong 已提交
116
  if (tqMetaOpen(pTq) < 0) {
117
    return -1;
118 119
  }

L
Liu Jicong 已提交
120 121
  pTq->pOffsetStore = tqOffsetOpen(pTq);
  if (pTq->pOffsetStore == NULL) {
122
    return -1;
123 124
  }

125
  pTq->pStreamMeta = streamMetaOpen(pTq->path, pTq, (FTaskExpand*)tqExpandTask, pTq->pVnode->config.vgId);
L
Liu Jicong 已提交
126
  if (pTq->pStreamMeta == NULL) {
127
    return -1;
L
Liu Jicong 已提交
128 129
  }

130 131
  // the version is kept in task's meta data
  // todo check if this version is required or not
132 133
  if (streamLoadTasks(pTq->pStreamMeta, walGetCommittedVer(pTq->pVnode->pWal)) < 0) {
    return -1;
L
Liu Jicong 已提交
134 135
  }

136
  return 0;
L
Liu Jicong 已提交
137
}
L
Liu Jicong 已提交
138

L
Liu Jicong 已提交
139
void tqClose(STQ* pTq) {
140 141
  if (pTq == NULL) {
    return;
H
Hongze Cheng 已提交
142
  }
143 144 145 146 147 148 149 150 151

  tqOffsetClose(pTq->pOffsetStore);
  taosHashCleanup(pTq->pHandle);
  taosHashCleanup(pTq->pPushMgr);
  taosHashCleanup(pTq->pCheckInfo);
  taosMemoryFree(pTq->path);
  tqMetaClose(pTq);
  streamMetaClose(pTq->pStreamMeta);
  taosMemoryFree(pTq);
L
Liu Jicong 已提交
152
}
L
Liu Jicong 已提交
153

H
Haojun Liao 已提交
154 155 156 157 158 159 160 161 162 163 164 165 166
void tqNotifyClose(STQ* pTq) {
  if (pTq != NULL) {
    taosWLockLatch(&pTq->pStreamMeta->lock);

    void* pIter = NULL;
    while (1) {
      pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter);
      if (pIter == NULL) {
        break;
      }

      SStreamTask* pTask = *(SStreamTask**)pIter;
      tqDebug("vgId:%d s-task:%s set dropping flag", pTq->pStreamMeta->vgId, pTask->id.idStr);
167 168 169
      pTask->status.taskStatus = TASK_STATUS__STOP;

      int64_t st = taosGetTimestampMs();
H
Haojun Liao 已提交
170
      qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS);
171 172
      int64_t el = taosGetTimestampMs() - st;
      tqDebug("vgId:%d s-task:%s is closed in %" PRId64 "ms", pTq->pStreamMeta->vgId, pTask->id.idStr, el);
H
Haojun Liao 已提交
173 174 175 176 177 178
    }

    taosWUnLockLatch(&pTq->pStreamMeta->lock);
  }
}

H
Haojun Liao 已提交
179 180
static int32_t doSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch,
                             int64_t consumerId, int32_t type) {
L
Liu Jicong 已提交
181 182
  int32_t len = 0;
  int32_t code = 0;
H
Haojun Liao 已提交
183 184 185 186 187 188

  if (type == TMQ_MSG_TYPE__POLL_RSP) {
    tEncodeSize(tEncodeSMqDataRsp, pRsp, len, code);
  } else if (type == TMQ_MSG_TYPE__TAOSX_RSP) {
    tEncodeSize(tEncodeSTaosxRsp, (STaosxRsp*)pRsp, len, code);
  }
L
Liu Jicong 已提交
189 190 191 192 193 194 195 196 197 198 199

  if (code < 0) {
    return -1;
  }

  int32_t tlen = sizeof(SMqRspHead) + len;
  void*   buf = rpcMallocCont(tlen);
  if (buf == NULL) {
    return -1;
  }

H
Haojun Liao 已提交
200 201 202
  ((SMqRspHead*)buf)->mqMsgType = type;
  ((SMqRspHead*)buf)->epoch = epoch;
  ((SMqRspHead*)buf)->consumerId = consumerId;
L
Liu Jicong 已提交
203 204 205 206 207

  void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead));

  SEncoder encoder = {0};
  tEncoderInit(&encoder, abuf, len);
H
Haojun Liao 已提交
208 209 210 211

  if (type == TMQ_MSG_TYPE__POLL_RSP) {
    tEncodeSMqDataRsp(&encoder, pRsp);
  } else if (type == TMQ_MSG_TYPE__TAOSX_RSP) {
X
Xiaoyu Wang 已提交
212
    tEncodeSTaosxRsp(&encoder, (STaosxRsp*)pRsp);
H
Haojun Liao 已提交
213 214
  }

L
Liu Jicong 已提交
215 216 217
  tEncoderClear(&encoder);

  SRpcMsg rsp = {
H
Haojun Liao 已提交
218
      .info = *pRpcHandleInfo,
L
Liu Jicong 已提交
219 220 221 222 223 224 225 226 227
      .pCont = buf,
      .contLen = tlen,
      .code = 0,
  };

  tmsgSendRsp(&rsp);
  return 0;
}

228 229 230 231 232 233
int32_t tqPushDataRsp(STQ* pTq, STqHandle* pHandle) {
  SMqDataRsp dataRsp = {0};
  dataRsp.head.consumerId = pHandle->consumerId;
  dataRsp.head.epoch = pHandle->epoch;
  dataRsp.head.mqMsgType = TMQ_MSG_TYPE__POLL_RSP;
  doSendDataRsp(&pHandle->msg->info, &dataRsp, pHandle->epoch, pHandle->consumerId, TMQ_MSG_TYPE__POLL_RSP);
L
Liu Jicong 已提交
234

wmmhello's avatar
wmmhello 已提交
235 236
  char buf1[80] = {0};
  char buf2[80] = {0};
237 238
  tFormatOffset(buf1, tListLen(buf1), &dataRsp.reqOffset);
  tFormatOffset(buf2, tListLen(buf2), &dataRsp.rspOffset);
H
Haojun Liao 已提交
239
  tqDebug("vgId:%d, from consumer:0x%" PRIx64 " (epoch %d) push rsp, block num: %d, req:%s, rsp:%s",
240
          TD_VID(pTq->pVnode), dataRsp.head.consumerId, dataRsp.head.epoch, dataRsp.blockNum, buf1, buf2);
L
Liu Jicong 已提交
241 242 243
  return 0;
}

H
Haojun Liao 已提交
244 245
int32_t tqSendDataRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp, int32_t type) {
  doSendDataRsp(&pMsg->info, pRsp, pReq->epoch, pReq->consumerId, type);
246 247 248 249 250 251

  char buf1[80] = {0};
  char buf2[80] = {0};
  tFormatOffset(buf1, 80, &pRsp->reqOffset);
  tFormatOffset(buf2, 80, &pRsp->rspOffset);

X
Xiaoyu Wang 已提交
252
  tqDebug("vgId:%d consumer:0x%" PRIx64 " (epoch %d) send rsp, block num:%d, req:%s, rsp:%s, reqId:0x%" PRIx64,
H
Haojun Liao 已提交
253
          TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2, pReq->reqId);
H
Haojun Liao 已提交
254

255 256 257
  return 0;
}

258
int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
259
  STqOffset offset = {0};
X
Xiaoyu Wang 已提交
260
  int32_t   vgId = TD_VID(pTq->pVnode);
261

X
Xiaoyu Wang 已提交
262 263
  SDecoder decoder;
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
264 265 266
  if (tDecodeSTqOffset(&decoder, &offset) < 0) {
    return -1;
  }
267

268 269
  tDecoderClear(&decoder);

wmmhello's avatar
wmmhello 已提交
270
  if (offset.val.type == TMQ_OFFSET__SNAPSHOT_DATA || offset.val.type == TMQ_OFFSET__SNAPSHOT_META) {
L
Liu Jicong 已提交
271
    tqDebug("receive offset commit msg to %s on vgId:%d, offset(type:snapshot) uid:%" PRId64 ", ts:%" PRId64,
H
Haojun Liao 已提交
272
            offset.subKey, vgId, offset.val.uid, offset.val.ts);
L
Liu Jicong 已提交
273
  } else if (offset.val.type == TMQ_OFFSET__LOG) {
X
Xiaoyu Wang 已提交
274 275
    tqDebug("receive offset commit msg to %s on vgId:%d, offset(type:log) version:%" PRId64, offset.subKey, vgId,
            offset.val.version);
276
    if (offset.val.version + 1 == sversion) {
277 278
      offset.val.version += 1;
    }
279
  } else {
280 281
    tqError("invalid commit offset type:%d", offset.val.type);
    return -1;
282
  }
283 284 285 286

  STqOffset* pSavedOffset = tqOffsetRead(pTq->pOffsetStore, offset.subKey);
  if (pSavedOffset != NULL && tqOffsetLessOrEqual(&offset, pSavedOffset)) {
    return 0;  // no need to update the offset value
287 288
  }

289
  // save the new offset value
290 291
  if (tqOffsetWrite(pTq->pOffsetStore, &offset) < 0) {
    return -1;
292
  }
293 294

  if (offset.val.type == TMQ_OFFSET__LOG) {
295
    STqHandle* pHandle = taosHashGet(pTq->pHandle, offset.subKey, strlen(offset.subKey));
296 297
    if (pHandle && (walRefVer(pHandle->pRef, offset.val.version) < 0)) {
      return -1;
298 299 300
    }
  }

301 302 303
  return 0;
}

L
Liu Jicong 已提交
304
int32_t tqCheckColModifiable(STQ* pTq, int64_t tbUid, int32_t colId) {
L
Liu Jicong 已提交
305
  void* pIter = NULL;
306

L
Liu Jicong 已提交
307
  while (1) {
308
    pIter = taosHashIterate(pTq->pCheckInfo, pIter);
309 310 311 312
    if (pIter == NULL) {
      break;
    }

313
    STqCheckInfo* pCheck = (STqCheckInfo*)pIter;
314

L
Liu Jicong 已提交
315 316
    if (pCheck->ntbUid == tbUid) {
      int32_t sz = taosArrayGetSize(pCheck->colIdList);
L
Liu Jicong 已提交
317
      for (int32_t i = 0; i < sz; i++) {
L
Liu Jicong 已提交
318 319
        int16_t forbidColId = *(int16_t*)taosArrayGet(pCheck->colIdList, i);
        if (forbidColId == colId) {
320
          taosHashCancelIterate(pTq->pCheckInfo, pIter);
L
Liu Jicong 已提交
321 322 323 324 325
          return -1;
        }
      }
    }
  }
326

L
Liu Jicong 已提交
327 328 329
  return 0;
}

330
int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) {
X
Xiaoyu Wang 已提交
331
  SMqPollReq req = {0};
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
  if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) {
    tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen);
    terrno = TSDB_CODE_INVALID_MSG;
    return -1;
  }

  int64_t      consumerId = req.consumerId;
  int32_t      reqEpoch = req.epoch;
  STqOffsetVal reqOffset = req.reqOffset;
  int32_t      vgId = TD_VID(pTq->pVnode);

  // 1. find handle
  STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
  if (pHandle == NULL) {
    tqError("tmq poll: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", consumerId, vgId, req.subKey);
    terrno = TSDB_CODE_INVALID_MSG;
    return -1;
  }

351
  // 2. check re-balance status
352
  taosRLockLatch(&pTq->lock);
353 354 355 356
  if (pHandle->consumerId != consumerId) {
    tqDebug("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
            consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId);
    terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH;
357
    taosRUnLockLatch(&pTq->lock);
358 359
    return -1;
  }
360
  taosRUnLockLatch(&pTq->lock);
361

362
  // 3. update the epoch value
363
  taosWLockLatch(&pTq->lock);
H
Haojun Liao 已提交
364 365
  int32_t savedEpoch = pHandle->epoch;
  if (savedEpoch < reqEpoch) {
X
Xiaoyu Wang 已提交
366 367
    tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch,
            reqEpoch);
368
    pHandle->epoch = reqEpoch;
H
Haojun Liao 已提交
369
  }
370
  taosWUnLockLatch(&pTq->lock);
371 372 373

  char buf[80];
  tFormatOffset(buf, 80, &reqOffset);
H
Haojun Liao 已提交
374 375
  tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s, reqId:0x%" PRIx64,
          consumerId, req.epoch, pHandle->subKey, vgId, buf, req.reqId);
376

377
  return tqExtractDataForMq(pTq, pHandle, &req, pMsg);
378 379
}

380
int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
L
Liu Jicong 已提交
381
  SMqVDeleteReq* pReq = (SMqVDeleteReq*)msg;
L
Liu Jicong 已提交
382

L
Liu Jicong 已提交
383
  tqDebug("vgId:%d, tq process delete sub req %s", pTq->pVnode->config.vgId, pReq->subKey);
wmmhello's avatar
wmmhello 已提交
384
  int32_t code = 0;
wmmhello's avatar
wmmhello 已提交
385 386 387 388 389 390
//  taosWLockLatch(&pTq->lock);
//  int32_t code = taosHashRemove(pTq->pPushMgr, pReq->subKey, strlen(pReq->subKey));
//  if (code != 0) {
//    tqDebug("vgId:%d, tq remove push handle %s", pTq->pVnode->config.vgId, pReq->subKey);
//  }
//  taosWUnLockLatch(&pTq->lock);
L
Liu Jicong 已提交
391

L
Liu Jicong 已提交
392 393
  STqHandle* pHandle = taosHashGet(pTq->pHandle, pReq->subKey, strlen(pReq->subKey));
  if (pHandle) {
X
Xiaoyu Wang 已提交
394
    // walCloseRef(pHandle->pWalReader->pWal, pHandle->pRef->refId);
L
Liu Jicong 已提交
395 396 397 398 399 400 401
    if (pHandle->pRef) {
      walCloseRef(pTq->pVnode->pWal, pHandle->pRef->refId);
    }
    code = taosHashRemove(pTq->pHandle, pReq->subKey, strlen(pReq->subKey));
    if (code != 0) {
      tqError("cannot process tq delete req %s, since no such handle", pReq->subKey);
    }
L
Liu Jicong 已提交
402
  }
403

L
Liu Jicong 已提交
404 405
  code = tqOffsetDelete(pTq->pOffsetStore, pReq->subKey);
  if (code != 0) {
406
    tqError("cannot process tq delete req %s, since no such offset in cache", pReq->subKey);
L
Liu Jicong 已提交
407
  }
L
Liu Jicong 已提交
408

L
Liu Jicong 已提交
409
  if (tqMetaDeleteHandle(pTq, pReq->subKey) < 0) {
L
Liu Jicong 已提交
410
    tqError("cannot process tq delete req %s, since no such offset in tdb", pReq->subKey);
411
  }
L
Liu Jicong 已提交
412
  return 0;
L
Liu Jicong 已提交
413 414
}

415
int32_t tqProcessAddCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
416 417
  STqCheckInfo info = {0};
  SDecoder     decoder;
X
Xiaoyu Wang 已提交
418
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
419
  if (tDecodeSTqCheckInfo(&decoder, &info) < 0) {
L
Liu Jicong 已提交
420 421 422 423
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  tDecoderClear(&decoder);
424 425 426 427 428
  if (taosHashPut(pTq->pCheckInfo, info.topic, strlen(info.topic), &info, sizeof(STqCheckInfo)) < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  if (tqMetaSaveCheckInfo(pTq, info.topic, msg, msgLen) < 0) {
L
Liu Jicong 已提交
429 430 431 432 433 434
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  return 0;
}

435
int32_t tqProcessDelCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
436 437 438 439 440 441 442 443 444 445 446
  if (taosHashRemove(pTq->pCheckInfo, msg, strlen(msg)) < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  if (tqMetaDeleteCheckInfo(pTq, msg) < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  return 0;
}

447
int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
448
  int ret = 0;
L
Liu Jicong 已提交
449
  SMqRebVgReq req = {0};
L
Liu Jicong 已提交
450
  tDecodeSMqRebVgReq(msg, &req);
L
Liu Jicong 已提交
451

452 453 454
  SVnode* pVnode = pTq->pVnode;
  int32_t vgId = TD_VID(pVnode);

455
  tqDebug("vgId:%d, tq process sub req:%s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pVnode->config.vgId, req.subKey,
456
          req.oldConsumerId, req.newConsumerId);
L
Liu Jicong 已提交
457

458
  STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
L
Liu Jicong 已提交
459
  if (pHandle == NULL) {
L
Liu Jicong 已提交
460
    if (req.oldConsumerId != -1) {
461
      tqError("vgId:%d, build new consumer handle %s for consumer:0x%" PRIx64 ", but old consumerId:0x%" PRIx64,
462
              req.vgId, req.subKey, req.newConsumerId, req.oldConsumerId);
L
Liu Jicong 已提交
463
    }
464

L
Liu Jicong 已提交
465
    if (req.newConsumerId == -1) {
466
      tqError("vgId:%d, tq invalid re-balance request, new consumerId %" PRId64 "", req.vgId, req.newConsumerId);
467
      goto end;
L
Liu Jicong 已提交
468
    }
469

L
Liu Jicong 已提交
470 471
    STqHandle tqHandle = {0};
    pHandle = &tqHandle;
L
Liu Jicong 已提交
472

H
Haojun Liao 已提交
473
    uint64_t oldConsumerId = pHandle->consumerId;
L
Liu Jicong 已提交
474 475 476
    memcpy(pHandle->subKey, req.subKey, TSDB_SUBSCRIBE_KEY_LEN);
    pHandle->consumerId = req.newConsumerId;
    pHandle->epoch = -1;
L
Liu Jicong 已提交
477

L
Liu Jicong 已提交
478
    pHandle->execHandle.subType = req.subType;
L
Liu Jicong 已提交
479
    pHandle->fetchMeta = req.withMeta;
wmmhello's avatar
wmmhello 已提交
480

481
    // TODO version should be assigned and refed during preprocess
482
    SWalRef* pRef = walRefCommittedVer(pVnode->pWal);
483
    if (pRef == NULL) {
484 485
      ret = -1;
      goto end;
486
    }
H
Haojun Liao 已提交
487

488 489
    int64_t ver = pRef->refVer;
    pHandle->pRef = pRef;
L
Liu Jicong 已提交
490

491
    SReadHandle handle = {
492
        .meta = pVnode->pMeta, .vnode = pVnode, .initTableReader = true, .initTqReader = true, .version = ver};
wmmhello's avatar
wmmhello 已提交
493
    pHandle->snapshotVer = ver;
494

L
Liu Jicong 已提交
495
    if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
L
Liu Jicong 已提交
496
      pHandle->execHandle.execCol.qmsg = req.qmsg;
L
Liu Jicong 已提交
497
      req.qmsg = NULL;
498

X
Xiaoyu Wang 已提交
499 500
      pHandle->execHandle.task = qCreateQueueExecTaskInfo(pHandle->execHandle.execCol.qmsg, &handle, vgId,
                                                          &pHandle->execHandle.numOfCols, req.newConsumerId);
L
Liu Jicong 已提交
501
      void* scanner = NULL;
502
      qExtractStreamScanner(pHandle->execHandle.task, &scanner);
503
      pHandle->execHandle.pTqReader = qExtractReaderFromStreamScanner(scanner);
L
Liu Jicong 已提交
504
    } else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__DB) {
505
      pHandle->pWalReader = walOpenReader(pVnode->pWal, NULL);
506
      pHandle->execHandle.pTqReader = tqReaderOpen(pVnode);
507

L
Liu Jicong 已提交
508
      pHandle->execHandle.execDb.pFilterOutTbUid =
L
Liu Jicong 已提交
509
          taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK);
510 511
      buildSnapContext(handle.meta, handle.version, 0, pHandle->execHandle.subType, pHandle->fetchMeta,
                       (SSnapContext**)(&handle.sContext));
512

513
      pHandle->execHandle.task = qCreateQueueExecTaskInfo(NULL, &handle, vgId, NULL, req.newConsumerId);
L
Liu Jicong 已提交
514
    } else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__TABLE) {
515
      pHandle->pWalReader = walOpenReader(pVnode->pWal, NULL);
wmmhello's avatar
wmmhello 已提交
516 517
      pHandle->execHandle.execTb.suid = req.suid;

L
Liu Jicong 已提交
518
      SArray* tbUidList = taosArrayInit(0, sizeof(int64_t));
519 520
      vnodeGetCtbIdList(pVnode, req.suid, tbUidList);
      tqDebug("vgId:%d, tq try to get all ctb, suid:%" PRId64, pVnode->config.vgId, req.suid);
L
Liu Jicong 已提交
521 522
      for (int32_t i = 0; i < taosArrayGetSize(tbUidList); i++) {
        int64_t tbUid = *(int64_t*)taosArrayGet(tbUidList, i);
523
        tqDebug("vgId:%d, idx %d, uid:%" PRId64, vgId, i, tbUid);
L
Liu Jicong 已提交
524
      }
525
      pHandle->execHandle.pTqReader = tqReaderOpen(pVnode);
526
      tqReaderSetTbUidList(pHandle->execHandle.pTqReader, tbUidList);
L
Liu Jicong 已提交
527
      taosArrayDestroy(tbUidList);
wmmhello's avatar
wmmhello 已提交
528

L
Liu Jicong 已提交
529 530
      buildSnapContext(handle.meta, handle.version, req.suid, pHandle->execHandle.subType, pHandle->fetchMeta,
                       (SSnapContext**)(&handle.sContext));
531
      pHandle->execHandle.task = qCreateQueueExecTaskInfo(NULL, &handle, vgId, NULL, req.newConsumerId);
L
Liu Jicong 已提交
532
    }
H
Haojun Liao 已提交
533

534
    taosHashPut(pTq->pHandle, req.subKey, strlen(req.subKey), pHandle, sizeof(STqHandle));
535 536
    tqDebug("try to persist handle %s consumer:0x%" PRIx64 " , old consumer:0x%" PRIx64, req.subKey,
            pHandle->consumerId, oldConsumerId);
537 538
    ret = tqMetaSaveHandle(pTq, req.subKey, pHandle);
    goto end;
L
Liu Jicong 已提交
539
  } else {
540 541 542
    if (pHandle->consumerId == req.newConsumerId) {  // do nothing
      tqInfo("vgId:%d consumer:0x%" PRIx64 " remains, no switch occurs", req.vgId, req.newConsumerId);
      atomic_add_fetch_32(&pHandle->epoch, 1);
543

544 545 546
    } else {
      tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId,
             req.newConsumerId);
547
      atomic_store_64(&pHandle->consumerId, req.newConsumerId);
wmmhello's avatar
wmmhello 已提交
548
      atomic_store_32(&pHandle->epoch, 0);
549 550 551 552 553 554
    }
    // kill executing task
    qTaskInfo_t pTaskInfo = pHandle->execHandle.task;
    if (pTaskInfo != NULL) {
      qKillTask(pTaskInfo, TSDB_CODE_SUCCESS);
    }
555

556 557 558
    taosWLockLatch(&pTq->lock);
    // remove if it has been register in the push manager, and return one empty block to consumer
    tqUnregisterPushHandle(pTq, pHandle);
559

560

561 562
    if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
      qStreamCloseTsdbReader(pTaskInfo);
L
Liu Jicong 已提交
563
    }
564 565 566 567

    taosWUnLockLatch(&pTq->lock);
    ret = tqMetaSaveHandle(pTq, req.subKey, pHandle);
    goto end;
L
Liu Jicong 已提交
568
  }
L
Liu Jicong 已提交
569

570
end:
H
Haojun Liao 已提交
571
  taosMemoryFree(req.qmsg);
572
  return ret;
L
Liu Jicong 已提交
573
}
574

575
int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) {
576
  int32_t vgId = TD_VID(pTq->pVnode);
577
  pTask->id.idStr = createStreamTaskIdStr(pTask->id.streamId, pTask->id.taskId);
L
Liu Jicong 已提交
578
  pTask->refCnt = 1;
579
  pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
L
Liu Jicong 已提交
580 581
  pTask->inputQueue = streamQueueOpen();
  pTask->outputQueue = streamQueueOpen();
L
Liu Jicong 已提交
582 583

  if (pTask->inputQueue == NULL || pTask->outputQueue == NULL) {
L
Liu Jicong 已提交
584
    return -1;
L
Liu Jicong 已提交
585 586
  }

L
Liu Jicong 已提交
587 588
  pTask->inputStatus = TASK_INPUT_STATUS__NORMAL;
  pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL;
589
  pTask->pMsgCb = &pTq->pVnode->msgCb;
590
  pTask->pMeta = pTq->pStreamMeta;
591
  pTask->chkInfo.version = ver;
592
  pTask->chkInfo.currentVer = ver;
593

594
  // expand executor
595
  pTask->status.taskStatus = (pTask->fillHistory)? TASK_STATUS__WAIT_DOWNSTREAM:TASK_STATUS__NORMAL;
596

597
  if (pTask->taskLevel == TASK_LEVEL__SOURCE) {
598
    pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1);
599 600 601 602
    if (pTask->pState == NULL) {
      return -1;
    }

603
    SReadHandle handle = {
604
        .meta = pTq->pVnode->pMeta, .vnode = pTq->pVnode, .initTqReader = 1, .pStateBackend = pTask->pState};
605

606 607
    pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId);
    if (pTask->exec.pExecutor == NULL) {
L
Liu Jicong 已提交
608 609
      return -1;
    }
610

611
  } else if (pTask->taskLevel == TASK_LEVEL__AGG) {
612
    pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1);
613 614 615
    if (pTask->pState == NULL) {
      return -1;
    }
616

617 618 619 620 621
    int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->childEpInfo);
    SReadHandle mgHandle = { .vnode = NULL, .numOfVgroups = numOfVgroups, .pStateBackend = pTask->pState};

    pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &mgHandle, vgId);
    if (pTask->exec.pExecutor == NULL) {
L
Liu Jicong 已提交
622 623
      return -1;
    }
L
Liu Jicong 已提交
624
  }
L
Liu Jicong 已提交
625 626

  // sink
L
Liu Jicong 已提交
627
  /*pTask->ahandle = pTq->pVnode;*/
628
  if (pTask->outputType == TASK_OUTPUT__SMA) {
L
Liu Jicong 已提交
629
    pTask->smaSink.vnode = pTq->pVnode;
L
Liu Jicong 已提交
630
    pTask->smaSink.smaSink = smaHandleRes;
631
  } else if (pTask->outputType == TASK_OUTPUT__TABLE) {
L
Liu Jicong 已提交
632
    pTask->tbSink.vnode = pTq->pVnode;
L
Liu Jicong 已提交
633
    pTask->tbSink.tbSinkFunc = tqSinkToTablePipeline2;
L
Liu Jicong 已提交
634

X
Xiaoyu Wang 已提交
635
    int32_t   ver1 = 1;
5
54liuyao 已提交
636
    SMetaInfo info = {0};
dengyihao's avatar
dengyihao 已提交
637
    int32_t   code = metaGetInfo(pTq->pVnode->pMeta, pTask->tbSink.stbUid, &info, NULL);
5
54liuyao 已提交
638
    if (code == TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
639
      ver1 = info.skmVer;
5
54liuyao 已提交
640
    }
L
Liu Jicong 已提交
641

642 643
    SSchemaWrapper* pschemaWrapper = pTask->tbSink.pSchemaWrapper;
    pTask->tbSink.pTSchema = tBuildTSchema(pschemaWrapper->pSchema, pschemaWrapper->nCols, ver1);
wmmhello's avatar
wmmhello 已提交
644
    if(pTask->tbSink.pTSchema == NULL) {
wmmhello's avatar
wmmhello 已提交
645
      return -1;
wmmhello's avatar
wmmhello 已提交
646
    }
L
Liu Jicong 已提交
647
  }
648

649
  if (pTask->taskLevel == TASK_LEVEL__SOURCE) {
650
    pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, NULL);
651 652
  }

653
  streamSetupTrigger(pTask);
654

655
  tqInfo("vgId:%d expand stream task, s-task:%s, checkpoint ver:%" PRId64 " child id:%d, level:%d", vgId, pTask->id.idStr,
656
         pTask->chkInfo.version, pTask->selfChildId, pTask->taskLevel);
657 658 659

  // next valid version will add one
  pTask->chkInfo.version += 1;
L
Liu Jicong 已提交
660
  return 0;
L
Liu Jicong 已提交
661
}
L
Liu Jicong 已提交
662

663 664 665 666 667 668
int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) {
  char*               msgStr = pMsg->pCont;
  char*               msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
  int32_t             msgLen = pMsg->contLen - sizeof(SMsgHead);
  SStreamTaskCheckReq req;
  SDecoder            decoder;
X
Xiaoyu Wang 已提交
669
  tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
670 671 672 673 674 675 676 677 678 679 680 681
  tDecodeSStreamTaskCheckReq(&decoder, &req);
  tDecoderClear(&decoder);
  int32_t             taskId = req.downstreamTaskId;
  SStreamTaskCheckRsp rsp = {
      .reqId = req.reqId,
      .streamId = req.streamId,
      .childId = req.childId,
      .downstreamNodeId = req.downstreamNodeId,
      .downstreamTaskId = req.downstreamTaskId,
      .upstreamNodeId = req.upstreamNodeId,
      .upstreamTaskId = req.upstreamTaskId,
  };
682

L
Liu Jicong 已提交
683
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
684

685
  if (pTask) {
686
    rsp.status = streamTaskCheckStatus(pTask);
687 688 689 690 691 692
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);

    tqDebug("tq recv task check req(reqId:0x%" PRIx64
            ") %d at node %d task status:%d, check req from task %d at node %d, rsp status %d",
            rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, pTask->status.taskStatus, rsp.upstreamTaskId,
            rsp.upstreamNodeId, rsp.status);
693 694
  } else {
    rsp.status = 0;
695 696 697 698
    tqDebug("tq recv task check(taskId:%d not built yet) req(reqId:0x%" PRIx64
            ") %d at node %d, check req from task %d at node %d, rsp status %d",
            taskId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId,
            rsp.status);
699 700 701 702 703 704 705
  }

  SEncoder encoder;
  int32_t  code;
  int32_t  len;
  tEncodeSize(tEncodeSStreamTaskCheckRsp, &rsp, len, code);
  if (code < 0) {
L
Liu Jicong 已提交
706
    tqError("unable to encode rsp %d", __LINE__);
L
Liu Jicong 已提交
707
    return -1;
708
  }
L
Liu Jicong 已提交
709

710 711 712 713 714 715 716 717
  void* buf = rpcMallocCont(sizeof(SMsgHead) + len);
  ((SMsgHead*)buf)->vgId = htonl(req.upstreamNodeId);

  void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
  tEncoderInit(&encoder, (uint8_t*)abuf, len);
  tEncodeSStreamTaskCheckRsp(&encoder, &rsp);
  tEncoderClear(&encoder);

718
  SRpcMsg rspMsg = { .code = 0, .pCont = buf, .contLen = sizeof(SMsgHead) + len, .info = pMsg->info };
719 720 721 722
  tmsgSendRsp(&rspMsg);
  return 0;
}

723
int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
724 725 726 727 728 729 730 731 732 733 734
  int32_t             code;
  SStreamTaskCheckRsp rsp;

  SDecoder decoder;
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
  code = tDecodeSStreamTaskCheckRsp(&decoder, &rsp);
  if (code < 0) {
    tDecoderClear(&decoder);
    return -1;
  }

735
  tDecoderClear(&decoder);
736
  tqDebug("tq recv task check rsp(reqId:0x%" PRIx64 ") %d at node %d check req from task %d at node %d, status %d",
737 738
          rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);

L
Liu Jicong 已提交
739
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, rsp.upstreamTaskId);
740 741 742 743
  if (pTask == NULL) {
    return -1;
  }

744
  code = streamProcessTaskCheckRsp(pTask, &rsp, sversion);
L
Liu Jicong 已提交
745 746
  streamMetaReleaseTask(pTq->pStreamMeta, pTask);
  return code;
747 748
}

749
int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
750 751 752 753 754
  int32_t code;
#if 0
  code = streamMetaAddSerializedTask(pTq->pStreamMeta, version, msg, msgLen);
  if (code < 0) return code;
#endif
5
54liuyao 已提交
755 756 757
  if (tsDisableStream) {
    return 0;
  }
758 759 760 761 762 763

  // 1.deserialize msg and build task
  SStreamTask* pTask = taosMemoryCalloc(1, sizeof(SStreamTask));
  if (pTask == NULL) {
    return -1;
  }
764

765 766
  SDecoder decoder;
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
767
  code = tDecodeStreamTask(&decoder, pTask);
768 769 770 771 772
  if (code < 0) {
    tDecoderClear(&decoder);
    taosMemoryFree(pTask);
    return -1;
  }
773

774 775
  tDecoderClear(&decoder);

776
  // 2.save task, use the newest commit version as the initial start version of stream task.
777
  taosWLockLatch(&pTq->pStreamMeta->lock);
778
  code = streamMetaAddDeployedTask(pTq->pStreamMeta, sversion, pTask);
779
  if (code < 0) {
780 781
    tqError("vgId:%d failed to add s-task:%s, total:%d", TD_VID(pTq->pVnode), pTask->id.idStr,
            streamMetaGetNumOfTasks(pTq->pStreamMeta));
782
    taosWUnLockLatch(&pTq->pStreamMeta->lock);
783 784 785
    return -1;
  }

786 787
  taosWUnLockLatch(&pTq->pStreamMeta->lock);

788 789
  // 3.go through recover steps to fill history
  if (pTask->fillHistory) {
790
    streamTaskCheckDownstream(pTask, sversion);
791 792
  }

793 794
  tqDebug("vgId:%d s-task:%s is deployed and add meta from mnd, status:%d, total:%d", TD_VID(pTq->pVnode),
          pTask->id.idStr, pTask->status.taskStatus, streamMetaGetNumOfTasks(pTq->pStreamMeta));
795 796 797
  return 0;
}

L
Liu Jicong 已提交
798 799 800 801 802
int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg) {
  int32_t code;
  char*   msg = pMsg->pCont;
  int32_t msgLen = pMsg->contLen;

803
  SStreamRecoverStep1Req* pReq = (SStreamRecoverStep1Req*)msg;
L
Liu Jicong 已提交
804
  SStreamTask*            pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId);
805 806 807 808 809
  if (pTask == NULL) {
    return -1;
  }

  // check param
810
  int64_t fillVer1 = pTask->chkInfo.version;
811
  if (fillVer1 <= 0) {
L
Liu Jicong 已提交
812
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
813 814 815 816
    return -1;
  }

  // do recovery step 1
H
Haojun Liao 已提交
817 818
  tqDebug("s-task:%s start recover step 1 scan", pTask->id.idStr);
  int64_t st = taosGetTimestampMs();
819

H
Haojun Liao 已提交
820
  streamSourceRecoverScanStep1(pTask);
821
  if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
L
Liu Jicong 已提交
822 823 824 825
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
    return 0;
  }

H
Haojun Liao 已提交
826 827 828
  double el = (taosGetTimestampMs() - st) / 1000.0;
  tqDebug("s-task:%s recover step 1 ended, elapsed time:%.2fs", pTask->id.idStr, el);

829 830 831 832
  // build msg to launch next step
  SStreamRecoverStep2Req req;
  code = streamBuildSourceRecover2Req(pTask, &req);
  if (code < 0) {
L
Liu Jicong 已提交
833
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
834 835 836
    return -1;
  }

L
Liu Jicong 已提交
837
  streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
838

839
  if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
L
Liu Jicong 已提交
840 841 842
    return 0;
  }

843
  // serialize msg
L
Liu Jicong 已提交
844 845 846 847 848 849 850 851
  int32_t len = sizeof(SStreamRecoverStep1Req);

  void* serializedReq = rpcMallocCont(len);
  if (serializedReq == NULL) {
    return -1;
  }

  memcpy(serializedReq, &req, len);
852 853

  // dispatch msg
H
Haojun Liao 已提交
854
  tqDebug("s-task:%s start recover block stage", pTask->id.idStr);
855

H
Haojun Liao 已提交
856 857
  SRpcMsg rpcMsg = {
      .code = 0, .contLen = len, .msgType = TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE, .pCont = serializedReq};
858 859 860 861
  tmsgPutToQueue(&pTq->pVnode->msgCb, WRITE_QUEUE, &rpcMsg);
  return 0;
}

862
int32_t tqProcessTaskRecover2Req(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
H
Haojun Liao 已提交
863 864
  int32_t code = 0;

865
  SStreamRecoverStep2Req* pReq = (SStreamRecoverStep2Req*)msg;
L
Liu Jicong 已提交
866
  SStreamTask*            pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId);
867 868 869 870 871
  if (pTask == NULL) {
    return -1;
  }

  // do recovery step 2
872
  code = streamSourceRecoverScanStep2(pTask, sversion);
873
  if (code < 0) {
L
Liu Jicong 已提交
874
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
875 876 877
    return -1;
  }

878
  if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
L
Liu Jicong 已提交
879 880 881 882
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
    return 0;
  }

883 884 885
  // restore param
  code = streamRestoreParam(pTask);
  if (code < 0) {
L
Liu Jicong 已提交
886
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
887 888 889 890 891 892
    return -1;
  }

  // set status normal
  code = streamSetStatusNormal(pTask);
  if (code < 0) {
L
Liu Jicong 已提交
893
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
894 895 896 897 898 899
    return -1;
  }

  // dispatch recover finish req to all related downstream task
  code = streamDispatchRecoverFinishReq(pTask);
  if (code < 0) {
L
Liu Jicong 已提交
900
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
901 902 903
    return -1;
  }

L
Liu Jicong 已提交
904 905 906
  atomic_store_8(&pTask->fillHistory, 0);
  streamMetaSaveTask(pTq->pStreamMeta, pTask);

L
Liu Jicong 已提交
907 908
  streamMetaReleaseTask(pTq->pStreamMeta, pTask);

909 910 911
  return 0;
}

L
Liu Jicong 已提交
912 913 914
int32_t tqProcessTaskRecoverFinishReq(STQ* pTq, SRpcMsg* pMsg) {
  char*   msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
  int32_t msgLen = pMsg->contLen - sizeof(SMsgHead);
915 916

  // deserialize
917 918 919
  SStreamRecoverFinishReq req;

  SDecoder decoder;
X
Xiaoyu Wang 已提交
920
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
921 922 923
  tDecodeSStreamRecoverFinishReq(&decoder, &req);
  tDecoderClear(&decoder);

924
  // find task
L
Liu Jicong 已提交
925
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.taskId);
926 927 928
  if (pTask == NULL) {
    return -1;
  }
929
  // do process request
930
  if (streamProcessRecoverFinishReq(pTask, req.childId) < 0) {
L
Liu Jicong 已提交
931
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
932 933 934
    return -1;
  }

L
Liu Jicong 已提交
935
  streamMetaReleaseTask(pTq->pStreamMeta, pTask);
936
  return 0;
L
Liu Jicong 已提交
937
}
L
Liu Jicong 已提交
938

L
Liu Jicong 已提交
939 940 941 942 943
int32_t tqProcessTaskRecoverFinishRsp(STQ* pTq, SRpcMsg* pMsg) {
  //
  return 0;
}

L
Liu Jicong 已提交
944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959
int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) {
  bool        failed = false;
  SDecoder*   pCoder = &(SDecoder){0};
  SDeleteRes* pRes = &(SDeleteRes){0};

  pRes->uidList = taosArrayInit(0, sizeof(tb_uid_t));
  if (pRes->uidList == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    failed = true;
  }

  tDecoderInit(pCoder, pReq, len);
  tDecodeDeleteRes(pCoder, pRes);
  tDecoderClear(pCoder);

  int32_t sz = taosArrayGetSize(pRes->uidList);
L
Liu Jicong 已提交
960
  if (sz == 0 || pRes->affectedRows == 0) {
L
Liu Jicong 已提交
961 962 963 964 965 966 967 968 969 970 971
    taosArrayDestroy(pRes->uidList);
    return 0;
  }
  SSDataBlock* pDelBlock = createSpecialDataBlock(STREAM_DELETE_DATA);
  blockDataEnsureCapacity(pDelBlock, sz);
  pDelBlock->info.rows = sz;
  pDelBlock->info.version = ver;

  for (int32_t i = 0; i < sz; i++) {
    // start key column
    SColumnInfoData* pStartCol = taosArrayGet(pDelBlock->pDataBlock, START_TS_COLUMN_INDEX);
972
    colDataSetVal(pStartCol, i, (const char*)&pRes->skey, false);  // end key column
L
Liu Jicong 已提交
973
    SColumnInfoData* pEndCol = taosArrayGet(pDelBlock->pDataBlock, END_TS_COLUMN_INDEX);
974
    colDataSetVal(pEndCol, i, (const char*)&pRes->ekey, false);
L
Liu Jicong 已提交
975 976 977
    // uid column
    SColumnInfoData* pUidCol = taosArrayGet(pDelBlock->pDataBlock, UID_COLUMN_INDEX);
    int64_t*         pUid = taosArrayGet(pRes->uidList, i);
978
    colDataSetVal(pUidCol, i, (const char*)pUid, false);
L
Liu Jicong 已提交
979

980 981 982
    colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, GROUPID_COLUMN_INDEX), i);
    colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, CALCULATE_START_TS_COLUMN_INDEX), i);
    colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, CALCULATE_END_TS_COLUMN_INDEX), i);
L
Liu Jicong 已提交
983 984
  }

L
Liu Jicong 已提交
985 986
  taosArrayDestroy(pRes->uidList);

L
Liu Jicong 已提交
987 988 989
  int32_t* pRef = taosMemoryMalloc(sizeof(int32_t));
  *pRef = 1;

990 991
  taosWLockLatch(&pTq->pStreamMeta->lock);

L
Liu Jicong 已提交
992 993 994
  void* pIter = NULL;
  while (1) {
    pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter);
995 996 997 998
    if (pIter == NULL) {
      break;
    }

L
Liu Jicong 已提交
999
    SStreamTask* pTask = *(SStreamTask**)pIter;
1000 1001 1002
    if (pTask->taskLevel != TASK_LEVEL__SOURCE) {
      continue;
    }
L
Liu Jicong 已提交
1003

1004
    qDebug("s-task:%s delete req enqueue, ver: %" PRId64, pTask->id.idStr, ver);
L
Liu Jicong 已提交
1005

L
Liu Jicong 已提交
1006
    if (!failed) {
S
Shengliang Guan 已提交
1007
      SStreamRefDataBlock* pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0);
L
Liu Jicong 已提交
1008 1009 1010 1011 1012
      pRefBlock->type = STREAM_INPUT__REF_DATA_BLOCK;
      pRefBlock->pBlock = pDelBlock;
      pRefBlock->dataRef = pRef;
      atomic_add_fetch_32(pRefBlock->dataRef, 1);

1013
      if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pRefBlock) < 0) {
L
Liu Jicong 已提交
1014
        atomic_sub_fetch_32(pRef, 1);
L
Liu Jicong 已提交
1015
        taosFreeQitem(pRefBlock);
L
Liu Jicong 已提交
1016 1017
        continue;
      }
L
Liu Jicong 已提交
1018

L
Liu Jicong 已提交
1019
      if (streamSchedExec(pTask) < 0) {
1020
        qError("s-task:%s stream task launch failed", pTask->id.idStr);
L
Liu Jicong 已提交
1021 1022
        continue;
      }
L
Liu Jicong 已提交
1023

L
Liu Jicong 已提交
1024 1025 1026 1027
    } else {
      streamTaskInputFail(pTask);
    }
  }
L
Liu Jicong 已提交
1028

1029 1030
  taosWUnLockLatch(&pTq->pStreamMeta->lock);

L
Liu Jicong 已提交
1031 1032
  int32_t ref = atomic_sub_fetch_32(pRef, 1);
  if (ref == 0) {
L
Liu Jicong 已提交
1033
    blockDataDestroy(pDelBlock);
L
Liu Jicong 已提交
1034 1035 1036 1037
    taosMemoryFree(pRef);
  }

#if 0
S
Shengliang Guan 已提交
1038
    SStreamDataBlock* pStreamBlock = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0);
L
Liu Jicong 已提交
1039 1040 1041 1042 1043 1044 1045 1046
    pStreamBlock->type = STREAM_INPUT__DATA_BLOCK;
    pStreamBlock->blocks = taosArrayInit(0, sizeof(SSDataBlock));
    SSDataBlock block = {0};
    assignOneDataBlock(&block, pDelBlock);
    block.info.type = STREAM_DELETE_DATA;
    taosArrayPush(pStreamBlock->blocks, &block);

    if (!failed) {
1047
      if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pStreamBlock) < 0) {
1048
        qError("stream task input del failed, task id %d", pTask->id.taskId);
L
Liu Jicong 已提交
1049 1050 1051 1052
        continue;
      }

      if (streamSchedExec(pTask) < 0) {
1053
        qError("stream task launch failed, task id %d", pTask->id.taskId);
L
Liu Jicong 已提交
1054 1055 1056 1057 1058 1059
        continue;
      }
    } else {
      streamTaskInputFail(pTask);
    }
  }
L
Liu Jicong 已提交
1060
  blockDataDestroy(pDelBlock);
L
Liu Jicong 已提交
1061
#endif
L
Liu Jicong 已提交
1062 1063 1064
  return 0;
}

1065 1066 1067 1068
int32_t tqProcessSubmitReqForSubscribe(STQ* pTq) {
  int32_t vgId = TD_VID(pTq->pVnode);

  taosWLockLatch(&pTq->lock);
1069 1070 1071 1072 1073

  if (taosHashGetSize(pTq->pPushMgr) > 0) {
    void* pIter = taosHashIterate(pTq->pPushMgr, NULL);

    while (pIter) {
wmmhello's avatar
wmmhello 已提交
1074
      STqHandle* pHandle = *(STqHandle**)pIter;
1075 1076 1077
      tqDebug("vgId:%d start set submit for pHandle:%p, consumer:0x%" PRIx64, vgId, pHandle, pHandle->consumerId);

      if (ASSERT(pHandle->msg != NULL)) {
wmmhello's avatar
wmmhello 已提交
1078 1079 1080 1081 1082 1083 1084 1085
        tqError("pHandle->msg should not be null");
        break;
      }else{
        SRpcMsg msg = {.msgType = TDMT_VND_TMQ_CONSUME, .pCont = pHandle->msg->pCont, .contLen = pHandle->msg->contLen, .info = pHandle->msg->info};
        tmsgPutToQueue(&pTq->pVnode->msgCb, QUERY_QUEUE, &msg);
        taosMemoryFree(pHandle->msg);
        pHandle->msg = NULL;
      }
1086

wmmhello's avatar
wmmhello 已提交
1087
      pIter = taosHashIterate(pTq->pPushMgr, pIter);
1088
    }
1089

wmmhello's avatar
wmmhello 已提交
1090
    taosHashClear(pTq->pPushMgr);
1091
  }
1092

1093 1094
  // unlock
  taosWUnLockLatch(&pTq->lock);
1095
  return 0;
L
Liu Jicong 已提交
1096 1097
}

L
Liu Jicong 已提交
1098 1099
int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) {
  SStreamTaskRunReq* pReq = pMsg->pCont;
1100 1101 1102 1103

  int32_t taskId = pReq->taskId;
  int32_t vgId = TD_VID(pTq->pVnode);

1104 1105
  if (taskId == WAL_READ_TASKS_ID) {  // all tasks are extracted submit data from the wal
    tqStreamTasksScanWal(pTq);
L
Liu Jicong 已提交
1106
    return 0;
1107
  }
1108

1109
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
1110 1111 1112 1113 1114
  if (pTask != NULL) {
    if (pTask->status.taskStatus == TASK_STATUS__NORMAL) {
      tqDebug("vgId:%d s-task:%s start to process block from wal, last chk point:%" PRId64, vgId,
              pTask->id.idStr, pTask->chkInfo.version);
      streamProcessRunReq(pTask);
1115
    } else {
1116
      tqDebug("vgId:%d s-task:%s ignore run req since not in ready state", vgId, pTask->id.idStr);
1117
    }
1118 1119 1120 1121 1122 1123 1124

    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
    tqStartStreamTasks(pTq);
    return 0;
  } else {
    tqError("vgId:%d failed to found s-task, taskId:%d", vgId, taskId);
    return -1;
L
Liu Jicong 已提交
1125
  }
L
Liu Jicong 已提交
1126 1127
}

L
Liu Jicong 已提交
1128
int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) {
1129 1130 1131 1132 1133
  char*              msgStr = pMsg->pCont;
  char*              msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
  int32_t            msgLen = pMsg->contLen - sizeof(SMsgHead);
  SStreamDispatchReq req;
  SDecoder           decoder;
L
Liu Jicong 已提交
1134
  tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
1135
  tDecodeStreamDispatchReq(&decoder, &req);
L
Liu Jicong 已提交
1136

1137
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.taskId);
L
Liu Jicong 已提交
1138
  if (pTask) {
1139
    SRpcMsg rsp = { .info = pMsg->info, .code = 0 };
L
Liu Jicong 已提交
1140
    streamProcessDispatchReq(pTask, &req, &rsp, exec);
L
Liu Jicong 已提交
1141
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
1142
    return 0;
1143
  } else {
L
liuyao 已提交
1144
    tDeleteStreamDispatchReq(&req);
1145
    return -1;
L
Liu Jicong 已提交
1146
  }
L
Liu Jicong 已提交
1147 1148
}

L
Liu Jicong 已提交
1149 1150
int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) {
  SStreamDispatchRsp* pRsp = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
1151
  int32_t             taskId = ntohl(pRsp->upstreamTaskId);
L
Liu Jicong 已提交
1152
  SStreamTask*        pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
1153
  tqDebug("recv dispatch rsp, code:%x", pMsg->code);
L
Liu Jicong 已提交
1154
  if (pTask) {
1155
    streamProcessDispatchRsp(pTask, pRsp, pMsg->code);
L
Liu Jicong 已提交
1156
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
1157
    return 0;
1158 1159
  } else {
    return -1;
L
Liu Jicong 已提交
1160
  }
L
Liu Jicong 已提交
1161
}
L
Liu Jicong 已提交
1162

1163
int32_t tqProcessTaskDropReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
L
Liu Jicong 已提交
1164
  SVDropStreamTaskReq* pReq = (SVDropStreamTaskReq*)msg;
1165
  streamMetaRemoveTask(pTq->pStreamMeta, pReq->taskId);
L
Liu Jicong 已提交
1166
  return 0;
L
Liu Jicong 已提交
1167
}
L
Liu Jicong 已提交
1168 1169 1170 1171 1172 1173 1174

int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) {
  char*              msgStr = pMsg->pCont;
  char*              msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
  int32_t            msgLen = pMsg->contLen - sizeof(SMsgHead);
  SStreamRetrieveReq req;
  SDecoder           decoder;
1175
  tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
L
Liu Jicong 已提交
1176
  tDecodeStreamRetrieveReq(&decoder, &req);
L
Liu Jicong 已提交
1177
  tDecoderClear(&decoder);
L
Liu Jicong 已提交
1178
  int32_t      taskId = req.dstTaskId;
L
Liu Jicong 已提交
1179
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
L
Liu Jicong 已提交
1180
  if (pTask) {
1181
    SRpcMsg rsp = { .info = pMsg->info, .code = 0 };
L
Liu Jicong 已提交
1182
    streamProcessRetrieveReq(pTask, &req, &rsp);
L
Liu Jicong 已提交
1183
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
1184
    tDeleteStreamRetrieveReq(&req);
L
Liu Jicong 已提交
1185
    return 0;
L
Liu Jicong 已提交
1186
  } else {
L
liuyao 已提交
1187
    tDeleteStreamRetrieveReq(&req);
L
Liu Jicong 已提交
1188
    return -1;
L
Liu Jicong 已提交
1189 1190 1191 1192 1193 1194 1195
  }
}

int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg) {
  //
  return 0;
}
L
Liu Jicong 已提交
1196

1197 1198 1199 1200 1201 1202
int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) {
  STQ*      pTq = pVnode->pTq;
  SMsgHead* msgStr = pMsg->pCont;
  char*     msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
  int32_t   msgLen = pMsg->contLen - sizeof(SMsgHead);
  int32_t   code = 0;
L
Liu Jicong 已提交
1203 1204 1205

  SStreamDispatchReq req;
  SDecoder           decoder;
1206
  tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
L
Liu Jicong 已提交
1207 1208
  if (tDecodeStreamDispatchReq(&decoder, &req) < 0) {
    code = TSDB_CODE_MSG_DECODE_ERROR;
L
Liu Jicong 已提交
1209
    tDecoderClear(&decoder);
L
Liu Jicong 已提交
1210 1211
    goto FAIL;
  }
L
Liu Jicong 已提交
1212
  tDecoderClear(&decoder);
L
Liu Jicong 已提交
1213

L
Liu Jicong 已提交
1214
  int32_t taskId = req.taskId;
L
Liu Jicong 已提交
1215

L
Liu Jicong 已提交
1216
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
L
Liu Jicong 已提交
1217
  if (pTask) {
1218
    SRpcMsg rsp = { .info = pMsg->info, .code = 0 };
L
Liu Jicong 已提交
1219
    streamProcessDispatchReq(pTask, &req, &rsp, false);
L
Liu Jicong 已提交
1220
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
1221 1222
    rpcFreeCont(pMsg->pCont);
    taosFreeQitem(pMsg);
1223
    return 0;
5
54liuyao 已提交
1224 1225
  } else {
    tDeleteStreamDispatchReq(&req);
L
Liu Jicong 已提交
1226
  }
L
Liu Jicong 已提交
1227

1228 1229
  code = TSDB_CODE_STREAM_TASK_NOT_EXIST;

L
Liu Jicong 已提交
1230
FAIL:
1231 1232 1233 1234
  if (pMsg->info.handle == NULL) return -1;

  SMsgHead* pRspHead = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp));
  if (pRspHead == NULL) {
1235
    SRpcMsg rsp = { .code = TSDB_CODE_OUT_OF_MEMORY, .info = pMsg->info };
1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251
    tqDebug("send dispatch error rsp, code: %x", code);
    tmsgSendRsp(&rsp);
    rpcFreeCont(pMsg->pCont);
    taosFreeQitem(pMsg);
    return -1;
  }

  pRspHead->vgId = htonl(req.upstreamNodeId);
  SStreamDispatchRsp* pRsp = POINTER_SHIFT(pRspHead, sizeof(SMsgHead));
  pRsp->streamId = htobe64(req.streamId);
  pRsp->upstreamTaskId = htonl(req.upstreamTaskId);
  pRsp->upstreamNodeId = htonl(req.upstreamNodeId);
  pRsp->downstreamNodeId = htonl(pVnode->config.vgId);
  pRsp->downstreamTaskId = htonl(req.taskId);
  pRsp->inputStatus = TASK_OUTPUT_STATUS__NORMAL;

L
Liu Jicong 已提交
1252
  SRpcMsg rsp = {
1253
      .code = code, .info = pMsg->info, .contLen = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp), .pCont = pRspHead};
1254
  tqDebug("send dispatch error rsp, code: %x", code);
L
Liu Jicong 已提交
1255
  tmsgSendRsp(&rsp);
L
Liu Jicong 已提交
1256 1257
  rpcFreeCont(pMsg->pCont);
  taosFreeQitem(pMsg);
1258
  return -1;
L
Liu Jicong 已提交
1259
}
L
Liu Jicong 已提交
1260

1261
int32_t tqCheckLogInWal(STQ* pTq, int64_t sversion) { return sversion <= pTq->walLogLastVer; }
1262

1263
int32_t tqStartStreamTasks(STQ* pTq) {
1264
  int32_t      vgId = TD_VID(pTq->pVnode);
1265
  SStreamMeta* pMeta = pTq->pStreamMeta;
1266

1267
  taosWLockLatch(&pMeta->lock);
1268

1269
  int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
1270 1271 1272 1273 1274 1275
  if (numOfTasks == 0) {
    tqInfo("vgId:%d no stream tasks exists", vgId);
    taosWUnLockLatch(&pTq->pStreamMeta->lock);
    return 0;
  }

1276
  pMeta->walScanCounter += 1;
1277

1278 1279
  if (pMeta->walScanCounter > 1) {
    tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->walScanCounter);
1280 1281 1282 1283
    taosWUnLockLatch(&pTq->pStreamMeta->lock);
    return 0;
  }

1284 1285 1286 1287
  SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq));
  if (pRunReq == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    tqError("vgId:%d failed restore stream tasks, code:%s", vgId, terrstr(terrno));
1288
    taosWUnLockLatch(&pTq->pStreamMeta->lock);
1289 1290 1291
    return -1;
  }

H
Haojun Liao 已提交
1292
  tqDebug("vgId:%d start wal scan stream tasks, tasks:%d", vgId, numOfTasks);
1293 1294
  pRunReq->head.vgId = vgId;
  pRunReq->streamId = 0;
1295
  pRunReq->taskId = WAL_READ_TASKS_ID;
1296 1297 1298

  SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)};
  tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg);
1299
  taosWUnLockLatch(&pTq->pStreamMeta->lock);
1300 1301 1302

  return 0;
}