tq.c 51.4 KB
Newer Older
H
refact  
Hongze Cheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
S
Shengliang Guan 已提交
14 15
 */

H
Hongze Cheng 已提交
16
#include "tq.h"
S
Shengliang Guan 已提交
17

dengyihao's avatar
dengyihao 已提交
18 19 20
// 0: not init
// 1: already inited
// 2: wait to be inited or cleaup
21
#define WAL_READ_TASKS_ID (-1)
22

23
static int32_t tqInitialize(STQ* pTq);
dengyihao's avatar
dengyihao 已提交
24

wmmhello's avatar
wmmhello 已提交
25
static FORCE_INLINE bool tqIsHandleExec(STqHandle* pHandle) { return TMQ_HANDLE_STATUS_EXEC == pHandle->status; }
dengyihao's avatar
dengyihao 已提交
26 27
static FORCE_INLINE void tqSetHandleExec(STqHandle* pHandle) { pHandle->status = TMQ_HANDLE_STATUS_EXEC; }
static FORCE_INLINE void tqSetHandleIdle(STqHandle* pHandle) { pHandle->status = TMQ_HANDLE_STATUS_IDLE; }
wmmhello's avatar
wmmhello 已提交
28

L
Liu Jicong 已提交
29
int32_t tqInit() {
L
Liu Jicong 已提交
30 31 32 33 34 35
  int8_t old;
  while (1) {
    old = atomic_val_compare_exchange_8(&tqMgmt.inited, 0, 2);
    if (old != 2) break;
  }

36 37 38 39 40 41
  if (old == 0) {
    tqMgmt.timer = taosTmrInit(10000, 100, 10000, "TQ");
    if (tqMgmt.timer == NULL) {
      atomic_store_8(&tqMgmt.inited, 0);
      return -1;
    }
42 43 44
    if (streamInit() < 0) {
      return -1;
    }
L
Liu Jicong 已提交
45
    atomic_store_8(&tqMgmt.inited, 1);
46
  }
47

L
Liu Jicong 已提交
48 49
  return 0;
}
L
Liu Jicong 已提交
50

51
void tqCleanUp() {
L
Liu Jicong 已提交
52 53 54 55 56 57 58 59
  int8_t old;
  while (1) {
    old = atomic_val_compare_exchange_8(&tqMgmt.inited, 1, 2);
    if (old != 2) break;
  }

  if (old == 1) {
    taosTmrCleanUp(tqMgmt.timer);
L
Liu Jicong 已提交
60
    streamCleanUp();
L
Liu Jicong 已提交
61 62
    atomic_store_8(&tqMgmt.inited, 0);
  }
63
}
L
Liu Jicong 已提交
64

65
static void destroyTqHandle(void* data) {
66 67
  STqHandle* pData = (STqHandle*)data;
  qDestroyTask(pData->execHandle.task);
wmmhello's avatar
wmmhello 已提交
68

69
  if (pData->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
L
Liu Jicong 已提交
70
    taosMemoryFreeClear(pData->execHandle.execCol.qmsg);
71
  } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__DB) {
72
    tqReaderClose(pData->execHandle.pTqReader);
73 74
    walCloseReader(pData->pWalReader);
    taosHashCleanup(pData->execHandle.execDb.pFilterOutTbUid);
L
Liu Jicong 已提交
75
  } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__TABLE) {
76
    walCloseReader(pData->pWalReader);
77
    tqReaderClose(pData->execHandle.pTqReader);
78 79
    taosMemoryFreeClear(pData->execHandle.execTb.qmsg);
    nodesDestroyNode(pData->execHandle.execTb.node);
80
  }
dengyihao's avatar
dengyihao 已提交
81
  if (pData->msg != NULL) {
82 83 84
    rpcFreeCont(pData->msg->pCont);
    taosMemoryFree(pData->msg);
    pData->msg = NULL;
D
dapan1121 已提交
85
  }
L
Liu Jicong 已提交
86 87
}

88 89 90 91 92
static bool tqOffsetLessOrEqual(const STqOffset* pLeft, const STqOffset* pRight) {
  return pLeft->val.type == TMQ_OFFSET__LOG && pRight->val.type == TMQ_OFFSET__LOG &&
         pLeft->val.version <= pRight->val.version;
}

L
Liu Jicong 已提交
93
STQ* tqOpen(const char* path, SVnode* pVnode) {
94
  STQ* pTq = taosMemoryCalloc(1, sizeof(STQ));
L
Liu Jicong 已提交
95
  if (pTq == NULL) {
S
Shengliang Guan 已提交
96
    terrno = TSDB_CODE_OUT_OF_MEMORY;
L
Liu Jicong 已提交
97 98
    return NULL;
  }
99

100
  pTq->path = taosStrdup(path);
L
Liu Jicong 已提交
101
  pTq->pVnode = pVnode;
L
Liu Jicong 已提交
102
  pTq->walLogLastVer = pVnode->pWal->vers.lastVer;
103

104
  pTq->pHandle = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK);
105
  taosHashSetFreeFp(pTq->pHandle, destroyTqHandle);
106

107
  taosInitRWLatch(&pTq->lock);
108
  pTq->pPushMgr = taosHashInit(64, MurmurHash3_32, false, HASH_NO_LOCK);
L
Liu Jicong 已提交
109

110
  pTq->pCheckInfo = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK);
L
Liu Jicong 已提交
111
  taosHashSetFreeFp(pTq->pCheckInfo, (FDelete)tDeleteSTqCheckInfo);
L
Liu Jicong 已提交
112

113 114 115 116 117 118 119
  int32_t code = tqInitialize(pTq);
  if (code != TSDB_CODE_SUCCESS) {
    tqClose(pTq);
    return NULL;
  } else {
    return pTq;
  }
120 121 122
}

int32_t tqInitialize(STQ* pTq) {
L
Liu Jicong 已提交
123
  if (tqMetaOpen(pTq) < 0) {
124
    return -1;
125 126
  }

L
Liu Jicong 已提交
127 128
  pTq->pOffsetStore = tqOffsetOpen(pTq);
  if (pTq->pOffsetStore == NULL) {
129
    return -1;
130 131
  }

132
  pTq->pStreamMeta = streamMetaOpen(pTq->path, pTq, (FTaskExpand*)tqExpandTask, pTq->pVnode->config.vgId);
L
Liu Jicong 已提交
133
  if (pTq->pStreamMeta == NULL) {
134
    return -1;
L
Liu Jicong 已提交
135 136
  }

137 138
  // the version is kept in task's meta data
  // todo check if this version is required or not
139 140
  if (streamLoadTasks(pTq->pStreamMeta, walGetCommittedVer(pTq->pVnode->pWal)) < 0) {
    return -1;
L
Liu Jicong 已提交
141 142
  }

143
  return 0;
L
Liu Jicong 已提交
144
}
L
Liu Jicong 已提交
145

L
Liu Jicong 已提交
146
void tqClose(STQ* pTq) {
147 148
  if (pTq == NULL) {
    return;
H
Hongze Cheng 已提交
149
  }
150 151 152 153 154 155 156 157 158

  tqOffsetClose(pTq->pOffsetStore);
  taosHashCleanup(pTq->pHandle);
  taosHashCleanup(pTq->pPushMgr);
  taosHashCleanup(pTq->pCheckInfo);
  taosMemoryFree(pTq->path);
  tqMetaClose(pTq);
  streamMetaClose(pTq->pStreamMeta);
  taosMemoryFree(pTq);
L
Liu Jicong 已提交
159
}
L
Liu Jicong 已提交
160

H
Haojun Liao 已提交
161 162 163 164 165 166 167 168 169 170 171 172 173
void tqNotifyClose(STQ* pTq) {
  if (pTq != NULL) {
    taosWLockLatch(&pTq->pStreamMeta->lock);

    void* pIter = NULL;
    while (1) {
      pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter);
      if (pIter == NULL) {
        break;
      }

      SStreamTask* pTask = *(SStreamTask**)pIter;
      tqDebug("vgId:%d s-task:%s set dropping flag", pTq->pStreamMeta->vgId, pTask->id.idStr);
174 175 176
      pTask->status.taskStatus = TASK_STATUS__STOP;

      int64_t st = taosGetTimestampMs();
H
Haojun Liao 已提交
177
      qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS);
178
      int64_t el = taosGetTimestampMs() - st;
H
Haojun Liao 已提交
179
      tqDebug("vgId:%d s-task:%s is closed in %" PRId64 " ms", pTq->pStreamMeta->vgId, pTask->id.idStr, el);
H
Haojun Liao 已提交
180 181 182 183 184 185
    }

    taosWUnLockLatch(&pTq->pStreamMeta->lock);
  }
}

D
dapan1121 已提交
186 187
static int32_t doSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch,
                             int64_t consumerId, int32_t type) {
L
Liu Jicong 已提交
188 189
  int32_t len = 0;
  int32_t code = 0;
D
dapan1121 已提交
190 191

  if (type == TMQ_MSG_TYPE__POLL_RSP) {
H
Haojun Liao 已提交
192
    tEncodeSize(tEncodeMqDataRsp, pRsp, len, code);
D
dapan1121 已提交
193 194 195
  } else if (type == TMQ_MSG_TYPE__TAOSX_RSP) {
    tEncodeSize(tEncodeSTaosxRsp, (STaosxRsp*)pRsp, len, code);
  }
L
Liu Jicong 已提交
196 197 198 199 200 201 202 203 204 205 206

  if (code < 0) {
    return -1;
  }

  int32_t tlen = sizeof(SMqRspHead) + len;
  void*   buf = rpcMallocCont(tlen);
  if (buf == NULL) {
    return -1;
  }

D
dapan1121 已提交
207 208 209
  ((SMqRspHead*)buf)->mqMsgType = type;
  ((SMqRspHead*)buf)->epoch = epoch;
  ((SMqRspHead*)buf)->consumerId = consumerId;
L
Liu Jicong 已提交
210 211 212 213 214 215

  void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead));

  SEncoder encoder = {0};
  tEncoderInit(&encoder, abuf, len);

D
dapan1121 已提交
216
  if (type == TMQ_MSG_TYPE__POLL_RSP) {
H
Haojun Liao 已提交
217
    tEncodeMqDataRsp(&encoder, pRsp);
D
dapan1121 已提交
218
  } else if (type == TMQ_MSG_TYPE__TAOSX_RSP) {
X
Xiaoyu Wang 已提交
219
    tEncodeSTaosxRsp(&encoder, (STaosxRsp*)pRsp);
dengyihao's avatar
dengyihao 已提交
220
  }
L
Liu Jicong 已提交
221

wmmhello's avatar
wmmhello 已提交
222
  tEncoderClear(&encoder);
L
Liu Jicong 已提交
223 224

  SRpcMsg rsp = {
D
dapan1121 已提交
225
      .info = *pRpcHandleInfo,
L
Liu Jicong 已提交
226 227 228 229
      .pCont = buf,
      .contLen = tlen,
      .code = 0,
  };
L
Liu Jicong 已提交
230

L
Liu Jicong 已提交
231
  tmsgSendRsp(&rsp);
L
Liu Jicong 已提交
232 233
  return 0;
}
L
Liu Jicong 已提交
234

H
Haojun Liao 已提交
235
int32_t tqPushDataRsp(STqHandle* pHandle, int32_t vgId) {
236 237 238 239
  SMqDataRsp dataRsp = {0};
  dataRsp.head.consumerId = pHandle->consumerId;
  dataRsp.head.epoch = pHandle->epoch;
  dataRsp.head.mqMsgType = TMQ_MSG_TYPE__POLL_RSP;
240 241

  int64_t sver = 0, ever = 0;
H
Haojun Liao 已提交
242
  walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever);
dengyihao's avatar
dengyihao 已提交
243 244
  tqDoSendDataRsp(&pHandle->msg->info, &dataRsp, pHandle->epoch, pHandle->consumerId, TMQ_MSG_TYPE__POLL_RSP, sver,
                  ever);
D
dapan1121 已提交
245 246 247

  char buf1[80] = {0};
  char buf2[80] = {0};
248 249
  tFormatOffset(buf1, tListLen(buf1), &dataRsp.reqOffset);
  tFormatOffset(buf2, tListLen(buf2), &dataRsp.rspOffset);
dengyihao's avatar
dengyihao 已提交
250 251
  tqDebug("vgId:%d, from consumer:0x%" PRIx64 " (epoch %d) push rsp, block num: %d, req:%s, rsp:%s", vgId,
          dataRsp.head.consumerId, dataRsp.head.epoch, dataRsp.blockNum, buf1, buf2);
L
Liu Jicong 已提交
252 253 254
  return 0;
}

255 256 257 258 259 260
int32_t tqSendDataRsp(STqHandle* pHandle, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp,
                      int32_t type, int32_t vgId) {
  int64_t sver = 0, ever = 0;
  walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever);

  tqDoSendDataRsp(&pMsg->info, pRsp, pReq->epoch, pReq->consumerId, type, sver, ever);
261

D
dapan1121 已提交
262 263 264 265
  char buf1[80] = {0};
  char buf2[80] = {0};
  tFormatOffset(buf1, 80, &pRsp->reqOffset);
  tFormatOffset(buf2, 80, &pRsp->rspOffset);
266

dengyihao's avatar
dengyihao 已提交
267 268
  tqDebug("vgId:%d consumer:0x%" PRIx64 " (epoch %d) send rsp, block num:%d, req:%s, rsp:%s, reqId:0x%" PRIx64, vgId,
          pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2, pReq->reqId);
269 270 271 272

  return 0;
}

273
int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
274 275
  SMqVgOffset vgOffset = {0};
  int32_t     vgId = TD_VID(pTq->pVnode);
276

X
Xiaoyu Wang 已提交
277 278
  SDecoder decoder;
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
279
  if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) {
280 281
    return -1;
  }
282

283 284
  tDecoderClear(&decoder);

285 286 287
  STqOffset* pOffset = &vgOffset.offset;

  if (pOffset->val.type == TMQ_OFFSET__SNAPSHOT_DATA || pOffset->val.type == TMQ_OFFSET__SNAPSHOT_META) {
L
Liu Jicong 已提交
288
    tqDebug("receive offset commit msg to %s on vgId:%d, offset(type:snapshot) uid:%" PRId64 ", ts:%" PRId64,
289 290 291 292 293 294
            pOffset->subKey, vgId, pOffset->val.uid, pOffset->val.ts);
  } else if (pOffset->val.type == TMQ_OFFSET__LOG) {
    tqDebug("receive offset commit msg to %s on vgId:%d, offset(type:log) version:%" PRId64, pOffset->subKey, vgId,
            pOffset->val.version);
    if (pOffset->val.version + 1 == sversion) {
      pOffset->val.version += 1;
295
    }
296
  } else {
297
    tqError("invalid commit offset type:%d", pOffset->val.type);
298
    return -1;
299
  }
300

301 302
  STqOffset* pSavedOffset = tqOffsetRead(pTq->pOffsetStore, pOffset->subKey);
  if (pSavedOffset != NULL && tqOffsetLessOrEqual(pOffset, pSavedOffset)) {
303
    tqDebug("not update the offset, vgId:%d sub:%s since committed:%" PRId64 " less than/equal to existed:%" PRId64,
304
            vgId, pOffset->subKey, pOffset->val.version, pSavedOffset->val.version);
305
    return 0;  // no need to update the offset value
306 307
  }

308
  // save the new offset value
309
  if (tqOffsetWrite(pTq->pOffsetStore, pOffset) < 0) {
310
    return -1;
311
  }
312

313 314 315
  return 0;
}

316
int32_t tqProcessSeekReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
317 318
  SMqVgOffset vgOffset = {0};
  int32_t     vgId = TD_VID(pTq->pVnode);
319 320 321

  SDecoder decoder;
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
322
  if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) {
H
Haojun Liao 已提交
323
    tqError("vgId:%d failed to decode seek msg", vgId);
324 325 326 327 328
    return -1;
  }

  tDecoderClear(&decoder);

H
Haojun Liao 已提交
329 330 331
  tqDebug("topic:%s, vgId:%d process offset seek by consumer:0x%" PRIx64 ", req offset:%" PRId64,
          vgOffset.offset.subKey, vgId, vgOffset.consumerId, vgOffset.offset.val.version);

332 333 334
  STqOffset* pOffset = &vgOffset.offset;
  if (pOffset->val.type != TMQ_OFFSET__LOG) {
    tqError("vgId:%d, subKey:%s invalid seek offset type:%d", vgId, pOffset->subKey, pOffset->val.type);
335 336 337
    return -1;
  }

338 339
  STqHandle* pHandle = taosHashGet(pTq->pHandle, pOffset->subKey, strlen(pOffset->subKey));
  if (pHandle == NULL) {
dengyihao's avatar
dengyihao 已提交
340
    tqError("tmq seek: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", vgOffset.consumerId, vgId, pOffset->subKey);
341 342
    terrno = TSDB_CODE_INVALID_MSG;
    return -1;
343 344
  }

345 346 347 348 349 350 351 352
  // 2. check consumer-vg assignment status
  taosRLockLatch(&pTq->lock);
  if (pHandle->consumerId != vgOffset.consumerId) {
    tqDebug("ERROR tmq seek: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
            vgOffset.consumerId, vgId, pOffset->subKey, pHandle->consumerId);
    terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH;
    taosRUnLockLatch(&pTq->lock);
    return -1;
353
  }
354 355
  taosRUnLockLatch(&pTq->lock);

dengyihao's avatar
dengyihao 已提交
356
  // 3. check the offset info
357 358 359 360 361 362
  STqOffset* pSavedOffset = tqOffsetRead(pTq->pOffsetStore, pOffset->subKey);
  if (pSavedOffset != NULL) {
    if (pSavedOffset->val.type != TMQ_OFFSET__LOG) {
      tqError("invalid saved offset type, vgId:%d sub:%s", vgId, pOffset->subKey);
      return 0;  // no need to update the offset value
    }
363

364 365 366 367 368 369
    if (pSavedOffset->val.version == pOffset->val.version) {
      tqDebug("vgId:%d subKey:%s no need to seek to %" PRId64 " prev offset:%" PRId64, vgId, pOffset->subKey,
              pOffset->val.version, pSavedOffset->val.version);
      return 0;
    }
  }
370 371 372

  int64_t sver = 0, ever = 0;
  walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever);
373 374 375 376
  if (pOffset->val.version < sver) {
    pOffset->val.version = sver;
  } else if (pOffset->val.version > ever) {
    pOffset->val.version = ever;
377 378 379
  }

  // save the new offset value
380 381 382 383
  if (pSavedOffset != NULL) {
    tqDebug("vgId:%d sub:%s seek to:%" PRId64 " prev offset:%" PRId64, vgId, pOffset->subKey, pOffset->val.version,
            pSavedOffset->val.version);
  } else {
dengyihao's avatar
dengyihao 已提交
384
    tqDebug("vgId:%d sub:%s seek to:%" PRId64 " not saved yet", vgId, pOffset->subKey, pOffset->val.version);
385
  }
386

387 388
  if (tqOffsetWrite(pTq->pOffsetStore, pOffset) < 0) {
    tqError("failed to save offset, vgId:%d sub:%s seek to %" PRId64, vgId, pOffset->subKey, pOffset->val.version);
389 390 391
    return -1;
  }

H
Haojun Liao 已提交
392 393 394
  tqDebug("topic:%s, vgId:%d consumer:0x%" PRIx64 " offset is update to:%" PRId64, vgOffset.offset.subKey, vgId,
          vgOffset.consumerId, vgOffset.offset.val.version);

395 396 397
  return 0;
}

L
Liu Jicong 已提交
398
int32_t tqCheckColModifiable(STQ* pTq, int64_t tbUid, int32_t colId) {
L
Liu Jicong 已提交
399
  void* pIter = NULL;
400

L
Liu Jicong 已提交
401
  while (1) {
402
    pIter = taosHashIterate(pTq->pCheckInfo, pIter);
403 404 405 406
    if (pIter == NULL) {
      break;
    }

407
    STqCheckInfo* pCheck = (STqCheckInfo*)pIter;
408

L
Liu Jicong 已提交
409 410
    if (pCheck->ntbUid == tbUid) {
      int32_t sz = taosArrayGetSize(pCheck->colIdList);
L
Liu Jicong 已提交
411
      for (int32_t i = 0; i < sz; i++) {
L
Liu Jicong 已提交
412 413
        int16_t forbidColId = *(int16_t*)taosArrayGet(pCheck->colIdList, i);
        if (forbidColId == colId) {
414
          taosHashCancelIterate(pTq->pCheckInfo, pIter);
L
Liu Jicong 已提交
415 416 417 418 419
          return -1;
        }
      }
    }
  }
420

L
Liu Jicong 已提交
421 422 423
  return 0;
}

D
dapan1121 已提交
424
int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) {
X
Xiaoyu Wang 已提交
425
  SMqPollReq req = {0};
dengyihao's avatar
dengyihao 已提交
426
  int        code = 0;
D
dapan1121 已提交
427 428 429 430 431 432 433 434 435 436
  if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) {
    tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen);
    terrno = TSDB_CODE_INVALID_MSG;
    return -1;
  }

  int64_t      consumerId = req.consumerId;
  int32_t      reqEpoch = req.epoch;
  STqOffsetVal reqOffset = req.reqOffset;
  int32_t      vgId = TD_VID(pTq->pVnode);
wmmhello's avatar
wmmhello 已提交
437
  STqHandle*   pHandle = NULL;
D
dapan1121 已提交
438

wmmhello's avatar
wmmhello 已提交
439 440 441 442 443 444 445 446 447 448
  while (1) {
    taosWLockLatch(&pTq->lock);
    // 1. find handle
    pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
    if (pHandle == NULL) {
      tqError("tmq poll: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", consumerId, vgId, req.subKey);
      terrno = TSDB_CODE_INVALID_MSG;
      taosWUnLockLatch(&pTq->lock);
      return -1;
    }
D
dapan1121 已提交
449

450 451
    // 2. check re-balance status
    if (pHandle->consumerId != consumerId) {
dengyihao's avatar
dengyihao 已提交
452 453
      tqError("ERROR tmq poll: consumer:0x%" PRIx64
              " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
454
              consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId);
455
      terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH;
456 457 458
      taosWUnLockLatch(&pTq->lock);
      return -1;
    }
wmmhello's avatar
wmmhello 已提交
459

wmmhello's avatar
wmmhello 已提交
460
    bool exec = tqIsHandleExec(pHandle);
dengyihao's avatar
dengyihao 已提交
461
    if (!exec) {
wmmhello's avatar
wmmhello 已提交
462
      tqSetHandleExec(pHandle);
dengyihao's avatar
dengyihao 已提交
463 464 465
      //      qSetTaskCode(pHandle->execHandle.task, TDB_CODE_SUCCESS);
      tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, set handle exec, pHandle:%p", consumerId, vgId,
              req.subKey, pHandle);
wmmhello's avatar
wmmhello 已提交
466 467 468
      taosWUnLockLatch(&pTq->lock);
      break;
    }
469
    taosWUnLockLatch(&pTq->lock);
470

dengyihao's avatar
dengyihao 已提交
471 472 473
    tqDebug("tmq poll: consumer:0x%" PRIx64
            "vgId:%d, topic:%s, subscription is executing, wait for 10ms and retry, pHandle:%p",
            consumerId, vgId, req.subKey, pHandle);
wmmhello's avatar
wmmhello 已提交
474
    taosMsleep(10);
D
dapan1121 已提交
475 476 477
  }

  // 3. update the epoch value
478
  if (pHandle->epoch < reqEpoch) {
dengyihao's avatar
dengyihao 已提交
479
    tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, pHandle->epoch,
X
Xiaoyu Wang 已提交
480
            reqEpoch);
D
dapan1121 已提交
481 482 483 484 485 486 487 488
    pHandle->epoch = reqEpoch;
  }

  char buf[80];
  tFormatOffset(buf, 80, &reqOffset);
  tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s, reqId:0x%" PRIx64,
          consumerId, req.epoch, pHandle->subKey, vgId, buf, req.reqId);

wmmhello's avatar
wmmhello 已提交
489
  code = tqExtractDataForMq(pTq, pHandle, &req, pMsg);
490
  tqSetHandleIdle(pHandle);
491

dengyihao's avatar
dengyihao 已提交
492 493
  tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, , set handle idle, pHandle:%p", consumerId, vgId,
          req.subKey, pHandle);
494
  return code;
D
dapan1121 已提交
495 496
}

497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
int32_t tqProcessVgWalInfoReq(STQ* pTq, SRpcMsg* pMsg) {
  SMqPollReq req = {0};
  if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) {
    tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen);
    terrno = TSDB_CODE_INVALID_MSG;
    return -1;
  }

  int64_t      consumerId = req.consumerId;
  STqOffsetVal reqOffset = req.reqOffset;
  int32_t      vgId = TD_VID(pTq->pVnode);

  // 1. find handle
  STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
  if (pHandle == NULL) {
    tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s not found", consumerId, vgId, req.subKey);
    terrno = TSDB_CODE_INVALID_MSG;
    return -1;
  }

  // 2. check re-balance status
  taosRLockLatch(&pTq->lock);
  if (pHandle->consumerId != consumerId) {
    tqDebug("ERROR consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
            consumerId, vgId, req.subKey, pHandle->consumerId);
    terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH;
    taosRUnLockLatch(&pTq->lock);
    return -1;
  }
  taosRUnLockLatch(&pTq->lock);

  int64_t sver = 0, ever = 0;
  walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever);

  SMqDataRsp dataRsp = {0};
  tqInitDataRsp(&dataRsp, &req);

  STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, req.subKey);
  if (pOffset != NULL) {
    if (pOffset->val.type != TMQ_OFFSET__LOG) {
      tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s use snapshot, no valid wal info", consumerId, vgId, req.subKey);
      terrno = TSDB_CODE_INVALID_PARA;
      tDeleteMqDataRsp(&dataRsp);
      return -1;
    }

    dataRsp.rspOffset.type = TMQ_OFFSET__LOG;
    dataRsp.rspOffset.version = pOffset->val.version;
  } else {
    if (req.useSnapshot == true) {
      tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s snapshot not support wal info", consumerId, vgId, req.subKey);
      terrno = TSDB_CODE_INVALID_PARA;
      tDeleteMqDataRsp(&dataRsp);
      return -1;
    }

    dataRsp.rspOffset.type = TMQ_OFFSET__LOG;

555
    if (reqOffset.type == TMQ_OFFSET__LOG) {
556
      int64_t currentVer = walReaderGetCurrentVer(pHandle->execHandle.pTqReader->pWalReader);
dengyihao's avatar
dengyihao 已提交
557
      if (currentVer == -1) {  // not start to read data from wal yet, return req offset directly
558 559 560 561
        dataRsp.rspOffset.version = reqOffset.version;
      } else {
        dataRsp.rspOffset.version = currentVer;  // return current consume offset value
      }
562 563
    } else if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEAST) {
      dataRsp.rspOffset.version = sver;  // not consume yet, set the earliest position
564 565 566 567 568 569 570 571 572 573 574 575 576 577 578
    } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) {
      dataRsp.rspOffset.version = ever;
    } else {
      tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s invalid offset type:%d", consumerId, vgId, req.subKey,
              reqOffset.type);
      terrno = TSDB_CODE_INVALID_PARA;
      tDeleteMqDataRsp(&dataRsp);
      return -1;
    }
  }

  tqDoSendDataRsp(&pMsg->info, &dataRsp, req.epoch, req.consumerId, TMQ_MSG_TYPE__WALINFO_RSP, sver, ever);
  return 0;
}

579
int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
L
Liu Jicong 已提交
580
  SMqVDeleteReq* pReq = (SMqVDeleteReq*)msg;
dengyihao's avatar
dengyihao 已提交
581
  int32_t        vgId = TD_VID(pTq->pVnode);
L
Liu Jicong 已提交
582

583
  tqDebug("vgId:%d, tq process delete sub req %s", vgId, pReq->subKey);
wmmhello's avatar
wmmhello 已提交
584
  int32_t code = 0;
L
Liu Jicong 已提交
585

wmmhello's avatar
wmmhello 已提交
586
  taosWLockLatch(&pTq->lock);
L
Liu Jicong 已提交
587 588
  STqHandle* pHandle = taosHashGet(pTq->pHandle, pReq->subKey, strlen(pReq->subKey));
  if (pHandle) {
wmmhello's avatar
wmmhello 已提交
589
    while (tqIsHandleExec(pHandle)) {
dengyihao's avatar
dengyihao 已提交
590 591
      tqDebug("vgId:%d, topic:%s, subscription is executing, wait for 10ms and retry, pHandle:%p", vgId,
              pHandle->subKey, pHandle);
wmmhello's avatar
wmmhello 已提交
592
      taosMsleep(10);
593
    }
594

L
Liu Jicong 已提交
595 596 597
    if (pHandle->pRef) {
      walCloseRef(pTq->pVnode->pWal, pHandle->pRef->refId);
    }
598

L
Liu Jicong 已提交
599 600 601 602
    code = taosHashRemove(pTq->pHandle, pReq->subKey, strlen(pReq->subKey));
    if (code != 0) {
      tqError("cannot process tq delete req %s, since no such handle", pReq->subKey);
    }
L
Liu Jicong 已提交
603
  }
604

L
Liu Jicong 已提交
605 606
  code = tqOffsetDelete(pTq->pOffsetStore, pReq->subKey);
  if (code != 0) {
607
    tqError("cannot process tq delete req %s, since no such offset in cache", pReq->subKey);
L
Liu Jicong 已提交
608
  }
L
Liu Jicong 已提交
609

L
Liu Jicong 已提交
610
  if (tqMetaDeleteHandle(pTq, pReq->subKey) < 0) {
L
Liu Jicong 已提交
611
    tqError("cannot process tq delete req %s, since no such offset in tdb", pReq->subKey);
612
  }
wmmhello's avatar
wmmhello 已提交
613 614
  taosWUnLockLatch(&pTq->lock);

L
Liu Jicong 已提交
615
  return 0;
L
Liu Jicong 已提交
616 617
}

618
int32_t tqProcessAddCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
619 620
  STqCheckInfo info = {0};
  SDecoder     decoder;
X
Xiaoyu Wang 已提交
621
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
622
  if (tDecodeSTqCheckInfo(&decoder, &info) < 0) {
L
Liu Jicong 已提交
623 624 625 626
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  tDecoderClear(&decoder);
627 628 629 630 631
  if (taosHashPut(pTq->pCheckInfo, info.topic, strlen(info.topic), &info, sizeof(STqCheckInfo)) < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  if (tqMetaSaveCheckInfo(pTq, info.topic, msg, msgLen) < 0) {
L
Liu Jicong 已提交
632 633 634 635 636 637
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  return 0;
}

638
int32_t tqProcessDelCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
639 640 641 642 643 644 645 646 647 648 649
  if (taosHashRemove(pTq->pCheckInfo, msg, strlen(msg)) < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  if (tqMetaDeleteCheckInfo(pTq, msg) < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  return 0;
}

650
int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
dengyihao's avatar
dengyihao 已提交
651
  int         ret = 0;
L
Liu Jicong 已提交
652
  SMqRebVgReq req = {0};
dengyihao's avatar
dengyihao 已提交
653
  SDecoder    dc = {0};
654 655 656 657 658 659 660 661 662

  tDecoderInit(&dc, msg, msgLen);

  // decode req
  if (tDecodeSMqRebVgReq(&dc, &req) < 0) {
    terrno = TSDB_CODE_INVALID_MSG;
    tDecoderClear(&dc);
    return -1;
  }
L
Liu Jicong 已提交
663

D
dapan1121 已提交
664 665 666
  SVnode* pVnode = pTq->pVnode;
  int32_t vgId = TD_VID(pVnode);

667
  tqDebug("vgId:%d, tq process sub req:%s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pVnode->config.vgId, req.subKey,
D
dapan1121 已提交
668
          req.oldConsumerId, req.newConsumerId);
L
Liu Jicong 已提交
669

670
  STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
L
Liu Jicong 已提交
671
  if (pHandle == NULL) {
L
Liu Jicong 已提交
672
    if (req.oldConsumerId != -1) {
673
      tqError("vgId:%d, build new consumer handle %s for consumer:0x%" PRIx64 ", but old consumerId:0x%" PRIx64,
674
              req.vgId, req.subKey, req.newConsumerId, req.oldConsumerId);
L
Liu Jicong 已提交
675
    }
D
dapan1121 已提交
676

L
Liu Jicong 已提交
677
    if (req.newConsumerId == -1) {
678
      tqError("vgId:%d, tq invalid re-balance request, new consumerId %" PRId64 "", req.vgId, req.newConsumerId);
679
      goto end;
L
Liu Jicong 已提交
680
    }
D
dapan1121 已提交
681

L
Liu Jicong 已提交
682 683
    STqHandle tqHandle = {0};
    pHandle = &tqHandle;
L
Liu Jicong 已提交
684

L
Liu Jicong 已提交
685 686 687
    memcpy(pHandle->subKey, req.subKey, TSDB_SUBSCRIBE_KEY_LEN);
    pHandle->consumerId = req.newConsumerId;
    pHandle->epoch = -1;
L
Liu Jicong 已提交
688

L
Liu Jicong 已提交
689
    pHandle->execHandle.subType = req.subType;
L
Liu Jicong 已提交
690
    pHandle->fetchMeta = req.withMeta;
wmmhello's avatar
wmmhello 已提交
691

692
    // TODO version should be assigned and refed during preprocess
D
dapan1121 已提交
693
    SWalRef* pRef = walRefCommittedVer(pVnode->pWal);
694
    if (pRef == NULL) {
695 696
      ret = -1;
      goto end;
697
    }
D
dapan1121 已提交
698

699 700
    int64_t ver = pRef->refVer;
    pHandle->pRef = pRef;
L
Liu Jicong 已提交
701

702
    SReadHandle handle = {.vnode = pVnode, .initTableReader = true, .initTqReader = true, .version = ver};
H
Haojun Liao 已提交
703 704
    initStorageAPI(&handle.api);

wmmhello's avatar
wmmhello 已提交
705
    pHandle->snapshotVer = ver;
706

L
Liu Jicong 已提交
707
    if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
dengyihao's avatar
dengyihao 已提交
708
      pHandle->execHandle.execCol.qmsg = taosStrdup(req.qmsg);
709

X
Xiaoyu Wang 已提交
710 711
      pHandle->execHandle.task = qCreateQueueExecTaskInfo(pHandle->execHandle.execCol.qmsg, &handle, vgId,
                                                          &pHandle->execHandle.numOfCols, req.newConsumerId);
L
Liu Jicong 已提交
712
      void* scanner = NULL;
713
      qExtractStreamScanner(pHandle->execHandle.task, &scanner);
714
      pHandle->execHandle.pTqReader = qExtractReaderFromStreamScanner(scanner);
L
Liu Jicong 已提交
715
    } else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__DB) {
D
dapan1121 已提交
716
      pHandle->pWalReader = walOpenReader(pVnode->pWal, NULL);
717
      pHandle->execHandle.pTqReader = tqReaderOpen(pVnode);
D
dapan1121 已提交
718

L
Liu Jicong 已提交
719
      pHandle->execHandle.execDb.pFilterOutTbUid =
wmmhello's avatar
wmmhello 已提交
720
          taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_ENTRY_LOCK);
721
      buildSnapContext(handle.vnode, handle.version, 0, pHandle->execHandle.subType, pHandle->fetchMeta,
722
                       (SSnapContext**)(&handle.sContext));
723

724
      pHandle->execHandle.task = qCreateQueueExecTaskInfo(NULL, &handle, vgId, NULL, req.newConsumerId);
L
Liu Jicong 已提交
725
    } else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__TABLE) {
D
dapan1121 已提交
726
      pHandle->pWalReader = walOpenReader(pVnode->pWal, NULL);
wmmhello's avatar
wmmhello 已提交
727
      pHandle->execHandle.execTb.suid = req.suid;
728
      pHandle->execHandle.execTb.qmsg = taosStrdup(req.qmsg);
wmmhello's avatar
wmmhello 已提交
729

dengyihao's avatar
dengyihao 已提交
730
      if (strcmp(pHandle->execHandle.execTb.qmsg, "") != 0) {
731 732 733 734 735
        if (nodesStringToNode(pHandle->execHandle.execTb.qmsg, &pHandle->execHandle.execTb.node) != 0) {
          tqError("nodesStringToNode error in sub stable, since %s, vgId:%d, subkey:%s consumer:0x%" PRIx64, terrstr(),
                  pVnode->config.vgId, req.subKey, pHandle->consumerId);
          return -1;
        }
L
Liu Jicong 已提交
736
      }
wmmhello's avatar
wmmhello 已提交
737

738
      buildSnapContext(handle.vnode, handle.version, req.suid, pHandle->execHandle.subType, pHandle->fetchMeta,
L
Liu Jicong 已提交
739
                       (SSnapContext**)(&handle.sContext));
740
      pHandle->execHandle.task = qCreateQueueExecTaskInfo(NULL, &handle, vgId, NULL, req.newConsumerId);
wmmhello's avatar
wmmhello 已提交
741

742
      SArray* tbUidList = NULL;
wmmhello's avatar
wmmhello 已提交
743
      ret = qGetTableList(req.suid, pVnode, pHandle->execHandle.execTb.node, &tbUidList, pHandle->execHandle.task);
dengyihao's avatar
dengyihao 已提交
744 745 746
      if (ret != TDB_CODE_SUCCESS) {
        tqError("qGetTableList error:%d vgId:%d, subkey:%s consumer:0x%" PRIx64, ret, pVnode->config.vgId, req.subKey,
                pHandle->consumerId);
747 748
        taosArrayDestroy(tbUidList);
        goto end;
L
Liu Jicong 已提交
749
      }
dengyihao's avatar
dengyihao 已提交
750 751
      tqDebug("tq try to get ctb for stb subscribe, vgId:%d, subkey:%s consumer:0x%" PRIx64 " suid:%" PRId64,
              pVnode->config.vgId, req.subKey, pHandle->consumerId, req.suid);
752
      pHandle->execHandle.pTqReader = tqReaderOpen(pVnode);
H
Haojun Liao 已提交
753
      tqReaderSetTbUidList(pHandle->execHandle.pTqReader, tbUidList, NULL);
L
Liu Jicong 已提交
754
      taosArrayDestroy(tbUidList);
L
Liu Jicong 已提交
755
    }
H
Haojun Liao 已提交
756

757
    taosHashPut(pTq->pHandle, req.subKey, strlen(req.subKey), pHandle, sizeof(STqHandle));
dengyihao's avatar
dengyihao 已提交
758
    tqDebug("try to persist handle %s consumer:0x%" PRIx64, req.subKey, pHandle->consumerId);
759 760
    ret = tqMetaSaveHandle(pTq, req.subKey, pHandle);
    goto end;
L
Liu Jicong 已提交
761
  } else {
762
    taosWLockLatch(&pTq->lock);
wmmhello's avatar
wmmhello 已提交
763

D
dapan1121 已提交
764
    if (pHandle->consumerId == req.newConsumerId) {  // do nothing
dengyihao's avatar
dengyihao 已提交
765 766
      tqInfo("vgId:%d consumer:0x%" PRIx64 " remains, no switch occurs, should not reach here", req.vgId,
             req.newConsumerId);
767 768 769
    } else {
      tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId,
             req.newConsumerId);
770 771
      atomic_store_64(&pHandle->consumerId, req.newConsumerId);
    }
dengyihao's avatar
dengyihao 已提交
772
    //    atomic_add_fetch_32(&pHandle->epoch, 1);
773

774
    // kill executing task
dengyihao's avatar
dengyihao 已提交
775 776 777 778 779 780 781 782 783 784
    //    if(tqIsHandleExec(pHandle)) {
    //      qTaskInfo_t pTaskInfo = pHandle->execHandle.task;
    //      if (pTaskInfo != NULL) {
    //        qKillTask(pTaskInfo, TSDB_CODE_SUCCESS);
    //      }

    //      if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
    //        qStreamCloseTsdbReader(pTaskInfo);
    //      }
    //    }
wmmhello's avatar
wmmhello 已提交
785 786
    // remove if it has been register in the push manager, and return one empty block to consumer
    tqUnregisterPushHandle(pTq, pHandle);
787
    taosWUnLockLatch(&pTq->lock);
788
    ret = tqMetaSaveHandle(pTq, req.subKey, pHandle);
L
Liu Jicong 已提交
789
  }
L
Liu Jicong 已提交
790

791
end:
792
  tDecoderClear(&dc);
793
  return ret;
L
Liu Jicong 已提交
794
}
795

dengyihao's avatar
dengyihao 已提交
796
void freePtr(void* ptr) { taosMemoryFree(*(void**)ptr); }
L
liuyao 已提交
797

798
int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) {
D
dapan1121 已提交
799
  int32_t vgId = TD_VID(pTq->pVnode);
800

801
  pTask->id.idStr = createStreamTaskIdStr(pTask->id.streamId, pTask->id.taskId);
L
Liu Jicong 已提交
802
  pTask->refCnt = 1;
803
  pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
dengyihao's avatar
dengyihao 已提交
804 805
  pTask->inputQueue = streamQueueOpen(512 << 10);
  pTask->outputQueue = streamQueueOpen(512 << 10);
L
Liu Jicong 已提交
806 807

  if (pTask->inputQueue == NULL || pTask->outputQueue == NULL) {
L
Liu Jicong 已提交
808
    return -1;
L
Liu Jicong 已提交
809 810
  }

L
Liu Jicong 已提交
811 812
  pTask->inputStatus = TASK_INPUT_STATUS__NORMAL;
  pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL;
813
  pTask->pMsgCb = &pTq->pVnode->msgCb;
814
  pTask->pMeta = pTq->pStreamMeta;
815
  pTask->chkInfo.version = ver;
816
  pTask->chkInfo.currentVer = ver;
817

818
  // expand executor
dengyihao's avatar
dengyihao 已提交
819
  pTask->status.taskStatus = (pTask->fillHistory) ? TASK_STATUS__WAIT_DOWNSTREAM : TASK_STATUS__NORMAL;
820

821
  if (pTask->taskLevel == TASK_LEVEL__SOURCE) {
822
    pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1);
823 824 825 826
    if (pTask->pState == NULL) {
      return -1;
    }

827
    SReadHandle handle = {.vnode = pTq->pVnode, .initTqReader = 1, .pStateBackend = pTask->pState};
828
    initStorageAPI(&handle.api);
829

830 831
    pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId);
    if (pTask->exec.pExecutor == NULL) {
L
Liu Jicong 已提交
832 833
      return -1;
    }
834

835
    qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId);
836
  } else if (pTask->taskLevel == TASK_LEVEL__AGG) {
837
    pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1);
838 839 840
    if (pTask->pState == NULL) {
      return -1;
    }
841

842
    int32_t     numOfVgroups = (int32_t)taosArrayGetSize(pTask->childEpInfo);
843 844
    SReadHandle handle = {.vnode = NULL, .numOfVgroups = numOfVgroups, .pStateBackend = pTask->pState};
    initStorageAPI(&handle.api);
845

846
    pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId);
847
    if (pTask->exec.pExecutor == NULL) {
L
Liu Jicong 已提交
848 849
      return -1;
    }
850 851

    qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId);
L
Liu Jicong 已提交
852
  }
L
Liu Jicong 已提交
853 854

  // sink
855
  if (pTask->outputType == TASK_OUTPUT__SMA) {
L
Liu Jicong 已提交
856
    pTask->smaSink.vnode = pTq->pVnode;
L
Liu Jicong 已提交
857
    pTask->smaSink.smaSink = smaHandleRes;
858
  } else if (pTask->outputType == TASK_OUTPUT__TABLE) {
L
Liu Jicong 已提交
859
    pTask->tbSink.vnode = pTq->pVnode;
H
Haojun Liao 已提交
860
    pTask->tbSink.tbSinkFunc = tqSinkToTablePipeline;
L
Liu Jicong 已提交
861

X
Xiaoyu Wang 已提交
862
    int32_t   ver1 = 1;
5
54liuyao 已提交
863
    SMetaInfo info = {0};
dengyihao's avatar
dengyihao 已提交
864
    int32_t   code = metaGetInfo(pTq->pVnode->pMeta, pTask->tbSink.stbUid, &info, NULL);
5
54liuyao 已提交
865
    if (code == TSDB_CODE_SUCCESS) {
D
dapan1121 已提交
866
      ver1 = info.skmVer;
5
54liuyao 已提交
867
    }
L
Liu Jicong 已提交
868

869 870
    SSchemaWrapper* pschemaWrapper = pTask->tbSink.pSchemaWrapper;
    pTask->tbSink.pTSchema = tBuildTSchema(pschemaWrapper->pSchema, pschemaWrapper->nCols, ver1);
871
    if (pTask->tbSink.pTSchema == NULL) {
D
dapan1121 已提交
872 873
      return -1;
    }
L
liuyao 已提交
874 875
    pTask->tbSink.pTblInfo = tSimpleHashInit(10240, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT));
    tSimpleHashSetFreeFp(pTask->tbSink.pTblInfo, freePtr);
L
Liu Jicong 已提交
876
  }
877

878
  if (pTask->taskLevel == TASK_LEVEL__SOURCE) {
879
    SWalFilterCond cond = {.deleteMsg = 1};  // delete msg also extract from wal files
880
    pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, &cond);
881 882
  }

883
  streamSetupTrigger(pTask);
884

dengyihao's avatar
dengyihao 已提交
885 886
  tqInfo("vgId:%d expand stream task, s-task:%s, checkpoint ver:%" PRId64 " child id:%d, level:%d", vgId,
         pTask->id.idStr, pTask->chkInfo.version, pTask->selfChildId, pTask->taskLevel);
887 888 889

  // next valid version will add one
  pTask->chkInfo.version += 1;
L
Liu Jicong 已提交
890
  return 0;
L
Liu Jicong 已提交
891
}
L
Liu Jicong 已提交
892

893
int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) {
894 895 896 897
  char*   msgStr = pMsg->pCont;
  char*   msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
  int32_t msgLen = pMsg->contLen - sizeof(SMsgHead);

898 899
  SStreamTaskCheckReq req;
  SDecoder            decoder;
900

X
Xiaoyu Wang 已提交
901
  tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
902 903
  tDecodeSStreamTaskCheckReq(&decoder, &req);
  tDecoderClear(&decoder);
904

905 906 907 908 909 910 911 912 913 914
  int32_t             taskId = req.downstreamTaskId;
  SStreamTaskCheckRsp rsp = {
      .reqId = req.reqId,
      .streamId = req.streamId,
      .childId = req.childId,
      .downstreamNodeId = req.downstreamNodeId,
      .downstreamTaskId = req.downstreamTaskId,
      .upstreamNodeId = req.upstreamNodeId,
      .upstreamTaskId = req.upstreamTaskId,
  };
915

L
Liu Jicong 已提交
916
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
917

918
  if (pTask != NULL) {
919
    rsp.status = streamTaskCheckStatus(pTask);
920 921
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);

922
    tqDebug("s-task:%s recv task check req(reqId:0x%" PRIx64
923
            ") %d at node %d task status:%d, check req from task %d at node %d, rsp status %d",
924 925
            pTask->id.idStr, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, pTask->status.taskStatus,
            rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);
926 927
  } else {
    rsp.status = 0;
928 929
    tqDebug("tq recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64
            ") %d at node %d, check req from task:0x%x at node %d, rsp status %d",
930 931
            taskId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId,
            rsp.status);
932 933 934 935 936
  }

  SEncoder encoder;
  int32_t  code;
  int32_t  len;
937

938 939
  tEncodeSize(tEncodeSStreamTaskCheckRsp, &rsp, len, code);
  if (code < 0) {
940
    tqError("vgId:%d failed to encode task check rsp, task:0x%x", pTq->pStreamMeta->vgId, taskId);
L
Liu Jicong 已提交
941
    return -1;
942
  }
L
Liu Jicong 已提交
943

944 945 946 947 948 949 950 951
  void* buf = rpcMallocCont(sizeof(SMsgHead) + len);
  ((SMsgHead*)buf)->vgId = htonl(req.upstreamNodeId);

  void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
  tEncoderInit(&encoder, (uint8_t*)abuf, len);
  tEncodeSStreamTaskCheckRsp(&encoder, &rsp);
  tEncoderClear(&encoder);

952
  SRpcMsg rspMsg = {.code = 0, .pCont = buf, .contLen = sizeof(SMsgHead) + len, .info = pMsg->info};
953

954 955 956 957
  tmsgSendRsp(&rspMsg);
  return 0;
}

958
int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
959 960 961 962 963 964
  int32_t             code;
  SStreamTaskCheckRsp rsp;

  SDecoder decoder;
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
  code = tDecodeSStreamTaskCheckRsp(&decoder, &rsp);
965

966 967 968 969 970
  if (code < 0) {
    tDecoderClear(&decoder);
    return -1;
  }

971
  tDecoderClear(&decoder);
972
  tqDebug("tq recv task check rsp(reqId:0x%" PRIx64 ") %d at node %d check req from task:0x%x at node %d, status %d",
973 974
          rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);

L
Liu Jicong 已提交
975
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, rsp.upstreamTaskId);
976
  if (pTask == NULL) {
977 978
    tqError("tq failed to locate the stream task:0x%x vgId:%d, it may have been destroyed", rsp.upstreamTaskId,
            pTq->pStreamMeta->vgId);
979 980 981
    return -1;
  }

982
  code = streamProcessTaskCheckRsp(pTask, &rsp, sversion);
L
Liu Jicong 已提交
983 984
  streamMetaReleaseTask(pTq->pStreamMeta, pTask);
  return code;
985 986
}

987
int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
988 989 990
  int32_t code = 0;
  int32_t vgId = TD_VID(pTq->pVnode);

5
54liuyao 已提交
991 992 993
  if (tsDisableStream) {
    return 0;
  }
994 995 996 997

  // 1.deserialize msg and build task
  SStreamTask* pTask = taosMemoryCalloc(1, sizeof(SStreamTask));
  if (pTask == NULL) {
998
    terrno = TSDB_CODE_OUT_OF_MEMORY;
dengyihao's avatar
dengyihao 已提交
999 1000
    tqError("vgId:%d failed to create stream task due to out of memory, alloc size:%d", vgId,
            (int32_t)sizeof(SStreamTask));
1001 1002
    return -1;
  }
1003

1004 1005
  SDecoder decoder;
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
1006
  code = tDecodeStreamTask(&decoder, pTask);
1007 1008 1009 1010 1011
  if (code < 0) {
    tDecoderClear(&decoder);
    taosMemoryFree(pTask);
    return -1;
  }
1012

1013 1014
  tDecoderClear(&decoder);

1015 1016
  SStreamMeta* pStreamMeta = pTq->pStreamMeta;

1017
  // 2.save task, use the newest commit version as the initial start version of stream task.
1018 1019 1020 1021
  taosWLockLatch(&pStreamMeta->lock);
  code = streamMetaAddDeployedTask(pStreamMeta, sversion, pTask);

  int32_t numOfTasks = streamMetaGetNumOfTasks(pStreamMeta);
1022
  if (code < 0) {
1023
    tqError("vgId:%d failed to add s-task:%s, total:%d", vgId, pTask->id.idStr, numOfTasks);
1024
    taosWUnLockLatch(&pStreamMeta->lock);
1025 1026 1027
    return -1;
  }

1028
  taosWUnLockLatch(&pStreamMeta->lock);
1029

1030
  // 3. for fill history task, do nothing. wait for the main task to start it
1031
  if (pTask->fillHistory) {
1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054
    tqDebug("s-task:%s fill history task, wait for being launched", pTask->id.idStr);
  } else {
    if (pTask->historyTaskId.taskId != 0) {
      // todo fix the bug: 1. maybe failed to located the fill history task, since it is not built yet. 2. race condition

      // an fill history task needs to be started.
      // Set the execute conditions, including the query time window and the version range
      SStreamTask* pHTask = taosHashGet(pStreamMeta->pTasks, &pTask->historyTaskId.taskId, sizeof(pTask->historyTaskId.taskId));

      pHTask->dataRange.range.minVer = 0;
      pHTask->dataRange.range.maxVer = sversion;

      pHTask->dataRange.window.skey = INT64_MIN;
      pHTask->dataRange.window.ekey = 1000000;

      tqDebug("s-task:%s set the launch condition for fill history task:%s, window:%" PRId64 " - %" PRId64
              " verrange:%" PRId64 " - %" PRId64,
              pTask->id.idStr, pHTask->id.idStr, pHTask->dataRange.window.skey, pHTask->dataRange.window.ekey,
              pHTask->dataRange.range.minVer, pHTask->dataRange.range.maxVer);

      // check if downstream tasks have been ready
      streamTaskCheckDownstream(pHTask, sversion);
    }
1055 1056
  }

1057
  tqDebug("vgId:%d s-task:%s is deployed and add meta from mnd, status:%d, total:%d", vgId, pTask->id.idStr,
1058
          pTask->status.taskStatus, numOfTasks);
1059

1060 1061 1062
  return 0;
}

L
Liu Jicong 已提交
1063 1064 1065 1066 1067
int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg) {
  int32_t code;
  char*   msg = pMsg->pCont;
  int32_t msgLen = pMsg->contLen;

1068
  SStreamRecoverStep1Req* pReq = (SStreamRecoverStep1Req*)msg;
L
Liu Jicong 已提交
1069
  SStreamTask*            pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId);
1070 1071 1072 1073 1074
  if (pTask == NULL) {
    return -1;
  }

  // check param
1075
  int64_t fillVer1 = pTask->chkInfo.version;
1076
  if (fillVer1 <= 0) {
L
Liu Jicong 已提交
1077
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1078 1079 1080 1081
    return -1;
  }

  // do recovery step 1
H
Haojun Liao 已提交
1082
  tqDebug("s-task:%s start non-blocking recover stage(step 1) scan", pTask->id.idStr);
H
Haojun Liao 已提交
1083
  int64_t st = taosGetTimestampMs();
1084

H
Haojun Liao 已提交
1085
  streamSourceRecoverScanStep1(pTask);
1086
  if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
H
Haojun Liao 已提交
1087 1088
    tqDebug("s-task:%s is dropped, abort recover in step1", pTask->id.idStr);

L
Liu Jicong 已提交
1089 1090 1091 1092
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
    return 0;
  }

H
Haojun Liao 已提交
1093
  double el = (taosGetTimestampMs() - st) / 1000.0;
H
Haojun Liao 已提交
1094
  tqDebug("s-task:%s non-blocking recover stage(step 1) ended, elapsed time:%.2fs", pTask->id.idStr, el);
H
Haojun Liao 已提交
1095

1096 1097 1098 1099
  // build msg to launch next step
  SStreamRecoverStep2Req req;
  code = streamBuildSourceRecover2Req(pTask, &req);
  if (code < 0) {
L
Liu Jicong 已提交
1100
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1101 1102 1103
    return -1;
  }

L
Liu Jicong 已提交
1104
  streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1105
  if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
L
Liu Jicong 已提交
1106 1107 1108
    return 0;
  }

1109
  // serialize msg
L
Liu Jicong 已提交
1110 1111 1112 1113
  int32_t len = sizeof(SStreamRecoverStep1Req);

  void* serializedReq = rpcMallocCont(len);
  if (serializedReq == NULL) {
H
Haojun Liao 已提交
1114
    tqError("s-task:%s failed to prepare the step2 stage, out of memory", pTask->id.idStr);
L
Liu Jicong 已提交
1115 1116 1117 1118
    return -1;
  }

  memcpy(serializedReq, &req, len);
1119 1120

  // dispatch msg
H
Haojun Liao 已提交
1121
  tqDebug("s-task:%s start recover block stage", pTask->id.idStr);
1122

H
Haojun Liao 已提交
1123 1124
  SRpcMsg rpcMsg = {
      .code = 0, .contLen = len, .msgType = TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE, .pCont = serializedReq};
1125 1126 1127 1128
  tmsgPutToQueue(&pTq->pVnode->msgCb, WRITE_QUEUE, &rpcMsg);
  return 0;
}

1129
int32_t tqProcessTaskRecover2Req(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
H
Haojun Liao 已提交
1130 1131
  int32_t code = 0;

1132
  SStreamRecoverStep2Req* pReq = (SStreamRecoverStep2Req*)msg;
H
Haojun Liao 已提交
1133 1134

  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId);
1135 1136 1137 1138 1139
  if (pTask == NULL) {
    return -1;
  }

  // do recovery step 2
H
Haojun Liao 已提交
1140
  int64_t st = taosGetTimestampMs();
dengyihao's avatar
dengyihao 已提交
1141
  tqDebug("s-task:%s start step2 recover, ts:%" PRId64, pTask->id.idStr, st);
H
Haojun Liao 已提交
1142

1143
  code = streamSourceRecoverScanStep2(pTask, sversion);
1144
  if (code < 0) {
L
Liu Jicong 已提交
1145
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1146 1147 1148
    return -1;
  }

1149
  qDebug("s-task:%s set start wal scan start ver:%"PRId64, pTask->id.idStr, sversion);
1150

1151
  walReaderSeekVer(pTask->exec.pWalReader, sversion);
L
liuyao 已提交
1152
  pTask->chkInfo.currentVer = sversion;
1153

1154
  if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
L
Liu Jicong 已提交
1155 1156 1157 1158
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
    return 0;
  }

1159 1160 1161
  // restore param
  code = streamRestoreParam(pTask);
  if (code < 0) {
L
Liu Jicong 已提交
1162
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1163 1164 1165 1166
    return -1;
  }

  // set status normal
H
Haojun Liao 已提交
1167
  tqDebug("s-task:%s blocking stage completed, set the status to be normal", pTask->id.idStr);
1168 1169
  code = streamSetStatusNormal(pTask);
  if (code < 0) {
L
Liu Jicong 已提交
1170
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1171 1172 1173
    return -1;
  }

dengyihao's avatar
dengyihao 已提交
1174
  double el = (taosGetTimestampMs() - st) / 1000.0;
H
Haojun Liao 已提交
1175
  tqDebug("s-task:%s step2 recover finished, el:%.2fs", pTask->id.idStr, el);
H
Haojun Liao 已提交
1176

1177 1178 1179
  // dispatch recover finish req to all related downstream task
  code = streamDispatchRecoverFinishReq(pTask);
  if (code < 0) {
L
Liu Jicong 已提交
1180
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1181 1182 1183
    return -1;
  }

L
Liu Jicong 已提交
1184 1185 1186
  atomic_store_8(&pTask->fillHistory, 0);
  streamMetaSaveTask(pTq->pStreamMeta, pTask);

L
Liu Jicong 已提交
1187
  streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1188 1189 1190
  return 0;
}

L
Liu Jicong 已提交
1191 1192 1193
int32_t tqProcessTaskRecoverFinishReq(STQ* pTq, SRpcMsg* pMsg) {
  char*   msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
  int32_t msgLen = pMsg->contLen - sizeof(SMsgHead);
1194 1195

  // deserialize
1196 1197 1198
  SStreamRecoverFinishReq req;

  SDecoder decoder;
X
Xiaoyu Wang 已提交
1199
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
1200 1201 1202
  tDecodeSStreamRecoverFinishReq(&decoder, &req);
  tDecoderClear(&decoder);

1203
  // find task
L
Liu Jicong 已提交
1204
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.taskId);
1205 1206 1207
  if (pTask == NULL) {
    return -1;
  }
1208
  // do process request
1209
  if (streamProcessRecoverFinishReq(pTask, req.childId) < 0) {
L
Liu Jicong 已提交
1210
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1211 1212 1213
    return -1;
  }

L
Liu Jicong 已提交
1214
  streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1215
  return 0;
L
Liu Jicong 已提交
1216
}
L
Liu Jicong 已提交
1217

L
Liu Jicong 已提交
1218 1219 1220 1221 1222
int32_t tqProcessTaskRecoverFinishRsp(STQ* pTq, SRpcMsg* pMsg) {
  //
  return 0;
}

1223 1224 1225 1226
int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock) {
  SDecoder*   pCoder = &(SDecoder){0};
  SDeleteRes* pRes = &(SDeleteRes){0};

H
Haojun Liao 已提交
1227 1228
  *pRefBlock = NULL;

1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275
  pRes->uidList = taosArrayInit(0, sizeof(tb_uid_t));
  if (pRes->uidList == NULL) {
    return TSDB_CODE_OUT_OF_MEMORY;
  }

  tDecoderInit(pCoder, (uint8_t*)pData, len);
  tDecodeDeleteRes(pCoder, pRes);
  tDecoderClear(pCoder);

  int32_t numOfTables = taosArrayGetSize(pRes->uidList);
  if (numOfTables == 0 || pRes->affectedRows == 0) {
    taosArrayDestroy(pRes->uidList);
    return TSDB_CODE_SUCCESS;
  }

  SSDataBlock* pDelBlock = createSpecialDataBlock(STREAM_DELETE_DATA);
  blockDataEnsureCapacity(pDelBlock, numOfTables);
  pDelBlock->info.rows = numOfTables;
  pDelBlock->info.version = ver;

  for (int32_t i = 0; i < numOfTables; i++) {
    // start key column
    SColumnInfoData* pStartCol = taosArrayGet(pDelBlock->pDataBlock, START_TS_COLUMN_INDEX);
    colDataSetVal(pStartCol, i, (const char*)&pRes->skey, false);  // end key column
    SColumnInfoData* pEndCol = taosArrayGet(pDelBlock->pDataBlock, END_TS_COLUMN_INDEX);
    colDataSetVal(pEndCol, i, (const char*)&pRes->ekey, false);
    // uid column
    SColumnInfoData* pUidCol = taosArrayGet(pDelBlock->pDataBlock, UID_COLUMN_INDEX);
    int64_t*         pUid = taosArrayGet(pRes->uidList, i);
    colDataSetVal(pUidCol, i, (const char*)pUid, false);

    colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, GROUPID_COLUMN_INDEX), i);
    colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, CALCULATE_START_TS_COLUMN_INDEX), i);
    colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, CALCULATE_END_TS_COLUMN_INDEX), i);
  }

  taosArrayDestroy(pRes->uidList);
  *pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0);
  if (pRefBlock == NULL) {
    return TSDB_CODE_OUT_OF_MEMORY;
  }

  (*pRefBlock)->type = STREAM_INPUT__REF_DATA_BLOCK;
  (*pRefBlock)->pBlock = pDelBlock;
  return TSDB_CODE_SUCCESS;
}

L
Liu Jicong 已提交
1276 1277
int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) {
  SStreamTaskRunReq* pReq = pMsg->pCont;
1278 1279 1280 1281

  int32_t taskId = pReq->taskId;
  int32_t vgId = TD_VID(pTq->pVnode);

1282 1283
  if (taskId == WAL_READ_TASKS_ID) {  // all tasks are extracted submit data from the wal
    tqStreamTasksScanWal(pTq);
L
Liu Jicong 已提交
1284
    return 0;
1285
  }
1286

1287
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
1288 1289
  if (pTask != NULL) {
    if (pTask->status.taskStatus == TASK_STATUS__NORMAL) {
dengyihao's avatar
dengyihao 已提交
1290 1291
      tqDebug("vgId:%d s-task:%s start to process block from wal, last chk point:%" PRId64, vgId, pTask->id.idStr,
              pTask->chkInfo.version);
1292
      streamProcessRunReq(pTask);
1293
    } else {
L
liuyao 已提交
1294
      if (streamTaskShouldPause(&pTask->status)) {
L
liuyao 已提交
1295
        atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
L
liuyao 已提交
1296
      }
1297
      tqDebug("vgId:%d s-task:%s ignore run req since not in ready state", vgId, pTask->id.idStr);
1298
    }
1299

L
Liu Jicong 已提交
1300
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1301
    tqStartStreamTasks(pTq);
L
Liu Jicong 已提交
1302
    return 0;
1303
  } else {
1304
    tqError("vgId:%d failed to found s-task, taskId:%d", vgId, taskId);
1305
    return -1;
L
Liu Jicong 已提交
1306
  }
L
Liu Jicong 已提交
1307 1308
}

L
Liu Jicong 已提交
1309
int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) {
dengyihao's avatar
dengyihao 已提交
1310 1311 1312
  char*   msgStr = pMsg->pCont;
  char*   msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
  int32_t msgLen = pMsg->contLen - sizeof(SMsgHead);
1313 1314 1315 1316

  SStreamDispatchReq req = {0};

  SDecoder decoder;
L
Liu Jicong 已提交
1317
  tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
1318
  tDecodeStreamDispatchReq(&decoder, &req);
L
Liu Jicong 已提交
1319

1320
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.taskId);
L
Liu Jicong 已提交
1321
  if (pTask) {
1322
    SRpcMsg rsp = {.info = pMsg->info, .code = 0};
1323
    streamProcessDispatchMsg(pTask, &req, &rsp, exec);
L
Liu Jicong 已提交
1324
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
1325
    return 0;
1326
  } else {
L
liuyao 已提交
1327
    tDeleteStreamDispatchReq(&req);
1328
    return -1;
L
Liu Jicong 已提交
1329
  }
L
Liu Jicong 已提交
1330 1331
}

L
Liu Jicong 已提交
1332 1333
int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) {
  SStreamDispatchRsp* pRsp = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
1334
  int32_t             taskId = ntohl(pRsp->upstreamTaskId);
L
Liu Jicong 已提交
1335
  SStreamTask*        pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
1336 1337

  int32_t vgId = pTq->pStreamMeta->vgId;
L
Liu Jicong 已提交
1338
  if (pTask) {
1339
    streamProcessDispatchRsp(pTask, pRsp, pMsg->code);
L
Liu Jicong 已提交
1340
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
1341
    return 0;
1342
  } else {
1343
    tqDebug("vgId:%d failed to handle the dispatch rsp, since find task:0x%x failed", vgId, taskId);
1344
    return TSDB_CODE_INVALID_MSG;
L
Liu Jicong 已提交
1345
  }
L
Liu Jicong 已提交
1346
}
L
Liu Jicong 已提交
1347

1348
int32_t tqProcessTaskDropReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
L
Liu Jicong 已提交
1349
  SVDropStreamTaskReq* pReq = (SVDropStreamTaskReq*)msg;
1350 1351
  tqDebug("vgId:%d receive msg to drop stream task:0x%x", TD_VID(pTq->pVnode), pReq->taskId);

1352
  streamMetaRemoveTask(pTq->pStreamMeta, pReq->taskId);
L
Liu Jicong 已提交
1353
  return 0;
L
Liu Jicong 已提交
1354
}
L
Liu Jicong 已提交
1355

5
54liuyao 已提交
1356 1357
int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
  SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)msg;
dengyihao's avatar
dengyihao 已提交
1358
  SStreamTask*          pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId);
L
liuyao 已提交
1359 1360
  if (pTask) {
    tqDebug("vgId:%d s-task:%s set pause flag", pTq->pStreamMeta->vgId, pTask->id.idStr);
L
liuyao 已提交
1361
    atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus);
L
liuyao 已提交
1362 1363 1364
    atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE);
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
  }
5
54liuyao 已提交
1365 1366 1367 1368 1369
  return 0;
}

int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
  SVResumeStreamTaskReq* pReq = (SVResumeStreamTaskReq*)msg;
1370 1371

  int32_t      vgId = pTq->pStreamMeta->vgId;
L
liuyao 已提交
1372 1373
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId);
  if (pTask) {
L
liuyao 已提交
1374
    atomic_store_8(&pTask->status.taskStatus, pTask->status.keepTaskStatus);
1375 1376

    // no lock needs to secure the access of the version
1377 1378 1379 1380 1381 1382
    if (pReq->igUntreated && pTask->taskLevel == TASK_LEVEL__SOURCE) {
      // discard all the data  when the stream task is suspended.
      walReaderSetSkipToVersion(pTask->exec.pWalReader, sversion);
      tqDebug("vgId:%d s-task:%s resume to exec, prev paused version:%" PRId64 ", start from vnode ver:%" PRId64
              ", schedStatus:%d",
              vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus);
1383
    } else {  // from the previous paused version and go on
1384 1385
      tqDebug("vgId:%d s-task:%s resume to exec, from paused ver:%" PRId64 ", vnode ver:%" PRId64 ", schedStatus:%d",
              vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus);
1386 1387
    }

L
liuyao 已提交
1388
    if (pTask->taskLevel == TASK_LEVEL__SOURCE && taosQueueItemSize(pTask->inputQueue->queue) == 0) {
L
liuyao 已提交
1389 1390 1391 1392
      tqStartStreamTasks(pTq);
    } else {
      streamSchedExec(pTask);
    }
L
liuyao 已提交
1393
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1394 1395
  } else {
    tqError("vgId:%d failed to find the s-task:0x%x for resume stream task", vgId, pReq->taskId);
L
liuyao 已提交
1396
  }
1397

5
54liuyao 已提交
1398 1399 1400
  return 0;
}

L
Liu Jicong 已提交
1401 1402 1403 1404 1405 1406
int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) {
  char*              msgStr = pMsg->pCont;
  char*              msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
  int32_t            msgLen = pMsg->contLen - sizeof(SMsgHead);
  SStreamRetrieveReq req;
  SDecoder           decoder;
1407
  tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
L
Liu Jicong 已提交
1408
  tDecodeStreamRetrieveReq(&decoder, &req);
L
Liu Jicong 已提交
1409
  tDecoderClear(&decoder);
L
Liu Jicong 已提交
1410
  int32_t      taskId = req.dstTaskId;
L
Liu Jicong 已提交
1411
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
L
Liu Jicong 已提交
1412
  if (pTask) {
1413
    SRpcMsg rsp = {.info = pMsg->info, .code = 0};
L
Liu Jicong 已提交
1414
    streamProcessRetrieveReq(pTask, &req, &rsp);
L
Liu Jicong 已提交
1415
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
1416
    tDeleteStreamRetrieveReq(&req);
L
Liu Jicong 已提交
1417
    return 0;
L
Liu Jicong 已提交
1418
  } else {
L
liuyao 已提交
1419
    tDeleteStreamRetrieveReq(&req);
L
Liu Jicong 已提交
1420
    return -1;
L
Liu Jicong 已提交
1421 1422 1423 1424 1425 1426 1427
  }
}

int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg) {
  //
  return 0;
}
L
Liu Jicong 已提交
1428

1429 1430 1431 1432 1433 1434
int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) {
  STQ*      pTq = pVnode->pTq;
  SMsgHead* msgStr = pMsg->pCont;
  char*     msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
  int32_t   msgLen = pMsg->contLen - sizeof(SMsgHead);
  int32_t   code = 0;
L
Liu Jicong 已提交
1435 1436 1437

  SStreamDispatchReq req;
  SDecoder           decoder;
1438
  tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
L
Liu Jicong 已提交
1439 1440
  if (tDecodeStreamDispatchReq(&decoder, &req) < 0) {
    code = TSDB_CODE_MSG_DECODE_ERROR;
L
Liu Jicong 已提交
1441
    tDecoderClear(&decoder);
L
Liu Jicong 已提交
1442 1443
    goto FAIL;
  }
L
Liu Jicong 已提交
1444
  tDecoderClear(&decoder);
L
Liu Jicong 已提交
1445

L
Liu Jicong 已提交
1446
  int32_t taskId = req.taskId;
L
Liu Jicong 已提交
1447

L
Liu Jicong 已提交
1448
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
L
Liu Jicong 已提交
1449
  if (pTask) {
1450
    SRpcMsg rsp = {.info = pMsg->info, .code = 0};
1451
    streamProcessDispatchMsg(pTask, &req, &rsp, false);
L
Liu Jicong 已提交
1452
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
1453 1454
    rpcFreeCont(pMsg->pCont);
    taosFreeQitem(pMsg);
1455
    return 0;
5
54liuyao 已提交
1456 1457
  } else {
    tDeleteStreamDispatchReq(&req);
L
Liu Jicong 已提交
1458
  }
L
Liu Jicong 已提交
1459

1460 1461
  code = TSDB_CODE_STREAM_TASK_NOT_EXIST;

L
Liu Jicong 已提交
1462
FAIL:
1463 1464 1465 1466
  if (pMsg->info.handle == NULL) return -1;

  SMsgHead* pRspHead = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp));
  if (pRspHead == NULL) {
1467
    SRpcMsg rsp = {.code = TSDB_CODE_OUT_OF_MEMORY, .info = pMsg->info};
1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483
    tqDebug("send dispatch error rsp, code: %x", code);
    tmsgSendRsp(&rsp);
    rpcFreeCont(pMsg->pCont);
    taosFreeQitem(pMsg);
    return -1;
  }

  pRspHead->vgId = htonl(req.upstreamNodeId);
  SStreamDispatchRsp* pRsp = POINTER_SHIFT(pRspHead, sizeof(SMsgHead));
  pRsp->streamId = htobe64(req.streamId);
  pRsp->upstreamTaskId = htonl(req.upstreamTaskId);
  pRsp->upstreamNodeId = htonl(req.upstreamNodeId);
  pRsp->downstreamNodeId = htonl(pVnode->config.vgId);
  pRsp->downstreamTaskId = htonl(req.taskId);
  pRsp->inputStatus = TASK_OUTPUT_STATUS__NORMAL;

L
Liu Jicong 已提交
1484
  SRpcMsg rsp = {
1485
      .code = code, .info = pMsg->info, .contLen = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp), .pCont = pRspHead};
1486
  tqDebug("send dispatch error rsp, code: %x", code);
L
Liu Jicong 已提交
1487
  tmsgSendRsp(&rsp);
L
Liu Jicong 已提交
1488 1489
  rpcFreeCont(pMsg->pCont);
  taosFreeQitem(pMsg);
1490
  return -1;
L
Liu Jicong 已提交
1491
}
L
Liu Jicong 已提交
1492

1493
int32_t tqCheckLogInWal(STQ* pTq, int64_t sversion) { return sversion <= pTq->walLogLastVer; }
1494

1495
int32_t tqStartStreamTasks(STQ* pTq) {
1496
  int32_t      vgId = TD_VID(pTq->pVnode);
1497
  SStreamMeta* pMeta = pTq->pStreamMeta;
1498

1499
  taosWLockLatch(&pMeta->lock);
1500

1501
  int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
1502
  if (numOfTasks == 0) {
1503
    tqInfo("vgId:%d no stream tasks exist", vgId);
1504
    taosWUnLockLatch(&pMeta->lock);
1505 1506 1507
    return 0;
  }

1508
  pMeta->walScanCounter += 1;
1509

1510 1511
  if (pMeta->walScanCounter > 1) {
    tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->walScanCounter);
1512
    taosWUnLockLatch(&pMeta->lock);
1513 1514 1515
    return 0;
  }

1516 1517 1518
  SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq));
  if (pRunReq == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
H
Haojun Liao 已提交
1519
    tqError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr());
1520
    taosWUnLockLatch(&pMeta->lock);
1521 1522 1523
    return -1;
  }

H
Haojun Liao 已提交
1524
  tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d", vgId, numOfTasks);
1525 1526
  pRunReq->head.vgId = vgId;
  pRunReq->streamId = 0;
1527
  pRunReq->taskId = WAL_READ_TASKS_ID;
1528 1529 1530

  SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)};
  tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg);
1531
  taosWUnLockLatch(&pMeta->lock);
1532 1533 1534

  return 0;
}