tq.c 53.0 KB
Newer Older
H
refact  
Hongze Cheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
S
Shengliang Guan 已提交
14 15
 */

H
Hongze Cheng 已提交
16
#include "tq.h"
S
Shengliang Guan 已提交
17

dengyihao's avatar
dengyihao 已提交
18 19 20
// 0: not init
// 1: already inited
// 2: wait to be inited or cleaup
21
#define WAL_READ_TASKS_ID (-1)
22

23
static int32_t tqInitialize(STQ* pTq);
dengyihao's avatar
dengyihao 已提交
24

wmmhello's avatar
wmmhello 已提交
25
static FORCE_INLINE bool tqIsHandleExec(STqHandle* pHandle) { return TMQ_HANDLE_STATUS_EXEC == pHandle->status; }
dengyihao's avatar
dengyihao 已提交
26 27
static FORCE_INLINE void tqSetHandleExec(STqHandle* pHandle) { pHandle->status = TMQ_HANDLE_STATUS_EXEC; }
static FORCE_INLINE void tqSetHandleIdle(STqHandle* pHandle) { pHandle->status = TMQ_HANDLE_STATUS_IDLE; }
wmmhello's avatar
wmmhello 已提交
28

L
Liu Jicong 已提交
29
int32_t tqInit() {
L
Liu Jicong 已提交
30 31 32 33 34 35
  int8_t old;
  while (1) {
    old = atomic_val_compare_exchange_8(&tqMgmt.inited, 0, 2);
    if (old != 2) break;
  }

36 37 38 39 40 41
  if (old == 0) {
    tqMgmt.timer = taosTmrInit(10000, 100, 10000, "TQ");
    if (tqMgmt.timer == NULL) {
      atomic_store_8(&tqMgmt.inited, 0);
      return -1;
    }
42 43 44
    if (streamInit() < 0) {
      return -1;
    }
L
Liu Jicong 已提交
45
    atomic_store_8(&tqMgmt.inited, 1);
46
  }
47

L
Liu Jicong 已提交
48 49
  return 0;
}
L
Liu Jicong 已提交
50

51
void tqCleanUp() {
L
Liu Jicong 已提交
52 53 54 55 56 57 58 59
  int8_t old;
  while (1) {
    old = atomic_val_compare_exchange_8(&tqMgmt.inited, 1, 2);
    if (old != 2) break;
  }

  if (old == 1) {
    taosTmrCleanUp(tqMgmt.timer);
L
Liu Jicong 已提交
60
    streamCleanUp();
L
Liu Jicong 已提交
61 62
    atomic_store_8(&tqMgmt.inited, 0);
  }
63
}
L
Liu Jicong 已提交
64

65
static void destroyTqHandle(void* data) {
66 67
  STqHandle* pData = (STqHandle*)data;
  qDestroyTask(pData->execHandle.task);
wmmhello's avatar
wmmhello 已提交
68

69
  if (pData->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
L
Liu Jicong 已提交
70
    taosMemoryFreeClear(pData->execHandle.execCol.qmsg);
71
  } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__DB) {
72
    tqReaderClose(pData->execHandle.pTqReader);
73 74
    walCloseReader(pData->pWalReader);
    taosHashCleanup(pData->execHandle.execDb.pFilterOutTbUid);
L
Liu Jicong 已提交
75
  } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__TABLE) {
76
    walCloseReader(pData->pWalReader);
77
    tqReaderClose(pData->execHandle.pTqReader);
78 79
    taosMemoryFreeClear(pData->execHandle.execTb.qmsg);
    nodesDestroyNode(pData->execHandle.execTb.node);
80
  }
dengyihao's avatar
dengyihao 已提交
81
  if (pData->msg != NULL) {
82 83 84
    rpcFreeCont(pData->msg->pCont);
    taosMemoryFree(pData->msg);
    pData->msg = NULL;
D
dapan1121 已提交
85
  }
L
Liu Jicong 已提交
86 87
}

88 89 90 91 92
static bool tqOffsetLessOrEqual(const STqOffset* pLeft, const STqOffset* pRight) {
  return pLeft->val.type == TMQ_OFFSET__LOG && pRight->val.type == TMQ_OFFSET__LOG &&
         pLeft->val.version <= pRight->val.version;
}

L
Liu Jicong 已提交
93
STQ* tqOpen(const char* path, SVnode* pVnode) {
94
  STQ* pTq = taosMemoryCalloc(1, sizeof(STQ));
L
Liu Jicong 已提交
95
  if (pTq == NULL) {
S
Shengliang Guan 已提交
96
    terrno = TSDB_CODE_OUT_OF_MEMORY;
L
Liu Jicong 已提交
97 98
    return NULL;
  }
99

100
  pTq->path = taosStrdup(path);
L
Liu Jicong 已提交
101
  pTq->pVnode = pVnode;
L
Liu Jicong 已提交
102
  pTq->walLogLastVer = pVnode->pWal->vers.lastVer;
103

104
  pTq->pHandle = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK);
105
  taosHashSetFreeFp(pTq->pHandle, destroyTqHandle);
106

107
  taosInitRWLatch(&pTq->lock);
108
  pTq->pPushMgr = taosHashInit(64, MurmurHash3_32, false, HASH_NO_LOCK);
L
Liu Jicong 已提交
109

110
  pTq->pCheckInfo = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK);
L
Liu Jicong 已提交
111
  taosHashSetFreeFp(pTq->pCheckInfo, (FDelete)tDeleteSTqCheckInfo);
L
Liu Jicong 已提交
112

113 114 115 116 117 118 119
  int32_t code = tqInitialize(pTq);
  if (code != TSDB_CODE_SUCCESS) {
    tqClose(pTq);
    return NULL;
  } else {
    return pTq;
  }
120 121 122
}

int32_t tqInitialize(STQ* pTq) {
L
Liu Jicong 已提交
123
  if (tqMetaOpen(pTq) < 0) {
124
    return -1;
125 126
  }

L
Liu Jicong 已提交
127 128
  pTq->pOffsetStore = tqOffsetOpen(pTq);
  if (pTq->pOffsetStore == NULL) {
129
    return -1;
130 131
  }

132
  pTq->pStreamMeta = streamMetaOpen(pTq->path, pTq, (FTaskExpand*)tqExpandTask, pTq->pVnode->config.vgId);
L
Liu Jicong 已提交
133
  if (pTq->pStreamMeta == NULL) {
134
    return -1;
L
Liu Jicong 已提交
135 136
  }

137 138
  // the version is kept in task's meta data
  // todo check if this version is required or not
139 140
  if (streamLoadTasks(pTq->pStreamMeta, walGetCommittedVer(pTq->pVnode->pWal)) < 0) {
    return -1;
L
Liu Jicong 已提交
141 142
  }

143
  return 0;
L
Liu Jicong 已提交
144
}
L
Liu Jicong 已提交
145

L
Liu Jicong 已提交
146
void tqClose(STQ* pTq) {
147 148
  if (pTq == NULL) {
    return;
H
Hongze Cheng 已提交
149
  }
150 151 152 153 154 155 156 157 158

  tqOffsetClose(pTq->pOffsetStore);
  taosHashCleanup(pTq->pHandle);
  taosHashCleanup(pTq->pPushMgr);
  taosHashCleanup(pTq->pCheckInfo);
  taosMemoryFree(pTq->path);
  tqMetaClose(pTq);
  streamMetaClose(pTq->pStreamMeta);
  taosMemoryFree(pTq);
L
Liu Jicong 已提交
159
}
L
Liu Jicong 已提交
160

H
Haojun Liao 已提交
161 162 163 164 165 166 167 168 169 170 171 172 173
void tqNotifyClose(STQ* pTq) {
  if (pTq != NULL) {
    taosWLockLatch(&pTq->pStreamMeta->lock);

    void* pIter = NULL;
    while (1) {
      pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter);
      if (pIter == NULL) {
        break;
      }

      SStreamTask* pTask = *(SStreamTask**)pIter;
      tqDebug("vgId:%d s-task:%s set dropping flag", pTq->pStreamMeta->vgId, pTask->id.idStr);
174 175 176
      pTask->status.taskStatus = TASK_STATUS__STOP;

      int64_t st = taosGetTimestampMs();
H
Haojun Liao 已提交
177
      qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS);
178
      int64_t el = taosGetTimestampMs() - st;
H
Haojun Liao 已提交
179
      tqDebug("vgId:%d s-task:%s is closed in %" PRId64 " ms", pTq->pStreamMeta->vgId, pTask->id.idStr, el);
H
Haojun Liao 已提交
180 181 182 183 184 185
    }

    taosWUnLockLatch(&pTq->pStreamMeta->lock);
  }
}

D
dapan1121 已提交
186 187
static int32_t doSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch,
                             int64_t consumerId, int32_t type) {
L
Liu Jicong 已提交
188 189
  int32_t len = 0;
  int32_t code = 0;
D
dapan1121 已提交
190 191

  if (type == TMQ_MSG_TYPE__POLL_RSP) {
H
Haojun Liao 已提交
192
    tEncodeSize(tEncodeMqDataRsp, pRsp, len, code);
D
dapan1121 已提交
193 194 195
  } else if (type == TMQ_MSG_TYPE__TAOSX_RSP) {
    tEncodeSize(tEncodeSTaosxRsp, (STaosxRsp*)pRsp, len, code);
  }
L
Liu Jicong 已提交
196 197 198 199 200 201 202 203 204 205 206

  if (code < 0) {
    return -1;
  }

  int32_t tlen = sizeof(SMqRspHead) + len;
  void*   buf = rpcMallocCont(tlen);
  if (buf == NULL) {
    return -1;
  }

D
dapan1121 已提交
207 208 209
  ((SMqRspHead*)buf)->mqMsgType = type;
  ((SMqRspHead*)buf)->epoch = epoch;
  ((SMqRspHead*)buf)->consumerId = consumerId;
L
Liu Jicong 已提交
210 211 212 213 214 215

  void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead));

  SEncoder encoder = {0};
  tEncoderInit(&encoder, abuf, len);

D
dapan1121 已提交
216
  if (type == TMQ_MSG_TYPE__POLL_RSP) {
H
Haojun Liao 已提交
217
    tEncodeMqDataRsp(&encoder, pRsp);
D
dapan1121 已提交
218
  } else if (type == TMQ_MSG_TYPE__TAOSX_RSP) {
X
Xiaoyu Wang 已提交
219
    tEncodeSTaosxRsp(&encoder, (STaosxRsp*)pRsp);
dengyihao's avatar
dengyihao 已提交
220
  }
L
Liu Jicong 已提交
221

wmmhello's avatar
wmmhello 已提交
222
  tEncoderClear(&encoder);
L
Liu Jicong 已提交
223 224

  SRpcMsg rsp = {
D
dapan1121 已提交
225
      .info = *pRpcHandleInfo,
L
Liu Jicong 已提交
226 227 228 229
      .pCont = buf,
      .contLen = tlen,
      .code = 0,
  };
L
Liu Jicong 已提交
230

L
Liu Jicong 已提交
231
  tmsgSendRsp(&rsp);
L
Liu Jicong 已提交
232 233
  return 0;
}
L
Liu Jicong 已提交
234

H
Haojun Liao 已提交
235
int32_t tqPushDataRsp(STqHandle* pHandle, int32_t vgId) {
236 237 238 239
  SMqDataRsp dataRsp = {0};
  dataRsp.head.consumerId = pHandle->consumerId;
  dataRsp.head.epoch = pHandle->epoch;
  dataRsp.head.mqMsgType = TMQ_MSG_TYPE__POLL_RSP;
240 241

  int64_t sver = 0, ever = 0;
H
Haojun Liao 已提交
242
  walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever);
dengyihao's avatar
dengyihao 已提交
243 244
  tqDoSendDataRsp(&pHandle->msg->info, &dataRsp, pHandle->epoch, pHandle->consumerId, TMQ_MSG_TYPE__POLL_RSP, sver,
                  ever);
D
dapan1121 已提交
245 246 247

  char buf1[80] = {0};
  char buf2[80] = {0};
248 249
  tFormatOffset(buf1, tListLen(buf1), &dataRsp.reqOffset);
  tFormatOffset(buf2, tListLen(buf2), &dataRsp.rspOffset);
dengyihao's avatar
dengyihao 已提交
250 251
  tqDebug("vgId:%d, from consumer:0x%" PRIx64 " (epoch %d) push rsp, block num: %d, req:%s, rsp:%s", vgId,
          dataRsp.head.consumerId, dataRsp.head.epoch, dataRsp.blockNum, buf1, buf2);
L
Liu Jicong 已提交
252 253 254
  return 0;
}

255 256 257 258 259 260
int32_t tqSendDataRsp(STqHandle* pHandle, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp,
                      int32_t type, int32_t vgId) {
  int64_t sver = 0, ever = 0;
  walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever);

  tqDoSendDataRsp(&pMsg->info, pRsp, pReq->epoch, pReq->consumerId, type, sver, ever);
261

D
dapan1121 已提交
262 263 264 265
  char buf1[80] = {0};
  char buf2[80] = {0};
  tFormatOffset(buf1, 80, &pRsp->reqOffset);
  tFormatOffset(buf2, 80, &pRsp->rspOffset);
266

dengyihao's avatar
dengyihao 已提交
267 268
  tqDebug("vgId:%d consumer:0x%" PRIx64 " (epoch %d) send rsp, block num:%d, req:%s, rsp:%s, reqId:0x%" PRIx64, vgId,
          pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2, pReq->reqId);
269 270 271 272

  return 0;
}

273
int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
274 275
  SMqVgOffset vgOffset = {0};
  int32_t     vgId = TD_VID(pTq->pVnode);
276

X
Xiaoyu Wang 已提交
277 278
  SDecoder decoder;
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
279
  if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) {
280 281
    return -1;
  }
282

283 284
  tDecoderClear(&decoder);

285 286 287
  STqOffset* pOffset = &vgOffset.offset;

  if (pOffset->val.type == TMQ_OFFSET__SNAPSHOT_DATA || pOffset->val.type == TMQ_OFFSET__SNAPSHOT_META) {
L
Liu Jicong 已提交
288
    tqDebug("receive offset commit msg to %s on vgId:%d, offset(type:snapshot) uid:%" PRId64 ", ts:%" PRId64,
289 290 291 292 293 294
            pOffset->subKey, vgId, pOffset->val.uid, pOffset->val.ts);
  } else if (pOffset->val.type == TMQ_OFFSET__LOG) {
    tqDebug("receive offset commit msg to %s on vgId:%d, offset(type:log) version:%" PRId64, pOffset->subKey, vgId,
            pOffset->val.version);
    if (pOffset->val.version + 1 == sversion) {
      pOffset->val.version += 1;
295
    }
296
  } else {
297
    tqError("invalid commit offset type:%d", pOffset->val.type);
298
    return -1;
299
  }
300

301 302
  STqOffset* pSavedOffset = tqOffsetRead(pTq->pOffsetStore, pOffset->subKey);
  if (pSavedOffset != NULL && tqOffsetLessOrEqual(pOffset, pSavedOffset)) {
303
    tqDebug("not update the offset, vgId:%d sub:%s since committed:%" PRId64 " less than/equal to existed:%" PRId64,
304
            vgId, pOffset->subKey, pOffset->val.version, pSavedOffset->val.version);
305
    return 0;  // no need to update the offset value
306 307
  }

308
  // save the new offset value
309
  if (tqOffsetWrite(pTq->pOffsetStore, pOffset) < 0) {
310
    return -1;
311
  }
312

313 314 315
  return 0;
}

316
int32_t tqProcessSeekReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
317 318
  SMqVgOffset vgOffset = {0};
  int32_t     vgId = TD_VID(pTq->pVnode);
319 320 321

  SDecoder decoder;
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
322
  if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) {
H
Haojun Liao 已提交
323
    tqError("vgId:%d failed to decode seek msg", vgId);
324 325 326 327 328
    return -1;
  }

  tDecoderClear(&decoder);

H
Haojun Liao 已提交
329 330 331
  tqDebug("topic:%s, vgId:%d process offset seek by consumer:0x%" PRIx64 ", req offset:%" PRId64,
          vgOffset.offset.subKey, vgId, vgOffset.consumerId, vgOffset.offset.val.version);

332 333 334
  STqOffset* pOffset = &vgOffset.offset;
  if (pOffset->val.type != TMQ_OFFSET__LOG) {
    tqError("vgId:%d, subKey:%s invalid seek offset type:%d", vgId, pOffset->subKey, pOffset->val.type);
335 336 337
    return -1;
  }

338 339
  STqHandle* pHandle = taosHashGet(pTq->pHandle, pOffset->subKey, strlen(pOffset->subKey));
  if (pHandle == NULL) {
dengyihao's avatar
dengyihao 已提交
340
    tqError("tmq seek: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", vgOffset.consumerId, vgId, pOffset->subKey);
341 342
    terrno = TSDB_CODE_INVALID_MSG;
    return -1;
343 344
  }

345 346 347 348 349 350 351 352
  // 2. check consumer-vg assignment status
  taosRLockLatch(&pTq->lock);
  if (pHandle->consumerId != vgOffset.consumerId) {
    tqDebug("ERROR tmq seek: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
            vgOffset.consumerId, vgId, pOffset->subKey, pHandle->consumerId);
    terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH;
    taosRUnLockLatch(&pTq->lock);
    return -1;
353
  }
354 355
  taosRUnLockLatch(&pTq->lock);

dengyihao's avatar
dengyihao 已提交
356
  // 3. check the offset info
357 358 359 360 361 362
  STqOffset* pSavedOffset = tqOffsetRead(pTq->pOffsetStore, pOffset->subKey);
  if (pSavedOffset != NULL) {
    if (pSavedOffset->val.type != TMQ_OFFSET__LOG) {
      tqError("invalid saved offset type, vgId:%d sub:%s", vgId, pOffset->subKey);
      return 0;  // no need to update the offset value
    }
363

364 365 366 367 368 369
    if (pSavedOffset->val.version == pOffset->val.version) {
      tqDebug("vgId:%d subKey:%s no need to seek to %" PRId64 " prev offset:%" PRId64, vgId, pOffset->subKey,
              pOffset->val.version, pSavedOffset->val.version);
      return 0;
    }
  }
370 371 372

  int64_t sver = 0, ever = 0;
  walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever);
373 374 375 376
  if (pOffset->val.version < sver) {
    pOffset->val.version = sver;
  } else if (pOffset->val.version > ever) {
    pOffset->val.version = ever;
377 378 379
  }

  // save the new offset value
380 381 382 383
  if (pSavedOffset != NULL) {
    tqDebug("vgId:%d sub:%s seek to:%" PRId64 " prev offset:%" PRId64, vgId, pOffset->subKey, pOffset->val.version,
            pSavedOffset->val.version);
  } else {
dengyihao's avatar
dengyihao 已提交
384
    tqDebug("vgId:%d sub:%s seek to:%" PRId64 " not saved yet", vgId, pOffset->subKey, pOffset->val.version);
385
  }
386

387 388
  if (tqOffsetWrite(pTq->pOffsetStore, pOffset) < 0) {
    tqError("failed to save offset, vgId:%d sub:%s seek to %" PRId64, vgId, pOffset->subKey, pOffset->val.version);
389 390 391
    return -1;
  }

H
Haojun Liao 已提交
392 393 394
  tqDebug("topic:%s, vgId:%d consumer:0x%" PRIx64 " offset is update to:%" PRId64, vgOffset.offset.subKey, vgId,
          vgOffset.consumerId, vgOffset.offset.val.version);

395 396 397
  return 0;
}

L
Liu Jicong 已提交
398
int32_t tqCheckColModifiable(STQ* pTq, int64_t tbUid, int32_t colId) {
L
Liu Jicong 已提交
399
  void* pIter = NULL;
400

L
Liu Jicong 已提交
401
  while (1) {
402
    pIter = taosHashIterate(pTq->pCheckInfo, pIter);
403 404 405 406
    if (pIter == NULL) {
      break;
    }

407
    STqCheckInfo* pCheck = (STqCheckInfo*)pIter;
408

L
Liu Jicong 已提交
409 410
    if (pCheck->ntbUid == tbUid) {
      int32_t sz = taosArrayGetSize(pCheck->colIdList);
L
Liu Jicong 已提交
411
      for (int32_t i = 0; i < sz; i++) {
L
Liu Jicong 已提交
412 413
        int16_t forbidColId = *(int16_t*)taosArrayGet(pCheck->colIdList, i);
        if (forbidColId == colId) {
414
          taosHashCancelIterate(pTq->pCheckInfo, pIter);
L
Liu Jicong 已提交
415 416 417 418 419
          return -1;
        }
      }
    }
  }
420

L
Liu Jicong 已提交
421 422 423
  return 0;
}

D
dapan1121 已提交
424
int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) {
X
Xiaoyu Wang 已提交
425
  SMqPollReq req = {0};
dengyihao's avatar
dengyihao 已提交
426
  int        code = 0;
D
dapan1121 已提交
427 428 429 430 431 432 433 434 435 436
  if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) {
    tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen);
    terrno = TSDB_CODE_INVALID_MSG;
    return -1;
  }

  int64_t      consumerId = req.consumerId;
  int32_t      reqEpoch = req.epoch;
  STqOffsetVal reqOffset = req.reqOffset;
  int32_t      vgId = TD_VID(pTq->pVnode);
wmmhello's avatar
wmmhello 已提交
437
  STqHandle*   pHandle = NULL;
D
dapan1121 已提交
438

wmmhello's avatar
wmmhello 已提交
439 440 441 442 443 444 445 446 447 448
  while (1) {
    taosWLockLatch(&pTq->lock);
    // 1. find handle
    pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
    if (pHandle == NULL) {
      tqError("tmq poll: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", consumerId, vgId, req.subKey);
      terrno = TSDB_CODE_INVALID_MSG;
      taosWUnLockLatch(&pTq->lock);
      return -1;
    }
D
dapan1121 已提交
449

450 451
    // 2. check re-balance status
    if (pHandle->consumerId != consumerId) {
dengyihao's avatar
dengyihao 已提交
452 453
      tqError("ERROR tmq poll: consumer:0x%" PRIx64
              " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
454
              consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId);
455
      terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH;
456 457 458
      taosWUnLockLatch(&pTq->lock);
      return -1;
    }
wmmhello's avatar
wmmhello 已提交
459

wmmhello's avatar
wmmhello 已提交
460
    bool exec = tqIsHandleExec(pHandle);
dengyihao's avatar
dengyihao 已提交
461
    if (!exec) {
wmmhello's avatar
wmmhello 已提交
462
      tqSetHandleExec(pHandle);
dengyihao's avatar
dengyihao 已提交
463 464 465
      //      qSetTaskCode(pHandle->execHandle.task, TDB_CODE_SUCCESS);
      tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, set handle exec, pHandle:%p", consumerId, vgId,
              req.subKey, pHandle);
wmmhello's avatar
wmmhello 已提交
466 467 468
      taosWUnLockLatch(&pTq->lock);
      break;
    }
469
    taosWUnLockLatch(&pTq->lock);
470

dengyihao's avatar
dengyihao 已提交
471 472 473
    tqDebug("tmq poll: consumer:0x%" PRIx64
            "vgId:%d, topic:%s, subscription is executing, wait for 10ms and retry, pHandle:%p",
            consumerId, vgId, req.subKey, pHandle);
wmmhello's avatar
wmmhello 已提交
474
    taosMsleep(10);
D
dapan1121 已提交
475 476 477
  }

  // 3. update the epoch value
478
  if (pHandle->epoch < reqEpoch) {
dengyihao's avatar
dengyihao 已提交
479
    tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, pHandle->epoch,
X
Xiaoyu Wang 已提交
480
            reqEpoch);
D
dapan1121 已提交
481 482 483 484 485 486 487 488
    pHandle->epoch = reqEpoch;
  }

  char buf[80];
  tFormatOffset(buf, 80, &reqOffset);
  tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s, reqId:0x%" PRIx64,
          consumerId, req.epoch, pHandle->subKey, vgId, buf, req.reqId);

wmmhello's avatar
wmmhello 已提交
489
  code = tqExtractDataForMq(pTq, pHandle, &req, pMsg);
490
  tqSetHandleIdle(pHandle);
491

dengyihao's avatar
dengyihao 已提交
492 493
  tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, , set handle idle, pHandle:%p", consumerId, vgId,
          req.subKey, pHandle);
494
  return code;
D
dapan1121 已提交
495 496
}

497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
int32_t tqProcessVgWalInfoReq(STQ* pTq, SRpcMsg* pMsg) {
  SMqPollReq req = {0};
  if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) {
    tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen);
    terrno = TSDB_CODE_INVALID_MSG;
    return -1;
  }

  int64_t      consumerId = req.consumerId;
  STqOffsetVal reqOffset = req.reqOffset;
  int32_t      vgId = TD_VID(pTq->pVnode);

  // 1. find handle
  STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
  if (pHandle == NULL) {
    tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s not found", consumerId, vgId, req.subKey);
    terrno = TSDB_CODE_INVALID_MSG;
    return -1;
  }

  // 2. check re-balance status
  taosRLockLatch(&pTq->lock);
  if (pHandle->consumerId != consumerId) {
    tqDebug("ERROR consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
            consumerId, vgId, req.subKey, pHandle->consumerId);
    terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH;
    taosRUnLockLatch(&pTq->lock);
    return -1;
  }
  taosRUnLockLatch(&pTq->lock);

  int64_t sver = 0, ever = 0;
  walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever);

  SMqDataRsp dataRsp = {0};
  tqInitDataRsp(&dataRsp, &req);

  STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, req.subKey);
  if (pOffset != NULL) {
    if (pOffset->val.type != TMQ_OFFSET__LOG) {
      tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s use snapshot, no valid wal info", consumerId, vgId, req.subKey);
      terrno = TSDB_CODE_INVALID_PARA;
      tDeleteMqDataRsp(&dataRsp);
      return -1;
    }

    dataRsp.rspOffset.type = TMQ_OFFSET__LOG;
    dataRsp.rspOffset.version = pOffset->val.version;
  } else {
    if (req.useSnapshot == true) {
      tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s snapshot not support wal info", consumerId, vgId, req.subKey);
      terrno = TSDB_CODE_INVALID_PARA;
      tDeleteMqDataRsp(&dataRsp);
      return -1;
    }

    dataRsp.rspOffset.type = TMQ_OFFSET__LOG;

555
    if (reqOffset.type == TMQ_OFFSET__LOG) {
556
      int64_t currentVer = walReaderGetCurrentVer(pHandle->execHandle.pTqReader->pWalReader);
dengyihao's avatar
dengyihao 已提交
557
      if (currentVer == -1) {  // not start to read data from wal yet, return req offset directly
558 559 560 561
        dataRsp.rspOffset.version = reqOffset.version;
      } else {
        dataRsp.rspOffset.version = currentVer;  // return current consume offset value
      }
562 563
    } else if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEAST) {
      dataRsp.rspOffset.version = sver;  // not consume yet, set the earliest position
564 565 566 567 568 569 570 571 572 573 574 575 576 577 578
    } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) {
      dataRsp.rspOffset.version = ever;
    } else {
      tqError("consumer:0x%" PRIx64 " vgId:%d subkey:%s invalid offset type:%d", consumerId, vgId, req.subKey,
              reqOffset.type);
      terrno = TSDB_CODE_INVALID_PARA;
      tDeleteMqDataRsp(&dataRsp);
      return -1;
    }
  }

  tqDoSendDataRsp(&pMsg->info, &dataRsp, req.epoch, req.consumerId, TMQ_MSG_TYPE__WALINFO_RSP, sver, ever);
  return 0;
}

579
int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
L
Liu Jicong 已提交
580
  SMqVDeleteReq* pReq = (SMqVDeleteReq*)msg;
dengyihao's avatar
dengyihao 已提交
581
  int32_t        vgId = TD_VID(pTq->pVnode);
L
Liu Jicong 已提交
582

583
  tqDebug("vgId:%d, tq process delete sub req %s", vgId, pReq->subKey);
wmmhello's avatar
wmmhello 已提交
584
  int32_t code = 0;
L
Liu Jicong 已提交
585

wmmhello's avatar
wmmhello 已提交
586
  taosWLockLatch(&pTq->lock);
L
Liu Jicong 已提交
587 588
  STqHandle* pHandle = taosHashGet(pTq->pHandle, pReq->subKey, strlen(pReq->subKey));
  if (pHandle) {
wmmhello's avatar
wmmhello 已提交
589
    while (tqIsHandleExec(pHandle)) {
dengyihao's avatar
dengyihao 已提交
590 591
      tqDebug("vgId:%d, topic:%s, subscription is executing, wait for 10ms and retry, pHandle:%p", vgId,
              pHandle->subKey, pHandle);
wmmhello's avatar
wmmhello 已提交
592
      taosMsleep(10);
593
    }
594

L
Liu Jicong 已提交
595 596 597
    if (pHandle->pRef) {
      walCloseRef(pTq->pVnode->pWal, pHandle->pRef->refId);
    }
598

L
Liu Jicong 已提交
599 600 601 602
    code = taosHashRemove(pTq->pHandle, pReq->subKey, strlen(pReq->subKey));
    if (code != 0) {
      tqError("cannot process tq delete req %s, since no such handle", pReq->subKey);
    }
L
Liu Jicong 已提交
603
  }
604

L
Liu Jicong 已提交
605 606
  code = tqOffsetDelete(pTq->pOffsetStore, pReq->subKey);
  if (code != 0) {
607
    tqError("cannot process tq delete req %s, since no such offset in cache", pReq->subKey);
L
Liu Jicong 已提交
608
  }
L
Liu Jicong 已提交
609

L
Liu Jicong 已提交
610
  if (tqMetaDeleteHandle(pTq, pReq->subKey) < 0) {
L
Liu Jicong 已提交
611
    tqError("cannot process tq delete req %s, since no such offset in tdb", pReq->subKey);
612
  }
wmmhello's avatar
wmmhello 已提交
613 614
  taosWUnLockLatch(&pTq->lock);

L
Liu Jicong 已提交
615
  return 0;
L
Liu Jicong 已提交
616 617
}

618
int32_t tqProcessAddCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
619 620
  STqCheckInfo info = {0};
  SDecoder     decoder;
X
Xiaoyu Wang 已提交
621
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
622
  if (tDecodeSTqCheckInfo(&decoder, &info) < 0) {
L
Liu Jicong 已提交
623 624 625 626
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  tDecoderClear(&decoder);
627 628 629 630 631
  if (taosHashPut(pTq->pCheckInfo, info.topic, strlen(info.topic), &info, sizeof(STqCheckInfo)) < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  if (tqMetaSaveCheckInfo(pTq, info.topic, msg, msgLen) < 0) {
L
Liu Jicong 已提交
632 633 634 635 636 637
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  return 0;
}

638
int32_t tqProcessDelCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
639 640 641 642 643 644 645 646 647 648 649
  if (taosHashRemove(pTq->pCheckInfo, msg, strlen(msg)) < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  if (tqMetaDeleteCheckInfo(pTq, msg) < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  return 0;
}

650
int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
dengyihao's avatar
dengyihao 已提交
651
  int         ret = 0;
L
Liu Jicong 已提交
652
  SMqRebVgReq req = {0};
dengyihao's avatar
dengyihao 已提交
653
  SDecoder    dc = {0};
654 655 656 657 658 659 660 661 662

  tDecoderInit(&dc, msg, msgLen);

  // decode req
  if (tDecodeSMqRebVgReq(&dc, &req) < 0) {
    terrno = TSDB_CODE_INVALID_MSG;
    tDecoderClear(&dc);
    return -1;
  }
L
Liu Jicong 已提交
663

D
dapan1121 已提交
664 665 666
  SVnode* pVnode = pTq->pVnode;
  int32_t vgId = TD_VID(pVnode);

667
  tqDebug("vgId:%d, tq process sub req:%s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pVnode->config.vgId, req.subKey,
D
dapan1121 已提交
668
          req.oldConsumerId, req.newConsumerId);
L
Liu Jicong 已提交
669

670
  STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
L
Liu Jicong 已提交
671
  if (pHandle == NULL) {
L
Liu Jicong 已提交
672
    if (req.oldConsumerId != -1) {
673
      tqError("vgId:%d, build new consumer handle %s for consumer:0x%" PRIx64 ", but old consumerId:0x%" PRIx64,
674
              req.vgId, req.subKey, req.newConsumerId, req.oldConsumerId);
L
Liu Jicong 已提交
675
    }
D
dapan1121 已提交
676

L
Liu Jicong 已提交
677
    if (req.newConsumerId == -1) {
678
      tqError("vgId:%d, tq invalid re-balance request, new consumerId %" PRId64 "", req.vgId, req.newConsumerId);
679
      goto end;
L
Liu Jicong 已提交
680
    }
D
dapan1121 已提交
681

L
Liu Jicong 已提交
682 683
    STqHandle tqHandle = {0};
    pHandle = &tqHandle;
L
Liu Jicong 已提交
684

L
Liu Jicong 已提交
685 686 687
    memcpy(pHandle->subKey, req.subKey, TSDB_SUBSCRIBE_KEY_LEN);
    pHandle->consumerId = req.newConsumerId;
    pHandle->epoch = -1;
L
Liu Jicong 已提交
688

L
Liu Jicong 已提交
689
    pHandle->execHandle.subType = req.subType;
L
Liu Jicong 已提交
690
    pHandle->fetchMeta = req.withMeta;
wmmhello's avatar
wmmhello 已提交
691

692
    // TODO version should be assigned and refed during preprocess
D
dapan1121 已提交
693
    SWalRef* pRef = walRefCommittedVer(pVnode->pWal);
694
    if (pRef == NULL) {
695 696
      ret = -1;
      goto end;
697
    }
D
dapan1121 已提交
698

699 700
    int64_t ver = pRef->refVer;
    pHandle->pRef = pRef;
L
Liu Jicong 已提交
701

702
    SReadHandle handle = {.vnode = pVnode, .initTableReader = true, .initTqReader = true, .version = ver};
H
Haojun Liao 已提交
703 704
    initStorageAPI(&handle.api);

wmmhello's avatar
wmmhello 已提交
705
    pHandle->snapshotVer = ver;
706

L
Liu Jicong 已提交
707
    if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
dengyihao's avatar
dengyihao 已提交
708
      pHandle->execHandle.execCol.qmsg = taosStrdup(req.qmsg);
709

X
Xiaoyu Wang 已提交
710 711
      pHandle->execHandle.task = qCreateQueueExecTaskInfo(pHandle->execHandle.execCol.qmsg, &handle, vgId,
                                                          &pHandle->execHandle.numOfCols, req.newConsumerId);
L
Liu Jicong 已提交
712
      void* scanner = NULL;
713
      qExtractStreamScanner(pHandle->execHandle.task, &scanner);
714
      pHandle->execHandle.pTqReader = qExtractReaderFromStreamScanner(scanner);
L
Liu Jicong 已提交
715
    } else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__DB) {
D
dapan1121 已提交
716
      pHandle->pWalReader = walOpenReader(pVnode->pWal, NULL);
717
      pHandle->execHandle.pTqReader = tqReaderOpen(pVnode);
D
dapan1121 已提交
718

L
Liu Jicong 已提交
719
      pHandle->execHandle.execDb.pFilterOutTbUid =
wmmhello's avatar
wmmhello 已提交
720
          taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_ENTRY_LOCK);
721
      buildSnapContext(handle.vnode, handle.version, 0, pHandle->execHandle.subType, pHandle->fetchMeta,
722
                       (SSnapContext**)(&handle.sContext));
723

724
      pHandle->execHandle.task = qCreateQueueExecTaskInfo(NULL, &handle, vgId, NULL, req.newConsumerId);
L
Liu Jicong 已提交
725
    } else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__TABLE) {
D
dapan1121 已提交
726
      pHandle->pWalReader = walOpenReader(pVnode->pWal, NULL);
wmmhello's avatar
wmmhello 已提交
727
      pHandle->execHandle.execTb.suid = req.suid;
728
      pHandle->execHandle.execTb.qmsg = taosStrdup(req.qmsg);
wmmhello's avatar
wmmhello 已提交
729

dengyihao's avatar
dengyihao 已提交
730
      if (strcmp(pHandle->execHandle.execTb.qmsg, "") != 0) {
731 732 733 734 735
        if (nodesStringToNode(pHandle->execHandle.execTb.qmsg, &pHandle->execHandle.execTb.node) != 0) {
          tqError("nodesStringToNode error in sub stable, since %s, vgId:%d, subkey:%s consumer:0x%" PRIx64, terrstr(),
                  pVnode->config.vgId, req.subKey, pHandle->consumerId);
          return -1;
        }
L
Liu Jicong 已提交
736
      }
wmmhello's avatar
wmmhello 已提交
737

738
      buildSnapContext(handle.vnode, handle.version, req.suid, pHandle->execHandle.subType, pHandle->fetchMeta,
L
Liu Jicong 已提交
739
                       (SSnapContext**)(&handle.sContext));
740
      pHandle->execHandle.task = qCreateQueueExecTaskInfo(NULL, &handle, vgId, NULL, req.newConsumerId);
wmmhello's avatar
wmmhello 已提交
741

742
      SArray* tbUidList = NULL;
wmmhello's avatar
wmmhello 已提交
743
      ret = qGetTableList(req.suid, pVnode, pHandle->execHandle.execTb.node, &tbUidList, pHandle->execHandle.task);
dengyihao's avatar
dengyihao 已提交
744 745 746
      if (ret != TDB_CODE_SUCCESS) {
        tqError("qGetTableList error:%d vgId:%d, subkey:%s consumer:0x%" PRIx64, ret, pVnode->config.vgId, req.subKey,
                pHandle->consumerId);
747 748
        taosArrayDestroy(tbUidList);
        goto end;
L
Liu Jicong 已提交
749
      }
dengyihao's avatar
dengyihao 已提交
750 751
      tqDebug("tq try to get ctb for stb subscribe, vgId:%d, subkey:%s consumer:0x%" PRIx64 " suid:%" PRId64,
              pVnode->config.vgId, req.subKey, pHandle->consumerId, req.suid);
752
      pHandle->execHandle.pTqReader = tqReaderOpen(pVnode);
H
Haojun Liao 已提交
753
      tqReaderSetTbUidList(pHandle->execHandle.pTqReader, tbUidList, NULL);
L
Liu Jicong 已提交
754
      taosArrayDestroy(tbUidList);
L
Liu Jicong 已提交
755
    }
H
Haojun Liao 已提交
756

757
    taosHashPut(pTq->pHandle, req.subKey, strlen(req.subKey), pHandle, sizeof(STqHandle));
dengyihao's avatar
dengyihao 已提交
758
    tqDebug("try to persist handle %s consumer:0x%" PRIx64, req.subKey, pHandle->consumerId);
759 760
    ret = tqMetaSaveHandle(pTq, req.subKey, pHandle);
    goto end;
L
Liu Jicong 已提交
761
  } else {
762
    taosWLockLatch(&pTq->lock);
wmmhello's avatar
wmmhello 已提交
763

D
dapan1121 已提交
764
    if (pHandle->consumerId == req.newConsumerId) {  // do nothing
dengyihao's avatar
dengyihao 已提交
765 766
      tqInfo("vgId:%d consumer:0x%" PRIx64 " remains, no switch occurs, should not reach here", req.vgId,
             req.newConsumerId);
767 768 769
    } else {
      tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId,
             req.newConsumerId);
770 771
      atomic_store_64(&pHandle->consumerId, req.newConsumerId);
    }
dengyihao's avatar
dengyihao 已提交
772
    //    atomic_add_fetch_32(&pHandle->epoch, 1);
773

774
    // kill executing task
dengyihao's avatar
dengyihao 已提交
775 776 777 778 779 780 781 782 783 784
    //    if(tqIsHandleExec(pHandle)) {
    //      qTaskInfo_t pTaskInfo = pHandle->execHandle.task;
    //      if (pTaskInfo != NULL) {
    //        qKillTask(pTaskInfo, TSDB_CODE_SUCCESS);
    //      }

    //      if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
    //        qStreamCloseTsdbReader(pTaskInfo);
    //      }
    //    }
wmmhello's avatar
wmmhello 已提交
785 786
    // remove if it has been register in the push manager, and return one empty block to consumer
    tqUnregisterPushHandle(pTq, pHandle);
787
    taosWUnLockLatch(&pTq->lock);
788
    ret = tqMetaSaveHandle(pTq, req.subKey, pHandle);
L
Liu Jicong 已提交
789
  }
L
Liu Jicong 已提交
790

791
end:
792
  tDecoderClear(&dc);
793
  return ret;
L
Liu Jicong 已提交
794
}
795

dengyihao's avatar
dengyihao 已提交
796
void freePtr(void* ptr) { taosMemoryFree(*(void**)ptr); }
L
liuyao 已提交
797

798
int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) {
D
dapan1121 已提交
799
  int32_t vgId = TD_VID(pTq->pVnode);
800

801
  pTask->id.idStr = createStreamTaskIdStr(pTask->id.streamId, pTask->id.taskId);
L
Liu Jicong 已提交
802
  pTask->refCnt = 1;
803
  pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
dengyihao's avatar
dengyihao 已提交
804 805
  pTask->inputQueue = streamQueueOpen(512 << 10);
  pTask->outputQueue = streamQueueOpen(512 << 10);
L
Liu Jicong 已提交
806 807

  if (pTask->inputQueue == NULL || pTask->outputQueue == NULL) {
L
Liu Jicong 已提交
808
    return -1;
L
Liu Jicong 已提交
809 810
  }

L
Liu Jicong 已提交
811 812
  pTask->inputStatus = TASK_INPUT_STATUS__NORMAL;
  pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL;
813
  pTask->pMsgCb = &pTq->pVnode->msgCb;
814
  pTask->pMeta = pTq->pStreamMeta;
815

816
  pTask->chkInfo.version = ver;
817
  pTask->chkInfo.currentVer = ver;
818

819 820 821
  pTask->dataRange.range.maxVer = ver;
  pTask->dataRange.range.minVer = ver;

822
  // expand executor
823
  pTask->status.taskStatus = /*(pTask->info.fillHistory) ? */TASK_STATUS__WAIT_DOWNSTREAM /*: TASK_STATUS__NORMAL*/;
824

825
  if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
826
    pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1);
827 828 829 830
    if (pTask->pState == NULL) {
      return -1;
    }

831
    SReadHandle handle = {.vnode = pTq->pVnode, .initTqReader = 1, .pStateBackend = pTask->pState};
832
    initStorageAPI(&handle.api);
833

834 835
    pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId);
    if (pTask->exec.pExecutor == NULL) {
L
Liu Jicong 已提交
836 837
      return -1;
    }
838

839
    qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId);
840
  } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) {
841
    pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1);
842 843 844
    if (pTask->pState == NULL) {
      return -1;
    }
845

846
    int32_t     numOfVgroups = (int32_t)taosArrayGetSize(pTask->pUpstreamEpInfoList);
847 848
    SReadHandle handle = {.vnode = NULL, .numOfVgroups = numOfVgroups, .pStateBackend = pTask->pState};
    initStorageAPI(&handle.api);
849

850
    pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId);
851
    if (pTask->exec.pExecutor == NULL) {
L
Liu Jicong 已提交
852 853
      return -1;
    }
854 855

    qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId);
L
Liu Jicong 已提交
856
  }
L
Liu Jicong 已提交
857 858

  // sink
859
  if (pTask->outputType == TASK_OUTPUT__SMA) {
L
Liu Jicong 已提交
860
    pTask->smaSink.vnode = pTq->pVnode;
L
Liu Jicong 已提交
861
    pTask->smaSink.smaSink = smaHandleRes;
862
  } else if (pTask->outputType == TASK_OUTPUT__TABLE) {
L
Liu Jicong 已提交
863
    pTask->tbSink.vnode = pTq->pVnode;
H
Haojun Liao 已提交
864
    pTask->tbSink.tbSinkFunc = tqSinkToTablePipeline;
L
Liu Jicong 已提交
865

X
Xiaoyu Wang 已提交
866
    int32_t   ver1 = 1;
5
54liuyao 已提交
867
    SMetaInfo info = {0};
dengyihao's avatar
dengyihao 已提交
868
    int32_t   code = metaGetInfo(pTq->pVnode->pMeta, pTask->tbSink.stbUid, &info, NULL);
5
54liuyao 已提交
869
    if (code == TSDB_CODE_SUCCESS) {
D
dapan1121 已提交
870
      ver1 = info.skmVer;
5
54liuyao 已提交
871
    }
L
Liu Jicong 已提交
872

873 874
    SSchemaWrapper* pschemaWrapper = pTask->tbSink.pSchemaWrapper;
    pTask->tbSink.pTSchema = tBuildTSchema(pschemaWrapper->pSchema, pschemaWrapper->nCols, ver1);
875
    if (pTask->tbSink.pTSchema == NULL) {
D
dapan1121 已提交
876 877
      return -1;
    }
L
liuyao 已提交
878 879
    pTask->tbSink.pTblInfo = tSimpleHashInit(10240, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT));
    tSimpleHashSetFreeFp(pTask->tbSink.pTblInfo, freePtr);
L
Liu Jicong 已提交
880
  }
881

882
  if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
883
    SWalFilterCond cond = {.deleteMsg = 1};  // delete msg also extract from wal files
884
    pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, &cond);
885 886
  }

887
  streamSetupTrigger(pTask);
888

dengyihao's avatar
dengyihao 已提交
889
  tqInfo("vgId:%d expand stream task, s-task:%s, checkpoint ver:%" PRId64 " child id:%d, level:%d", vgId,
890
         pTask->id.idStr, pTask->chkInfo.version, pTask->info.selfChildId, pTask->info.taskLevel);
891 892 893

  // next valid version will add one
  pTask->chkInfo.version += 1;
L
Liu Jicong 已提交
894
  return 0;
L
Liu Jicong 已提交
895
}
L
Liu Jicong 已提交
896

897
int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) {
898 899 900 901
  char*   msgStr = pMsg->pCont;
  char*   msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
  int32_t msgLen = pMsg->contLen - sizeof(SMsgHead);

902 903
  SStreamTaskCheckReq req;
  SDecoder            decoder;
904

X
Xiaoyu Wang 已提交
905
  tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
906 907
  tDecodeSStreamTaskCheckReq(&decoder, &req);
  tDecoderClear(&decoder);
908

909 910 911 912 913 914 915 916 917 918
  int32_t             taskId = req.downstreamTaskId;
  SStreamTaskCheckRsp rsp = {
      .reqId = req.reqId,
      .streamId = req.streamId,
      .childId = req.childId,
      .downstreamNodeId = req.downstreamNodeId,
      .downstreamTaskId = req.downstreamTaskId,
      .upstreamNodeId = req.upstreamNodeId,
      .upstreamTaskId = req.upstreamTaskId,
  };
919

L
Liu Jicong 已提交
920
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
921

922
  if (pTask != NULL) {
923
    rsp.status = streamTaskCheckStatus(pTask);
924 925
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);

926 927
    tqDebug("s-task:%s recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), status:%d, rsp status %d",
            pTask->id.idStr, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, pTask->status.taskStatus, rsp.status);
928 929
  } else {
    rsp.status = 0;
930 931
    tqDebug("tq recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 ") from task:0x%x (vgId:%d), rsp status %d",
            taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);
932 933 934 935 936
  }

  SEncoder encoder;
  int32_t  code;
  int32_t  len;
937

938 939
  tEncodeSize(tEncodeSStreamTaskCheckRsp, &rsp, len, code);
  if (code < 0) {
940
    tqError("vgId:%d failed to encode task check rsp, task:0x%x", pTq->pStreamMeta->vgId, taskId);
L
Liu Jicong 已提交
941
    return -1;
942
  }
L
Liu Jicong 已提交
943

944 945 946 947 948 949 950 951
  void* buf = rpcMallocCont(sizeof(SMsgHead) + len);
  ((SMsgHead*)buf)->vgId = htonl(req.upstreamNodeId);

  void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
  tEncoderInit(&encoder, (uint8_t*)abuf, len);
  tEncodeSStreamTaskCheckRsp(&encoder, &rsp);
  tEncoderClear(&encoder);

952
  SRpcMsg rspMsg = {.code = 0, .pCont = buf, .contLen = sizeof(SMsgHead) + len, .info = pMsg->info};
953

954 955 956 957
  tmsgSendRsp(&rspMsg);
  return 0;
}

958
int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
959 960 961 962 963 964
  int32_t             code;
  SStreamTaskCheckRsp rsp;

  SDecoder decoder;
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
  code = tDecodeSStreamTaskCheckRsp(&decoder, &rsp);
965

966 967 968 969 970
  if (code < 0) {
    tDecoderClear(&decoder);
    return -1;
  }

971
  tDecoderClear(&decoder);
972
  tqDebug("tq recv task check rsp(reqId:0x%" PRIx64 ") %d (vgId:%d) check req from task:0x%x (vgId:%d), status %d",
973 974
          rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);

L
Liu Jicong 已提交
975
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, rsp.upstreamTaskId);
976
  if (pTask == NULL) {
977
    tqError("tq failed to locate the stream task:0x%x (vgId:%d), it may have been destroyed", rsp.upstreamTaskId,
978
            pTq->pStreamMeta->vgId);
979 980 981
    return -1;
  }

982
  code = streamProcessCheckRsp(pTask, &rsp);
L
Liu Jicong 已提交
983 984
  streamMetaReleaseTask(pTq->pStreamMeta, pTask);
  return code;
985 986
}

987
int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
988 989 990
  int32_t code = 0;
  int32_t vgId = TD_VID(pTq->pVnode);

5
54liuyao 已提交
991 992 993
  if (tsDisableStream) {
    return 0;
  }
994 995 996 997

  // 1.deserialize msg and build task
  SStreamTask* pTask = taosMemoryCalloc(1, sizeof(SStreamTask));
  if (pTask == NULL) {
998
    terrno = TSDB_CODE_OUT_OF_MEMORY;
dengyihao's avatar
dengyihao 已提交
999 1000
    tqError("vgId:%d failed to create stream task due to out of memory, alloc size:%d", vgId,
            (int32_t)sizeof(SStreamTask));
1001 1002
    return -1;
  }
1003

1004 1005
  SDecoder decoder;
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
1006
  code = tDecodeStreamTask(&decoder, pTask);
1007 1008 1009 1010 1011
  if (code < 0) {
    tDecoderClear(&decoder);
    taosMemoryFree(pTask);
    return -1;
  }
1012

1013 1014
  tDecoderClear(&decoder);

1015 1016
  SStreamMeta* pStreamMeta = pTq->pStreamMeta;

1017
  // 2.save task, use the newest commit version as the initial start version of stream task.
1018 1019 1020 1021
  taosWLockLatch(&pStreamMeta->lock);
  code = streamMetaAddDeployedTask(pStreamMeta, sversion, pTask);

  int32_t numOfTasks = streamMetaGetNumOfTasks(pStreamMeta);
1022
  if (code < 0) {
1023
    tqError("vgId:%d failed to add s-task:%s, total:%d", vgId, pTask->id.idStr, numOfTasks);
1024
    taosWUnLockLatch(&pStreamMeta->lock);
1025 1026 1027
    return -1;
  }

1028
  taosWUnLockLatch(&pStreamMeta->lock);
1029

1030
  // 3. It's an fill history task, do nothing. wait for the main task to start it
1031
  if (pTask->info.fillHistory) {
1032 1033
    tqDebug("s-task:%s fill history task, wait for being launched", pTask->id.idStr);
  } else {
1034
    // calculate the correct start time window, and start the handle the history data for the main task.
1035
    if (pTask->historyTaskId.taskId != 0) {
1036
      // launch the history fill stream task
1037
      streamTaskStartHistoryTask(pTask, sversion);
1038 1039

      // launch current task
1040
      SHistDataRange* pRange = &pTask->dataRange;
1041 1042 1043 1044 1045 1046 1047
      int64_t ekey = pRange->window.ekey;
      int64_t ver = pRange->range.minVer;

      pRange->window.skey = ekey;
      pRange->window.ekey = INT64_MAX;
      pRange->range.minVer = 0;
      pRange->range.maxVer = ver;
1048 1049 1050 1051 1052 1053 1054 1055 1056

      tqDebug("s-task:%s fill-history task exists, update stream time window:%" PRId64 " - %" PRId64
              ", ver range:%" PRId64 " - %" PRId64,
              pTask->id.idStr, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, pRange->range.maxVer);
    } else {
      SHistDataRange* pRange = &pTask->dataRange;
      tqDebug("s-task:%s no associated task, stream time window:%" PRId64 " - %" PRId64 ", ver range:%" PRId64
              " - %" PRId64,
              pTask->id.idStr, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, pRange->range.maxVer);
1057
    }
1058 1059

    streamTaskCheckDownstreamTasks(pTask);
1060 1061
  }

1062
  tqDebug("vgId:%d s-task:%s is deployed and add meta from mnd, status:%d, numOfTasks:%d", vgId, pTask->id.idStr,
1063
          pTask->status.taskStatus, numOfTasks);
1064

1065 1066 1067
  return 0;
}

L
Liu Jicong 已提交
1068
int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg) {
1069
  int32_t code = TSDB_CODE_SUCCESS;
L
Liu Jicong 已提交
1070 1071 1072
  char*   msg = pMsg->pCont;
  int32_t msgLen = pMsg->contLen;

1073
  SStreamMeta* pMeta = pTq->pStreamMeta;
1074
  SStreamRecoverStep1Req* pReq = (SStreamRecoverStep1Req*)msg;
1075 1076

  SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->taskId);
1077
  if (pTask == NULL) {
1078 1079
    tqError("vgId:%d failed to acquire stream task:0x%x during stream recover, task may have been destroyed",
            pMeta->vgId, pReq->taskId);
1080 1081 1082 1083
    return -1;
  }

  // check param
1084
  int64_t fillVer1 = pTask->chkInfo.version;
1085
  if (fillVer1 <= 0) {
1086
    streamMetaReleaseTask(pMeta, pTask);
1087 1088 1089 1090
    return -1;
  }

  // do recovery step 1
H
Haojun Liao 已提交
1091
  tqDebug("s-task:%s start history data scan stage(step 1)", pTask->id.idStr);
H
Haojun Liao 已提交
1092
  int64_t st = taosGetTimestampMs();
1093

H
Haojun Liao 已提交
1094
  streamSourceRecoverScanStep1(pTask);
1095
  if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
H
Haojun Liao 已提交
1096 1097
    tqDebug("s-task:%s is dropped, abort recover in step1", pTask->id.idStr);

1098
    streamMetaReleaseTask(pMeta, pTask);
L
Liu Jicong 已提交
1099 1100 1101
    return 0;
  }

H
Haojun Liao 已提交
1102
  double el = (taosGetTimestampMs() - st) / 1000.0;
H
Haojun Liao 已提交
1103
  tqDebug("s-task:%s history data scan stage(step 1) ended, elapsed time:%.2fs", pTask->id.idStr, el);
1104

H
Haojun Liao 已提交
1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127
  if (pTask->info.fillHistory) {/*
    // 1. stop the related stream task, get the current scan wal version of stream task, ver1.
    SStreamTask* pStreamTask = streamMetaAcquireTask(pMeta, pTask->streamTaskId.taskId);
    if (pStreamTask == NULL) {
      // todo handle error
    }

    pStreamTask->status.taskStatus = TASK_STATUS__PAUSE;


    // if it's an source task, extract the last version in wal.

    // 2. wait for downstream tasks to completed


    // 3. do secondary scan of the history data scan, the time window remain, and the version range is updated to [pTask->dataRange.range.maxVer, ver1]


    // 4. 1) transfer the ownership of executor state, 2) update the scan data range for source task.


    // 5. resume the related stream task.
*/
1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139
  } else {
    // todo update the chkInfo version for current task.
    // this task has an associated history stream task, so we need to scan wal from the end version of
    // history scan. The current version of chkInfo.current is not updated during the history scan
    tqDebug("s-task:%s history data scan completed, now start to scan data from wal, start ver:%" PRId64
            ", window:%" PRId64 " - %" PRId64,
            pTask->id.idStr, pTask->chkInfo.currentVer, pTask->dataRange.window.skey, pTask->dataRange.window.ekey);

    code = streamTaskScanHistoryDataComplete(pTask);
    streamMetaReleaseTask(pMeta, pTask);
    return code;
  }
H
Haojun Liao 已提交
1140

1141
#if 0
1142 1143 1144 1145
  // build msg to launch next step
  SStreamRecoverStep2Req req;
  code = streamBuildSourceRecover2Req(pTask, &req);
  if (code < 0) {
L
Liu Jicong 已提交
1146
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1147 1148 1149
    return -1;
  }

L
Liu Jicong 已提交
1150
  streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1151
  if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
L
Liu Jicong 已提交
1152 1153 1154
    return 0;
  }

1155
  // serialize msg
L
Liu Jicong 已提交
1156 1157 1158 1159
  int32_t len = sizeof(SStreamRecoverStep1Req);

  void* serializedReq = rpcMallocCont(len);
  if (serializedReq == NULL) {
H
Haojun Liao 已提交
1160
    tqError("s-task:%s failed to prepare the step2 stage, out of memory", pTask->id.idStr);
L
Liu Jicong 已提交
1161 1162 1163 1164
    return -1;
  }

  memcpy(serializedReq, &req, len);
1165 1166

  // dispatch msg
H
Haojun Liao 已提交
1167
  tqDebug("s-task:%s start recover block stage", pTask->id.idStr);
1168

H
Haojun Liao 已提交
1169 1170
  SRpcMsg rpcMsg = {
      .code = 0, .contLen = len, .msgType = TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE, .pCont = serializedReq};
1171
  tmsgPutToQueue(&pTq->pVnode->msgCb, WRITE_QUEUE, &rpcMsg);
1172 1173
#endif

1174 1175 1176
  return 0;
}

1177
int32_t tqProcessTaskRecover2Req(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
H
Haojun Liao 已提交
1178 1179
  int32_t code = 0;

1180
  SStreamRecoverStep2Req* pReq = (SStreamRecoverStep2Req*)msg;
H
Haojun Liao 已提交
1181 1182

  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId);
1183 1184 1185 1186 1187
  if (pTask == NULL) {
    return -1;
  }

  // do recovery step 2
H
Haojun Liao 已提交
1188
  int64_t st = taosGetTimestampMs();
dengyihao's avatar
dengyihao 已提交
1189
  tqDebug("s-task:%s start step2 recover, ts:%" PRId64, pTask->id.idStr, st);
H
Haojun Liao 已提交
1190

1191
  code = streamSourceRecoverScanStep2(pTask, sversion);
1192
  if (code < 0) {
L
Liu Jicong 已提交
1193
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1194 1195 1196
    return -1;
  }

1197
  qDebug("s-task:%s set start wal scan start ver:%"PRId64, pTask->id.idStr, sversion);
1198

1199
  walReaderSeekVer(pTask->exec.pWalReader, sversion);
L
liuyao 已提交
1200
  pTask->chkInfo.currentVer = sversion;
1201

1202
  if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
L
Liu Jicong 已提交
1203 1204 1205 1206
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
    return 0;
  }

1207 1208 1209
  // restore param
  code = streamRestoreParam(pTask);
  if (code < 0) {
L
Liu Jicong 已提交
1210
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1211 1212 1213 1214
    return -1;
  }

  // set status normal
H
Haojun Liao 已提交
1215
  tqDebug("s-task:%s blocking stage completed, set the status to be normal", pTask->id.idStr);
1216 1217
  code = streamSetStatusNormal(pTask);
  if (code < 0) {
L
Liu Jicong 已提交
1218
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1219 1220 1221
    return -1;
  }

dengyihao's avatar
dengyihao 已提交
1222
  double el = (taosGetTimestampMs() - st) / 1000.0;
H
Haojun Liao 已提交
1223
  tqDebug("s-task:%s step2 recover finished, el:%.2fs", pTask->id.idStr, el);
H
Haojun Liao 已提交
1224

1225
  // dispatch recover finish req to all related downstream task
1226
  code = streamDispatchRecoverFinishMsg(pTask);
1227
  if (code < 0) {
L
Liu Jicong 已提交
1228
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1229 1230 1231
    return -1;
  }

1232
  atomic_store_8(&pTask->info.fillHistory, 0);
L
Liu Jicong 已提交
1233 1234
  streamMetaSaveTask(pTq->pStreamMeta, pTask);

L
Liu Jicong 已提交
1235
  streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1236 1237 1238
  return 0;
}

L
Liu Jicong 已提交
1239 1240 1241
int32_t tqProcessTaskRecoverFinishReq(STQ* pTq, SRpcMsg* pMsg) {
  char*   msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
  int32_t msgLen = pMsg->contLen - sizeof(SMsgHead);
1242 1243

  // deserialize
1244 1245 1246
  SStreamRecoverFinishReq req;

  SDecoder decoder;
X
Xiaoyu Wang 已提交
1247
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
1248 1249 1250
  tDecodeSStreamRecoverFinishReq(&decoder, &req);
  tDecoderClear(&decoder);

1251
  // find task
L
Liu Jicong 已提交
1252
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.taskId);
1253 1254 1255
  if (pTask == NULL) {
    return -1;
  }
1256
  // do process request
1257
  if (streamProcessRecoverFinishReq(pTask, req.childId) < 0) {
L
Liu Jicong 已提交
1258
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1259 1260 1261
    return -1;
  }

L
Liu Jicong 已提交
1262
  streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1263
  return 0;
L
Liu Jicong 已提交
1264
}
L
Liu Jicong 已提交
1265

L
Liu Jicong 已提交
1266 1267 1268 1269 1270
int32_t tqProcessTaskRecoverFinishRsp(STQ* pTq, SRpcMsg* pMsg) {
  //
  return 0;
}

1271 1272 1273 1274
int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock) {
  SDecoder*   pCoder = &(SDecoder){0};
  SDeleteRes* pRes = &(SDeleteRes){0};

H
Haojun Liao 已提交
1275 1276
  *pRefBlock = NULL;

1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323
  pRes->uidList = taosArrayInit(0, sizeof(tb_uid_t));
  if (pRes->uidList == NULL) {
    return TSDB_CODE_OUT_OF_MEMORY;
  }

  tDecoderInit(pCoder, (uint8_t*)pData, len);
  tDecodeDeleteRes(pCoder, pRes);
  tDecoderClear(pCoder);

  int32_t numOfTables = taosArrayGetSize(pRes->uidList);
  if (numOfTables == 0 || pRes->affectedRows == 0) {
    taosArrayDestroy(pRes->uidList);
    return TSDB_CODE_SUCCESS;
  }

  SSDataBlock* pDelBlock = createSpecialDataBlock(STREAM_DELETE_DATA);
  blockDataEnsureCapacity(pDelBlock, numOfTables);
  pDelBlock->info.rows = numOfTables;
  pDelBlock->info.version = ver;

  for (int32_t i = 0; i < numOfTables; i++) {
    // start key column
    SColumnInfoData* pStartCol = taosArrayGet(pDelBlock->pDataBlock, START_TS_COLUMN_INDEX);
    colDataSetVal(pStartCol, i, (const char*)&pRes->skey, false);  // end key column
    SColumnInfoData* pEndCol = taosArrayGet(pDelBlock->pDataBlock, END_TS_COLUMN_INDEX);
    colDataSetVal(pEndCol, i, (const char*)&pRes->ekey, false);
    // uid column
    SColumnInfoData* pUidCol = taosArrayGet(pDelBlock->pDataBlock, UID_COLUMN_INDEX);
    int64_t*         pUid = taosArrayGet(pRes->uidList, i);
    colDataSetVal(pUidCol, i, (const char*)pUid, false);

    colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, GROUPID_COLUMN_INDEX), i);
    colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, CALCULATE_START_TS_COLUMN_INDEX), i);
    colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, CALCULATE_END_TS_COLUMN_INDEX), i);
  }

  taosArrayDestroy(pRes->uidList);
  *pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0);
  if (pRefBlock == NULL) {
    return TSDB_CODE_OUT_OF_MEMORY;
  }

  (*pRefBlock)->type = STREAM_INPUT__REF_DATA_BLOCK;
  (*pRefBlock)->pBlock = pDelBlock;
  return TSDB_CODE_SUCCESS;
}

L
Liu Jicong 已提交
1324 1325
int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) {
  SStreamTaskRunReq* pReq = pMsg->pCont;
1326 1327 1328 1329

  int32_t taskId = pReq->taskId;
  int32_t vgId = TD_VID(pTq->pVnode);

1330 1331
  if (taskId == WAL_READ_TASKS_ID) {  // all tasks are extracted submit data from the wal
    tqStreamTasksScanWal(pTq);
L
Liu Jicong 已提交
1332
    return 0;
1333
  }
1334

1335
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
1336 1337
  if (pTask != NULL) {
    if (pTask->status.taskStatus == TASK_STATUS__NORMAL) {
dengyihao's avatar
dengyihao 已提交
1338 1339
      tqDebug("vgId:%d s-task:%s start to process block from wal, last chk point:%" PRId64, vgId, pTask->id.idStr,
              pTask->chkInfo.version);
1340
      streamProcessRunReq(pTask);
1341
    } else {
L
liuyao 已提交
1342
      if (streamTaskShouldPause(&pTask->status)) {
L
liuyao 已提交
1343
        atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
L
liuyao 已提交
1344
      }
1345
      tqDebug("vgId:%d s-task:%s ignore run req since not in ready state", vgId, pTask->id.idStr);
1346
    }
1347

L
Liu Jicong 已提交
1348
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1349
    tqStartStreamTasks(pTq);
L
Liu Jicong 已提交
1350
    return 0;
1351
  } else {
1352
    tqError("vgId:%d failed to found s-task, taskId:%d", vgId, taskId);
1353
    return -1;
L
Liu Jicong 已提交
1354
  }
L
Liu Jicong 已提交
1355 1356
}

L
Liu Jicong 已提交
1357
int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) {
dengyihao's avatar
dengyihao 已提交
1358 1359 1360
  char*   msgStr = pMsg->pCont;
  char*   msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
  int32_t msgLen = pMsg->contLen - sizeof(SMsgHead);
1361 1362 1363 1364

  SStreamDispatchReq req = {0};

  SDecoder decoder;
L
Liu Jicong 已提交
1365
  tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
1366
  tDecodeStreamDispatchReq(&decoder, &req);
L
Liu Jicong 已提交
1367

1368
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.taskId);
L
Liu Jicong 已提交
1369
  if (pTask) {
1370
    SRpcMsg rsp = {.info = pMsg->info, .code = 0};
1371
    streamProcessDispatchMsg(pTask, &req, &rsp, exec);
L
Liu Jicong 已提交
1372
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
1373
    return 0;
1374
  } else {
L
liuyao 已提交
1375
    tDeleteStreamDispatchReq(&req);
1376
    return -1;
L
Liu Jicong 已提交
1377
  }
L
Liu Jicong 已提交
1378 1379
}

L
Liu Jicong 已提交
1380 1381
int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) {
  SStreamDispatchRsp* pRsp = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
1382
  int32_t             taskId = ntohl(pRsp->upstreamTaskId);
L
Liu Jicong 已提交
1383
  SStreamTask*        pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
1384 1385

  int32_t vgId = pTq->pStreamMeta->vgId;
L
Liu Jicong 已提交
1386
  if (pTask) {
1387
    streamProcessDispatchRsp(pTask, pRsp, pMsg->code);
L
Liu Jicong 已提交
1388
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
1389
    return 0;
1390
  } else {
1391
    tqDebug("vgId:%d failed to handle the dispatch rsp, since find task:0x%x failed", vgId, taskId);
1392
    return TSDB_CODE_INVALID_MSG;
L
Liu Jicong 已提交
1393
  }
L
Liu Jicong 已提交
1394
}
L
Liu Jicong 已提交
1395

1396
int32_t tqProcessTaskDropReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
L
Liu Jicong 已提交
1397
  SVDropStreamTaskReq* pReq = (SVDropStreamTaskReq*)msg;
1398 1399
  tqDebug("vgId:%d receive msg to drop stream task:0x%x", TD_VID(pTq->pVnode), pReq->taskId);

1400
  streamMetaRemoveTask(pTq->pStreamMeta, pReq->taskId);
L
Liu Jicong 已提交
1401
  return 0;
L
Liu Jicong 已提交
1402
}
L
Liu Jicong 已提交
1403

5
54liuyao 已提交
1404 1405
int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
  SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)msg;
dengyihao's avatar
dengyihao 已提交
1406
  SStreamTask*          pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId);
L
liuyao 已提交
1407 1408
  if (pTask) {
    tqDebug("vgId:%d s-task:%s set pause flag", pTq->pStreamMeta->vgId, pTask->id.idStr);
L
liuyao 已提交
1409
    atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus);
L
liuyao 已提交
1410 1411 1412
    atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE);
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
  }
5
54liuyao 已提交
1413 1414 1415 1416 1417
  return 0;
}

int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
  SVResumeStreamTaskReq* pReq = (SVResumeStreamTaskReq*)msg;
1418 1419

  int32_t      vgId = pTq->pStreamMeta->vgId;
L
liuyao 已提交
1420 1421
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId);
  if (pTask) {
L
liuyao 已提交
1422
    atomic_store_8(&pTask->status.taskStatus, pTask->status.keepTaskStatus);
1423 1424

    // no lock needs to secure the access of the version
1425
    if (pReq->igUntreated && pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
1426 1427 1428 1429 1430
      // discard all the data  when the stream task is suspended.
      walReaderSetSkipToVersion(pTask->exec.pWalReader, sversion);
      tqDebug("vgId:%d s-task:%s resume to exec, prev paused version:%" PRId64 ", start from vnode ver:%" PRId64
              ", schedStatus:%d",
              vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus);
1431
    } else {  // from the previous paused version and go on
1432 1433
      tqDebug("vgId:%d s-task:%s resume to exec, from paused ver:%" PRId64 ", vnode ver:%" PRId64 ", schedStatus:%d",
              vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus);
1434 1435
    }

1436
    if (pTask->info.taskLevel == TASK_LEVEL__SOURCE && taosQueueItemSize(pTask->inputQueue->queue) == 0) {
L
liuyao 已提交
1437 1438 1439 1440
      tqStartStreamTasks(pTq);
    } else {
      streamSchedExec(pTask);
    }
L
liuyao 已提交
1441
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1442 1443
  } else {
    tqError("vgId:%d failed to find the s-task:0x%x for resume stream task", vgId, pReq->taskId);
L
liuyao 已提交
1444
  }
1445

5
54liuyao 已提交
1446 1447 1448
  return 0;
}

L
Liu Jicong 已提交
1449 1450 1451 1452 1453 1454
int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) {
  char*              msgStr = pMsg->pCont;
  char*              msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
  int32_t            msgLen = pMsg->contLen - sizeof(SMsgHead);
  SStreamRetrieveReq req;
  SDecoder           decoder;
1455
  tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
L
Liu Jicong 已提交
1456
  tDecodeStreamRetrieveReq(&decoder, &req);
L
Liu Jicong 已提交
1457
  tDecoderClear(&decoder);
L
Liu Jicong 已提交
1458
  int32_t      taskId = req.dstTaskId;
L
Liu Jicong 已提交
1459
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
L
Liu Jicong 已提交
1460
  if (pTask) {
1461
    SRpcMsg rsp = {.info = pMsg->info, .code = 0};
L
Liu Jicong 已提交
1462
    streamProcessRetrieveReq(pTask, &req, &rsp);
L
Liu Jicong 已提交
1463
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
1464
    tDeleteStreamRetrieveReq(&req);
L
Liu Jicong 已提交
1465
    return 0;
L
Liu Jicong 已提交
1466
  } else {
L
liuyao 已提交
1467
    tDeleteStreamRetrieveReq(&req);
L
Liu Jicong 已提交
1468
    return -1;
L
Liu Jicong 已提交
1469 1470 1471 1472 1473 1474 1475
  }
}

int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg) {
  //
  return 0;
}
L
Liu Jicong 已提交
1476

1477 1478 1479 1480 1481 1482
int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) {
  STQ*      pTq = pVnode->pTq;
  SMsgHead* msgStr = pMsg->pCont;
  char*     msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
  int32_t   msgLen = pMsg->contLen - sizeof(SMsgHead);
  int32_t   code = 0;
L
Liu Jicong 已提交
1483 1484 1485

  SStreamDispatchReq req;
  SDecoder           decoder;
1486
  tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
L
Liu Jicong 已提交
1487 1488
  if (tDecodeStreamDispatchReq(&decoder, &req) < 0) {
    code = TSDB_CODE_MSG_DECODE_ERROR;
L
Liu Jicong 已提交
1489
    tDecoderClear(&decoder);
L
Liu Jicong 已提交
1490 1491
    goto FAIL;
  }
L
Liu Jicong 已提交
1492
  tDecoderClear(&decoder);
L
Liu Jicong 已提交
1493

L
Liu Jicong 已提交
1494
  int32_t taskId = req.taskId;
L
Liu Jicong 已提交
1495

L
Liu Jicong 已提交
1496
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
L
Liu Jicong 已提交
1497
  if (pTask) {
1498
    SRpcMsg rsp = {.info = pMsg->info, .code = 0};
1499
    streamProcessDispatchMsg(pTask, &req, &rsp, false);
L
Liu Jicong 已提交
1500
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
1501 1502
    rpcFreeCont(pMsg->pCont);
    taosFreeQitem(pMsg);
1503
    return 0;
5
54liuyao 已提交
1504 1505
  } else {
    tDeleteStreamDispatchReq(&req);
L
Liu Jicong 已提交
1506
  }
L
Liu Jicong 已提交
1507

1508 1509
  code = TSDB_CODE_STREAM_TASK_NOT_EXIST;

L
Liu Jicong 已提交
1510
FAIL:
1511 1512 1513 1514
  if (pMsg->info.handle == NULL) return -1;

  SMsgHead* pRspHead = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp));
  if (pRspHead == NULL) {
1515
    SRpcMsg rsp = {.code = TSDB_CODE_OUT_OF_MEMORY, .info = pMsg->info};
1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531
    tqDebug("send dispatch error rsp, code: %x", code);
    tmsgSendRsp(&rsp);
    rpcFreeCont(pMsg->pCont);
    taosFreeQitem(pMsg);
    return -1;
  }

  pRspHead->vgId = htonl(req.upstreamNodeId);
  SStreamDispatchRsp* pRsp = POINTER_SHIFT(pRspHead, sizeof(SMsgHead));
  pRsp->streamId = htobe64(req.streamId);
  pRsp->upstreamTaskId = htonl(req.upstreamTaskId);
  pRsp->upstreamNodeId = htonl(req.upstreamNodeId);
  pRsp->downstreamNodeId = htonl(pVnode->config.vgId);
  pRsp->downstreamTaskId = htonl(req.taskId);
  pRsp->inputStatus = TASK_OUTPUT_STATUS__NORMAL;

L
Liu Jicong 已提交
1532
  SRpcMsg rsp = {
1533
      .code = code, .info = pMsg->info, .contLen = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp), .pCont = pRspHead};
1534
  tqDebug("send dispatch error rsp, code: %x", code);
L
Liu Jicong 已提交
1535
  tmsgSendRsp(&rsp);
L
Liu Jicong 已提交
1536 1537
  rpcFreeCont(pMsg->pCont);
  taosFreeQitem(pMsg);
1538
  return -1;
L
Liu Jicong 已提交
1539
}
L
Liu Jicong 已提交
1540

1541
int32_t tqCheckLogInWal(STQ* pTq, int64_t sversion) { return sversion <= pTq->walLogLastVer; }
1542

1543
int32_t tqStartStreamTasks(STQ* pTq) {
1544
  int32_t      vgId = TD_VID(pTq->pVnode);
1545
  SStreamMeta* pMeta = pTq->pStreamMeta;
1546

1547
  taosWLockLatch(&pMeta->lock);
1548

1549
  int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
1550
  if (numOfTasks == 0) {
1551
    tqInfo("vgId:%d no stream tasks exist", vgId);
1552
    taosWUnLockLatch(&pMeta->lock);
1553 1554 1555
    return 0;
  }

1556
  pMeta->walScanCounter += 1;
1557

1558 1559
  if (pMeta->walScanCounter > 1) {
    tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->walScanCounter);
1560
    taosWUnLockLatch(&pMeta->lock);
1561 1562 1563
    return 0;
  }

1564 1565 1566
  SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq));
  if (pRunReq == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
H
Haojun Liao 已提交
1567
    tqError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr());
1568
    taosWUnLockLatch(&pMeta->lock);
1569 1570 1571
    return -1;
  }

H
Haojun Liao 已提交
1572
  tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d", vgId, numOfTasks);
1573 1574
  pRunReq->head.vgId = vgId;
  pRunReq->streamId = 0;
1575
  pRunReq->taskId = WAL_READ_TASKS_ID;
1576 1577 1578

  SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)};
  tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg);
1579
  taosWUnLockLatch(&pMeta->lock);
1580 1581 1582

  return 0;
}