tq.c 53.9 KB
Newer Older
H
refact  
Hongze Cheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
S
Shengliang Guan 已提交
14 15
 */

H
Hongze Cheng 已提交
16
#include "tq.h"
S
Shengliang Guan 已提交
17

18
#define IS_OFFSET_RESET_TYPE(_t)  ((_t) < 0)
19
#define ALL_STREAM_TASKS_ID       (-1)
20

L
Liu Jicong 已提交
21
int32_t tqInit() {
L
Liu Jicong 已提交
22 23 24 25 26 27
  int8_t old;
  while (1) {
    old = atomic_val_compare_exchange_8(&tqMgmt.inited, 0, 2);
    if (old != 2) break;
  }

28 29 30 31 32 33
  if (old == 0) {
    tqMgmt.timer = taosTmrInit(10000, 100, 10000, "TQ");
    if (tqMgmt.timer == NULL) {
      atomic_store_8(&tqMgmt.inited, 0);
      return -1;
    }
34 35 36
    if (streamInit() < 0) {
      return -1;
    }
L
Liu Jicong 已提交
37
    atomic_store_8(&tqMgmt.inited, 1);
38
  }
39

L
Liu Jicong 已提交
40 41
  return 0;
}
L
Liu Jicong 已提交
42

43
void tqCleanUp() {
L
Liu Jicong 已提交
44 45 46 47 48 49 50 51
  int8_t old;
  while (1) {
    old = atomic_val_compare_exchange_8(&tqMgmt.inited, 1, 2);
    if (old != 2) break;
  }

  if (old == 1) {
    taosTmrCleanUp(tqMgmt.timer);
L
Liu Jicong 已提交
52
    streamCleanUp();
L
Liu Jicong 已提交
53 54
    atomic_store_8(&tqMgmt.inited, 0);
  }
55
}
L
Liu Jicong 已提交
56

57
static void destroyTqHandle(void* data) {
58 59 60
  STqHandle* pData = (STqHandle*)data;
  qDestroyTask(pData->execHandle.task);
  if (pData->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
L
Liu Jicong 已提交
61
    taosMemoryFreeClear(pData->execHandle.execCol.qmsg);
62
  } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__DB) {
63
    tqCloseReader(pData->execHandle.pTqReader);
64 65
    walCloseReader(pData->pWalReader);
    taosHashCleanup(pData->execHandle.execDb.pFilterOutTbUid);
L
Liu Jicong 已提交
66
  } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__TABLE) {
67
    walCloseReader(pData->pWalReader);
68
    tqCloseReader(pData->execHandle.pTqReader);
69 70 71
  }
}

L
Liu Jicong 已提交
72
static void tqPushEntryFree(void* data) {
L
Liu Jicong 已提交
73
  STqPushEntry* p = *(void**)data;
H
Haojun Liao 已提交
74 75 76 77 78 79 80
  if (p->pDataRsp->head.mqMsgType == TMQ_MSG_TYPE__POLL_RSP) {
    tDeleteSMqDataRsp(p->pDataRsp);
  } else if (p->pDataRsp->head.mqMsgType == TMQ_MSG_TYPE__TAOSX_RSP) {
    tDeleteSTaosxRsp((STaosxRsp*)p->pDataRsp);
  }

  taosMemoryFree(p->pDataRsp);
L
Liu Jicong 已提交
81 82 83
  taosMemoryFree(p);
}

84 85 86 87 88
static bool tqOffsetLessOrEqual(const STqOffset* pLeft, const STqOffset* pRight) {
  return pLeft->val.type == TMQ_OFFSET__LOG && pRight->val.type == TMQ_OFFSET__LOG &&
         pLeft->val.version <= pRight->val.version;
}

L
Liu Jicong 已提交
89
STQ* tqOpen(const char* path, SVnode* pVnode) {
90
  STQ* pTq = taosMemoryCalloc(1, sizeof(STQ));
L
Liu Jicong 已提交
91
  if (pTq == NULL) {
S
Shengliang Guan 已提交
92
    terrno = TSDB_CODE_OUT_OF_MEMORY;
L
Liu Jicong 已提交
93 94
    return NULL;
  }
95

96
  pTq->path = taosStrdup(path);
L
Liu Jicong 已提交
97
  pTq->pVnode = pVnode;
L
Liu Jicong 已提交
98
  pTq->walLogLastVer = pVnode->pWal->vers.lastVer;
99

100
  pTq->pHandle = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK);
101
  taosHashSetFreeFp(pTq->pHandle, destroyTqHandle);
102

103
  taosInitRWLatch(&pTq->lock);
L
Liu Jicong 已提交
104
  pTq->pPushMgr = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_NO_LOCK);
L
Liu Jicong 已提交
105
  taosHashSetFreeFp(pTq->pPushMgr, tqPushEntryFree);
L
Liu Jicong 已提交
106

107
  pTq->pCheckInfo = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK);
L
Liu Jicong 已提交
108
  taosHashSetFreeFp(pTq->pCheckInfo, (FDelete)tDeleteSTqCheckInfo);
L
Liu Jicong 已提交
109

L
Liu Jicong 已提交
110
  if (tqMetaOpen(pTq) < 0) {
L
Liu Jicong 已提交
111
    return NULL;
112 113
  }

L
Liu Jicong 已提交
114 115
  pTq->pOffsetStore = tqOffsetOpen(pTq);
  if (pTq->pOffsetStore == NULL) {
L
Liu Jicong 已提交
116
    return NULL;
117 118
  }

119
  pTq->pStreamMeta = streamMetaOpen(path, pTq, (FTaskExpand*)tqExpandTask, pTq->pVnode->config.vgId);
L
Liu Jicong 已提交
120
  if (pTq->pStreamMeta == NULL) {
L
Liu Jicong 已提交
121
    return NULL;
L
Liu Jicong 已提交
122 123
  }

L
Liu Jicong 已提交
124
  if (streamLoadTasks(pTq->pStreamMeta, walGetCommittedVer(pVnode->pWal)) < 0) {
L
Liu Jicong 已提交
125
    return NULL;
L
Liu Jicong 已提交
126 127
  }

L
Liu Jicong 已提交
128 129
  return pTq;
}
L
Liu Jicong 已提交
130

L
Liu Jicong 已提交
131
void tqClose(STQ* pTq) {
132 133
  if (pTq == NULL) {
    return;
H
Hongze Cheng 已提交
134
  }
135 136 137 138 139 140 141 142 143

  tqOffsetClose(pTq->pOffsetStore);
  taosHashCleanup(pTq->pHandle);
  taosHashCleanup(pTq->pPushMgr);
  taosHashCleanup(pTq->pCheckInfo);
  taosMemoryFree(pTq->path);
  tqMetaClose(pTq);
  streamMetaClose(pTq->pStreamMeta);
  taosMemoryFree(pTq);
L
Liu Jicong 已提交
144
}
L
Liu Jicong 已提交
145

L
Liu Jicong 已提交
146
int32_t tqSendMetaPollRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqMetaRsp* pRsp) {
147 148 149 150 151 152 153
  int32_t len = 0;
  int32_t code = 0;
  tEncodeSize(tEncodeSMqMetaRsp, pRsp, len, code);
  if (code < 0) {
    return -1;
  }
  int32_t tlen = sizeof(SMqRspHead) + len;
L
Liu Jicong 已提交
154 155 156 157 158 159 160 161 162 163
  void*   buf = rpcMallocCont(tlen);
  if (buf == NULL) {
    return -1;
  }

  ((SMqRspHead*)buf)->mqMsgType = TMQ_MSG_TYPE__POLL_META_RSP;
  ((SMqRspHead*)buf)->epoch = pReq->epoch;
  ((SMqRspHead*)buf)->consumerId = pReq->consumerId;

  void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead));
164 165 166 167 168

  SEncoder encoder = {0};
  tEncoderInit(&encoder, abuf, len);
  tEncodeSMqMetaRsp(&encoder, pRsp);
  tEncoderClear(&encoder);
L
Liu Jicong 已提交
169 170 171 172 173 174 175 176 177

  SRpcMsg resp = {
      .info = pMsg->info,
      .pCont = buf,
      .contLen = tlen,
      .code = 0,
  };
  tmsgSendRsp(&resp);

178
  tqDebug("vgId:%d, from consumer:0x%" PRIx64 " (epoch %d) send rsp, res msg type %d, offset type:%d",
179
          TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->resMsgType, pRsp->rspOffset.type);
L
Liu Jicong 已提交
180 181 182 183

  return 0;
}

H
Haojun Liao 已提交
184 185
static int32_t doSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch,
                             int64_t consumerId, int32_t type) {
L
Liu Jicong 已提交
186 187
  int32_t len = 0;
  int32_t code = 0;
H
Haojun Liao 已提交
188 189 190 191 192 193

  if (type == TMQ_MSG_TYPE__POLL_RSP) {
    tEncodeSize(tEncodeSMqDataRsp, pRsp, len, code);
  } else if (type == TMQ_MSG_TYPE__TAOSX_RSP) {
    tEncodeSize(tEncodeSTaosxRsp, (STaosxRsp*)pRsp, len, code);
  }
L
Liu Jicong 已提交
194 195 196 197 198 199 200 201 202 203 204

  if (code < 0) {
    return -1;
  }

  int32_t tlen = sizeof(SMqRspHead) + len;
  void*   buf = rpcMallocCont(tlen);
  if (buf == NULL) {
    return -1;
  }

H
Haojun Liao 已提交
205 206 207
  ((SMqRspHead*)buf)->mqMsgType = type;
  ((SMqRspHead*)buf)->epoch = epoch;
  ((SMqRspHead*)buf)->consumerId = consumerId;
L
Liu Jicong 已提交
208 209 210 211 212

  void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead));

  SEncoder encoder = {0};
  tEncoderInit(&encoder, abuf, len);
H
Haojun Liao 已提交
213 214 215 216 217 218 219

  if (type == TMQ_MSG_TYPE__POLL_RSP) {
    tEncodeSMqDataRsp(&encoder, pRsp);
  } else if (type == TMQ_MSG_TYPE__TAOSX_RSP) {
    tEncodeSTaosxRsp(&encoder, (STaosxRsp*) pRsp);
  }

L
Liu Jicong 已提交
220 221 222
  tEncoderClear(&encoder);

  SRpcMsg rsp = {
H
Haojun Liao 已提交
223
      .info = *pRpcHandleInfo,
L
Liu Jicong 已提交
224 225 226 227 228 229 230 231 232
      .pCont = buf,
      .contLen = tlen,
      .code = 0,
  };

  tmsgSendRsp(&rsp);
  return 0;
}

H
Haojun Liao 已提交
233 234 235 236
int32_t tqPushDataRsp(STQ* pTq, STqPushEntry* pPushEntry) {
  SMqDataRsp* pRsp = pPushEntry->pDataRsp;
  SMqRspHead* pHeader = &pPushEntry->pDataRsp->head;
  doSendDataRsp(&pPushEntry->info, pRsp, pHeader->epoch, pHeader->consumerId, pHeader->mqMsgType);
L
Liu Jicong 已提交
237

wmmhello's avatar
wmmhello 已提交
238 239
  char buf1[80] = {0};
  char buf2[80] = {0};
H
Haojun Liao 已提交
240 241 242 243
  tFormatOffset(buf1, tListLen(buf1), &pRsp->reqOffset);
  tFormatOffset(buf2, tListLen(buf2), &pRsp->rspOffset);
  tqDebug("vgId:%d, from consumer:0x%" PRIx64 " (epoch %d) push rsp, block num: %d, req:%s, rsp:%s",
          TD_VID(pTq->pVnode), pRsp->head.consumerId, pRsp->head.epoch, pRsp->blockNum, buf1, buf2);
L
Liu Jicong 已提交
244 245 246
  return 0;
}

H
Haojun Liao 已提交
247 248
int32_t tqSendDataRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp, int32_t type) {
  doSendDataRsp(&pMsg->info, pRsp, pReq->epoch, pReq->consumerId, type);
249 250 251 252 253 254

  char buf1[80] = {0};
  char buf2[80] = {0};
  tFormatOffset(buf1, 80, &pRsp->reqOffset);
  tFormatOffset(buf2, 80, &pRsp->rspOffset);

H
Haojun Liao 已提交
255 256
  tqDebug("vgId:%d consumer:0x%" PRIx64 " (epoch %d) send rsp, block num:%d, req:%s, rsp:%s, reqId:0x%"PRIx64,
          TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2, pReq->reqId);
H
Haojun Liao 已提交
257

258 259 260
  return 0;
}

261
int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
262
  STqOffset offset = {0};
H
Haojun Liao 已提交
263
  int32_t vgId = TD_VID(pTq->pVnode);
264

X
Xiaoyu Wang 已提交
265 266
  SDecoder decoder;
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
267 268 269
  if (tDecodeSTqOffset(&decoder, &offset) < 0) {
    return -1;
  }
270

271 272
  tDecoderClear(&decoder);

wmmhello's avatar
wmmhello 已提交
273
  if (offset.val.type == TMQ_OFFSET__SNAPSHOT_DATA || offset.val.type == TMQ_OFFSET__SNAPSHOT_META) {
L
Liu Jicong 已提交
274
    tqDebug("receive offset commit msg to %s on vgId:%d, offset(type:snapshot) uid:%" PRId64 ", ts:%" PRId64,
H
Haojun Liao 已提交
275
            offset.subKey, vgId, offset.val.uid, offset.val.ts);
L
Liu Jicong 已提交
276
  } else if (offset.val.type == TMQ_OFFSET__LOG) {
S
Shengliang Guan 已提交
277
    tqDebug("receive offset commit msg to %s on vgId:%d, offset(type:log) version:%" PRId64, offset.subKey,
H
Haojun Liao 已提交
278
            vgId, offset.val.version);
279
    if (offset.val.version + 1 == sversion) {
280 281
      offset.val.version += 1;
    }
282
  } else {
283 284
    tqError("invalid commit offset type:%d", offset.val.type);
    return -1;
285
  }
286 287 288 289

  STqOffset* pSavedOffset = tqOffsetRead(pTq->pOffsetStore, offset.subKey);
  if (pSavedOffset != NULL && tqOffsetLessOrEqual(&offset, pSavedOffset)) {
    return 0;  // no need to update the offset value
290 291
  }

292
  // save the new offset value
293 294
  if (tqOffsetWrite(pTq->pOffsetStore, &offset) < 0) {
    return -1;
295
  }
296 297

  if (offset.val.type == TMQ_OFFSET__LOG) {
298
    STqHandle* pHandle = taosHashGet(pTq->pHandle, offset.subKey, strlen(offset.subKey));
299 300
    if (pHandle && (walRefVer(pHandle->pRef, offset.val.version) < 0)) {
      return -1;
301 302 303
    }
  }

304 305 306
  return 0;
}

L
Liu Jicong 已提交
307
int32_t tqCheckColModifiable(STQ* pTq, int64_t tbUid, int32_t colId) {
L
Liu Jicong 已提交
308
  void* pIter = NULL;
309

L
Liu Jicong 已提交
310
  while (1) {
311
    pIter = taosHashIterate(pTq->pCheckInfo, pIter);
312 313 314 315
    if (pIter == NULL) {
      break;
    }

316
    STqCheckInfo* pCheck = (STqCheckInfo*)pIter;
317

L
Liu Jicong 已提交
318 319
    if (pCheck->ntbUid == tbUid) {
      int32_t sz = taosArrayGetSize(pCheck->colIdList);
L
Liu Jicong 已提交
320
      for (int32_t i = 0; i < sz; i++) {
L
Liu Jicong 已提交
321 322
        int16_t forbidColId = *(int16_t*)taosArrayGet(pCheck->colIdList, i);
        if (forbidColId == colId) {
323
          taosHashCancelIterate(pTq->pCheckInfo, pIter);
L
Liu Jicong 已提交
324 325 326 327 328
          return -1;
        }
      }
    }
  }
329

L
Liu Jicong 已提交
330 331 332
  return 0;
}

L
Liu Jicong 已提交
333 334 335 336 337 338 339 340 341 342
static int32_t tqInitDataRsp(SMqDataRsp* pRsp, const SMqPollReq* pReq, int8_t subType) {
  pRsp->reqOffset = pReq->reqOffset;

  pRsp->blockData = taosArrayInit(0, sizeof(void*));
  pRsp->blockDataLen = taosArrayInit(0, sizeof(int32_t));

  if (pRsp->blockData == NULL || pRsp->blockDataLen == NULL) {
    return -1;
  }

L
Liu Jicong 已提交
343
  pRsp->withTbName = 0;
L
Liu Jicong 已提交
344
  pRsp->withSchema = false;
L
Liu Jicong 已提交
345 346 347
  return 0;
}

348 349 350 351 352 353 354 355 356 357 358 359 360
static int32_t tqInitTaosxRsp(STaosxRsp* pRsp, const SMqPollReq* pReq) {
  pRsp->reqOffset = pReq->reqOffset;

  pRsp->withTbName = 1;
  pRsp->withSchema = 1;
  pRsp->blockData = taosArrayInit(0, sizeof(void*));
  pRsp->blockDataLen = taosArrayInit(0, sizeof(int32_t));
  pRsp->blockTbName = taosArrayInit(0, sizeof(void*));
  pRsp->blockSchema = taosArrayInit(0, sizeof(void*));

  if (pRsp->blockData == NULL || pRsp->blockDataLen == NULL || pRsp->blockTbName == NULL || pRsp->blockSchema == NULL) {
    return -1;
  }
361

362 363 364
  return 0;
}

365 366 367 368 369
static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest,
                                     SRpcMsg* pMsg, bool* pBlockReturned) {
  uint64_t     consumerId = pRequest->consumerId;
  STqOffsetVal reqOffset = pRequest->reqOffset;
  STqOffset*   pOffset = tqOffsetRead(pTq->pOffsetStore, pRequest->subKey);
H
Haojun Liao 已提交
370 371
  int32_t      vgId = TD_VID(pTq->pVnode);

372 373 374 375 376 377 378 379
  *pBlockReturned = false;

  // In this vnode, data has been polled by consumer for this topic, so let's continue from the last offset value.
  if (pOffset != NULL) {
    *pOffsetVal = pOffset->val;

    char formatBuf[80];
    tFormatOffset(formatBuf, 80, pOffsetVal);
380 381
    tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, existed offset found, offset reset to %s and continue. reqId:0x%"PRIx64,
            consumerId, pHandle->subKey, vgId, formatBuf, pRequest->reqId);
382 383 384 385 386
    return 0;
  } else {
    // no poll occurs in this vnode for this topic, let's seek to the right offset value.
    if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEAST) {
      if (pRequest->useSnapshot) {
H
Haojun Liao 已提交
387 388 389
        tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey:%s, vgId:%d, (earliest) set offset to be snapshot",
                consumerId, pHandle->subKey, vgId);

390 391 392 393 394 395 396 397 398 399 400
        if (pHandle->fetchMeta) {
          tqOffsetResetToMeta(pOffsetVal, 0);
        } else {
          tqOffsetResetToData(pOffsetVal, 0, 0);
        }
      } else {
        pHandle->pRef = walRefFirstVer(pTq->pVnode->pWal, pHandle->pRef);
        if (pHandle->pRef == NULL) {
          terrno = TSDB_CODE_OUT_OF_MEMORY;
          return -1;
        }
D
dapan1121 已提交
401

wmmhello's avatar
wmmhello 已提交
402
        // offset set to previous version when init
403 404 405 406 407 408 409 410
        tqOffsetResetToLog(pOffsetVal, pHandle->pRef->refVer - 1);
      }
    } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) {
      if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
        SMqDataRsp dataRsp = {0};
        tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType);

        tqOffsetResetToLog(&dataRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal));
H
Haojun Liao 已提交
411 412
        tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, (latest) offset reset to %" PRId64, consumerId,
                pHandle->subKey, vgId, dataRsp.rspOffset.version);
H
Haojun Liao 已提交
413
        int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp, TMQ_MSG_TYPE__POLL_RSP);
414 415 416 417 418 419 420 421
        tDeleteSMqDataRsp(&dataRsp);

        *pBlockReturned = true;
        return code;
      } else {
        STaosxRsp taosxRsp = {0};
        tqInitTaosxRsp(&taosxRsp, pRequest);
        tqOffsetResetToLog(&taosxRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal));
H
Haojun Liao 已提交
422
        int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP);
423
        tDeleteSTaosxRsp(&taosxRsp);
L
Liu Jicong 已提交
424

425 426 427 428
        *pBlockReturned = true;
        return code;
      }
    } else if (reqOffset.type == TMQ_OFFSET__RESET_NONE) {
H
Haojun Liao 已提交
429 430
      tqError("tmq poll: subkey:%s, no offset committed for consumer:0x%" PRIx64 " in vg %d, subkey %s, reset none failed",
              pHandle->subKey, consumerId, vgId, pRequest->subKey);
431 432 433
      terrno = TSDB_CODE_TQ_NO_COMMITTED_OFFSET;
      return -1;
    }
L
Liu Jicong 已提交
434 435
  }

436 437
  return 0;
}
L
Liu Jicong 已提交
438

439 440 441 442 443 444 445 446 447 448 449 450
static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest,
                                                   SRpcMsg* pMsg, STqOffsetVal* pOffset) {
  uint64_t consumerId = pRequest->consumerId;
  int32_t  vgId = TD_VID(pTq->pVnode);

  SMqDataRsp dataRsp = {0};
  tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType);

  // lock
  taosWLockLatch(&pTq->lock);

  qSetTaskId(pHandle->execHandle.task, consumerId, pRequest->reqId);
wmmhello's avatar
wmmhello 已提交
451
  int code = tqScanData(pTq, pHandle, &dataRsp, pOffset);
452
  if(code != 0) {
453
    goto end;
454
  }
455 456 457 458

  // till now, all data has been transferred to consumer, new data needs to push client once arrived.
  if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG &&
      dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) {
459
    code = tqRegisterPushHandle(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP);
460 461 462 463
    taosWUnLockLatch(&pTq->lock);
    return code;
  }

464

465 466 467 468
  code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&dataRsp, TMQ_MSG_TYPE__POLL_RSP);

  // NOTE: this pHandle->consumerId may have been changed already.

469 470 471 472 473 474 475 476 477
end:
  {
    char buf[80] = {0};
    tFormatOffset(buf, 80, &dataRsp.rspOffset);
    tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, rsp offset type:%s, reqId:0x%" PRIx64 " code:%d",
            consumerId, pHandle->subKey, vgId, dataRsp.blockNum, buf, pRequest->reqId, code);
    taosWUnLockLatch(&pTq->lock);
    tDeleteSMqDataRsp(&dataRsp);
  }
478 479 480
  return code;
}

wmmhello's avatar
wmmhello 已提交
481

wmmhello's avatar
wmmhello 已提交
482
static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg, STqOffsetVal *offset) {
483 484 485
  int code = 0;
  int32_t      vgId = TD_VID(pTq->pVnode);
  SWalCkHead*  pCkHead = NULL;
486 487
  SMqMetaRsp metaRsp = {0};
  STaosxRsp taosxRsp = {0};
488
  tqInitTaosxRsp(&taosxRsp, pRequest);
wmmhello's avatar
wmmhello 已提交
489

490 491
  if (offset->type != TMQ_OFFSET__LOG) {
    if (tqScanTaosx(pTq, pHandle, &taosxRsp, &metaRsp, offset) < 0) {
L
Liu Jicong 已提交
492 493
      return -1;
    }
wmmhello's avatar
wmmhello 已提交
494

L
Liu Jicong 已提交
495
    if (metaRsp.metaRspLen > 0) {
496
      code = tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp);
wmmhello's avatar
wmmhello 已提交
497
      tqDebug("tmq poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send meta offset type:%d,uid:%" PRId64 ",ts:%" PRId64,
wmmhello's avatar
wmmhello 已提交
498
              pRequest->consumerId, pHandle->subKey, vgId, metaRsp.rspOffset.type, metaRsp.rspOffset.uid, metaRsp.rspOffset.ts);
wmmhello's avatar
wmmhello 已提交
499
      taosMemoryFree(metaRsp.metaRsp);
500 501
      tDeleteSTaosxRsp(&taosxRsp);
      return code;
H
Haojun Liao 已提交
502
    }
wmmhello's avatar
wmmhello 已提交
503

504
    tqDebug("taosx poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send data blockNum:%d, offset type:%d,uid:%" PRId64
wmmhello's avatar
wmmhello 已提交
505
    ",ts:%" PRId64,pRequest->consumerId, pHandle->subKey, vgId, taosxRsp.blockNum, taosxRsp.rspOffset.type, taosxRsp.rspOffset.uid,taosxRsp.rspOffset.ts);
506
    if (taosxRsp.blockNum > 0) {
H
Haojun Liao 已提交
507
      code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP);
508 509
      tDeleteSTaosxRsp(&taosxRsp);
      return code;
wmmhello's avatar
wmmhello 已提交
510 511
    }else {
      *offset = taosxRsp.rspOffset;
512
    }
513 514
  }

wmmhello's avatar
wmmhello 已提交
515 516

  if (offset->type == TMQ_OFFSET__LOG) {
517
    int64_t fetchVer = offset->version + 1;
wmmhello's avatar
wmmhello 已提交
518 519
    pCkHead = taosMemoryMalloc(sizeof(SWalCkHead) + 2048);
    if (pCkHead == NULL) {
520
      tDeleteSTaosxRsp(&taosxRsp);
H
Haojun Liao 已提交
521
      terrno = TSDB_CODE_OUT_OF_MEMORY;
522
      return -1;
523
    }
wmmhello's avatar
wmmhello 已提交
524
    walSetReaderCapacity(pHandle->pWalReader, 2048);
525
    int totalRows = 0;
wmmhello's avatar
wmmhello 已提交
526
    while (1) {
527 528
      int32_t savedEpoch = atomic_load_32(&pHandle->epoch);
      if (savedEpoch > pRequest->epoch) {
H
Haojun Liao 已提交
529
        tqWarn("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey:%s vgId:%d offset %" PRId64
wmmhello's avatar
wmmhello 已提交
530
          ", found new consumer epoch %d, discard req epoch %d", pRequest->consumerId, pRequest->epoch, pHandle->subKey, vgId, fetchVer, savedEpoch, pRequest->epoch);
wmmhello's avatar
wmmhello 已提交
531
        break;
532
      }
wmmhello's avatar
wmmhello 已提交
533

534
      if (tqFetchLog(pTq, pHandle, &fetchVer, &pCkHead, pRequest->reqId) < 0) {
535
        tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer);
536 537 538 539
        code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP);
        tDeleteSTaosxRsp(&taosxRsp);
        taosMemoryFreeClear(pCkHead);
        return code;
wmmhello's avatar
wmmhello 已提交
540
      }
H
Haojun Liao 已提交
541

wmmhello's avatar
wmmhello 已提交
542
      SWalCont* pHead = &pCkHead->head;
543
      tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d) iter log, vgId:%d offset %" PRId64 " msgType %d", pRequest->consumerId,
544
              pRequest->epoch, vgId, fetchVer, pHead->msgType);
H
Haojun Liao 已提交
545

546 547
      // process meta
      if (pHead->msgType != TDMT_VND_SUBMIT) {
548 549
        if(totalRows > 0) {
          tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer - 1);
H
Haojun Liao 已提交
550
          code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP);
551
          tDeleteSTaosxRsp(&taosxRsp);
L
Liu Jicong 已提交
552
          taosMemoryFreeClear(pCkHead);
553
          return code;
wmmhello's avatar
wmmhello 已提交
554 555
        }

556
        tqDebug("fetch meta msg, ver:%" PRId64 ", type:%s", pHead->version, TMSG_INFO(pHead->msgType));
wmmhello's avatar
wmmhello 已提交
557 558 559 560
        tqOffsetResetToLog(&metaRsp.rspOffset, fetchVer);
        metaRsp.resMsgType = pHead->msgType;
        metaRsp.metaRspLen = pHead->bodyLen;
        metaRsp.metaRsp = pHead->body;
561
        if (tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp) < 0) {
wmmhello's avatar
wmmhello 已提交
562
          code = -1;
L
Liu Jicong 已提交
563
          taosMemoryFreeClear(pCkHead);
564
          tDeleteSTaosxRsp(&taosxRsp);
565
          return code;
wmmhello's avatar
wmmhello 已提交
566 567
        }
        code = 0;
L
Liu Jicong 已提交
568
        taosMemoryFreeClear(pCkHead);
569
        tDeleteSTaosxRsp(&taosxRsp);
570
        return code;
wmmhello's avatar
wmmhello 已提交
571
      }
572

573 574 575 576 577 578 579 580
      // process data
      SPackedData submit = {
          .msgStr = POINTER_SHIFT(pHead->body, sizeof(SSubmitReq2Msg)),
          .msgLen = pHead->bodyLen - sizeof(SSubmitReq2Msg),
          .ver = pHead->version,
      };

      if (tqTaosxScanLog(pTq, pHandle, submit, &taosxRsp, &totalRows) < 0) {
wmmhello's avatar
wmmhello 已提交
581
        tqError("tmq poll: tqTaosxScanLog error %" PRId64 ", in vgId:%d, subkey %s", pRequest->consumerId, vgId,
582
                pRequest->subKey);
583 584
        taosMemoryFreeClear(pCkHead);
        tDeleteSTaosxRsp(&taosxRsp);
585 586 587 588 589 590 591 592 593 594 595 596
        return -1;
      }

      if (totalRows >= 4096 || taosxRsp.createTableNum > 0) {
        tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer);
        code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP);
        tDeleteSTaosxRsp(&taosxRsp);
        taosMemoryFreeClear(pCkHead);
        return code;
      } else {
        fetchVer++;
      }
wmmhello's avatar
wmmhello 已提交
597
    }
598
  }
599

600
  tDeleteSTaosxRsp(&taosxRsp);
L
Liu Jicong 已提交
601
  taosMemoryFreeClear(pCkHead);
602
  return 0;
L
Liu Jicong 已提交
603 604
}

wmmhello's avatar
wmmhello 已提交
605
static int32_t doPollDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg) {
606 607 608 609 610
  int32_t      code = -1;
  STqOffsetVal offset = {0};
  STqOffsetVal reqOffset = pRequest->reqOffset;

  // 1. reset the offset if needed
wmmhello's avatar
wmmhello 已提交
611 612
  if (IS_OFFSET_RESET_TYPE(reqOffset.type)) {
    // handle the reset offset cases, according to the consumer's choice.
613 614 615 616 617 618 619 620 621 622
    bool blockReturned = false;
    code = extractResetOffsetVal(&offset, pTq, pHandle, pRequest, pMsg, &blockReturned);
    if (code != 0) {
      return code;
    }

    // empty block returned, quit
    if (blockReturned) {
      return 0;
    }
wmmhello's avatar
wmmhello 已提交
623 624 625
  } else { // use the consumer specified offset
    // the offset value can not be monotonious increase??
    offset = reqOffset;
626 627
  }

wmmhello's avatar
wmmhello 已提交
628
  // this is a normal subscribe requirement
629
  if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
wmmhello's avatar
wmmhello 已提交
630
    return extractDataAndRspForNormalSubscribe(pTq, pHandle, pRequest, pMsg, &offset);
631 632 633 634
  }

  // todo handle the case where re-balance occurs.
  // for taosx
wmmhello's avatar
wmmhello 已提交
635
  return extractDataAndRspForDbStbSubscribe(pTq, pHandle, pRequest, pMsg, &offset);
636 637
}

638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658
int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) {
  SMqPollReq   req = {0};
  if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) {
    tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen);
    terrno = TSDB_CODE_INVALID_MSG;
    return -1;
  }

  int64_t      consumerId = req.consumerId;
  int32_t      reqEpoch = req.epoch;
  STqOffsetVal reqOffset = req.reqOffset;
  int32_t      vgId = TD_VID(pTq->pVnode);

  // 1. find handle
  STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
  if (pHandle == NULL) {
    tqError("tmq poll: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", consumerId, vgId, req.subKey);
    terrno = TSDB_CODE_INVALID_MSG;
    return -1;
  }

659
  // 2. check re-balance status
660
  taosRLockLatch(&pTq->lock);
661 662 663 664
  if (pHandle->consumerId != consumerId) {
    tqDebug("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
            consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId);
    terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH;
665
    taosRUnLockLatch(&pTq->lock);
666 667
    return -1;
  }
668
  taosRUnLockLatch(&pTq->lock);
669

670
  // 3. update the epoch value
671
  taosWLockLatch(&pTq->lock);
H
Haojun Liao 已提交
672 673
  int32_t savedEpoch = pHandle->epoch;
  if (savedEpoch < reqEpoch) {
674 675
    tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch, reqEpoch);
    pHandle->epoch = reqEpoch;
H
Haojun Liao 已提交
676
  }
677
  taosWUnLockLatch(&pTq->lock);
678 679 680

  char buf[80];
  tFormatOffset(buf, 80, &reqOffset);
H
Haojun Liao 已提交
681 682
  tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s, reqId:0x%" PRIx64,
          consumerId, req.epoch, pHandle->subKey, vgId, buf, req.reqId);
683

684
  return doPollDataForMq(pTq, pHandle, &req, pMsg);
685 686
}

687
int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
L
Liu Jicong 已提交
688
  SMqVDeleteReq* pReq = (SMqVDeleteReq*)msg;
L
Liu Jicong 已提交
689

L
Liu Jicong 已提交
690
  tqDebug("vgId:%d, tq process delete sub req %s", pTq->pVnode->config.vgId, pReq->subKey);
L
Liu Jicong 已提交
691

692
  taosWLockLatch(&pTq->lock);
L
Liu Jicong 已提交
693 694 695 696
  int32_t code = taosHashRemove(pTq->pPushMgr, pReq->subKey, strlen(pReq->subKey));
  if (code != 0) {
    tqDebug("vgId:%d, tq remove push handle %s", pTq->pVnode->config.vgId, pReq->subKey);
  }
697
  taosWUnLockLatch(&pTq->lock);
L
Liu Jicong 已提交
698

L
Liu Jicong 已提交
699 700
  STqHandle* pHandle = taosHashGet(pTq->pHandle, pReq->subKey, strlen(pReq->subKey));
  if (pHandle) {
X
Xiaoyu Wang 已提交
701
    // walCloseRef(pHandle->pWalReader->pWal, pHandle->pRef->refId);
L
Liu Jicong 已提交
702 703 704 705 706 707 708
    if (pHandle->pRef) {
      walCloseRef(pTq->pVnode->pWal, pHandle->pRef->refId);
    }
    code = taosHashRemove(pTq->pHandle, pReq->subKey, strlen(pReq->subKey));
    if (code != 0) {
      tqError("cannot process tq delete req %s, since no such handle", pReq->subKey);
    }
L
Liu Jicong 已提交
709
  }
710

L
Liu Jicong 已提交
711 712
  code = tqOffsetDelete(pTq->pOffsetStore, pReq->subKey);
  if (code != 0) {
713
    tqError("cannot process tq delete req %s, since no such offset in cache", pReq->subKey);
L
Liu Jicong 已提交
714
  }
L
Liu Jicong 已提交
715

L
Liu Jicong 已提交
716
  if (tqMetaDeleteHandle(pTq, pReq->subKey) < 0) {
L
Liu Jicong 已提交
717
    tqError("cannot process tq delete req %s, since no such offset in tdb", pReq->subKey);
718
  }
L
Liu Jicong 已提交
719
  return 0;
L
Liu Jicong 已提交
720 721
}

722
int32_t tqProcessAddCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
723 724
  STqCheckInfo info = {0};
  SDecoder     decoder;
X
Xiaoyu Wang 已提交
725
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
726
  if (tDecodeSTqCheckInfo(&decoder, &info) < 0) {
L
Liu Jicong 已提交
727 728 729 730
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  tDecoderClear(&decoder);
731 732 733 734 735
  if (taosHashPut(pTq->pCheckInfo, info.topic, strlen(info.topic), &info, sizeof(STqCheckInfo)) < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  if (tqMetaSaveCheckInfo(pTq, info.topic, msg, msgLen) < 0) {
L
Liu Jicong 已提交
736 737 738 739 740 741
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  return 0;
}

742
int32_t tqProcessDelCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
743 744 745 746 747 748 749 750 751 752 753
  if (taosHashRemove(pTq->pCheckInfo, msg, strlen(msg)) < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  if (tqMetaDeleteCheckInfo(pTq, msg) < 0) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return -1;
  }
  return 0;
}

754
int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
L
Liu Jicong 已提交
755
  SMqRebVgReq req = {0};
L
Liu Jicong 已提交
756
  tDecodeSMqRebVgReq(msg, &req);
L
Liu Jicong 已提交
757

758 759 760
  SVnode* pVnode = pTq->pVnode;
  int32_t vgId = TD_VID(pVnode);

761
  tqDebug("vgId:%d, tq process sub req:%s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pVnode->config.vgId, req.subKey,
762
          req.oldConsumerId, req.newConsumerId);
L
Liu Jicong 已提交
763

764
  STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
L
Liu Jicong 已提交
765
  if (pHandle == NULL) {
L
Liu Jicong 已提交
766
    if (req.oldConsumerId != -1) {
767
      tqError("vgId:%d, build new consumer handle %s for consumer:0x%" PRIx64 ", but old consumerId:0x%" PRIx64,
768
              req.vgId, req.subKey, req.newConsumerId, req.oldConsumerId);
L
Liu Jicong 已提交
769
    }
770

L
Liu Jicong 已提交
771
    if (req.newConsumerId == -1) {
772
      tqError("vgId:%d, tq invalid re-balance request, new consumerId %" PRId64 "", req.vgId, req.newConsumerId);
L
Liu Jicong 已提交
773
      taosMemoryFree(req.qmsg);
L
Liu Jicong 已提交
774 775
      return 0;
    }
776

L
Liu Jicong 已提交
777 778
    STqHandle tqHandle = {0};
    pHandle = &tqHandle;
L
Liu Jicong 已提交
779

H
Haojun Liao 已提交
780
    uint64_t oldConsumerId = pHandle->consumerId;
L
Liu Jicong 已提交
781 782 783
    memcpy(pHandle->subKey, req.subKey, TSDB_SUBSCRIBE_KEY_LEN);
    pHandle->consumerId = req.newConsumerId;
    pHandle->epoch = -1;
L
Liu Jicong 已提交
784

L
Liu Jicong 已提交
785
    pHandle->execHandle.subType = req.subType;
L
Liu Jicong 已提交
786
    pHandle->fetchMeta = req.withMeta;
wmmhello's avatar
wmmhello 已提交
787

788
    // TODO version should be assigned and refed during preprocess
789
    SWalRef* pRef = walRefCommittedVer(pVnode->pWal);
790
    if (pRef == NULL) {
H
Haojun Liao 已提交
791
      taosMemoryFree(req.qmsg);
L
Liu Jicong 已提交
792
      return -1;
793
    }
H
Haojun Liao 已提交
794

795 796
    int64_t ver = pRef->refVer;
    pHandle->pRef = pRef;
L
Liu Jicong 已提交
797

798
    SReadHandle handle = {
799
        .meta = pVnode->pMeta, .vnode = pVnode, .initTableReader = true, .initTqReader = true, .version = ver};
wmmhello's avatar
wmmhello 已提交
800
    pHandle->snapshotVer = ver;
801

L
Liu Jicong 已提交
802
    if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
L
Liu Jicong 已提交
803
      pHandle->execHandle.execCol.qmsg = req.qmsg;
L
Liu Jicong 已提交
804
      req.qmsg = NULL;
805 806

      pHandle->execHandle.task =
807
          qCreateQueueExecTaskInfo(pHandle->execHandle.execCol.qmsg, &handle, vgId, &pHandle->execHandle.numOfCols, req.newConsumerId);
L
Liu Jicong 已提交
808
      void* scanner = NULL;
809
      qExtractStreamScanner(pHandle->execHandle.task, &scanner);
810
      pHandle->execHandle.pTqReader = qExtractReaderFromStreamScanner(scanner);
L
Liu Jicong 已提交
811
    } else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__DB) {
812
      pHandle->pWalReader = walOpenReader(pVnode->pWal, NULL);
813
      pHandle->execHandle.pTqReader = tqOpenReader(pVnode);
814

L
Liu Jicong 已提交
815
      pHandle->execHandle.execDb.pFilterOutTbUid =
L
Liu Jicong 已提交
816
          taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK);
817 818
      buildSnapContext(handle.meta, handle.version, 0, pHandle->execHandle.subType, pHandle->fetchMeta,
                       (SSnapContext**)(&handle.sContext));
819

820
      pHandle->execHandle.task = qCreateQueueExecTaskInfo(NULL, &handle, vgId, NULL, req.newConsumerId);
L
Liu Jicong 已提交
821
    } else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__TABLE) {
822
      pHandle->pWalReader = walOpenReader(pVnode->pWal, NULL);
wmmhello's avatar
wmmhello 已提交
823 824
      pHandle->execHandle.execTb.suid = req.suid;

L
Liu Jicong 已提交
825
      SArray* tbUidList = taosArrayInit(0, sizeof(int64_t));
826 827
      vnodeGetCtbIdList(pVnode, req.suid, tbUidList);
      tqDebug("vgId:%d, tq try to get all ctb, suid:%" PRId64, pVnode->config.vgId, req.suid);
L
Liu Jicong 已提交
828 829
      for (int32_t i = 0; i < taosArrayGetSize(tbUidList); i++) {
        int64_t tbUid = *(int64_t*)taosArrayGet(tbUidList, i);
830
        tqDebug("vgId:%d, idx %d, uid:%" PRId64, vgId, i, tbUid);
L
Liu Jicong 已提交
831
      }
832 833
      pHandle->execHandle.pTqReader = tqOpenReader(pVnode);
      tqReaderSetTbUidList(pHandle->execHandle.pTqReader, tbUidList);
L
Liu Jicong 已提交
834
      taosArrayDestroy(tbUidList);
wmmhello's avatar
wmmhello 已提交
835

L
Liu Jicong 已提交
836 837
      buildSnapContext(handle.meta, handle.version, req.suid, pHandle->execHandle.subType, pHandle->fetchMeta,
                       (SSnapContext**)(&handle.sContext));
838
      pHandle->execHandle.task = qCreateQueueExecTaskInfo(NULL, &handle, vgId, NULL, req.newConsumerId);
L
Liu Jicong 已提交
839
    }
H
Haojun Liao 已提交
840

841
    taosHashPut(pTq->pHandle, req.subKey, strlen(req.subKey), pHandle, sizeof(STqHandle));
842 843
    tqDebug("try to persist handle %s consumer:0x%" PRIx64 " , old consumer:0x%" PRIx64, req.subKey,
            pHandle->consumerId, oldConsumerId);
L
Liu Jicong 已提交
844
    if (tqMetaSaveHandle(pTq, req.subKey, pHandle) < 0) {
H
Haojun Liao 已提交
845
      taosMemoryFree(req.qmsg);
L
Liu Jicong 已提交
846
      return -1;
L
Liu Jicong 已提交
847
    }
L
Liu Jicong 已提交
848
  } else {
849 850 851 852
    if (pHandle->consumerId == req.newConsumerId) {  // do nothing
      tqInfo("vgId:%d consumer:0x%" PRIx64 " remains, no switch occurs", req.vgId, req.newConsumerId);
      atomic_store_32(&pHandle->epoch, -1);
      atomic_add_fetch_32(&pHandle->epoch, 1);
H
Haojun Liao 已提交
853
      taosMemoryFree(req.qmsg);
854
      return tqMetaSaveHandle(pTq, req.subKey, pHandle);
855 856 857
    } else {
      tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId,
             req.newConsumerId);
858

859 860 861
      // kill executing task
      qTaskInfo_t pTaskInfo = pHandle->execHandle.task;
      if (pTaskInfo != NULL) {
862
        qKillTask(pTaskInfo, TSDB_CODE_SUCCESS);
863 864
      }

865 866
      taosWLockLatch(&pTq->lock);
      atomic_store_32(&pHandle->epoch, -1);
867

868
      // remove if it has been register in the push manager, and return one empty block to consumer
869
      tqUnregisterPushHandle(pTq, req.subKey, (int32_t)strlen(req.subKey), pHandle->consumerId, true);
870

871 872
      atomic_store_64(&pHandle->consumerId, req.newConsumerId);
      atomic_add_fetch_32(&pHandle->epoch, 1);
873

874
      if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
875 876 877
        qStreamCloseTsdbReader(pTaskInfo);
      }

878 879 880 881 882
      taosWUnLockLatch(&pTq->lock);
      if (tqMetaSaveHandle(pTq, req.subKey, pHandle) < 0) {
        taosMemoryFree(req.qmsg);
        return -1;
      }
L
Liu Jicong 已提交
883
    }
L
Liu Jicong 已提交
884
  }
L
Liu Jicong 已提交
885

H
Haojun Liao 已提交
886
  taosMemoryFree(req.qmsg);
L
Liu Jicong 已提交
887
  return 0;
L
Liu Jicong 已提交
888
}
889

890
int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) {
891 892 893
  // todo extract method
  char buf[128] = {0};
  sprintf(buf, "0x%"PRIx64"-%d", pTask->id.streamId, pTask->id.taskId);
L
Liu Jicong 已提交
894

895
  int32_t vgId = TD_VID(pTq->pVnode);
896
  pTask->id.idStr = taosStrdup(buf);
L
Liu Jicong 已提交
897
  pTask->refCnt = 1;
L
Liu Jicong 已提交
898
  pTask->schedStatus = TASK_SCHED_STATUS__INACTIVE;
L
Liu Jicong 已提交
899 900
  pTask->inputQueue = streamQueueOpen();
  pTask->outputQueue = streamQueueOpen();
L
Liu Jicong 已提交
901 902

  if (pTask->inputQueue == NULL || pTask->outputQueue == NULL) {
L
Liu Jicong 已提交
903
    return -1;
L
Liu Jicong 已提交
904 905
  }

L
Liu Jicong 已提交
906 907
  pTask->inputStatus = TASK_INPUT_STATUS__NORMAL;
  pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL;
908
  pTask->pMsgCb = &pTq->pVnode->msgCb;
909
  pTask->pMeta = pTq->pStreamMeta;
910

911
  // expand executor
912 913
  if (pTask->fillHistory) {
    pTask->taskStatus = TASK_STATUS__WAIT_DOWNSTREAM;
914
  } else {
915
    pTask->taskStatus = TASK_STATUS__RESTORE;
916 917
  }

918
  if (pTask->taskLevel == TASK_LEVEL__SOURCE) {
919
    pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1);
920 921 922 923
    if (pTask->pState == NULL) {
      return -1;
    }

924
    SReadHandle handle = {
925
        .meta = pTq->pVnode->pMeta, .vnode = pTq->pVnode, .initTqReader = 1, .pStateBackend = pTask->pState};
926

927 928
    pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId);
    if (pTask->exec.pExecutor == NULL) {
L
Liu Jicong 已提交
929 930
      return -1;
    }
931

932
  } else if (pTask->taskLevel == TASK_LEVEL__AGG) {
933
    pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1);
934 935 936
    if (pTask->pState == NULL) {
      return -1;
    }
937

938 939 940 941 942
    int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->childEpInfo);
    SReadHandle mgHandle = { .vnode = NULL, .numOfVgroups = numOfVgroups, .pStateBackend = pTask->pState};

    pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &mgHandle, vgId);
    if (pTask->exec.pExecutor == NULL) {
L
Liu Jicong 已提交
943 944
      return -1;
    }
L
Liu Jicong 已提交
945
  }
L
Liu Jicong 已提交
946 947

  // sink
L
Liu Jicong 已提交
948
  /*pTask->ahandle = pTq->pVnode;*/
949
  if (pTask->outputType == TASK_OUTPUT__SMA) {
L
Liu Jicong 已提交
950
    pTask->smaSink.vnode = pTq->pVnode;
L
Liu Jicong 已提交
951
    pTask->smaSink.smaSink = smaHandleRes;
952
  } else if (pTask->outputType == TASK_OUTPUT__TABLE) {
L
Liu Jicong 已提交
953
    pTask->tbSink.vnode = pTq->pVnode;
L
Liu Jicong 已提交
954
    pTask->tbSink.tbSinkFunc = tqSinkToTablePipeline2;
L
Liu Jicong 已提交
955

H
Haojun Liao 已提交
956
    int32_t ver1 = 1;
5
54liuyao 已提交
957 958 959
    SMetaInfo info = {0};
    int32_t code = metaGetInfo(pTq->pVnode->pMeta, pTask->tbSink.stbUid, &info, NULL);
    if (code == TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
960
      ver1 = info.skmVer;
5
54liuyao 已提交
961
    }
L
Liu Jicong 已提交
962

963 964
    SSchemaWrapper* pschemaWrapper = pTask->tbSink.pSchemaWrapper;
    pTask->tbSink.pTSchema = tBuildTSchema(pschemaWrapper->pSchema, pschemaWrapper->nCols, ver1);
wmmhello's avatar
wmmhello 已提交
965
    if(pTask->tbSink.pTSchema == NULL) {
wmmhello's avatar
wmmhello 已提交
966
      return -1;
wmmhello's avatar
wmmhello 已提交
967
    }
L
Liu Jicong 已提交
968
  }
969

970 971 972 973 974 975 976 977 978 979 980
  if (pTask->taskLevel == TASK_LEVEL__SOURCE) {
    pTask->exec.pTqReader = tqOpenReader(pTq->pVnode);
    if (pTask->exec.pTqReader == NULL) {
      terrno = TSDB_CODE_OUT_OF_MEMORY;
      return -1;
    }

    SArray* pList = qGetQueriedTableListInfo(pTask->exec.pExecutor);
    tqReaderAddTbUidList(pTask->exec.pTqReader, pList);
  }

981
  streamSetupTrigger(pTask);
982
  tqInfo("vgId:%d expand stream task, s-task:%s, ver:%" PRId64 " child id:%d, level:%d", vgId, pTask->id.idStr,
983
         pTask->chkInfo.version, pTask->selfChildId, pTask->taskLevel);
L
Liu Jicong 已提交
984
  return 0;
L
Liu Jicong 已提交
985
}
L
Liu Jicong 已提交
986

987 988 989 990 991 992
int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) {
  char*               msgStr = pMsg->pCont;
  char*               msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
  int32_t             msgLen = pMsg->contLen - sizeof(SMsgHead);
  SStreamTaskCheckReq req;
  SDecoder            decoder;
X
Xiaoyu Wang 已提交
993
  tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
994 995 996 997 998 999 1000 1001 1002 1003 1004 1005
  tDecodeSStreamTaskCheckReq(&decoder, &req);
  tDecoderClear(&decoder);
  int32_t             taskId = req.downstreamTaskId;
  SStreamTaskCheckRsp rsp = {
      .reqId = req.reqId,
      .streamId = req.streamId,
      .childId = req.childId,
      .downstreamNodeId = req.downstreamNodeId,
      .downstreamTaskId = req.downstreamTaskId,
      .upstreamNodeId = req.upstreamNodeId,
      .upstreamTaskId = req.upstreamTaskId,
  };
1006

L
Liu Jicong 已提交
1007
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
1008 1009 1010 1011 1012 1013
  if (pTask && atomic_load_8(&pTask->taskStatus) == TASK_STATUS__NORMAL) {
    rsp.status = 1;
  } else {
    rsp.status = 0;
  }

L
Liu Jicong 已提交
1014
  if (pTask) streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
1015

1016
  tqDebug("tq recv task check req(reqId:0x%" PRIx64 ") %d at node %d check req from task %d at node %d, status %d",
1017 1018 1019 1020 1021 1022 1023
          rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);

  SEncoder encoder;
  int32_t  code;
  int32_t  len;
  tEncodeSize(tEncodeSStreamTaskCheckRsp, &rsp, len, code);
  if (code < 0) {
L
Liu Jicong 已提交
1024
    tqError("unable to encode rsp %d", __LINE__);
L
Liu Jicong 已提交
1025
    return -1;
1026
  }
L
Liu Jicong 已提交
1027

1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046
  void* buf = rpcMallocCont(sizeof(SMsgHead) + len);
  ((SMsgHead*)buf)->vgId = htonl(req.upstreamNodeId);

  void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
  tEncoderInit(&encoder, (uint8_t*)abuf, len);
  tEncodeSStreamTaskCheckRsp(&encoder, &rsp);
  tEncoderClear(&encoder);

  SRpcMsg rspMsg = {
      .code = 0,
      .pCont = buf,
      .contLen = sizeof(SMsgHead) + len,
      .info = pMsg->info,
  };

  tmsgSendRsp(&rspMsg);
  return 0;
}

1047
int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059
  int32_t             code;
  SStreamTaskCheckRsp rsp;

  SDecoder decoder;
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
  code = tDecodeSStreamTaskCheckRsp(&decoder, &rsp);
  if (code < 0) {
    tDecoderClear(&decoder);
    return -1;
  }
  tDecoderClear(&decoder);

1060
  tqDebug("tq recv task check rsp(reqId:0x%" PRIx64 ") %d at node %d check req from task %d at node %d, status %d",
1061 1062
          rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);

L
Liu Jicong 已提交
1063
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, rsp.upstreamTaskId);
1064 1065 1066 1067
  if (pTask == NULL) {
    return -1;
  }

1068
  code = streamProcessTaskCheckRsp(pTask, &rsp, sversion);
L
Liu Jicong 已提交
1069 1070
  streamMetaReleaseTask(pTq->pStreamMeta, pTask);
  return code;
1071 1072
}

1073
int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
1074 1075 1076 1077 1078
  int32_t code;
#if 0
  code = streamMetaAddSerializedTask(pTq->pStreamMeta, version, msg, msgLen);
  if (code < 0) return code;
#endif
5
54liuyao 已提交
1079 1080 1081
  if (tsDisableStream) {
    return 0;
  }
1082 1083 1084 1085 1086 1087

  // 1.deserialize msg and build task
  SStreamTask* pTask = taosMemoryCalloc(1, sizeof(SStreamTask));
  if (pTask == NULL) {
    return -1;
  }
1088

1089 1090
  SDecoder decoder;
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
1091
  code = tDecodeStreamTask(&decoder, pTask);
1092 1093 1094 1095 1096
  if (code < 0) {
    tDecoderClear(&decoder);
    taosMemoryFree(pTask);
    return -1;
  }
1097

1098 1099 1100
  tDecoderClear(&decoder);

  // 2.save task
1101
  code = streamMetaAddTask(pTq->pStreamMeta, sversion, pTask);
1102 1103 1104 1105 1106 1107
  if (code < 0) {
    return -1;
  }

  // 3.go through recover steps to fill history
  if (pTask->fillHistory) {
1108
    streamTaskCheckDownstream(pTask, sversion);
1109 1110 1111 1112 1113
  }

  return 0;
}

L
Liu Jicong 已提交
1114 1115 1116 1117 1118
int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg) {
  int32_t code;
  char*   msg = pMsg->pCont;
  int32_t msgLen = pMsg->contLen;

1119
  SStreamRecoverStep1Req* pReq = (SStreamRecoverStep1Req*)msg;
L
Liu Jicong 已提交
1120
  SStreamTask*            pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId);
1121 1122 1123 1124 1125
  if (pTask == NULL) {
    return -1;
  }

  // check param
1126
  int64_t fillVer1 = pTask->chkInfo.version;
1127
  if (fillVer1 <= 0) {
L
Liu Jicong 已提交
1128
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1129 1130 1131 1132 1133 1134
    return -1;
  }

  // do recovery step 1
  streamSourceRecoverScanStep1(pTask);

L
Liu Jicong 已提交
1135 1136 1137 1138 1139
  if (atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING) {
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
    return 0;
  }

1140 1141 1142 1143
  // build msg to launch next step
  SStreamRecoverStep2Req req;
  code = streamBuildSourceRecover2Req(pTask, &req);
  if (code < 0) {
L
Liu Jicong 已提交
1144
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1145 1146 1147
    return -1;
  }

L
Liu Jicong 已提交
1148
  streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
1149

L
Liu Jicong 已提交
1150 1151 1152 1153
  if (atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING) {
    return 0;
  }

1154
  // serialize msg
L
Liu Jicong 已提交
1155 1156 1157 1158 1159 1160 1161 1162
  int32_t len = sizeof(SStreamRecoverStep1Req);

  void* serializedReq = rpcMallocCont(len);
  if (serializedReq == NULL) {
    return -1;
  }

  memcpy(serializedReq, &req, len);
1163 1164 1165 1166 1167

  // dispatch msg
  SRpcMsg rpcMsg = {
      .code = 0,
      .contLen = len,
L
Liu Jicong 已提交
1168
      .msgType = TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE,
L
Liu Jicong 已提交
1169
      .pCont = serializedReq,
1170 1171 1172 1173 1174 1175 1176
  };

  tmsgPutToQueue(&pTq->pVnode->msgCb, WRITE_QUEUE, &rpcMsg);

  return 0;
}

1177
int32_t tqProcessTaskRecover2Req(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
1178 1179
  int32_t                 code;
  SStreamRecoverStep2Req* pReq = (SStreamRecoverStep2Req*)msg;
L
Liu Jicong 已提交
1180
  SStreamTask*            pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId);
1181 1182 1183 1184 1185
  if (pTask == NULL) {
    return -1;
  }

  // do recovery step 2
1186
  code = streamSourceRecoverScanStep2(pTask, sversion);
1187
  if (code < 0) {
L
Liu Jicong 已提交
1188
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1189 1190 1191
    return -1;
  }

L
Liu Jicong 已提交
1192 1193 1194 1195 1196
  if (atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING) {
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
    return 0;
  }

1197 1198 1199
  // restore param
  code = streamRestoreParam(pTask);
  if (code < 0) {
L
Liu Jicong 已提交
1200
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1201 1202 1203 1204 1205 1206
    return -1;
  }

  // set status normal
  code = streamSetStatusNormal(pTask);
  if (code < 0) {
L
Liu Jicong 已提交
1207
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1208 1209 1210 1211 1212 1213
    return -1;
  }

  // dispatch recover finish req to all related downstream task
  code = streamDispatchRecoverFinishReq(pTask);
  if (code < 0) {
L
Liu Jicong 已提交
1214
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1215 1216 1217
    return -1;
  }

L
Liu Jicong 已提交
1218 1219 1220
  atomic_store_8(&pTask->fillHistory, 0);
  streamMetaSaveTask(pTq->pStreamMeta, pTask);

L
Liu Jicong 已提交
1221 1222
  streamMetaReleaseTask(pTq->pStreamMeta, pTask);

1223 1224 1225
  return 0;
}

L
Liu Jicong 已提交
1226 1227 1228
int32_t tqProcessTaskRecoverFinishReq(STQ* pTq, SRpcMsg* pMsg) {
  char*   msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
  int32_t msgLen = pMsg->contLen - sizeof(SMsgHead);
1229 1230

  // deserialize
1231 1232 1233
  SStreamRecoverFinishReq req;

  SDecoder decoder;
X
Xiaoyu Wang 已提交
1234
  tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
1235 1236 1237
  tDecodeSStreamRecoverFinishReq(&decoder, &req);
  tDecoderClear(&decoder);

1238
  // find task
L
Liu Jicong 已提交
1239
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.taskId);
1240 1241 1242
  if (pTask == NULL) {
    return -1;
  }
1243
  // do process request
1244
  if (streamProcessRecoverFinishReq(pTask, req.childId) < 0) {
L
Liu Jicong 已提交
1245
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1246 1247 1248
    return -1;
  }

L
Liu Jicong 已提交
1249
  streamMetaReleaseTask(pTq->pStreamMeta, pTask);
1250
  return 0;
L
Liu Jicong 已提交
1251
}
L
Liu Jicong 已提交
1252

L
Liu Jicong 已提交
1253 1254 1255 1256 1257
int32_t tqProcessTaskRecoverFinishRsp(STQ* pTq, SRpcMsg* pMsg) {
  //
  return 0;
}

L
Liu Jicong 已提交
1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273
int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) {
  bool        failed = false;
  SDecoder*   pCoder = &(SDecoder){0};
  SDeleteRes* pRes = &(SDeleteRes){0};

  pRes->uidList = taosArrayInit(0, sizeof(tb_uid_t));
  if (pRes->uidList == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    failed = true;
  }

  tDecoderInit(pCoder, pReq, len);
  tDecodeDeleteRes(pCoder, pRes);
  tDecoderClear(pCoder);

  int32_t sz = taosArrayGetSize(pRes->uidList);
L
Liu Jicong 已提交
1274
  if (sz == 0 || pRes->affectedRows == 0) {
L
Liu Jicong 已提交
1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285
    taosArrayDestroy(pRes->uidList);
    return 0;
  }
  SSDataBlock* pDelBlock = createSpecialDataBlock(STREAM_DELETE_DATA);
  blockDataEnsureCapacity(pDelBlock, sz);
  pDelBlock->info.rows = sz;
  pDelBlock->info.version = ver;

  for (int32_t i = 0; i < sz; i++) {
    // start key column
    SColumnInfoData* pStartCol = taosArrayGet(pDelBlock->pDataBlock, START_TS_COLUMN_INDEX);
1286
    colDataSetVal(pStartCol, i, (const char*)&pRes->skey, false);  // end key column
L
Liu Jicong 已提交
1287
    SColumnInfoData* pEndCol = taosArrayGet(pDelBlock->pDataBlock, END_TS_COLUMN_INDEX);
1288
    colDataSetVal(pEndCol, i, (const char*)&pRes->ekey, false);
L
Liu Jicong 已提交
1289 1290 1291
    // uid column
    SColumnInfoData* pUidCol = taosArrayGet(pDelBlock->pDataBlock, UID_COLUMN_INDEX);
    int64_t*         pUid = taosArrayGet(pRes->uidList, i);
1292
    colDataSetVal(pUidCol, i, (const char*)pUid, false);
L
Liu Jicong 已提交
1293

1294 1295 1296
    colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, GROUPID_COLUMN_INDEX), i);
    colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, CALCULATE_START_TS_COLUMN_INDEX), i);
    colDataSetNULL(taosArrayGet(pDelBlock->pDataBlock, CALCULATE_END_TS_COLUMN_INDEX), i);
L
Liu Jicong 已提交
1297 1298
  }

L
Liu Jicong 已提交
1299 1300
  taosArrayDestroy(pRes->uidList);

L
Liu Jicong 已提交
1301 1302 1303
  int32_t* pRef = taosMemoryMalloc(sizeof(int32_t));
  *pRef = 1;

L
Liu Jicong 已提交
1304 1305 1306 1307 1308 1309 1310
  void* pIter = NULL;
  while (1) {
    pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter);
    if (pIter == NULL) break;
    SStreamTask* pTask = *(SStreamTask**)pIter;
    if (pTask->taskLevel != TASK_LEVEL__SOURCE) continue;

1311
    qDebug("delete req enqueue stream task: %d, ver: %" PRId64, pTask->id.taskId, ver);
L
Liu Jicong 已提交
1312

L
Liu Jicong 已提交
1313
    if (!failed) {
S
Shengliang Guan 已提交
1314
      SStreamRefDataBlock* pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0);
L
Liu Jicong 已提交
1315 1316 1317 1318 1319
      pRefBlock->type = STREAM_INPUT__REF_DATA_BLOCK;
      pRefBlock->pBlock = pDelBlock;
      pRefBlock->dataRef = pRef;
      atomic_add_fetch_32(pRefBlock->dataRef, 1);

1320
      if (tAppendDataForStream(pTask, (SStreamQueueItem*)pRefBlock) < 0) {
1321
        qError("stream task input del failed, task id %d", pTask->id.taskId);
L
Liu Jicong 已提交
1322

L
Liu Jicong 已提交
1323
        atomic_sub_fetch_32(pRef, 1);
L
Liu Jicong 已提交
1324
        taosFreeQitem(pRefBlock);
L
Liu Jicong 已提交
1325 1326
        continue;
      }
L
Liu Jicong 已提交
1327

L
Liu Jicong 已提交
1328
      if (streamSchedExec(pTask) < 0) {
1329
        qError("stream task launch failed, task id %d", pTask->id.taskId);
L
Liu Jicong 已提交
1330 1331
        continue;
      }
L
Liu Jicong 已提交
1332

L
Liu Jicong 已提交
1333 1334 1335 1336
    } else {
      streamTaskInputFail(pTask);
    }
  }
L
Liu Jicong 已提交
1337

L
Liu Jicong 已提交
1338
  int32_t ref = atomic_sub_fetch_32(pRef, 1);
L
Liu Jicong 已提交
1339
  /*A(ref >= 0);*/
L
Liu Jicong 已提交
1340
  if (ref == 0) {
L
Liu Jicong 已提交
1341
    blockDataDestroy(pDelBlock);
L
Liu Jicong 已提交
1342 1343 1344 1345
    taosMemoryFree(pRef);
  }

#if 0
S
Shengliang Guan 已提交
1346
    SStreamDataBlock* pStreamBlock = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0);
L
Liu Jicong 已提交
1347 1348 1349 1350 1351 1352 1353 1354
    pStreamBlock->type = STREAM_INPUT__DATA_BLOCK;
    pStreamBlock->blocks = taosArrayInit(0, sizeof(SSDataBlock));
    SSDataBlock block = {0};
    assignOneDataBlock(&block, pDelBlock);
    block.info.type = STREAM_DELETE_DATA;
    taosArrayPush(pStreamBlock->blocks, &block);

    if (!failed) {
1355
      if (tAppendDataForStream(pTask, (SStreamQueueItem*)pStreamBlock) < 0) {
1356
        qError("stream task input del failed, task id %d", pTask->id.taskId);
L
Liu Jicong 已提交
1357 1358 1359 1360
        continue;
      }

      if (streamSchedExec(pTask) < 0) {
1361
        qError("stream task launch failed, task id %d", pTask->id.taskId);
L
Liu Jicong 已提交
1362 1363 1364 1365 1366 1367
        continue;
      }
    } else {
      streamTaskInputFail(pTask);
    }
  }
L
Liu Jicong 已提交
1368
  blockDataDestroy(pDelBlock);
L
Liu Jicong 已提交
1369
#endif
L
Liu Jicong 已提交
1370 1371 1372 1373

  return 0;
}

1374 1375 1376
static int32_t addSubmitBlockNLaunchTask(STqOffsetStore* pOffsetStore, SStreamTask* pTask, SStreamDataSubmit2* pSubmit,
                                         const char* key, int64_t ver) {
  doSaveTaskOffset(pOffsetStore, key, ver);
1377
  int32_t code = tqAddInputBlockNLaunchTask(pTask, (SStreamQueueItem*)pSubmit, ver);
1378 1379 1380 1381 1382 1383 1384 1385

  // remove the offset, if all functions are completed successfully.
  if (code == TSDB_CODE_SUCCESS) {
    tqOffsetDelete(pOffsetStore, key);
  }
  return TSDB_CODE_SUCCESS;
}

L
Liu Jicong 已提交
1386
int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) {
1387
  void* pIter = NULL;
L
Liu Jicong 已提交
1388

1389
  SStreamDataSubmit2* pSubmit = streamDataSubmitNew(submit, STREAM_INPUT__DATA_SUBMIT);
L
Liu Jicong 已提交
1390
  if (pSubmit == NULL) {
L
Liu Jicong 已提交
1391
    terrno = TSDB_CODE_OUT_OF_MEMORY;
L
Liu Jicong 已提交
1392
    tqError("failed to create data submit for stream since out of memory");
1393
    saveOffsetForAllTasks(pTq, submit.ver);
1394
    return -1;
L
Liu Jicong 已提交
1395 1396 1397
  }

  while (1) {
L
Liu Jicong 已提交
1398
    pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter);
1399 1400 1401 1402
    if (pIter == NULL) {
      break;
    }

1403
    SStreamTask* pTask = *(SStreamTask**)pIter;
1404 1405 1406 1407
    if (pTask->taskLevel != TASK_LEVEL__SOURCE) {
      continue;
    }

1408
    if (pTask->taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) {
1409 1410
      tqDebug("stream task:%d skip push data, not ready for processing, status %d", pTask->id.taskId,
              pTask->taskStatus);
L
Liu Jicong 已提交
1411 1412
      continue;
    }
L
Liu Jicong 已提交
1413

1414 1415 1416
    // check if offset value exists
    char key[128] = {0};
    createStreamTaskOffsetKey(key, pTask->id.streamId, pTask->id.taskId);
1417

1418 1419 1420 1421 1422 1423 1424 1425
    if (tInputQueueIsFull(pTask)) {
      STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key);

      int64_t ver = submit.ver;
      if (pOffset == NULL) {
        doSaveTaskOffset(pTq->pOffsetStore, key, submit.ver);
      } else {
        ver = pOffset->val.version;
L
Liu Jicong 已提交
1426 1427
      }

1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440
      tqDebug("s-task:%s input queue is full, do nothing, start ver:%" PRId64, pTask->id.idStr, ver);
      continue;
    }

    // check if offset value exists
    STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key);
    if (pOffset != NULL) {
      // seek the stored version and extract data from WAL
      int32_t code = tqSeekVer(pTask->exec.pTqReader, pOffset->val.version, "");

      // all data has been retrieved from WAL, let's try submit block directly.
      if (code == TSDB_CODE_SUCCESS) {  // all data retrieved, abort
        // append the data for the stream
1441
        SFetchRet ret = {.data.info.type = STREAM_NORMAL};
1442 1443 1444 1445
        terrno = 0;

        tqNextBlock(pTask->exec.pTqReader, &ret);
        if (ret.fetchType == FETCH_TYPE__DATA) {
1446 1447
          code = launchTaskForWalBlock(pTask, &ret, pOffset);
          if (code != TSDB_CODE_SUCCESS) {
1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459
            continue;
          }
        } else {  // FETCH_TYPE__NONE, let's try submit block directly
          tqDebug("s-task:%s data in WAL are all consumed, try data in submit message", pTask->id.idStr);
          addSubmitBlockNLaunchTask(pTq->pOffsetStore, pTask, pSubmit, key, submit.ver);
        }

        // do nothing if failed, since the offset value is kept already
      } else {  // failed to seek to the WAL version
        // todo handle the case where offset has been deleted in WAL, due to stream computing too slow
        tqDebug("s-task:%s data in WAL are all consumed, try data in submit msg", pTask->id.idStr);
        addSubmitBlockNLaunchTask(pTq->pOffsetStore, pTask, pSubmit, key, submit.ver);
L
Liu Jicong 已提交
1460
      }
L
Liu Jicong 已提交
1461
    } else {
1462
      addSubmitBlockNLaunchTask(pTq->pOffsetStore, pTask, pSubmit, key, submit.ver);
L
Liu Jicong 已提交
1463 1464 1465
    }
  }

1466 1467
  streamDataSubmitDestroy(pSubmit);
  taosFreeQitem(pSubmit);
L
Liu Jicong 已提交
1468

1469
  return 0;
L
Liu Jicong 已提交
1470 1471
}

L
Liu Jicong 已提交
1472 1473
int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) {
  SStreamTaskRunReq* pReq = pMsg->pCont;
1474 1475 1476 1477 1478 1479

  int32_t taskId = pReq->taskId;
  int32_t vgId = TD_VID(pTq->pVnode);

  if (taskId == ALL_STREAM_TASKS_ID) {  // all tasks are restored from the wal
    tqDoRestoreSourceStreamTasks(pTq);
L
Liu Jicong 已提交
1480
    return 0;
1481
  } else {
1482
    SStreamTask* pTask = streamMetaAcquireTaskEx(pTq->pStreamMeta, taskId);
1483 1484 1485 1486
    if (pTask != NULL) {
      if (pTask->taskStatus == TASK_STATUS__NORMAL) {
        tqDebug("vgId:%d s-task:%s start to process run req", vgId, pTask->id.idStr);
        streamProcessRunReq(pTask);
1487 1488 1489
      } else if (pTask->taskStatus == TASK_STATUS__RESTORE) {
        tqDebug("vgId:%d s-task:%s start to process in restore procedure from last chk point:%" PRId64, vgId,
                pTask->id.idStr, pTask->chkInfo.version);
1490
        streamProcessRunReq(pTask);
1491 1492 1493 1494 1495 1496 1497 1498 1499 1500
      } else {
        tqDebug("vgId:%d s-task:%s ignore run req since not in ready state", vgId, pTask->id.idStr);
      }

      streamMetaReleaseTask(pTq->pStreamMeta, pTask);
      return 0;
    } else {
      tqError("vgId:%d failed to found s-task, taskId:%d", vgId, taskId);
      return -1;
    }
L
Liu Jicong 已提交
1501
  }
L
Liu Jicong 已提交
1502 1503
}

L
Liu Jicong 已提交
1504
int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) {
1505 1506 1507 1508 1509
  char*              msgStr = pMsg->pCont;
  char*              msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
  int32_t            msgLen = pMsg->contLen - sizeof(SMsgHead);
  SStreamDispatchReq req;
  SDecoder           decoder;
L
Liu Jicong 已提交
1510
  tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
1511
  tDecodeStreamDispatchReq(&decoder, &req);
L
Liu Jicong 已提交
1512 1513
  int32_t taskId = req.taskId;

L
Liu Jicong 已提交
1514
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
L
Liu Jicong 已提交
1515
  if (pTask) {
1516 1517 1518 1519
    SRpcMsg rsp = {
        .info = pMsg->info,
        .code = 0,
    };
L
Liu Jicong 已提交
1520
    streamProcessDispatchReq(pTask, &req, &rsp, exec);
L
Liu Jicong 已提交
1521
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
1522
    return 0;
1523 1524
  } else {
    return -1;
L
Liu Jicong 已提交
1525
  }
L
Liu Jicong 已提交
1526 1527
}

L
Liu Jicong 已提交
1528 1529
int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) {
  SStreamDispatchRsp* pRsp = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
1530
  int32_t             taskId = ntohl(pRsp->upstreamTaskId);
L
Liu Jicong 已提交
1531
  SStreamTask*        pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
1532
  tqDebug("recv dispatch rsp, code:%x", pMsg->code);
L
Liu Jicong 已提交
1533
  if (pTask) {
1534
    streamProcessDispatchRsp(pTask, pRsp, pMsg->code);
L
Liu Jicong 已提交
1535
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
1536
    return 0;
1537 1538
  } else {
    return -1;
L
Liu Jicong 已提交
1539
  }
L
Liu Jicong 已提交
1540
}
L
Liu Jicong 已提交
1541

1542
int32_t tqProcessTaskDropReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
L
Liu Jicong 已提交
1543
  SVDropStreamTaskReq* pReq = (SVDropStreamTaskReq*)msg;
1544
  streamMetaRemoveTask(pTq->pStreamMeta, pReq->taskId);
L
Liu Jicong 已提交
1545
  return 0;
L
Liu Jicong 已提交
1546
}
L
Liu Jicong 已提交
1547 1548 1549 1550 1551 1552 1553

int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) {
  char*              msgStr = pMsg->pCont;
  char*              msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
  int32_t            msgLen = pMsg->contLen - sizeof(SMsgHead);
  SStreamRetrieveReq req;
  SDecoder           decoder;
1554
  tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
L
Liu Jicong 已提交
1555
  tDecodeStreamRetrieveReq(&decoder, &req);
L
Liu Jicong 已提交
1556
  tDecoderClear(&decoder);
L
Liu Jicong 已提交
1557
  int32_t      taskId = req.dstTaskId;
L
Liu Jicong 已提交
1558
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
L
Liu Jicong 已提交
1559
  if (pTask) {
1560
    SRpcMsg rsp = { .info = pMsg->info, .code = 0 };
L
Liu Jicong 已提交
1561
    streamProcessRetrieveReq(pTask, &req, &rsp);
L
Liu Jicong 已提交
1562
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
1563
    tDeleteStreamRetrieveReq(&req);
L
Liu Jicong 已提交
1564
    return 0;
L
Liu Jicong 已提交
1565 1566
  } else {
    return -1;
L
Liu Jicong 已提交
1567 1568 1569 1570 1571 1572 1573
  }
}

int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg) {
  //
  return 0;
}
L
Liu Jicong 已提交
1574

1575 1576 1577 1578 1579 1580
int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) {
  STQ*      pTq = pVnode->pTq;
  SMsgHead* msgStr = pMsg->pCont;
  char*     msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
  int32_t   msgLen = pMsg->contLen - sizeof(SMsgHead);
  int32_t   code = 0;
L
Liu Jicong 已提交
1581 1582 1583

  SStreamDispatchReq req;
  SDecoder           decoder;
1584
  tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
L
Liu Jicong 已提交
1585 1586
  if (tDecodeStreamDispatchReq(&decoder, &req) < 0) {
    code = TSDB_CODE_MSG_DECODE_ERROR;
L
Liu Jicong 已提交
1587
    tDecoderClear(&decoder);
L
Liu Jicong 已提交
1588 1589
    goto FAIL;
  }
L
Liu Jicong 已提交
1590
  tDecoderClear(&decoder);
L
Liu Jicong 已提交
1591

L
Liu Jicong 已提交
1592
  int32_t taskId = req.taskId;
L
Liu Jicong 已提交
1593

L
Liu Jicong 已提交
1594
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
L
Liu Jicong 已提交
1595
  if (pTask) {
1596
    SRpcMsg rsp = { .info = pMsg->info, .code = 0 };
L
Liu Jicong 已提交
1597
    streamProcessDispatchReq(pTask, &req, &rsp, false);
L
Liu Jicong 已提交
1598
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
L
Liu Jicong 已提交
1599 1600
    rpcFreeCont(pMsg->pCont);
    taosFreeQitem(pMsg);
1601
    return 0;
5
54liuyao 已提交
1602 1603
  } else {
    tDeleteStreamDispatchReq(&req);
L
Liu Jicong 已提交
1604
  }
L
Liu Jicong 已提交
1605

1606 1607
  code = TSDB_CODE_STREAM_TASK_NOT_EXIST;

L
Liu Jicong 已提交
1608
FAIL:
1609 1610 1611 1612
  if (pMsg->info.handle == NULL) return -1;

  SMsgHead* pRspHead = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp));
  if (pRspHead == NULL) {
1613
    SRpcMsg rsp = { .code = TSDB_CODE_OUT_OF_MEMORY, .info = pMsg->info };
1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629
    tqDebug("send dispatch error rsp, code: %x", code);
    tmsgSendRsp(&rsp);
    rpcFreeCont(pMsg->pCont);
    taosFreeQitem(pMsg);
    return -1;
  }

  pRspHead->vgId = htonl(req.upstreamNodeId);
  SStreamDispatchRsp* pRsp = POINTER_SHIFT(pRspHead, sizeof(SMsgHead));
  pRsp->streamId = htobe64(req.streamId);
  pRsp->upstreamTaskId = htonl(req.upstreamTaskId);
  pRsp->upstreamNodeId = htonl(req.upstreamNodeId);
  pRsp->downstreamNodeId = htonl(pVnode->config.vgId);
  pRsp->downstreamTaskId = htonl(req.taskId);
  pRsp->inputStatus = TASK_OUTPUT_STATUS__NORMAL;

L
Liu Jicong 已提交
1630
  SRpcMsg rsp = {
1631
      .code = code, .info = pMsg->info, .contLen = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp), .pCont = pRspHead};
1632
  tqDebug("send dispatch error rsp, code: %x", code);
L
Liu Jicong 已提交
1633
  tmsgSendRsp(&rsp);
L
Liu Jicong 已提交
1634 1635
  rpcFreeCont(pMsg->pCont);
  taosFreeQitem(pMsg);
1636
  return -1;
L
Liu Jicong 已提交
1637
}
L
Liu Jicong 已提交
1638

1639
int32_t tqCheckLogInWal(STQ* pTq, int64_t sversion) { return sversion <= pTq->walLogLastVer; }
1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650

int32_t tqRestoreStreamTasks(STQ* pTq) {
  int32_t vgId = TD_VID(pTq->pVnode);

  SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq));
  if (pRunReq == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    tqError("vgId:%d failed restore stream tasks, code:%s", vgId, terrstr(terrno));
    return -1;
  }

1651 1652 1653 1654
  int32_t numOfTasks = taosHashGetSize(pTq->pStreamMeta->pRestoreTasks);
  tqInfo("vgId:%d start restoring stream tasks, total tasks:%d", vgId, numOfTasks);
  initOffsetForAllRestoreTasks(pTq);

1655 1656 1657 1658 1659 1660 1661 1662 1663
  pRunReq->head.vgId = vgId;
  pRunReq->streamId = 0;
  pRunReq->taskId = ALL_STREAM_TASKS_ID;

  SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)};
  tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg);

  return 0;
}