executor.c 43.4 KB
Newer Older
H
Hongze Cheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14 15 16
 */

#include "executor.h"
H
Haojun Liao 已提交
17
#include "executorimpl.h"
18
#include "planner.h"
L
Liu Jicong 已提交
19
#include "tdatablock.h"
L
Liu Jicong 已提交
20
#include "tref.h"
21
#include "tudf.h"
L
Liu Jicong 已提交
22
#include "vnode.h"
23 24 25 26 27 28 29 30

static TdThreadOnce initPoolOnce = PTHREAD_ONCE_INIT;
int32_t             exchangeObjRefPool = -1;

static void cleanupRefPool() {
  int32_t ref = atomic_val_compare_exchange_32(&exchangeObjRefPool, exchangeObjRefPool, 0);
  taosCloseRef(ref);
}
31

D
dapan1121 已提交
32 33 34 35 36
static void initRefPool() { 
  exchangeObjRefPool = taosOpenRef(1024, doDestroyExchangeOperatorInfo);   
  atexit(cleanupRefPool);
}

L
Liu Jicong 已提交
37 38 39 40
static int32_t doSetSMABlock(SOperatorInfo* pOperator, void* input, size_t numOfBlocks, int32_t type, char* id) {
  if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) {
    if (pOperator->numOfDownstream == 0) {
      qError("failed to find stream scan operator to set the input data block, %s" PRIx64, id);
S
Shengliang Guan 已提交
41
      return TSDB_CODE_APP_ERROR;
L
Liu Jicong 已提交
42 43 44 45
    }

    if (pOperator->numOfDownstream > 1) {  // not handle this in join query
      qError("join not supported for stream block scan, %s" PRIx64, id);
S
Shengliang Guan 已提交
46
      return TSDB_CODE_APP_ERROR;
L
Liu Jicong 已提交
47 48 49 50 51 52 53 54 55 56
    }
    pOperator->status = OP_NOT_OPENED;
    return doSetSMABlock(pOperator->pDownstream[0], input, numOfBlocks, type, id);
  } else {
    pOperator->status = OP_NOT_OPENED;

    SStreamScanInfo* pInfo = pOperator->info;

    if (type == STREAM_INPUT__MERGED_SUBMIT) {
      for (int32_t i = 0; i < numOfBlocks; i++) {
K
kailixu 已提交
57 58
        SPackedData* pReq = POINTER_SHIFT(input, i * sizeof(SPackedData));
        taosArrayPush(pInfo->pBlockLists, pReq);
L
Liu Jicong 已提交
59 60 61 62 63 64 65 66
      }
      pInfo->blockType = STREAM_INPUT__DATA_SUBMIT;
    } else if (type == STREAM_INPUT__DATA_SUBMIT) {
      taosArrayPush(pInfo->pBlockLists, &input);
      pInfo->blockType = STREAM_INPUT__DATA_SUBMIT;
    } else if (type == STREAM_INPUT__DATA_BLOCK) {
      for (int32_t i = 0; i < numOfBlocks; ++i) {
        SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i];
K
kailixu 已提交
67 68 69 70
        SPackedData  tmp = {
             .pDataBlock = pDataBlock,
        };
        taosArrayPush(pInfo->pBlockLists, &tmp);
L
Liu Jicong 已提交
71 72 73 74 75 76 77 78
      }
      pInfo->blockType = STREAM_INPUT__DATA_BLOCK;
    }

    return TSDB_CODE_SUCCESS;
  }
}

L
Liu Jicong 已提交
79
static int32_t doSetStreamOpOpen(SOperatorInfo* pOperator, char* id) {
H
Haojun Liao 已提交
80 81 82 83 84
  if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) {
    if (pOperator->numOfDownstream == 0) {
      qError("failed to find stream scan operator to set the input data block, %s" PRIx64, id);
      return TSDB_CODE_APP_ERROR;
    }
L
Liu Jicong 已提交
85

H
Haojun Liao 已提交
86 87 88
    if (pOperator->numOfDownstream > 1) {  // not handle this in join query
      qError("join not supported for stream block scan, %s" PRIx64, id);
      return TSDB_CODE_APP_ERROR;
L
Liu Jicong 已提交
89
    }
H
Haojun Liao 已提交
90 91
    pOperator->status = OP_NOT_OPENED;
    return doSetStreamOpOpen(pOperator->pDownstream[0], id);
L
Liu Jicong 已提交
92 93 94 95
  }
  return 0;
}

96 97 98 99 100 101 102 103 104 105 106
static void clearStreamBlock(SOperatorInfo* pOperator) {
  if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) {
    if (pOperator->numOfDownstream == 1) {
      return clearStreamBlock(pOperator->pDownstream[0]);
    }
  } else {
    SStreamScanInfo* pInfo = pOperator->info;
    doClearBufferedBlocks(pInfo);
  }
}

5
54liuyao 已提交
107 108 109 110 111 112
void resetTaskInfo(qTaskInfo_t tinfo) {
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
  pTaskInfo->code = 0;
  clearStreamBlock(pTaskInfo->pRoot);
}

L
Liu Jicong 已提交
113
static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t numOfBlocks, int32_t type, char* id) {
X
Xiaoyu Wang 已提交
114
  if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) {
H
Haojun Liao 已提交
115
    if (pOperator->numOfDownstream == 0) {
116
      qError("failed to find stream scan operator to set the input data block, %s" PRIx64, id);
S
Shengliang Guan 已提交
117
      return TSDB_CODE_APP_ERROR;
H
Haojun Liao 已提交
118
    }
H
Haojun Liao 已提交
119

H
Haojun Liao 已提交
120
    if (pOperator->numOfDownstream > 1) {  // not handle this in join query
121
      qError("join not supported for stream block scan, %s" PRIx64, id);
S
Shengliang Guan 已提交
122
      return TSDB_CODE_APP_ERROR;
H
Haojun Liao 已提交
123
    }
L
Liu Jicong 已提交
124
    pOperator->status = OP_NOT_OPENED;
L
Liu Jicong 已提交
125
    return doSetStreamBlock(pOperator->pDownstream[0], input, numOfBlocks, type, id);
H
Haojun Liao 已提交
126
  } else {
127 128
    pOperator->status = OP_NOT_OPENED;

129
    SStreamScanInfo* pInfo = pOperator->info;
130 131
    qDebug("task stream set total blocks:%d %s", (int32_t)numOfBlocks, id);
    ASSERT(pInfo->validBlockIndex == 0 && taosArrayGetSize(pInfo->pBlockLists) == 0);
132

L
Liu Jicong 已提交
133 134
    if (type == STREAM_INPUT__MERGED_SUBMIT) {
      for (int32_t i = 0; i < numOfBlocks; i++) {
L
Liu Jicong 已提交
135
        SPackedData* pReq = POINTER_SHIFT(input, i * sizeof(SPackedData));
L
Liu Jicong 已提交
136
        taosArrayPush(pInfo->pBlockLists, pReq);
137
      }
L
Liu Jicong 已提交
138 139 140
      pInfo->blockType = STREAM_INPUT__DATA_SUBMIT;
    } else if (type == STREAM_INPUT__DATA_SUBMIT) {
      ASSERT(numOfBlocks == 1);
L
Liu Jicong 已提交
141
      taosArrayPush(pInfo->pBlockLists, input);
L
Liu Jicong 已提交
142
      pInfo->blockType = STREAM_INPUT__DATA_SUBMIT;
L
Liu Jicong 已提交
143
    } else if (type == STREAM_INPUT__DATA_BLOCK) {
H
Haojun Liao 已提交
144
      for (int32_t i = 0; i < numOfBlocks; ++i) {
L
Liu Jicong 已提交
145
        SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i];
L
Liu Jicong 已提交
146 147
        SPackedData  tmp = {
             .pDataBlock = pDataBlock,
L
Liu Jicong 已提交
148 149
        };
        taosArrayPush(pInfo->pBlockLists, &tmp);
H
Haojun Liao 已提交
150
      }
L
Liu Jicong 已提交
151
      pInfo->blockType = STREAM_INPUT__DATA_BLOCK;
152 153
    } else {
      ASSERT(0);
154 155
    }

H
Haojun Liao 已提交
156 157 158 159
    return TSDB_CODE_SUCCESS;
  }
}

160 161 162 163 164 165 166 167 168 169 170 171 172
void doSetTaskId(SOperatorInfo* pOperator) {
  SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
  if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) {
    SStreamScanInfo* pStreamScanInfo = pOperator->info;
    STableScanInfo*  pScanInfo = pStreamScanInfo->pTableScanOp->info;
    if (pScanInfo->base.dataReader != NULL) {
      tsdbReaderSetId(pScanInfo->base.dataReader, pTaskInfo->id.str);
    }
  } else {
    doSetTaskId(pOperator->pDownstream[0]);
  }
}

H
Haojun Liao 已提交
173 174 175 176 177 178
void qSetTaskId(qTaskInfo_t tinfo, uint64_t taskId, uint64_t queryId) {
  SExecTaskInfo* pTaskInfo = tinfo;
  pTaskInfo->id.queryId = queryId;

  taosMemoryFreeClear(pTaskInfo->id.str);
  pTaskInfo->id.str = buildTaskId(taskId, queryId);
179 180 181

  // set the idstr for tsdbReader
  doSetTaskId(pTaskInfo->pRoot);
H
Haojun Liao 已提交
182 183
}

L
Liu Jicong 已提交
184 185
int32_t qSetStreamOpOpen(qTaskInfo_t tinfo) {
  if (tinfo == NULL) {
S
Shengliang Guan 已提交
186
    return TSDB_CODE_APP_ERROR;
L
Liu Jicong 已提交
187 188 189 190 191 192 193 194 195 196 197 198 199
  }

  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;

  int32_t code = doSetStreamOpOpen(pTaskInfo->pRoot, GET_TASKID(pTaskInfo));
  if (code != TSDB_CODE_SUCCESS) {
    qError("%s failed to set the stream block data", GET_TASKID(pTaskInfo));
  } else {
    qDebug("%s set the stream block successfully", GET_TASKID(pTaskInfo));
  }

  return code;
}
200

L
Liu Jicong 已提交
201
int32_t qSetMultiStreamInput(qTaskInfo_t tinfo, const void* pBlocks, size_t numOfBlocks, int32_t type) {
H
Haojun Liao 已提交
202
  if (tinfo == NULL) {
S
Shengliang Guan 已提交
203
    return TSDB_CODE_APP_ERROR;
H
Haojun Liao 已提交
204 205
  }

H
Haojun Liao 已提交
206
  if (pBlocks == NULL || numOfBlocks == 0) {
H
Haojun Liao 已提交
207 208 209
    return TSDB_CODE_SUCCESS;
  }

L
Liu Jicong 已提交
210
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
H
Haojun Liao 已提交
211

C
Cary Xu 已提交
212
  int32_t code = doSetStreamBlock(pTaskInfo->pRoot, (void*)pBlocks, numOfBlocks, type, GET_TASKID(pTaskInfo));
H
Haojun Liao 已提交
213
  if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
214
    qError("%s failed to set the stream block data", GET_TASKID(pTaskInfo));
H
Haojun Liao 已提交
215
  } else {
H
Haojun Liao 已提交
216
    qDebug("%s set the stream block successfully", GET_TASKID(pTaskInfo));
H
Haojun Liao 已提交
217 218 219 220 221
  }

  return code;
}

L
Liu Jicong 已提交
222 223
int32_t qSetSMAInput(qTaskInfo_t tinfo, const void* pBlocks, size_t numOfBlocks, int32_t type) {
  if (tinfo == NULL) {
S
Shengliang Guan 已提交
224
    return TSDB_CODE_APP_ERROR;
L
Liu Jicong 已提交
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
  }

  if (pBlocks == NULL || numOfBlocks == 0) {
    return TSDB_CODE_SUCCESS;
  }

  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;

  int32_t code = doSetSMABlock(pTaskInfo->pRoot, (void*)pBlocks, numOfBlocks, type, GET_TASKID(pTaskInfo));
  if (code != TSDB_CODE_SUCCESS) {
    qError("%s failed to set the sma block data", GET_TASKID(pTaskInfo));
  } else {
    qDebug("%s set the sma block successfully", GET_TASKID(pTaskInfo));
  }

  return code;
}

243 244 245
qTaskInfo_t qCreateQueueExecTaskInfo(void* msg, SReadHandle* pReaderHandle, int32_t vgId, int32_t* numOfCols, uint64_t id) {
  if (msg == NULL) { // create raw scan
    SExecTaskInfo* pTaskInfo = doCreateExecTaskInfo(0, id, vgId, OPTR_EXEC_MODEL_QUEUE, "");
246 247 248 249
    if (NULL == pTaskInfo) {
      terrno = TSDB_CODE_OUT_OF_MEMORY;
      return NULL;
    }
H
Haojun Liao 已提交
250

251
    pTaskInfo->pRoot = createRawScanOperatorInfo(pReaderHandle, pTaskInfo);
L
Liu Jicong 已提交
252
    if (NULL == pTaskInfo->pRoot) {
253 254 255 256
      terrno = TSDB_CODE_OUT_OF_MEMORY;
      taosMemoryFree(pTaskInfo);
      return NULL;
    }
257 258

    qDebug("create raw scan task info completed, vgId:%d, %s", vgId, GET_TASKID(pTaskInfo));
259
    return pTaskInfo;
L
Liu Jicong 已提交
260 261
  }

H
Haojun Liao 已提交
262
  struct SSubplan* pPlan = NULL;
263 264

  int32_t code = qStringToSubplan(msg, &pPlan);
L
Liu Jicong 已提交
265 266 267 268 269 270
  if (code != TSDB_CODE_SUCCESS) {
    terrno = code;
    return NULL;
  }

  qTaskInfo_t pTaskInfo = NULL;
271
  code = qCreateExecTask(pReaderHandle, vgId, 0, pPlan, &pTaskInfo, NULL, NULL, OPTR_EXEC_MODEL_QUEUE);
L
Liu Jicong 已提交
272
  if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
273 274
    nodesDestroyNode((SNode*)pPlan);
    qDestroyTask(pTaskInfo);
L
Liu Jicong 已提交
275 276 277 278
    terrno = code;
    return NULL;
  }

279
  // extract the number of output columns
H
Haojun Liao 已提交
280
  SDataBlockDescNode* pDescNode = pPlan->pNode->pOutputDataBlockDesc;
wmmhello's avatar
wmmhello 已提交
281
  *numOfCols = 0;
282

L
Liu Jicong 已提交
283
  SNode* pNode;
284 285 286
  FOREACH(pNode, pDescNode->pSlots) {
    SSlotDescNode* pSlotDesc = (SSlotDescNode*)pNode;
    if (pSlotDesc->output) {
wmmhello's avatar
wmmhello 已提交
287
      ++(*numOfCols);
288 289 290
    }
  }

L
Liu Jicong 已提交
291 292 293
  return pTaskInfo;
}

294
qTaskInfo_t qCreateStreamExecTaskInfo(void* msg, SReadHandle* readers, int32_t vgId) {
L
Liu Jicong 已提交
295
  if (msg == NULL) {
296 297 298
    return NULL;
  }

H
Haojun Liao 已提交
299 300
  struct SSubplan* pPlan = NULL;
  int32_t          code = qStringToSubplan(msg, &pPlan);
301 302 303 304 305 306
  if (code != TSDB_CODE_SUCCESS) {
    terrno = code;
    return NULL;
  }

  qTaskInfo_t pTaskInfo = NULL;
307
  code = qCreateExecTask(readers, vgId, 0, pPlan, &pTaskInfo, NULL, NULL, OPTR_EXEC_MODEL_STREAM);
308
  if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
309 310
    nodesDestroyNode((SNode*)pPlan);
    qDestroyTask(pTaskInfo);
311 312 313 314 315 316
    terrno = code;
    return NULL;
  }

  return pTaskInfo;
}
317

318
static SArray* filterUnqualifiedTables(const SStreamScanInfo* pScanInfo, const SArray* tableIdList, const char* idstr) {
319
  SArray* qa = taosArrayInit(4, sizeof(tb_uid_t));
H
Haojun Liao 已提交
320 321 322 323
  int32_t numOfUids = taosArrayGetSize(tableIdList);
  if (numOfUids == 0) {
    return qa;
  }
324 325 326 327

  // let's discard the tables those are not created according to the queried super table.
  SMetaReader mr = {0};
  metaReaderInit(&mr, pScanInfo->readHandle.meta, 0);
H
Haojun Liao 已提交
328
  for (int32_t i = 0; i < numOfUids; ++i) {
329
    uint64_t* id = (uint64_t*)taosArrayGet(tableIdList, i);
330

331
    int32_t code = metaGetTableEntryByUid(&mr, *id);
332
    if (code != TSDB_CODE_SUCCESS) {
333
      qError("failed to get table meta, uid:%" PRIu64 " code:%s, %s", *id, tstrerror(terrno), idstr);
334 335 336
      continue;
    }

M
Minglei Jin 已提交
337 338
    tDecoderClear(&mr.coder);

339
    // TODO handle ntb case
L
Liu Jicong 已提交
340
    if (mr.me.type != TSDB_CHILD_TABLE || mr.me.ctbEntry.suid != pScanInfo->tableUid) {
341 342
      continue;
    }
343 344 345

    if (pScanInfo->pTagCond != NULL) {
      bool          qualified = false;
346
      STableKeyInfo info = {.groupId = 0, .uid = mr.me.uid};
H
Haojun Liao 已提交
347
      code = isQualifiedTable(&info, pScanInfo->pTagCond, pScanInfo->readHandle.meta, &qualified);
348 349 350 351 352 353 354 355 356 357
      if (code != TSDB_CODE_SUCCESS) {
        qError("failed to filter new table, uid:0x%" PRIx64 ", %s", info.uid, idstr);
        continue;
      }

      if (!qualified) {
        continue;
      }
    }

358
    // handle multiple partition
359 360 361 362 363 364 365
    taosArrayPush(qa, id);
  }

  metaReaderClear(&mr);
  return qa;
}

366
int32_t qUpdateQualifiedTableId(qTaskInfo_t tinfo, const SArray* tableIdList, bool isAdd) {
dengyihao's avatar
dengyihao 已提交
367
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
H
Haojun Liao 已提交
368 369

  if (isAdd) {
L
Liu Jicong 已提交
370
    qDebug("add %d tables id into query list, %s", (int32_t)taosArrayGetSize(tableIdList), pTaskInfo->id.str);
H
Haojun Liao 已提交
371 372
  }

373
  // traverse to the stream scanner node to add this table id
374
  SOperatorInfo* pInfo = pTaskInfo->pRoot;
L
Liu Jicong 已提交
375
  while (pInfo->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) {
376 377 378
    pInfo = pInfo->pDownstream[0];
  }

379 380
  int32_t          code = 0;
  SStreamScanInfo* pScanInfo = pInfo->info;
381
  if (isAdd) {  // add new table id
382
    SArray* qa = filterUnqualifiedTables(pScanInfo, tableIdList, GET_TASKID(pTaskInfo));
383 384 385
    int32_t numOfQualifiedTables = taosArrayGetSize(qa);

    qDebug(" %d qualified child tables added into stream scanner", numOfQualifiedTables);
386

L
Liu Jicong 已提交
387
    code = tqReaderAddTbUidList(pScanInfo->tqReader, qa);
388
    if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
389
      taosArrayDestroy(qa);
390 391 392
      return code;
    }

M
Minglei Jin 已提交
393
    bool   assignUid = false;
L
Liu Jicong 已提交
394 395
    size_t bufLen = (pScanInfo->pGroupTags != NULL) ? getTableTagsBufLen(pScanInfo->pGroupTags) : 0;
    char*  keyBuf = NULL;
396
    if (bufLen > 0) {
397
      assignUid = groupbyTbname(pScanInfo->pGroupTags);
398 399
      keyBuf = taosMemoryMalloc(bufLen);
      if (keyBuf == NULL) {
H
Haojun Liao 已提交
400
        taosArrayDestroy(qa);
401 402 403
        return TSDB_CODE_OUT_OF_MEMORY;
      }
    }
404

405
    STableListInfo* pTableListInfo = ((STableScanInfo*)pScanInfo->pTableScanOp->info)->base.pTableListInfo;
406
    taosWLockLatch(&pTaskInfo->lock);
407 408

    for (int32_t i = 0; i < numOfQualifiedTables; ++i) {
409
      uint64_t*     uid = taosArrayGet(qa, i);
410
      STableKeyInfo keyInfo = {.uid = *uid, .groupId = 0};
411 412

      if (bufLen > 0) {
413 414 415 416 417 418
        if (assignUid) {
          keyInfo.groupId = keyInfo.uid;
        } else {
          code = getGroupIdFromTagsVal(pScanInfo->readHandle.meta, keyInfo.uid, pScanInfo->pGroupTags, keyBuf,
                                       &keyInfo.groupId);
          if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
419
            taosMemoryFree(keyBuf);
H
Haojun Liao 已提交
420
            taosArrayDestroy(qa);
421
            taosWUnLockLatch(&pTaskInfo->lock);
422 423
            return code;
          }
424 425 426
        }
      }

L
Liu Jicong 已提交
427
#if 0
428
      bool exists = false;
H
Haojun Liao 已提交
429 430 431 432 433 434
      for (int32_t k = 0; k < taosArrayGetSize(pListInfo->pTableList); ++k) {
        STableKeyInfo* pKeyInfo = taosArrayGet(pListInfo->pTableList, k);
        if (pKeyInfo->uid == keyInfo.uid) {
          qWarn("ignore duplicated query table uid:%" PRIu64 " added, %s", pKeyInfo->uid, pTaskInfo->id.str);
          exists = true;
        }
435 436
      }

H
Haojun Liao 已提交
437
      if (!exists) {
438
#endif
439

H
Haojun Liao 已提交
440
      tableListAddTableInfo(pTableListInfo, keyInfo.uid, keyInfo.groupId);
441 442
    }

443
    taosWUnLockLatch(&pTaskInfo->lock);
444 445 446 447
    if (keyBuf != NULL) {
      taosMemoryFree(keyBuf);
    }

448 449 450
    taosArrayDestroy(qa);
  } else {  // remove the table id in current list
    qDebug(" %d remove child tables from the stream scanner", (int32_t)taosArrayGetSize(tableIdList));
451
    taosWLockLatch(&pTaskInfo->lock);
L
Liu Jicong 已提交
452
    code = tqReaderRemoveTbUidList(pScanInfo->tqReader, tableIdList);
453
    taosWUnLockLatch(&pTaskInfo->lock);
454 455
  }

456
  return code;
L
fix  
Liu Jicong 已提交
457
}
458

459
int32_t qGetQueryTableSchemaVersion(qTaskInfo_t tinfo, char* dbName, char* tableName, int32_t* sversion,
L
Liu Jicong 已提交
460
                                    int32_t* tversion) {
461
  ASSERT(tinfo != NULL && dbName != NULL && tableName != NULL);
462
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
463

464
  if (pTaskInfo->schemaInfo.sw == NULL) {
H
Haojun Liao 已提交
465 466 467
    return TSDB_CODE_SUCCESS;
  }

468 469 470 471
  *sversion = pTaskInfo->schemaInfo.sw->version;
  *tversion = pTaskInfo->schemaInfo.tversion;
  if (pTaskInfo->schemaInfo.dbname) {
    strcpy(dbName, pTaskInfo->schemaInfo.dbname);
472 473 474
  } else {
    dbName[0] = 0;
  }
475 476
  if (pTaskInfo->schemaInfo.tablename) {
    strcpy(tableName, pTaskInfo->schemaInfo.tablename);
477 478 479
  } else {
    tableName[0] = 0;
  }
480 481

  return 0;
482
}
483 484

int32_t qCreateExecTask(SReadHandle* readHandle, int32_t vgId, uint64_t taskId, SSubplan* pSubplan,
D
dapan1121 已提交
485
                        qTaskInfo_t* pTaskInfo, DataSinkHandle* handle, char* sql, EOPTR_EXEC_MODEL model) {
486 487 488
  SExecTaskInfo** pTask = (SExecTaskInfo**)pTaskInfo;
  taosThreadOnce(&initPoolOnce, initRefPool);

489
  qDebug("start to create task, TID:0x%" PRIx64 " QID:0x%" PRIx64 ", vgId:%d", taskId, pSubplan->id.queryId, vgId);
490

491
  int32_t code = createExecTaskInfo(pSubplan, pTask, readHandle, taskId, vgId, sql, model);
492
  if (code != TSDB_CODE_SUCCESS) {
493
    qError("failed to createExecTaskInfo, code: %s", tstrerror(code));
494 495 496
    goto _error;
  }

497
  SDataSinkMgtCfg cfg = {.maxDataBlockNum = 500, .maxDataBlockNumPerQuery = 50};
498 499
  code = dsDataSinkMgtInit(&cfg);
  if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
500
    qError("failed to dsDataSinkMgtInit, code:%s, %s", tstrerror(code), (*pTask)->id.str);
501 502 503 504 505
    goto _error;
  }

  if (handle) {
    void* pSinkParam = NULL;
506
    code = createDataSinkParam(pSubplan->pDataSink, &pSinkParam, (*pTask), readHandle);
507
    if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
508
      qError("failed to createDataSinkParam, vgId:%d, code:%s, %s", vgId, tstrerror(code), (*pTask)->id.str);
509 510 511
      goto _error;
    }

H
Haojun Liao 已提交
512
    code = dsCreateDataSinker(pSubplan->pDataSink, handle, pSinkParam, (*pTask)->id.str);
L
Liu Jicong 已提交
513
    if (code != TSDB_CODE_SUCCESS) {
wmmhello's avatar
wmmhello 已提交
514 515
      taosMemoryFreeClear(pSinkParam);
    }
516 517
  }

518
  qDebug("subplan task create completed, TID:0x%" PRIx64 " QID:0x%" PRIx64, taskId, pSubplan->id.queryId);
519

dengyihao's avatar
dengyihao 已提交
520
_error:
521 522 523 524
  // if failed to add ref for all tables in this query, abort current query
  return code;
}

H
Haojun Liao 已提交
525
static void freeBlock(void* param) {
526
  SSDataBlock* pBlock = *(SSDataBlock**)param;
H
Haojun Liao 已提交
527 528 529
  blockDataDestroy(pBlock);
}

530
int32_t qExecTaskOpt(qTaskInfo_t tinfo, SArray* pResList, uint64_t* useconds, bool* hasMore, SLocalFetch* pLocal) {
531 532 533
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
  int64_t        threadId = taosGetSelfPthreadId();

D
dapan1121 已提交
534
  if (pLocal) {
535
    memcpy(&pTaskInfo->localFetch, pLocal, sizeof(*pLocal));
D
dapan1121 已提交
536
  }
L
Liu Jicong 已提交
537

H
Haojun Liao 已提交
538
  taosArrayClear(pResList);
H
Haojun Liao 已提交
539

540 541 542 543 544 545 546 547
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pTaskInfo->owner, 0, threadId)) != 0) {
    qError("%s-%p execTask is now executed by thread:%p", GET_TASKID(pTaskInfo), pTaskInfo, (void*)curOwner);
    pTaskInfo->code = TSDB_CODE_QRY_IN_EXEC;
    return pTaskInfo->code;
  }

  if (pTaskInfo->cost.start == 0) {
548
    pTaskInfo->cost.start = taosGetTimestampUs();
549 550 551
  }

  if (isTaskKilled(pTaskInfo)) {
552
    atomic_store_64(&pTaskInfo->owner, 0);
553 554 555 556 557 558 559 560 561
    qDebug("%s already killed, abort", GET_TASKID(pTaskInfo));
    return TSDB_CODE_SUCCESS;
  }

  // error occurs, record the error code and return to client
  int32_t ret = setjmp(pTaskInfo->env);
  if (ret != TSDB_CODE_SUCCESS) {
    pTaskInfo->code = ret;
    cleanUpUdfs();
H
Haojun Liao 已提交
562

563 564 565 566 567 568 569 570
    qDebug("%s task abort due to error/cancel occurs, code:%s", GET_TASKID(pTaskInfo), tstrerror(pTaskInfo->code));
    atomic_store_64(&pTaskInfo->owner, 0);

    return pTaskInfo->code;
  }

  qDebug("%s execTask is launched", GET_TASKID(pTaskInfo));

571
  int32_t      current = 0;
H
Haojun Liao 已提交
572 573
  SSDataBlock* pRes = NULL;

574 575
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
576
  int32_t blockIndex = 0;
577
  while ((pRes = pTaskInfo->pRoot->fpSet.getNextFn(pTaskInfo->pRoot)) != NULL) {
H
Haojun Liao 已提交
578 579 580 581 582 583
    SSDataBlock* p = NULL;
    if (blockIndex >= taosArrayGetSize(pTaskInfo->pResultBlockList)) {
      SSDataBlock* p1 = createOneDataBlock(pRes, true);
      taosArrayPush(pTaskInfo->pResultBlockList, &p1);
      p = p1;
    } else {
L
Liu Jicong 已提交
584
      p = *(SSDataBlock**)taosArrayGet(pTaskInfo->pResultBlockList, blockIndex);
H
Haojun Liao 已提交
585 586 587 588 589
      copyDataBlock(p, pRes);
    }

    blockIndex += 1;

H
Haojun Liao 已提交
590 591 592 593 594 595 596 597 598
    current += p->info.rows;
    ASSERT(p->info.rows > 0);
    taosArrayPush(pResList, &p);

    if (current >= 4096) {
      break;
    }
  }

599
  *hasMore = (pRes != NULL);
600 601 602
  uint64_t el = (taosGetTimestampUs() - st);

  pTaskInfo->cost.elapsedTime += el;
H
Haojun Liao 已提交
603
  if (NULL == pRes) {
604 605 606 607 608
    *useconds = pTaskInfo->cost.elapsedTime;
  }

  cleanUpUdfs();

H
Haojun Liao 已提交
609
  uint64_t total = pTaskInfo->pRoot->resultInfo.totalRows;
H
Haojun Liao 已提交
610
  qDebug("%s task suspended, %d rows in %d blocks returned, total:%" PRId64 " rows, in sinkNode:%d, elapsed:%.2f ms",
611
         GET_TASKID(pTaskInfo), current, (int32_t)taosArrayGetSize(pResList), total, 0, el / 1000.0);
612 613 614 615 616

  atomic_store_64(&pTaskInfo->owner, 0);
  return pTaskInfo->code;
}

H
Haojun Liao 已提交
617 618
void qCleanExecTaskBlockBuf(qTaskInfo_t tinfo) {
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
L
Liu Jicong 已提交
619 620 621
  SArray*        pList = pTaskInfo->pResultBlockList;
  size_t         num = taosArrayGetSize(pList);
  for (int32_t i = 0; i < num; ++i) {
H
Haojun Liao 已提交
622 623 624 625 626 627 628
    SSDataBlock** p = taosArrayGet(pTaskInfo->pResultBlockList, i);
    blockDataDestroy(*p);
  }

  taosArrayClear(pTaskInfo->pResultBlockList);
}

629 630 631 632 633 634 635 636 637 638 639 640 641
int32_t qExecTask(qTaskInfo_t tinfo, SSDataBlock** pRes, uint64_t* useconds) {
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
  int64_t        threadId = taosGetSelfPthreadId();

  *pRes = NULL;
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pTaskInfo->owner, 0, threadId)) != 0) {
    qError("%s-%p execTask is now executed by thread:%p", GET_TASKID(pTaskInfo), pTaskInfo, (void*)curOwner);
    pTaskInfo->code = TSDB_CODE_QRY_IN_EXEC;
    return pTaskInfo->code;
  }

  if (pTaskInfo->cost.start == 0) {
642
    pTaskInfo->cost.start = taosGetTimestampUs();
643 644
  }

5
54liuyao 已提交
645
  if (isTaskKilled(pTaskInfo)) {
646
    clearStreamBlock(pTaskInfo->pRoot);
647
    atomic_store_64(&pTaskInfo->owner, 0);
648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685
    qDebug("%s already killed, abort", GET_TASKID(pTaskInfo));
    return TSDB_CODE_SUCCESS;
  }

  // error occurs, record the error code and return to client
  int32_t ret = setjmp(pTaskInfo->env);
  if (ret != TSDB_CODE_SUCCESS) {
    pTaskInfo->code = ret;
    cleanUpUdfs();
    qDebug("%s task abort due to error/cancel occurs, code:%s", GET_TASKID(pTaskInfo), tstrerror(pTaskInfo->code));
    atomic_store_64(&pTaskInfo->owner, 0);
    return pTaskInfo->code;
  }

  qDebug("%s execTask is launched", GET_TASKID(pTaskInfo));

  int64_t st = taosGetTimestampUs();

  *pRes = pTaskInfo->pRoot->fpSet.getNextFn(pTaskInfo->pRoot);
  uint64_t el = (taosGetTimestampUs() - st);

  pTaskInfo->cost.elapsedTime += el;
  if (NULL == *pRes) {
    *useconds = pTaskInfo->cost.elapsedTime;
  }

  cleanUpUdfs();

  int32_t  current = (*pRes != NULL) ? (*pRes)->info.rows : 0;
  uint64_t total = pTaskInfo->pRoot->resultInfo.totalRows;

  qDebug("%s task suspended, %d rows returned, total:%" PRId64 " rows, in sinkNode:%d, elapsed:%.2f ms",
         GET_TASKID(pTaskInfo), current, total, 0, el / 1000.0);

  atomic_store_64(&pTaskInfo->owner, 0);
  return pTaskInfo->code;
}

L
Liu Jicong 已提交
686
int32_t qAppendTaskStopInfo(SExecTaskInfo* pTaskInfo, SExchangeOpStopInfo* pInfo) {
D
dapan1121 已提交
687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706
  taosWLockLatch(&pTaskInfo->stopInfo.lock);
  taosArrayPush(pTaskInfo->stopInfo.pStopInfo, pInfo);
  taosWUnLockLatch(&pTaskInfo->stopInfo.lock);

  return TSDB_CODE_SUCCESS;
}

int32_t stopInfoComp(void const* lp, void const* rp) {
  SExchangeOpStopInfo* key = (SExchangeOpStopInfo*)lp;
  SExchangeOpStopInfo* pInfo = (SExchangeOpStopInfo*)rp;

  if (key->refId < pInfo->refId) {
    return -1;
  } else if (key->refId > pInfo->refId) {
    return 1;
  }

  return 0;
}

L
Liu Jicong 已提交
707
void qRemoveTaskStopInfo(SExecTaskInfo* pTaskInfo, SExchangeOpStopInfo* pInfo) {
D
dapan1121 已提交
708 709 710 711 712 713 714
  taosWLockLatch(&pTaskInfo->stopInfo.lock);
  int32_t idx = taosArraySearchIdx(pTaskInfo->stopInfo.pStopInfo, pInfo, stopInfoComp, TD_EQ);
  if (idx >= 0) {
    taosArrayRemove(pTaskInfo->stopInfo.pStopInfo, idx);
  }
  taosWUnLockLatch(&pTaskInfo->stopInfo.lock);

D
dapan1121 已提交
715
  return;
D
dapan1121 已提交
716 717 718 719 720 721 722
}

void qStopTaskOperators(SExecTaskInfo* pTaskInfo) {
  taosWLockLatch(&pTaskInfo->stopInfo.lock);

  int32_t num = taosArrayGetSize(pTaskInfo->stopInfo.pStopInfo);
  for (int32_t i = 0; i < num; ++i) {
L
Liu Jicong 已提交
723 724
    SExchangeOpStopInfo* pStop = taosArrayGet(pTaskInfo->stopInfo.pStopInfo, i);
    SExchangeInfo*       pExchangeInfo = taosAcquireRef(exchangeObjRefPool, pStop->refId);
D
dapan1121 已提交
725 726 727 728 729 730 731 732 733
    if (pExchangeInfo) {
      tsem_post(&pExchangeInfo->ready);
      taosReleaseRef(exchangeObjRefPool, pStop->refId);
    }
  }

  taosWUnLockLatch(&pTaskInfo->stopInfo.lock);
}

D
dapan1121 已提交
734
int32_t qAsyncKillTask(qTaskInfo_t qinfo, int32_t rspCode) {
735 736 737 738 739 740
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)qinfo;
  if (pTaskInfo == NULL) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  qDebug("%s execTask async killed", GET_TASKID(pTaskInfo));
L
Liu Jicong 已提交
741

D
dapan1121 已提交
742
  setTaskKilled(pTaskInfo, rspCode);
D
dapan1121 已提交
743
  qStopTaskOperators(pTaskInfo);
L
Liu Jicong 已提交
744

745 746 747
  return TSDB_CODE_SUCCESS;
}

748 749 750 751 752 753
int32_t qKillTask(qTaskInfo_t tinfo, int32_t rspCode) {
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
  if (pTaskInfo == NULL) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

H
Haojun Liao 已提交
754 755
  qDebug("%s sync killed execTask", GET_TASKID(pTaskInfo));
  setTaskKilled(pTaskInfo, TSDB_CODE_TSC_QUERY_KILLED);
756 757 758 759 760 761 762 763 764

  while(qTaskIsExecuting(pTaskInfo)) {
    taosMsleep(10);
  }

  pTaskInfo->code = rspCode;
  return TSDB_CODE_SUCCESS;
}

765 766 767 768 769 770 771 772 773
bool qTaskIsExecuting(qTaskInfo_t qinfo) {
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)qinfo;
  if (NULL == pTaskInfo) {
    return false;
  }

  return 0 != atomic_load_64(&pTaskInfo->owner);
}

H
Haojun Liao 已提交
774 775
static void printTaskExecCostInLog(SExecTaskInfo* pTaskInfo) {
  STaskCostInfo* pSummary = &pTaskInfo->cost;
776
  int64_t        idleTime = pSummary->start - pSummary->created;
H
Haojun Liao 已提交
777 778 779 780

  SFileBlockLoadRecorder* pRecorder = pSummary->pRecoder;
  if (pSummary->pRecoder != NULL) {
    qDebug(
781
        "%s :cost summary: idle:%.2f ms, elapsed time:%.2f ms, extract tableList:%.2f ms, "
782
        "createGroupIdMap:%.2f ms, total blocks:%d, "
H
Haojun Liao 已提交
783
        "load block SMA:%d, load data block:%d, total rows:%" PRId64 ", check rows:%" PRId64,
784 785 786 787 788 789
        GET_TASKID(pTaskInfo), idleTime / 1000.0, pSummary->elapsedTime / 1000.0, pSummary->extractListTime,
        pSummary->groupIdMapTime, pRecorder->totalBlocks, pRecorder->loadBlockStatis, pRecorder->loadBlocks,
        pRecorder->totalRows, pRecorder->totalCheckedRows);
  } else {
    qDebug("%s :cost summary: idle in queue:%.2f ms, elapsed time:%.2f ms", GET_TASKID(pTaskInfo), idleTime / 1000.0,
           pSummary->elapsedTime / 1000.0);
H
Haojun Liao 已提交
790 791 792
  }
}

793 794 795 796 797 798 799 800
void qDestroyTask(qTaskInfo_t qTaskHandle) {
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)qTaskHandle;
  if (pTaskInfo == NULL) {
    return;
  }

  qDebug("%s execTask completed, numOfRows:%" PRId64, GET_TASKID(pTaskInfo), pTaskInfo->pRoot->resultInfo.totalRows);

H
Haojun Liao 已提交
801
  printTaskExecCostInLog(pTaskInfo);  // print the query cost summary
802 803 804
  doDestroyTask(pTaskInfo);
}

H
Haojun Liao 已提交
805
int32_t qGetExplainExecInfo(qTaskInfo_t tinfo, SArray* pExecInfoList) {
806
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
H
Haojun Liao 已提交
807
  return getOperatorExplainExecInfo(pTaskInfo->pRoot, pExecInfoList);
808 809 810 811 812 813 814 815 816
}

int32_t qSerializeTaskStatus(qTaskInfo_t tinfo, char** pOutput, int32_t* len) {
  SExecTaskInfo* pTaskInfo = (struct SExecTaskInfo*)tinfo;
  if (pTaskInfo->pRoot == NULL) {
    return TSDB_CODE_INVALID_PARA;
  }

  int32_t nOptrWithVal = 0;
L
Liu Jicong 已提交
817 818 819 820 821
  //  int32_t code = encodeOperator(pTaskInfo->pRoot, pOutput, len, &nOptrWithVal);
  //  if ((code == TSDB_CODE_SUCCESS) && (nOptrWithVal == 0)) {
  //    taosMemoryFreeClear(*pOutput);
  //    *len = 0;
  //  }
H
Haojun Liao 已提交
822
  return 0;
823 824 825 826 827 828 829 830 831
}

int32_t qDeserializeTaskStatus(qTaskInfo_t tinfo, const char* pInput, int32_t len) {
  SExecTaskInfo* pTaskInfo = (struct SExecTaskInfo*)tinfo;

  if (pTaskInfo == NULL || pInput == NULL || len == 0) {
    return TSDB_CODE_INVALID_PARA;
  }

H
Haojun Liao 已提交
832
  return 0;
L
Liu Jicong 已提交
833
  //  return decodeOperator(pTaskInfo->pRoot, pInput, len);
834 835 836 837 838 839 840
}

int32_t qExtractStreamScanner(qTaskInfo_t tinfo, void** scanner) {
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
  SOperatorInfo* pOperator = pTaskInfo->pRoot;

  while (1) {
841
    uint16_t type = pOperator->operatorType;
842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860
    if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) {
      *scanner = pOperator->info;
      return 0;
    } else {
      ASSERT(pOperator->numOfDownstream == 1);
      pOperator = pOperator->pDownstream[0];
    }
  }
}

#if 0
int32_t qStreamInput(qTaskInfo_t tinfo, void* pItem) {
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
  ASSERT(pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM);
  taosWriteQitem(pTaskInfo->streamInfo.inputQueue->queue, pItem);
  return 0;
}
#endif

861
int32_t qStreamSourceRecoverStep1(qTaskInfo_t tinfo, int64_t ver) {
862 863
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
  ASSERT(pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM);
L
Liu Jicong 已提交
864
  pTaskInfo->streamInfo.fillHistoryVer1 = ver;
865 866 867 868 869 870 871
  pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__PREPARE1;
  return 0;
}

int32_t qStreamSourceRecoverStep2(qTaskInfo_t tinfo, int64_t ver) {
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
  ASSERT(pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM);
L
Liu Jicong 已提交
872
  pTaskInfo->streamInfo.fillHistoryVer2 = ver;
873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892
  pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__PREPARE2;
  return 0;
}

int32_t qStreamRecoverFinish(qTaskInfo_t tinfo) {
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
  ASSERT(pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM);
  pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__NONE;
  return 0;
}

int32_t qStreamSetParamForRecover(qTaskInfo_t tinfo) {
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
  SOperatorInfo* pOperator = pTaskInfo->pRoot;

  while (1) {
    if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL ||
        pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL ||
        pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL) {
      SStreamIntervalOperatorInfo* pInfo = pOperator->info;
L
Liu Jicong 已提交
893 894 895 896 897 898 899 900 901
      ASSERT(pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE ||
             pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE);
      ASSERT(pInfo->twAggSup.calTriggerSaved == 0);
      ASSERT(pInfo->twAggSup.deleteMarkSaved == 0);

      qInfo("save stream param for interval: %d,  %" PRId64, pInfo->twAggSup.calTrigger, pInfo->twAggSup.deleteMark);

      pInfo->twAggSup.calTriggerSaved = pInfo->twAggSup.calTrigger;
      pInfo->twAggSup.deleteMarkSaved = pInfo->twAggSup.deleteMark;
902 903
      pInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE;
      pInfo->twAggSup.deleteMark = INT64_MAX;
904 905
      pInfo->ignoreExpiredDataSaved = pInfo->ignoreExpiredData;
      pInfo->ignoreExpiredData = false;
906 907 908 909
    } else if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION ||
               pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION ||
               pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) {
      SStreamSessionAggOperatorInfo* pInfo = pOperator->info;
L
Liu Jicong 已提交
910 911 912 913 914 915 916 917 918
      ASSERT(pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE ||
             pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE);
      ASSERT(pInfo->twAggSup.calTriggerSaved == 0);
      ASSERT(pInfo->twAggSup.deleteMarkSaved == 0);

      qInfo("save stream param for session: %d,  %" PRId64, pInfo->twAggSup.calTrigger, pInfo->twAggSup.deleteMark);

      pInfo->twAggSup.calTriggerSaved = pInfo->twAggSup.calTrigger;
      pInfo->twAggSup.deleteMarkSaved = pInfo->twAggSup.deleteMark;
919 920
      pInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE;
      pInfo->twAggSup.deleteMark = INT64_MAX;
921 922
      pInfo->ignoreExpiredDataSaved = pInfo->ignoreExpiredData;
      pInfo->ignoreExpiredData = false;
923 924
    } else if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE) {
      SStreamStateAggOperatorInfo* pInfo = pOperator->info;
L
Liu Jicong 已提交
925 926 927 928 929 930 931 932 933
      ASSERT(pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE ||
             pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE);
      ASSERT(pInfo->twAggSup.calTriggerSaved == 0);
      ASSERT(pInfo->twAggSup.deleteMarkSaved == 0);

      qInfo("save stream param for state: %d,  %" PRId64, pInfo->twAggSup.calTrigger, pInfo->twAggSup.deleteMark);

      pInfo->twAggSup.calTriggerSaved = pInfo->twAggSup.calTrigger;
      pInfo->twAggSup.deleteMarkSaved = pInfo->twAggSup.deleteMark;
934 935
      pInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE;
      pInfo->twAggSup.deleteMark = INT64_MAX;
936 937
      pInfo->ignoreExpiredDataSaved = pInfo->ignoreExpiredData;
      pInfo->ignoreExpiredData = false;
938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964
    }

    // iterate operator tree
    if (pOperator->numOfDownstream != 1 || pOperator->pDownstream[0] == NULL) {
      if (pOperator->numOfDownstream > 1) {
        qError("unexpected stream, multiple downstream");
        ASSERT(0);
        return -1;
      }
      return 0;
    } else {
      pOperator = pOperator->pDownstream[0];
    }
  }

  return 0;
}

int32_t qStreamRestoreParam(qTaskInfo_t tinfo) {
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
  SOperatorInfo* pOperator = pTaskInfo->pRoot;

  while (1) {
    if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL ||
        pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL ||
        pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL) {
      SStreamIntervalOperatorInfo* pInfo = pOperator->info;
L
Liu Jicong 已提交
965 966
      pInfo->twAggSup.calTrigger = pInfo->twAggSup.calTriggerSaved;
      pInfo->twAggSup.deleteMark = pInfo->twAggSup.deleteMarkSaved;
967
      pInfo->ignoreExpiredData = pInfo->ignoreExpiredDataSaved;
L
Liu Jicong 已提交
968
      qInfo("restore stream param for interval: %d,  %" PRId64, pInfo->twAggSup.calTrigger, pInfo->twAggSup.deleteMark);
969 970 971 972
    } else if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION ||
               pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION ||
               pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) {
      SStreamSessionAggOperatorInfo* pInfo = pOperator->info;
L
Liu Jicong 已提交
973 974
      pInfo->twAggSup.calTrigger = pInfo->twAggSup.calTriggerSaved;
      pInfo->twAggSup.deleteMark = pInfo->twAggSup.deleteMarkSaved;
975
      pInfo->ignoreExpiredData = pInfo->ignoreExpiredDataSaved;
L
Liu Jicong 已提交
976
      qInfo("restore stream param for session: %d,  %" PRId64, pInfo->twAggSup.calTrigger, pInfo->twAggSup.deleteMark);
977 978
    } else if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE) {
      SStreamStateAggOperatorInfo* pInfo = pOperator->info;
L
Liu Jicong 已提交
979 980
      pInfo->twAggSup.calTrigger = pInfo->twAggSup.calTriggerSaved;
      pInfo->twAggSup.deleteMark = pInfo->twAggSup.deleteMarkSaved;
981
      pInfo->ignoreExpiredData = pInfo->ignoreExpiredDataSaved;
L
Liu Jicong 已提交
982
      qInfo("restore stream param for state: %d,  %" PRId64, pInfo->twAggSup.calTrigger, pInfo->twAggSup.deleteMark);
983 984 985 986 987 988
    }

    // iterate operator tree
    if (pOperator->numOfDownstream != 1 || pOperator->pDownstream[0] == NULL) {
      if (pOperator->numOfDownstream > 1) {
        qError("unexpected stream, multiple downstream");
L
Liu Jicong 已提交
989
        /*ASSERT(0);*/
990 991 992 993 994 995 996
        return -1;
      }
      return 0;
    } else {
      pOperator = pOperator->pDownstream[0];
    }
  }
997 998
  return 0;
}
999

L
Liu Jicong 已提交
1000 1001 1002 1003
bool qStreamRecoverScanFinished(qTaskInfo_t tinfo) {
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
  return pTaskInfo->streamInfo.recoverScanFinished;
}
1004 1005 1006 1007 1008 1009

void* qExtractReaderFromStreamScanner(void* scanner) {
  SStreamScanInfo* pInfo = scanner;
  return (void*)pInfo->tqReader;
}

wmmhello's avatar
wmmhello 已提交
1010 1011 1012 1013 1014 1015 1016 1017
const SSchemaWrapper* qExtractSchemaFromTask(qTaskInfo_t tinfo) {
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
  return pTaskInfo->streamInfo.schema;
}

const char* qExtractTbnameFromTask(qTaskInfo_t tinfo) {
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
  return pTaskInfo->streamInfo.tbName;
1018 1019
}

wmmhello's avatar
wmmhello 已提交
1020
SMqMetaRsp* qStreamExtractMetaMsg(qTaskInfo_t tinfo) {
1021
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
wmmhello's avatar
wmmhello 已提交
1022
  return &pTaskInfo->streamInfo.metaRsp;
1023 1024
}

1025
void qStreamExtractOffset(qTaskInfo_t tinfo, STqOffsetVal* pOffset) {
wmmhello's avatar
wmmhello 已提交
1026
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
1027
  memcpy(pOffset, &pTaskInfo->streamInfo.currentOffset, sizeof(STqOffsetVal));
1028 1029
}

H
Haojun Liao 已提交
1030
int32_t initQueryTableDataCondForTmq(SQueryTableDataCond* pCond, SSnapContext* sContext, SMetaTableInfo* pMtInfo) {
1031 1032
  memset(pCond, 0, sizeof(SQueryTableDataCond));
  pCond->order = TSDB_ORDER_ASC;
H
Haojun Liao 已提交
1033
  pCond->numOfCols = pMtInfo->schema->nCols;
1034
  pCond->colList = taosMemoryCalloc(pCond->numOfCols, sizeof(SColumnInfo));
H
Haojun Liao 已提交
1035 1036 1037 1038
  pCond->pSlotList = taosMemoryMalloc(sizeof(int32_t) * pCond->numOfCols);
  if (pCond->colList == NULL || pCond->pSlotList == NULL) {
    taosMemoryFreeClear(pCond->colList);
    taosMemoryFreeClear(pCond->pSlotList);
S
Shengliang Guan 已提交
1039
    terrno = TSDB_CODE_OUT_OF_MEMORY;
1040 1041 1042
    return terrno;
  }

H
Haojun Liao 已提交
1043
  pCond->twindows = TSWINDOW_INITIALIZER;
H
Haojun Liao 已提交
1044
  pCond->suid = pMtInfo->suid;
1045 1046 1047 1048 1049
  pCond->type = TIMEWINDOW_RANGE_CONTAINED;
  pCond->startVersion = -1;
  pCond->endVersion = sContext->snapVersion;

  for (int32_t i = 0; i < pCond->numOfCols; ++i) {
H
Haojun Liao 已提交
1050 1051 1052 1053 1054 1055
    SColumnInfo* pColInfo = &pCond->colList[i];
    pColInfo->type = pMtInfo->schema->pSchema[i].type;
    pColInfo->bytes = pMtInfo->schema->pSchema[i].bytes;
    pColInfo->colId = pMtInfo->schema->pSchema[i].colId;

    pCond->pSlotList[i] = i;
1056 1057 1058 1059 1060
  }

  return TSDB_CODE_SUCCESS;
}

L
Liu Jicong 已提交
1061
int32_t qStreamSetScanMemData(qTaskInfo_t tinfo, SPackedData submit) {
L
Liu Jicong 已提交
1062
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
wmmhello's avatar
wmmhello 已提交
1063 1064 1065 1066
  if((pTaskInfo->execModel != OPTR_EXEC_MODEL_QUEUE) || (pTaskInfo->streamInfo.submit.msgStr != NULL)){
    qError("qStreamSetScanMemData err:%d,%p", pTaskInfo->execModel, pTaskInfo->streamInfo.submit.msgStr);
    return -1;
  }
1067 1068
  qDebug("set the submit block for future scan");

L
Liu Jicong 已提交
1069
  pTaskInfo->streamInfo.submit = submit;
L
Liu Jicong 已提交
1070 1071 1072
  return 0;
}

1073 1074 1075 1076 1077 1078
void qStreamSetOpen(qTaskInfo_t tinfo) {
  SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
  SOperatorInfo*  pOperator = pTaskInfo->pRoot;
  pOperator->status = OP_NOT_OPENED;
}

1079
int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subType) {
1080 1081 1082 1083
  SExecTaskInfo*  pTaskInfo = (SExecTaskInfo*)tinfo;
  SOperatorInfo*  pOperator = pTaskInfo->pRoot;
  const char*     id = GET_TASKID(pTaskInfo);

wmmhello's avatar
wmmhello 已提交
1084
  // if pOffset equal to current offset, means continue consume
1085
  if (tOffsetEqual(pOffset, &pTaskInfo->streamInfo.currentOffset)) {
1086 1087
    return 0;
  }
1088

1089
  if (subType == TOPIC_SUB_TYPE__COLUMN) {
1090
    pOperator = extractOperatorInTree(pOperator, QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN, id);
wmmhello's avatar
wmmhello 已提交
1091 1092
    if (pOperator == NULL) {
      return -1;
1093 1094
    }
    SStreamScanInfo* pInfo = pOperator->info;
1095 1096
    STableScanInfo*  pScanInfo = pInfo->pTableScanOp->info;
    STableScanBase*  pScanBaseInfo = &pScanInfo->base;
1097
    STableListInfo*  pTableListInfo = pScanBaseInfo->pTableListInfo;
1098

1099
    if (pOffset->type == TMQ_OFFSET__LOG) {
1100 1101 1102
      tsdbReaderClose(pScanBaseInfo->dataReader);
      pScanBaseInfo->dataReader = NULL;

H
Haojun Liao 已提交
1103
      // let's seek to the next version in wal file
1104
      if (tqSeekVer(pInfo->tqReader, pOffset->version + 1, id) < 0) {
1105
        qError("tqSeekVer failed ver:%"PRId64", %s", pOffset->version + 1, id);
1106 1107 1108
        return -1;
      }
    } else if (pOffset->type == TMQ_OFFSET__SNAPSHOT_DATA) {
1109 1110
      // iterate all tables from tableInfoList, and retrieve rows from each table one-by-one
      // those data are from the snapshot in tsdb, besides the data in the wal file.
1111 1112
      int64_t uid = pOffset->uid;
      int64_t ts = pOffset->ts;
1113
      int32_t index = 0;
1114

1115 1116 1117 1118
      // this value may be changed if new tables are created
      taosRLockLatch(&pTaskInfo->lock);
      int32_t numOfTables = tableListGetSize(pTableListInfo);

1119
      if (uid == 0) {
1120 1121
        if (numOfTables != 0) {
          STableKeyInfo* pTableInfo = tableListGetInfo(pTableListInfo, 0);
1122 1123
          uid = pTableInfo->uid;
          ts = INT64_MIN;
1124
          pScanInfo->currentTable = 0;
1125
        } else {
1126 1127
          taosRUnLockLatch(&pTaskInfo->lock);
          qError("no table in table list, %s", id);
L
Liu Jicong 已提交
1128 1129
          return -1;
        }
1130
      }
H
Haojun Liao 已提交
1131

H
Haojun Liao 已提交
1132
      qDebug("switch to table uid:%" PRId64 " ts:%" PRId64 "% "PRId64 " rows returned", uid, ts, pInfo->pTableScanOp->resultInfo.totalRows);
1133
      pInfo->pTableScanOp->resultInfo.totalRows = 0;
H
Haojun Liao 已提交
1134

1135
      // start from current accessed position
H
Haojun Liao 已提交
1136 1137 1138
      // we cannot start from the pScanInfo->currentTable, since the commit offset may cause the rollback of the start
      // position, let's find it from the beginning.
      index = tableListFind(pTableListInfo, uid, 0);
1139
      taosRUnLockLatch(&pTaskInfo->lock);
1140

1141 1142 1143
      if (index >= 0) {
        pScanInfo->currentTable = index;
      } else {
H
Haojun Liao 已提交
1144 1145
        qError("vgId:%d uid:%" PRIu64 " not found in table list, total:%d, index:%d %s", pTaskInfo->id.vgId, uid,
               numOfTables, pScanInfo->currentTable, id);
wmmhello's avatar
wmmhello 已提交
1146 1147
        return -1;
      }
1148

1149
      STableKeyInfo keyInfo = {.uid = uid};
1150 1151 1152 1153
      int64_t oldSkey = pScanBaseInfo->cond.twindows.skey;

      // let's start from the next ts that returned to consumer.
      pScanBaseInfo->cond.twindows.skey = ts + 1;
H
Haojun Liao 已提交
1154
      pScanInfo->scanTimes = 0;
1155

1156 1157
      if (pScanBaseInfo->dataReader == NULL) {
        int32_t code = tsdbReaderOpen(pScanBaseInfo->readHandle.vnode, &pScanBaseInfo->cond, &keyInfo, 1,
D
dapan1121 已提交
1158
                                      pScanInfo->pResBlock, &pScanBaseInfo->dataReader, id, false);
1159 1160 1161
        if (code != TSDB_CODE_SUCCESS) {
          qError("prepare read tsdb snapshot failed, uid:%" PRId64 ", code:%s %s", pOffset->uid, tstrerror(code), id);
          terrno = code;
wmmhello's avatar
wmmhello 已提交
1162
          return -1;
L
Liu Jicong 已提交
1163
        }
1164

H
Haojun Liao 已提交
1165 1166
        qDebug("tsdb reader created with offset(snapshot) uid:%" PRId64 " ts:%" PRId64 " table index:%d, total:%d, %s",
               uid, pScanBaseInfo->cond.twindows.skey, pScanInfo->currentTable, numOfTables, id);
1167 1168 1169 1170
      } else {
        tsdbSetTableList(pScanBaseInfo->dataReader, &keyInfo, 1);
        tsdbReaderReset(pScanBaseInfo->dataReader, &pScanBaseInfo->cond);
        qDebug("tsdb reader offset seek snapshot to uid:%" PRId64 " ts %" PRId64 "  table index:%d numOfTable:%d, %s",
H
Haojun Liao 已提交
1171
               uid, pScanBaseInfo->cond.twindows.skey, pScanInfo->currentTable, numOfTables, id);
1172
      }
1173 1174 1175

      // restore the key value
      pScanBaseInfo->cond.twindows.skey = oldSkey;
1176
    } else {
1177
      qError("invalid pOffset->type:%d, %s", pOffset->type, id);
1178 1179 1180
      return -1;
    }

1181
  } else {  // subType == TOPIC_SUB_TYPE__TABLE/TOPIC_SUB_TYPE__DB
1182

1183 1184 1185
    if (pOffset->type == TMQ_OFFSET__SNAPSHOT_DATA) {
      SStreamRawScanInfo* pInfo = pOperator->info;
      SSnapContext*       sContext = pInfo->sContext;
1186 1187

      SOperatorInfo* p = extractOperatorInTree(pOperator, QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN, id);
1188 1189
      STableListInfo* pTableListInfo = ((SStreamRawScanInfo*)(p->info))->pTableListInfo;

1190
      if (setForSnapShot(sContext, pOffset->uid) != 0) {
1191
        qError("setDataForSnapShot error. uid:%" PRId64" , %s", pOffset->uid, id);
1192 1193
        return -1;
      }
H
Haojun Liao 已提交
1194

1195 1196 1197
      SMetaTableInfo mtInfo = getUidfromSnapShot(sContext);
      tsdbReaderClose(pInfo->dataReader);
      pInfo->dataReader = NULL;
H
Haojun Liao 已提交
1198

1199 1200
      cleanupQueryTableDataCond(&pTaskInfo->streamInfo.tableCond);
      tableListClear(pTableListInfo);
1201

1202
      if (mtInfo.uid == 0) {
1203
        goto end;  // no data
1204
      }
1205

1206 1207
      initQueryTableDataCondForTmq(&pTaskInfo->streamInfo.tableCond, sContext, &mtInfo);
      pTaskInfo->streamInfo.tableCond.twindows.skey = pOffset->ts;
H
Haojun Liao 已提交
1208

1209
      tableListAddTableInfo(pTableListInfo, mtInfo.uid, 0);
1210

1211 1212
      STableKeyInfo* pList = tableListGetInfo(pTableListInfo, 0);
      int32_t        size = tableListGetSize(pTableListInfo);
1213

D
dapan1121 已提交
1214
      tsdbReaderOpen(pInfo->vnode, &pTaskInfo->streamInfo.tableCond, pList, size, NULL, &pInfo->dataReader, NULL, false);
L
Liu Jicong 已提交
1215

1216 1217 1218 1219 1220
      cleanupQueryTableDataCond(&pTaskInfo->streamInfo.tableCond);
      strcpy(pTaskInfo->streamInfo.tbName, mtInfo.tbName);
      tDeleteSSchemaWrapper(pTaskInfo->streamInfo.schema);
      pTaskInfo->streamInfo.schema = mtInfo.schema;

1221
      qDebug("tmqsnap qStreamPrepareScan snapshot data uid:%" PRId64 " ts %" PRId64" %s", mtInfo.uid, pOffset->ts, id);
1222 1223 1224 1225 1226 1227 1228
    } else if (pOffset->type == TMQ_OFFSET__SNAPSHOT_META) {
      SStreamRawScanInfo* pInfo = pOperator->info;
      SSnapContext*       sContext = pInfo->sContext;
      if (setForSnapShot(sContext, pOffset->uid) != 0) {
        qError("setForSnapShot error. uid:%" PRIu64 " ,version:%" PRId64, pOffset->uid, pOffset->version);
        return -1;
      }
1229
      qDebug("tmqsnap qStreamPrepareScan snapshot meta uid:%" PRId64 " ts %" PRId64 " %s", pOffset->uid, pOffset->ts, id);
1230 1231 1232 1233
    } else if (pOffset->type == TMQ_OFFSET__LOG) {
      SStreamRawScanInfo* pInfo = pOperator->info;
      tsdbReaderClose(pInfo->dataReader);
      pInfo->dataReader = NULL;
1234
      qDebug("tmqsnap qStreamPrepareScan snapshot log, %s", id);
wmmhello's avatar
wmmhello 已提交
1235
    }
1236
  }
1237 1238

end:
wmmhello's avatar
wmmhello 已提交
1239
  pTaskInfo->streamInfo.currentOffset = *pOffset;
1240

1241 1242
  return 0;
}
H
Haojun Liao 已提交
1243 1244 1245

void qProcessRspMsg(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) {
  SMsgSendInfo* pSendInfo = (SMsgSendInfo*)pMsg->info.ahandle;
wmmhello's avatar
wmmhello 已提交
1246 1247 1248 1249
  if(pMsg->info.ahandle == NULL){
    qError("pMsg->info.ahandle is NULL");
    return;
  }
H
Haojun Liao 已提交
1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265

  SDataBuf buf = {.len = pMsg->contLen, .pData = NULL};

  if (pMsg->contLen > 0) {
    buf.pData = taosMemoryCalloc(1, pMsg->contLen);
    if (buf.pData == NULL) {
      terrno = TSDB_CODE_OUT_OF_MEMORY;
      pMsg->code = TSDB_CODE_OUT_OF_MEMORY;
    } else {
      memcpy(buf.pData, pMsg->pCont, pMsg->contLen);
    }
  }

  pSendInfo->fp(pSendInfo->param, &buf, pMsg->code);
  rpcFreeCont(pMsg->pCont);
  destroySendMsgInfo(pSendInfo);
L
Liu Jicong 已提交
1266
}
L
Liu Jicong 已提交
1267