tsdbRead.c 108.1 KB
Newer Older
H
hjxilinx 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
Hongze Cheng 已提交
16
#include "tsdb.h"
17
#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC)
H
Hongze Cheng 已提交
18

19
typedef struct {
dengyihao's avatar
dengyihao 已提交
20
  STbDataIter* iter;
21 22 23 24
  int32_t      index;
  bool         hasVal;
} SIterInfo;

H
Haojun Liao 已提交
25
typedef struct STableBlockScanInfo {
dengyihao's avatar
dengyihao 已提交
26 27 28 29 30 31 32 33 34
  uint64_t  uid;
  TSKEY     lastKey;
  SBlockIdx blockIdx;
  SArray*   pBlockList;  // block data index list
  SIterInfo iter;        // mem buffer skip list iterator
  SIterInfo iiter;       // imem buffer skip list iterator
  SArray*   delSkyline;  // delete info for this table
  int32_t   fileDelIndex;
  bool      iterInit;  // whether to initialize the in-memory skip list iterator or not
H
Haojun Liao 已提交
35 36 37
} STableBlockScanInfo;

typedef struct SBlockOrderWrapper {
dengyihao's avatar
dengyihao 已提交
38 39
  int64_t uid;
  SBlock* pBlock;
H
Haojun Liao 已提交
40
} SBlockOrderWrapper;
H
Hongze Cheng 已提交
41 42

typedef struct SBlockOrderSupporter {
43 44 45 46
  SBlockOrderWrapper** pDataBlockInfo;
  int32_t*             indexPerTable;
  int32_t*             numOfBlocksPerTable;
  int32_t              numOfTables;
H
Hongze Cheng 已提交
47 48 49
} SBlockOrderSupporter;

typedef struct SIOCostSummary {
H
Haojun Liao 已提交
50
  int64_t blockLoadTime;
51
  int64_t smaLoadTime;
H
Haojun Liao 已提交
52
  int64_t checkForNextTime;
53 54
  int64_t headFileLoad;
  int64_t headFileLoadTime;
H
Hongze Cheng 已提交
55 56 57
} SIOCostSummary;

typedef struct SBlockLoadSuppInfo {
58
  SArray*          pColAgg;
59
  SColumnDataAgg   tsColAgg;
C
Cary Xu 已提交
60
  SColumnDataAgg** plist;
61 62
  int16_t*         colIds;    // column ids for loading file block data
  char**           buildBuf;  // build string tmp buffer, todo remove it later after all string format being updated.
H
Hongze Cheng 已提交
63 64
} SBlockLoadSuppInfo;

65
typedef struct SFilesetIter {
66 67 68 69
  int32_t          numOfFiles;  // number of total files
  int32_t          index;       // current accessed index in the list
  SArray*          pFileList;   // data file list
  int32_t          order;
70
} SFilesetIter;
H
Haojun Liao 已提交
71 72

typedef struct SFileDataBlockInfo {
dengyihao's avatar
dengyihao 已提交
73
  int32_t
L
Liu Jicong 已提交
74
      tbBlockIdx;  // index position in STableBlockScanInfo in order to check whether neighbor block overlaps with it
dengyihao's avatar
dengyihao 已提交
75
  uint64_t uid;
H
Haojun Liao 已提交
76 77 78
} SFileDataBlockInfo;

typedef struct SDataBlockIter {
dengyihao's avatar
dengyihao 已提交
79 80 81 82
  int32_t numOfBlocks;
  int32_t index;
  SArray* blockList;  // SArray<SFileDataBlockInfo>
  int32_t order;
H
Haojun Liao 已提交
83 84 85
} SDataBlockIter;

typedef struct SFileBlockDumpInfo {
dengyihao's avatar
dengyihao 已提交
86 87 88 89
  int32_t totalRows;
  int32_t rowIndex;
  int64_t lastKey;
  bool    allDumped;
H
Haojun Liao 已提交
90 91
} SFileBlockDumpInfo;

H
Haojun Liao 已提交
92
typedef struct SVersionRange {
dengyihao's avatar
dengyihao 已提交
93 94
  uint64_t minVer;
  uint64_t maxVer;
H
Haojun Liao 已提交
95 96
} SVersionRange;

H
Haojun Liao 已提交
97
typedef struct SReaderStatus {
dengyihao's avatar
dengyihao 已提交
98 99
  bool                 loadFromFile;  // check file stage
  SHashObj*            pTableMap;     // SHash<STableBlockScanInfo>
100
  STableBlockScanInfo* pTableIter;    // table iterator used in building in-memory buffer data blocks.
101
  SFileBlockDumpInfo   fBlockDumpInfo;
102

dengyihao's avatar
dengyihao 已提交
103 104 105 106 107
  SDFileSet*     pCurrentFileset;  // current opened file set
  SBlockData     fileBlockData;
  SFilesetIter   fileIter;
  SDataBlockIter blockIter;
  bool           composedDataBlock;  // the returned data block is a composed block or not
H
Haojun Liao 已提交
108 109
} SReaderStatus;

H
Hongze Cheng 已提交
110
struct STsdbReader {
H
Haojun Liao 已提交
111 112 113 114 115 116 117
  STsdb*             pTsdb;
  uint64_t           suid;
  int16_t            order;
  STimeWindow        window;  // the primary query time window that applies to all queries
  SSDataBlock*       pResBlock;
  int32_t            capacity;
  SReaderStatus      status;
118 119
  char*              idStr;  // query info handle, for debug purpose
  int32_t            type;   // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows
H
Hongze Cheng 已提交
120
  SBlockLoadSuppInfo suppInfo;
121

L
Liu Jicong 已提交
122 123 124 125
  SIOCostSummary cost;
  STSchema*      pSchema;
  SDataFReader*  pFileReader;
  SVersionRange  verRange;
H
Hongze Cheng 已提交
126
};
H
Hongze Cheng 已提交
127

H
Haojun Liao 已提交
128
static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter);
129 130
static int      buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t endKey, int32_t capacity,
                                          STsdbReader* pReader);
131
static TSDBROW* getValidRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader);
132 133
static int32_t  doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pScanInfo, STsdbReader* pReader,
                                        SRowMerger* pMerger);
dengyihao's avatar
dengyihao 已提交
134 135
static int32_t  doMergeRowsInBuf(SIterInfo* pIter, int64_t ts, SArray* pDelList, SRowMerger* pMerger,
                                 STsdbReader* pReader);
136 137 138
static int32_t  doAppendOneRow(SSDataBlock* pBlock, STsdbReader* pReader, STSRow* pTSRow);
static void     setComposedBlockFlag(STsdbReader* pReader, bool composed);
static void     updateSchema(TSDBROW* pRow, uint64_t uid, STsdbReader* pReader);
139
static bool     hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey, int32_t order);
140

dengyihao's avatar
dengyihao 已提交
141 142
static void doMergeMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, STSRow** pTSRow,
                             STsdbReader* pReader);
143 144
static void doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader,
                               STSRow** pTSRow);
dengyihao's avatar
dengyihao 已提交
145 146 147 148
static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, STbData* pMemTbData,
                                      STbData* piMemTbData);
static STsdb*  getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr,
                                   int8_t* pLevel);
149
static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level);
H
Haojun Liao 已提交
150

151 152 153
static int32_t setColumnIdSlotList(STsdbReader* pReader, SSDataBlock* pBlock) {
  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;

154
  size_t numOfCols = blockDataGetNumOfCols(pBlock);
155

156
  pSupInfo->colIds = taosMemoryMalloc(numOfCols * sizeof(int16_t));
157
  pSupInfo->buildBuf = taosMemoryCalloc(numOfCols, POINTER_BYTES);
158 159 160
  if (pSupInfo->buildBuf == NULL || pSupInfo->colIds == NULL) {
    taosMemoryFree(pSupInfo->colIds);
    taosMemoryFree(pSupInfo->buildBuf);
H
Haojun Liao 已提交
161 162
    return TSDB_CODE_OUT_OF_MEMORY;
  }
H
Hongze Cheng 已提交
163

H
Haojun Liao 已提交
164 165
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, i);
166
    pSupInfo->colIds[i] = pCol->info.colId;
167 168 169 170

    if (IS_VAR_DATA_TYPE(pCol->info.type)) {
      pSupInfo->buildBuf[i] = taosMemoryMalloc(pCol->info.bytes);
    }
H
Haojun Liao 已提交
171
  }
H
Hongze Cheng 已提交
172

H
Haojun Liao 已提交
173 174
  return TSDB_CODE_SUCCESS;
}
H
Hongze Cheng 已提交
175

176
static SHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, const STableKeyInfo* idList, int32_t numOfTables) {
H
Haojun Liao 已提交
177
  // allocate buffer in order to load data blocks from file
178 179 180 181
  // todo use simple hash instead
  SHashObj* pTableMap =
      taosHashInit(numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK);
  if (pTableMap == NULL) {
H
Haojun Liao 已提交
182 183 184
    return NULL;
  }

185 186 187 188 189
  for (int32_t j = 0; j < numOfTables; ++j) {
    STableBlockScanInfo info = {.lastKey = 0, .uid = idList[j].uid};
    if (ASCENDING_TRAVERSE(pTsdbReader->order)) {
      if (info.lastKey == INT64_MIN || info.lastKey < pTsdbReader->window.skey) {
        info.lastKey = pTsdbReader->window.skey;
H
Haojun Liao 已提交
190 191
      }

192
      ASSERT(info.lastKey >= pTsdbReader->window.skey && info.lastKey <= pTsdbReader->window.ekey);
wmmhello's avatar
wmmhello 已提交
193
    } else {
194
      info.lastKey = pTsdbReader->window.skey;
H
Haojun Liao 已提交
195
    }
wmmhello's avatar
wmmhello 已提交
196

197 198 199
    taosHashPut(pTableMap, &info.uid, sizeof(uint64_t), &info, sizeof(info));
    tsdbDebug("%p check table uid:%" PRId64 " from lastKey:%" PRId64 " %s", pTsdbReader, info.uid, info.lastKey,
              pTsdbReader->idStr);
H
Haojun Liao 已提交
200 201
  }

202
  return pTableMap;
H
Hongze Cheng 已提交
203
}
H
Hongze Cheng 已提交
204

205 206 207
static void resetDataBlockScanInfo(SHashObj* pTableMap) {
  STableBlockScanInfo* p = NULL;

dengyihao's avatar
dengyihao 已提交
208
  while ((p = taosHashIterate(pTableMap, p)) != NULL) {
209 210
    p->iterInit = false;
    p->iiter.hasVal = false;
dengyihao's avatar
dengyihao 已提交
211
    if (p->iter.iter != NULL) {
212
      p->iter.iter = tsdbTbDataIterDestroy(p->iter.iter);
213 214
    }

215
    p->delSkyline = taosArrayDestroy(p->delSkyline);
216 217 218
  }
}

219 220 221 222 223 224 225 226
static void destroyBlockScanInfo(SHashObj* pTableMap) {
  STableBlockScanInfo* p = NULL;

  while ((p = taosHashIterate(pTableMap, p)) != NULL) {
    p->iterInit = false;
    p->iiter.hasVal = false;

    if (p->iter.iter != NULL) {
227
      p->iter.iter = tsdbTbDataIterDestroy(p->iter.iter);
228 229 230
    }

    if (p->iiter.iter != NULL) {
231
      p->iiter.iter = tsdbTbDataIterDestroy(p->iiter.iter);
232 233
    }

234 235
    p->delSkyline = taosArrayDestroy(p->delSkyline);
    p->pBlockList = taosArrayDestroy(p->pBlockList);
236 237 238 239 240
  }

  taosHashCleanup(pTableMap);
}

241
static bool isEmptyQueryTimeWindow(STimeWindow* pWindow) {
242 243
  ASSERT(pWindow != NULL);
  return pWindow->skey > pWindow->ekey;
H
Haojun Liao 已提交
244
}
H
Hongze Cheng 已提交
245

246 247 248
// Update the query time window according to the data time to live(TTL) information, in order to avoid to return
// the expired data to client, even it is queried already.
static STimeWindow updateQueryTimeWindow(STsdb* pTsdb, STimeWindow* pWindow) {
dengyihao's avatar
dengyihao 已提交
249
  STsdbKeepCfg* pCfg = &pTsdb->keepCfg;
H
Hongze Cheng 已提交
250

251
  int64_t now = taosGetTimestamp(pCfg->precision);
dengyihao's avatar
dengyihao 已提交
252
  int64_t earilyTs = now - (tsTickPerMin[pCfg->precision] * pCfg->keep2) + 1;  // needs to add one tick
253

dengyihao's avatar
dengyihao 已提交
254
  STimeWindow win = *pWindow;
255 256 257 258 259 260
  if (win.skey < earilyTs) {
    win.skey = earilyTs;
  }

  return win;
}
H
Hongze Cheng 已提交
261

H
Haojun Liao 已提交
262
static void limitOutputBufferSize(const SQueryTableDataCond* pCond, int32_t* capacity) {
H
Haojun Liao 已提交
263 264 265 266 267 268
  int32_t rowLen = 0;
  for (int32_t i = 0; i < pCond->numOfCols; ++i) {
    rowLen += pCond->colList[i].bytes;
  }

  // make sure the output SSDataBlock size be less than 2MB.
H
Haojun Liao 已提交
269 270 271
  const int32_t TWOMB = 2 * 1024 * 1024;
  if ((*capacity) * rowLen > TWOMB) {
    (*capacity) = TWOMB / rowLen;
H
Haojun Liao 已提交
272 273 274 275
  }
}

// init file iterator
276
static int32_t initFilesetIterator(SFilesetIter* pIter, const STsdbFSState* pFState, int32_t order, const char* idstr) {
277 278
  size_t numOfFileset = taosArrayGetSize(pFState->aDFileSet);

279 280 281
  pIter->index = ASCENDING_TRAVERSE(order) ? -1 : numOfFileset;
  pIter->order = order;
  pIter->pFileList = taosArrayDup(pFState->aDFileSet);
282
  pIter->numOfFiles = numOfFileset;
H
Haojun Liao 已提交
283

H
Haojun Liao 已提交
284
  tsdbDebug("init fileset iterator, total files:%d %s", pIter->numOfFiles, idstr);
H
Haojun Liao 已提交
285 286 287
  return TSDB_CODE_SUCCESS;
}

L
Liu Jicong 已提交
288
static void cleanupFilesetIterator(SFilesetIter* pIter) { taosArrayDestroy(pIter->pFileList); }
289

290
static bool filesetIteratorNext(SFilesetIter* pIter, STsdbReader* pReader) {
291 292
  bool    asc = ASCENDING_TRAVERSE(pIter->order);
  int32_t step = asc ? 1 : -1;
293 294 295
  pIter->index += step;

  if ((asc && pIter->index >= pIter->numOfFiles) || ((!asc) && pIter->index < 0)) {
H
Haojun Liao 已提交
296 297 298 299 300
    return false;
  }

  // check file the time range of coverage
  STimeWindow win = {0};
H
Hongze Cheng 已提交
301

302
  while (1) {
H
Haojun Liao 已提交
303 304 305
    if (pReader->pFileReader != NULL) {
      tsdbDataFReaderClose(&pReader->pFileReader);
    }
306

307
    pReader->status.pCurrentFileset = (SDFileSet*)taosArrayGet(pIter->pFileList, pIter->index);
H
Haojun Liao 已提交
308

309 310 311 312
    int32_t code = tsdbDataFReaderOpen(&pReader->pFileReader, pReader->pTsdb, pReader->status.pCurrentFileset);
    if (code != TSDB_CODE_SUCCESS) {
      goto _err;
    }
H
Haojun Liao 已提交
313

314 315 316 317 318 319 320 321 322 323 324 325
    int32_t fid = pReader->status.pCurrentFileset->fid;
    tsdbFidKeyRange(fid, pReader->pTsdb->keepCfg.days, pReader->pTsdb->keepCfg.precision, &win.skey, &win.ekey);

    // current file are no longer overlapped with query time window, ignore remain files
    if ((asc && win.skey > pReader->window.ekey) || (!asc && win.ekey < pReader->window.skey)) {
      tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, %s", pReader,
                pReader->window.skey, pReader->window.ekey, pReader->idStr);
      return false;
    }

    if ((asc && (win.ekey < pReader->window.skey)) || ((!asc) && (win.skey > pReader->window.ekey))) {
      pIter->index += step;
326 327 328
      if ((asc && pIter->index >= pIter->numOfFiles) || ((!asc) && pIter->index < 0)) {
        return false;
      }
329 330
      continue;
    }
C
Cary Xu 已提交
331

332 333
    tsdbDebug("%p file found fid:%d for qrange:%" PRId64 "-%" PRId64 ", ignore, %s", pReader, fid, pReader->window.skey,
              pReader->window.ekey, pReader->idStr);
334 335
    return true;
  }
336

337
_err:
H
Haojun Liao 已提交
338 339 340
  return false;
}

341
static void resetDataBlockIterator(SDataBlockIter* pIter, int32_t order) {
342 343
  pIter->order = order;
  pIter->index = -1;
H
Haojun Liao 已提交
344
  pIter->numOfBlocks = -1;
345 346 347 348 349 350 351
  if (pIter->blockList == NULL) {
    pIter->blockList = taosArrayInit(4, sizeof(SFileDataBlockInfo));
  } else {
    taosArrayClear(pIter->blockList);
  }
}

L
Liu Jicong 已提交
352
static void cleanupDataBlockIterator(SDataBlockIter* pIter) { taosArrayDestroy(pIter->blockList); }
H
Haojun Liao 已提交
353

H
Haojun Liao 已提交
354
static void initReaderStatus(SReaderStatus* pStatus) {
dengyihao's avatar
dengyihao 已提交
355 356
  pStatus->pTableIter = NULL;
  pStatus->loadFromFile = true;
H
Haojun Liao 已提交
357 358
}

359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381
static SSDataBlock* createResBlock(SQueryTableDataCond* pCond, int32_t capacity) {
  SSDataBlock* pResBlock = createDataBlock();
  if (pResBlock == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

  for (int32_t i = 0; i < pCond->numOfCols; ++i) {
    SColumnInfoData colInfo = {{0}, 0};
    colInfo.info = pCond->colList[i];
    blockDataAppendColInfo(pResBlock, &colInfo);
  }

  int32_t code = blockDataEnsureCapacity(pResBlock, capacity);
  if (code != TSDB_CODE_SUCCESS) {
    terrno = code;
    taosMemoryFree(pResBlock);
    return NULL;
  }

  return pResBlock;
}

H
Haojun Liao 已提交
382 383
static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsdbReader** ppReader, const char* idstr) {
  int32_t      code = 0;
384
  int8_t       level = 0;
H
Haojun Liao 已提交
385
  STsdbReader* pReader = (STsdbReader*)taosMemoryCalloc(1, sizeof(*pReader));
H
Hongze Cheng 已提交
386 387
  if (pReader == NULL) {
    code = TSDB_CODE_OUT_OF_MEMORY;
H
Haojun Liao 已提交
388
    goto _end;
H
Hongze Cheng 已提交
389 390
  }

H
Haojun Liao 已提交
391
  initReaderStatus(&pReader->status);
392

L
Liu Jicong 已提交
393
  pReader->pTsdb = getTsdbByRetentions(pVnode, pCond->twindows.skey, pVnode->config.tsdbCfg.retentions, idstr, &level);
dengyihao's avatar
dengyihao 已提交
394 395 396 397 398
  pReader->suid = pCond->suid;
  pReader->order = pCond->order;
  pReader->capacity = 4096;
  pReader->idStr = (idstr != NULL) ? strdup(idstr) : NULL;
  pReader->verRange = getQueryVerRange(pVnode, pCond, level);
399
  pReader->type = pCond->type;
400
  pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows);
401

402
  ASSERT(pCond->numOfCols > 0);
H
Hongze Cheng 已提交
403

404
  limitOutputBufferSize(pCond, &pReader->capacity);
405

406 407
  // allocate buffer in order to load data blocks from file
  SBlockLoadSuppInfo* pSup = &pReader->suppInfo;
408
  pSup->pColAgg = taosArrayInit(4, sizeof(SColumnDataAgg));
409
  pSup->plist = taosMemoryCalloc(pCond->numOfCols, POINTER_BYTES);
410
  if (pSup->pColAgg == NULL || pSup->plist == NULL) {
411 412 413
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _end;
  }
H
Haojun Liao 已提交
414

415 416
  pSup->tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID;

417 418 419 420
  pReader->pResBlock = createResBlock(pCond, pReader->capacity);
  if (pReader->pResBlock == NULL) {
    code = terrno;
    goto _end;
H
Hongze Cheng 已提交
421
  }
H
Hongze Cheng 已提交
422

423 424
  setColumnIdSlotList(pReader, pReader->pResBlock);

H
Hongze Cheng 已提交
425 426
  *ppReader = pReader;
  return code;
H
Hongze Cheng 已提交
427

H
Haojun Liao 已提交
428 429
_end:
  tsdbReaderClose(pReader);
H
Hongze Cheng 已提交
430 431 432
  *ppReader = NULL;
  return code;
}
H
Hongze Cheng 已提交
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465

// void tsdbResetQueryHandleForNewTable(STsdbReader* queryHandle, SQueryTableDataCond* pCond, STableListInfo* tableList,
//                                      int32_t tWinIdx) {
//   STsdbReader* pTsdbReadHandle = queryHandle;

//   pTsdbReadHandle->order = pCond->order;
//   pTsdbReadHandle->window = pCond->twindows[tWinIdx];
//   pTsdbReadHandle->type = TSDB_QUERY_TYPE_ALL;
//   pTsdbReadHandle->cur.fid = -1;
//   pTsdbReadHandle->cur.win = TSWINDOW_INITIALIZER;
//   pTsdbReadHandle->checkFiles = true;
//   pTsdbReadHandle->activeIndex = 0;  // current active table index
//   pTsdbReadHandle->locateStart = false;
//   pTsdbReadHandle->loadExternalRow = pCond->loadExternalRows;

//   if (ASCENDING_TRAVERSE(pCond->order)) {
//     assert(pTsdbReadHandle->window.skey <= pTsdbReadHandle->window.ekey);
//   } else {
//     assert(pTsdbReadHandle->window.skey >= pTsdbReadHandle->window.ekey);
//   }

//   // allocate buffer in order to load data blocks from file
//   memset(pTsdbReadHandle->suppInfo.pstatis, 0, sizeof(SColumnDataAgg));
//   memset(pTsdbReadHandle->suppInfo.plist, 0, POINTER_BYTES);

//   tsdbInitDataBlockLoadInfo(&pTsdbReadHandle->dataBlockLoadInfo);
//   tsdbInitCompBlockLoadInfo(&pTsdbReadHandle->compBlockLoadInfo);

//   SArray* pTable = NULL;
//   //  STsdbMeta* pMeta = tsdbGetMeta(pTsdbReadHandle->pTsdb);

//   //  pTsdbReadHandle->pTableCheckInfo = destroyTableCheckInfo(pTsdbReadHandle->pTableCheckInfo);

H
Haojun Liao 已提交
466
//   pTsdbReadHandle->pTableCheckInfo = NULL;  // createDataBlockScanInfo(pTsdbReadHandle, groupList, pMeta,
H
Hongze Cheng 已提交
467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
//                                             // &pTable);
//   if (pTsdbReadHandle->pTableCheckInfo == NULL) {
//     //    tsdbReaderClose(pTsdbReadHandle);
//     terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
//   }

//   //  pTsdbReadHandle->prev = doFreeColumnInfoData(pTsdbReadHandle->prev);
//   //  pTsdbReadHandle->next = doFreeColumnInfoData(pTsdbReadHandle->next);
// }

// SArray* tsdbGetQueriedTableList(STsdbReader** pHandle) {
//   assert(pHandle != NULL);

//   STsdbReader* pTsdbReadHandle = (STsdbReader*)pHandle;

//   size_t  size = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
//   SArray* res = taosArrayInit(size, POINTER_BYTES);
//   return res;
// }

487 488
// static TSKEY extractFirstTraverseKey(STableBlockScanInfo* pCheckInfo, int32_t order, int32_t update, TDRowVerT
// maxVer) {
H
Hongze Cheng 已提交
489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537
//   TSDBROW row = {0};
//   STSRow *rmem = NULL, *rimem = NULL;

//   if (pCheckInfo->iter) {
//     if (tsdbTbDataIterGet(pCheckInfo->iter, &row)) {
//       rmem = row.pTSRow;
//     }
//   }

//   if (pCheckInfo->iiter) {
//     if (tsdbTbDataIterGet(pCheckInfo->iiter, &row)) {
//       rimem = row.pTSRow;
//     }
//   }

//   if (rmem == NULL && rimem == NULL) {
//     return TSKEY_INITIAL_VAL;
//   }

//   if (rmem != NULL && rimem == NULL) {
//     pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
//     return TD_ROW_KEY(rmem);
//   }

//   if (rmem == NULL && rimem != NULL) {
//     pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
//     return TD_ROW_KEY(rimem);
//   }

//   TSKEY r1 = TD_ROW_KEY(rmem);
//   TSKEY r2 = TD_ROW_KEY(rimem);

//   if (r1 == r2) {
//     if (TD_SUPPORT_UPDATE(update)) {
//       pCheckInfo->chosen = CHECKINFO_CHOSEN_BOTH;
//     } else {
//       pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
//       tsdbTbDataIterNext(pCheckInfo->iter);
//     }
//     return r1;
//   } else if (r1 < r2 && ASCENDING_TRAVERSE(order)) {
//     pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
//     return r1;
//   } else {
//     pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
//     return r2;
//   }
// }

H
Haojun Liao 已提交
538
// static bool moveToNextRowInMem(STableBlockScanInfo* pCheckInfo) {
H
Hongze Cheng 已提交
539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571
//   bool hasNext = false;
//   if (pCheckInfo->chosen == CHECKINFO_CHOSEN_MEM) {
//     if (pCheckInfo->iter != NULL) {
//       hasNext = tsdbTbDataIterNext(pCheckInfo->iter);
//     }

//     if (hasNext) {
//       return hasNext;
//     }

//     if (pCheckInfo->iiter != NULL) {
//       return tsdbTbDataIterGet(pCheckInfo->iiter, NULL);
//     }
//   } else if (pCheckInfo->chosen == CHECKINFO_CHOSEN_IMEM) {
//     if (pCheckInfo->iiter != NULL) {
//       hasNext = tsdbTbDataIterNext(pCheckInfo->iiter);
//     }

//     if (hasNext) {
//       return hasNext;
//     }

//     if (pCheckInfo->iter != NULL) {
//       return tsdbTbDataIterGet(pCheckInfo->iter, NULL);
//     }
//   } else {
//     if (pCheckInfo->iter != NULL) {
//       hasNext = tsdbTbDataIterNext(pCheckInfo->iter);
//     }
//     if (pCheckInfo->iiter != NULL) {
//       hasNext = tsdbTbDataIterNext(pCheckInfo->iiter) || hasNext;
//     }
//   }
H
Hongze Cheng 已提交
572

H
Hongze Cheng 已提交
573 574
//   return hasNext;
// }
H
Hongze Cheng 已提交
575

H
Hongze Cheng 已提交
576 577 578
// static int32_t binarySearchForBlock(SBlock* pBlock, int32_t numOfBlocks, TSKEY skey, int32_t order) {
//   int32_t firstSlot = 0;
//   int32_t lastSlot = numOfBlocks - 1;
H
Hongze Cheng 已提交
579

H
Hongze Cheng 已提交
580
//   int32_t midSlot = firstSlot;
H
Hongze Cheng 已提交
581

H
Hongze Cheng 已提交
582 583 584
//   while (1) {
//     numOfBlocks = lastSlot - firstSlot + 1;
//     midSlot = (firstSlot + (numOfBlocks >> 1));
H
Hongze Cheng 已提交
585

H
Hongze Cheng 已提交
586
//     if (numOfBlocks == 1) break;
H
Hongze Cheng 已提交
587

H
Hongze Cheng 已提交
588 589 590 591 592 593 594 595 596 597 598
//     if (skey > pBlock[midSlot].maxKey.ts) {
//       if (numOfBlocks == 2) break;
//       if ((order == TSDB_ORDER_DESC) && (skey < pBlock[midSlot + 1].minKey.ts)) break;
//       firstSlot = midSlot + 1;
//     } else if (skey < pBlock[midSlot].minKey.ts) {
//       if ((order == TSDB_ORDER_ASC) && (skey > pBlock[midSlot - 1].maxKey.ts)) break;
//       lastSlot = midSlot - 1;
//     } else {
//       break;  // got the slot
//     }
//   }
H
Hongze Cheng 已提交
599

H
Hongze Cheng 已提交
600 601
//   return midSlot;
// }
H
Hongze Cheng 已提交
602

H
Haojun Liao 已提交
603
static int32_t doLoadBlockIndex(STsdbReader* pReader, SDataFReader* pFileReader, SArray* pIndexList) {
604
  SArray* aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx));
H
Hongze Cheng 已提交
605

606
  int32_t code = tsdbReadBlockIdx(pFileReader, aBlockIdx, NULL);
H
Haojun Liao 已提交
607
  if (code != TSDB_CODE_SUCCESS) {
608
    goto _end;
H
Haojun Liao 已提交
609
  }
H
Hongze Cheng 已提交
610

H
Hongze Cheng 已提交
611 612
  if (taosArrayGetSize(aBlockIdx) == 0) {
    taosArrayClear(aBlockIdx);
H
Haojun Liao 已提交
613 614
    return TSDB_CODE_SUCCESS;
  }
H
Hongze Cheng 已提交
615

616
  SBlockIdx* pBlockIdx;
H
Hongze Cheng 已提交
617
  for (int32_t i = 0; i < taosArrayGetSize(aBlockIdx); ++i) {
618
    pBlockIdx = (SBlockIdx*)taosArrayGet(aBlockIdx, i);
H
Haojun Liao 已提交
619

620
    // uid check
H
Hongze Cheng 已提交
621
    if (pBlockIdx->suid != pReader->suid) {
H
Haojun Liao 已提交
622 623 624 625
      continue;
    }

    // this block belongs to a table that is not queried.
H
Hongze Cheng 已提交
626
    void* p = taosHashGet(pReader->status.pTableMap, &pBlockIdx->uid, sizeof(uint64_t));
H
Haojun Liao 已提交
627 628 629 630
    if (p == NULL) {
      continue;
    }

631 632
    // todo: not valid info in bockIndex
    // time range check
633 634 635
    //    if (pBlockIdx->minKey > pReader->window.ekey || pBlockIdx->maxKey < pReader->window.skey) {
    //      continue;
    //    }
636 637

    // version check
638 639 640
    //    if (pBlockIdx->minVersion > pReader->verRange.maxVer || pBlockIdx->maxVersion < pReader->verRange.minVer) {
    //      continue;
    //    }
H
Haojun Liao 已提交
641 642 643 644 645 646

    STableBlockScanInfo* pScanInfo = p;
    if (pScanInfo->pBlockList == NULL) {
      pScanInfo->pBlockList = taosArrayInit(16, sizeof(SBlock));
    }

H
Hongze Cheng 已提交
647 648
    pScanInfo->blockIdx = *pBlockIdx;
    taosArrayPush(pIndexList, pBlockIdx);
H
Haojun Liao 已提交
649
  }
H
Hongze Cheng 已提交
650

651
_end:
H
Hongze Cheng 已提交
652
  taosArrayDestroy(aBlockIdx);
H
Haojun Liao 已提交
653 654
  return code;
}
H
Hongze Cheng 已提交
655

656 657
static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, uint32_t* numOfValidTables,
                               int32_t* numOfBlocks) {
H
Haojun Liao 已提交
658
  size_t numOfTables = taosArrayGetSize(pIndexList);
H
Hongze Cheng 已提交
659

H
Haojun Liao 已提交
660
  *numOfValidTables = 0;
H
Hongze Cheng 已提交
661

662
  STableBlockScanInfo* px = NULL;
dengyihao's avatar
dengyihao 已提交
663
  while (1) {
664 665 666 667 668 669 670 671
    px = taosHashIterate(pReader->status.pTableMap, px);
    if (px == NULL) {
      break;
    }

    taosArrayClear(px->pBlockList);
  }

dengyihao's avatar
dengyihao 已提交
672
  for (int32_t i = 0; i < numOfTables; ++i) {
H
Haojun Liao 已提交
673
    SBlockIdx* pBlockIdx = taosArrayGet(pIndexList, i);
H
Hongze Cheng 已提交
674

H
Hongze Cheng 已提交
675
    SMapData mapData = {0};
H
Haojun Liao 已提交
676 677
    tMapDataReset(&mapData);
    tsdbReadBlock(pReader->pFileReader, pBlockIdx, &mapData, NULL);
H
Hongze Cheng 已提交
678

H
Haojun Liao 已提交
679 680 681
    STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pBlockIdx->uid, sizeof(int64_t));
    for (int32_t j = 0; j < mapData.nItem; ++j) {
      SBlock block = {0};
H
Hongze Cheng 已提交
682

H
Hongze Cheng 已提交
683
      tMapDataGetItemByIdx(&mapData, j, &block, tGetBlock);
H
Hongze Cheng 已提交
684

685
      // 1. time range check
686
      if (block.minKey.ts > pReader->window.ekey || block.maxKey.ts < pReader->window.skey) {
H
Haojun Liao 已提交
687 688
        continue;
      }
H
Hongze Cheng 已提交
689

690
      // 2. version range check
691 692 693
      if (block.minVersion > pReader->verRange.maxVer || block.maxVersion < pReader->verRange.minVer) {
        continue;
      }
694

H
Haojun Liao 已提交
695 696
      void* p = taosArrayPush(pScanInfo->pBlockList, &block);
      if (p == NULL) {
H
Haojun Liao 已提交
697
        tMapDataClear(&mapData);
H
Haojun Liao 已提交
698 699
        return TSDB_CODE_OUT_OF_MEMORY;
      }
700 701

      (*numOfBlocks) += 1;
H
Haojun Liao 已提交
702
    }
H
Hongze Cheng 已提交
703

H
Haojun Liao 已提交
704
    tMapDataClear(&mapData);
H
Haojun Liao 已提交
705 706 707 708
    if (pScanInfo->pBlockList != NULL && taosArrayGetSize(pScanInfo->pBlockList) > 0) {
      (*numOfValidTables) += 1;
    }
  }
H
Hongze Cheng 已提交
709

H
Haojun Liao 已提交
710 711
  return TSDB_CODE_SUCCESS;
}
H
Hongze Cheng 已提交
712

713 714
// todo remove pblock parameter
static void setBlockAllDumped(SFileBlockDumpInfo* pDumpInfo, SBlock* pBlock, int32_t order) {
715
  int32_t step = ASCENDING_TRAVERSE(order) ? 1 : -1;
H
Haojun Liao 已提交
716

717
  pDumpInfo->allDumped = true;
718
  pDumpInfo->lastKey = pBlock->maxKey.ts + step;
H
Haojun Liao 已提交
719 720
}

721 722
static void doCopyColVal(SColumnInfoData* pColInfoData, int32_t rowIndex, int32_t colIndex, SColVal* pColVal,
                         SBlockLoadSuppInfo* pSup) {
H
Haojun Liao 已提交
723
  if (IS_VAR_DATA_TYPE(pColVal->type)) {
724
    if (pColVal->isNull || pColVal->isNone) {
H
Haojun Liao 已提交
725 726 727 728 729 730 731
      colDataAppendNULL(pColInfoData, rowIndex);
    } else {
      varDataSetLen(pSup->buildBuf[colIndex], pColVal->value.nData);
      memcpy(varDataVal(pSup->buildBuf[colIndex]), pColVal->value.pData, pColVal->value.nData);
      colDataAppend(pColInfoData, rowIndex, pSup->buildBuf[colIndex], false);
    }
  } else {
732
    colDataAppend(pColInfoData, rowIndex, (const char*)&pColVal->value, pColVal->isNull || pColVal->isNone);
H
Haojun Liao 已提交
733
  }
H
Haojun Liao 已提交
734 735
}

736
static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) {
737
  SReaderStatus*  pStatus = &pReader->status;
738
  SDataBlockIter* pBlockIter = &pStatus->blockIter;
H
Hongze Cheng 已提交
739

740
  SBlockData*         pBlockData = &pStatus->fileBlockData;
H
Haojun Liao 已提交
741
  SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(pBlockIter);
H
Haojun Liao 已提交
742 743
  SBlock*             pBlock = taosArrayGet(pBlockScanInfo->pBlockList, pFBlock->tbBlockIdx);
  SSDataBlock*        pResBlock = pReader->pResBlock;
744
  int32_t             numOfCols = blockDataGetNumOfCols(pResBlock);
H
Haojun Liao 已提交
745

H
Haojun Liao 已提交
746
  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;
747
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
H
Haojun Liao 已提交
748

749
  int64_t st = taosGetTimestampUs();
H
Haojun Liao 已提交
750

H
Haojun Liao 已提交
751
  SColVal cv = {0};
752 753
  int32_t colIndex = 0;

754 755
  bool    asc = ASCENDING_TRAVERSE(pReader->order);
  int32_t step = asc ? 1 : -1;
756

757
  int32_t rowIndex = 0;
758 759
  int32_t remain = asc ? (pBlockData->nRow - pDumpInfo->rowIndex) : (pDumpInfo->rowIndex + 1);

760 761 762 763 764 765 766 767
  int32_t endIndex = 0;
  if (remain <= pReader->capacity) {
    endIndex = pBlockData->nRow;
  } else {
    endIndex = pDumpInfo->rowIndex + step * pReader->capacity;
    remain = pReader->capacity;
  }

768
  int32_t          i = 0;
769 770
  SColumnInfoData* pColData = taosArrayGet(pResBlock->pDataBlock, i);
  if (pColData->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
771
    for (int32_t j = pDumpInfo->rowIndex; j < endIndex && j >= 0; j += step) {
772 773 774 775 776
      colDataAppend(pColData, rowIndex++, (const char*)&pBlockData->aTSKEY[j], false);
    }
    i += 1;
  }

H
Hongze Cheng 已提交
777
  while (i < numOfCols && colIndex < taosArrayGetSize(pBlockData->aIdx)) {
778 779 780
    rowIndex = 0;
    pColData = taosArrayGet(pResBlock->pDataBlock, i);

H
Hongze Cheng 已提交
781
    SColData* pData = tBlockDataGetColDataByIdx(pBlockData, colIndex);
782 783

    if (pData->cid == pColData->info.colId) {
784
      for (int32_t j = pDumpInfo->rowIndex; j < endIndex && j >= 0; j += step) {
785 786
        tColDataGetValue(pData, j, &cv);
        doCopyColVal(pColData, rowIndex++, i, &cv, pSupInfo);
H
Haojun Liao 已提交
787
      }
788 789 790
      colIndex += 1;
    } else {  // the specified column does not exist in file block, fill with null data
      colDataAppendNNULL(pColData, 0, remain);
H
Haojun Liao 已提交
791
    }
792 793 794 795 796

    ASSERT(rowIndex == remain);
    i += 1;
  }

797
  while (i < numOfCols) {
798 799 800
    pColData = taosArrayGet(pResBlock->pDataBlock, i);
    colDataAppendNNULL(pColData, 0, remain);
    i += 1;
H
Haojun Liao 已提交
801
  }
H
Haojun Liao 已提交
802

803
  pResBlock->info.rows = remain;
804
  pDumpInfo->rowIndex += step * remain;
805 806

  setBlockAllDumped(pDumpInfo, pBlock, pReader->order);
H
Haojun Liao 已提交
807

H
Haojun Liao 已提交
808 809
  int64_t elapsedTime = (taosGetTimestampUs() - st);
  pReader->cost.blockLoadTime += elapsedTime;
H
Haojun Liao 已提交
810

811
  int32_t unDumpedRows = asc ? pBlock->nRow - pDumpInfo->rowIndex : pDumpInfo->rowIndex + 1;
H
Haojun Liao 已提交
812
  tsdbDebug("%p load file block into buffer, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64
813
            ", rows:%d, remain:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", elapsed time:%" PRId64 " us, %s",
814 815 816 817 818 819 820
            pReader, pBlockIter->index, pFBlock->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, remain, unDumpedRows,
            pBlock->minVersion, pBlock->maxVersion, elapsedTime, pReader->idStr);

  return TSDB_CODE_SUCCESS;
}

// todo consider the output buffer size
821 822
static int32_t doLoadFileBlockData(STsdbReader* pReader, SDataBlockIter* pBlockIter,
                                   STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData) {
823 824 825 826 827 828 829 830 831 832 833
  int64_t st = taosGetTimestampUs();

  SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(pBlockIter);
  SBlock*             pBlock = taosArrayGet(pBlockScanInfo->pBlockList, pFBlock->tbBlockIdx);
  SSDataBlock*        pResBlock = pReader->pResBlock;
  int32_t             numOfCols = blockDataGetNumOfCols(pResBlock);

  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;

  uint8_t *pb = NULL, *pb1 = NULL;
834
  int32_t  code = tsdbReadColData(pReader->pFileReader, &pBlockScanInfo->blockIdx, pBlock, pSupInfo->colIds, numOfCols,
L
Liu Jicong 已提交
835
                                  pBlockData, &pb, &pb1);
836 837 838 839 840 841 842 843 844
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }

  int64_t elapsedTime = (taosGetTimestampUs() - st);
  pReader->cost.blockLoadTime += elapsedTime;

  pDumpInfo->allDumped = false;
  tsdbDebug("%p load file block into buffer, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64
845
            ", rows:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", elapsed time:%" PRId64 " us, %s",
846
            pReader, pBlockIter->index, pFBlock->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->nRow,
H
Haojun Liao 已提交
847 848
            pBlock->minVersion, pBlock->maxVersion, elapsedTime, pReader->idStr);
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
849 850

_error:
H
Haojun Liao 已提交
851 852 853 854 855
  tsdbError("%p error occurs in loading file block, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64
            ", rows:%d, %s",
            pReader, pBlockIter->index, pFBlock->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->nRow,
            pReader->idStr);
  return code;
H
Haojun Liao 已提交
856
}
H
Hongze Cheng 已提交
857

H
Hongze Cheng 已提交
858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915
// static int doBinarySearchKey(char* pValue, int num, TSKEY key, int order) {
//   int    firstPos, lastPos, midPos = -1;
//   int    numOfRows;
//   TSKEY* keyList;

//   assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);

//   if (num <= 0) return -1;

//   keyList = (TSKEY*)pValue;
//   firstPos = 0;
//   lastPos = num - 1;

//   if (order == TSDB_ORDER_DESC) {
//     // find the first position which is smaller than the key
//     while (1) {
//       if (key >= keyList[lastPos]) return lastPos;
//       if (key == keyList[firstPos]) return firstPos;
//       if (key < keyList[firstPos]) return firstPos - 1;

//       numOfRows = lastPos - firstPos + 1;
//       midPos = (numOfRows >> 1) + firstPos;

//       if (key < keyList[midPos]) {
//         lastPos = midPos - 1;
//       } else if (key > keyList[midPos]) {
//         firstPos = midPos + 1;
//       } else {
//         break;
//       }
//     }

//   } else {
//     // find the first position which is bigger than the key
//     while (1) {
//       if (key <= keyList[firstPos]) return firstPos;
//       if (key == keyList[lastPos]) return lastPos;

//       if (key > keyList[lastPos]) {
//         lastPos = lastPos + 1;
//         if (lastPos >= num)
//           return -1;
//         else
//           return lastPos;
//       }

//       numOfRows = lastPos - firstPos + 1;
//       midPos = (numOfRows >> 1) + firstPos;

//       if (key < keyList[midPos]) {
//         lastPos = midPos - 1;
//       } else if (key > keyList[midPos]) {
//         firstPos = midPos + 1;
//       } else {
//         break;
//       }
//     }
//   }
H
Hongze Cheng 已提交
916

H
Hongze Cheng 已提交
917 918
//   return midPos;
// }
H
Hongze Cheng 已提交
919

H
Hongze Cheng 已提交
920 921
// static void doCheckGeneratedBlockRange(STsdbReader* pTsdbReadHandle) {
//   SQueryFilePos* cur = &pTsdbReadHandle->cur;
H
Hongze Cheng 已提交
922

H
Hongze Cheng 已提交
923 924 925 926 927 928
//   if (cur->rows > 0) {
//     if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
//       assert(cur->win.skey >= pTsdbReadHandle->window.skey && cur->win.ekey <= pTsdbReadHandle->window.ekey);
//     } else {
//       assert(cur->win.skey >= pTsdbReadHandle->window.ekey && cur->win.ekey <= pTsdbReadHandle->window.skey);
//     }
H
Hongze Cheng 已提交
929

H
Hongze Cheng 已提交
930 931 932 933 934
//     SColumnInfoData* pColInfoData = taosArrayGet(pTsdbReadHandle->pColumns, 0);
//     assert(cur->win.skey == ((TSKEY*)pColInfoData->pData)[0] &&
//            cur->win.ekey == ((TSKEY*)pColInfoData->pData)[cur->rows - 1]);
//   } else {
//     cur->win = pTsdbReadHandle->window;
H
Hongze Cheng 已提交
935

H
Hongze Cheng 已提交
936 937 938 939
//     int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order) ? 1 : -1;
//     cur->lastKey = pTsdbReadHandle->window.ekey + step;
//   }
// }
H
Hongze Cheng 已提交
940

H
Haojun Liao 已提交
941
// static void copyAllRemainRowsFromFileBlock(STsdbReader* pTsdbReadHandle, STableBlockScanInfo* pCheckInfo,
H
Hongze Cheng 已提交
942 943
//                                            SDataBlockInfo* pBlockInfo, int32_t endPos) {
//   SQueryFilePos* cur = &pTsdbReadHandle->cur;
H
Hongze Cheng 已提交
944

H
Hongze Cheng 已提交
945 946
//   SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0];
//   TSKEY*     tsArray = pCols->cols[0].pData;
H
Hongze Cheng 已提交
947

H
Hongze Cheng 已提交
948
//   bool ascScan = ASCENDING_TRAVERSE(pTsdbReadHandle->order);
H
Hongze Cheng 已提交
949

H
Hongze Cheng 已提交
950
//   int32_t step = ascScan ? 1 : -1;
H
Hongze Cheng 已提交
951

H
Hongze Cheng 已提交
952 953
//   int32_t start = cur->pos;
//   int32_t end = endPos;
H
Hongze Cheng 已提交
954

H
Hongze Cheng 已提交
955 956 957
//   if (!ascScan) {
//     TSWAP(start, end);
//   }
H
Hongze Cheng 已提交
958

H
Hongze Cheng 已提交
959 960
//   assert(pTsdbReadHandle->outputCapacity >= (end - start + 1));
//   int32_t numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, 0, start, end);
H
Hongze Cheng 已提交
961

H
Hongze Cheng 已提交
962 963 964 965 966
//   // the time window should always be ascending order: skey <= ekey
//   cur->win = (STimeWindow){.skey = tsArray[start], .ekey = tsArray[end]};
//   cur->mixBlock = (numOfRows != pBlockInfo->rows);
//   cur->lastKey = tsArray[endPos] + step;
//   cur->blockCompleted = (ascScan ? (endPos == pBlockInfo->rows - 1) : (endPos == 0));
H
Hongze Cheng 已提交
967

H
Hongze Cheng 已提交
968 969 970 971
//   // The value of pos may be -1 or pBlockInfo->rows, and it is invalid in both cases.
//   int32_t pos = endPos + step;
//   updateInfoAfterMerge(pTsdbReadHandle, pCheckInfo, numOfRows, pos);
//   doCheckGeneratedBlockRange(pTsdbReadHandle);
H
Hongze Cheng 已提交
972

H
Hongze Cheng 已提交
973 974 975 976
//   tsdbDebug("%p uid:%" PRIu64 ", data block created, mixblock:%d, brange:%" PRIu64 "-%" PRIu64 " rows:%d, %s",
//             pTsdbReadHandle, pCheckInfo->tableId, cur->mixBlock, cur->win.skey, cur->win.ekey, cur->rows,
//             pTsdbReadHandle->idStr);
// }
H
Hongze Cheng 已提交
977

H
Hongze Cheng 已提交
978 979
// // only return the qualified data to client in terms of query time window, data rows in the same block but do not
// // be included in the query time window will be discarded
H
Haojun Liao 已提交
980
// static void doMergeTwoLevelData(STsdbReader* pTsdbReadHandle, STableBlockScanInfo* pCheckInfo, SBlock* pBlock) {
H
Hongze Cheng 已提交
981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181
//   SQueryFilePos* cur = &pTsdbReadHandle->cur;
//   SDataBlockInfo blockInfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlock);
//   STsdbCfg*      pCfg = REPO_CFG(pTsdbReadHandle->pTsdb);

//   initTableMemIterator(pTsdbReadHandle, pCheckInfo);

//   SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0];
//   assert(pCols->cols[0].type == TSDB_DATA_TYPE_TIMESTAMP && pCols->cols[0].colId == PRIMARYKEY_TIMESTAMP_COL_ID &&
//          cur->pos >= 0 && cur->pos < pBlock->numOfRows);
//   // Even Multi-Version supported, the records with duplicated TSKEY would be merged inside of tsdbLoadData
//   interface. TSKEY* tsArray = pCols->cols[0].pData; assert(pCols->numOfRows == pBlock->numOfRows && tsArray[0] ==
//   pBlock->minKey.ts &&
//          tsArray[pBlock->numOfRows - 1] == pBlock->maxKey.ts);

//   bool    ascScan = ASCENDING_TRAVERSE(pTsdbReadHandle->order);
//   int32_t step = ascScan ? 1 : -1;

//   // for search the endPos, so the order needs to reverse
//   int32_t order = ascScan ? TSDB_ORDER_DESC : TSDB_ORDER_ASC;

//   int32_t numOfCols = (int32_t)(QH_GET_NUM_OF_COLS(pTsdbReadHandle));
//   int32_t endPos = getEndPosInDataBlock(pTsdbReadHandle, &blockInfo);

//   STimeWindow* pWin = &blockInfo.window;
//   tsdbDebug("%p uid:%" PRIu64 " start merge data block, file block range:%" PRIu64 "-%" PRIu64
//             " rows:%d, start:%d, end:%d, %s",
//             pTsdbReadHandle, pCheckInfo->tableId, pWin->skey, pWin->ekey, blockInfo.rows, cur->pos, endPos,
//             pTsdbReadHandle->idStr);

//   // compared with the data from in-memory buffer, to generate the correct timestamp array list
//   int32_t numOfRows = 0;
//   int32_t curRow = 0;

//   int16_t   rv1 = -1;
//   int16_t   rv2 = -1;
//   STSchema* pSchema1 = NULL;
//   STSchema* pSchema2 = NULL;

//   int32_t pos = cur->pos;
//   cur->win = TSWINDOW_INITIALIZER;
//   bool adjustPos = false;

//   // no data in buffer, load data from file directly
//   if (pCheckInfo->iiter == NULL && pCheckInfo->iter == NULL) {
//     copyAllRemainRowsFromFileBlock(pTsdbReadHandle, pCheckInfo, &blockInfo, endPos);
//     return;
//   } else if (pCheckInfo->iter != NULL || pCheckInfo->iiter != NULL) {
//     SSkipListNode* node = NULL;
//     TSKEY          lastKeyAppend = TSKEY_INITIAL_VAL;

//     do {
//       STSRow* row2 = NULL;
//       STSRow* row1 = getSRowInTableMem(pCheckInfo, pTsdbReadHandle->order, pCfg->update, &row2, TD_VER_MAX);
//       if (row1 == NULL) {
//         break;
//       }

//       TSKEY key = TD_ROW_KEY(row1);
//       if ((key > pTsdbReadHandle->window.ekey && ascScan) || (key < pTsdbReadHandle->window.ekey && !ascScan)) {
//         break;
//       }

//       if (adjustPos) {
//         if (key == lastKeyAppend) {
//           pos -= step;
//         }
//         adjustPos = false;
//       }

//       if (((pos > endPos || tsArray[pos] > pTsdbReadHandle->window.ekey) && ascScan) ||
//           ((pos < endPos || tsArray[pos] < pTsdbReadHandle->window.ekey) && !ascScan)) {
//         break;
//       }

//       if ((key < tsArray[pos] && ascScan) || (key > tsArray[pos] && !ascScan)) {
//         if (rv1 != TD_ROW_SVER(row1)) {
//           //          pSchema1 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row1));
//           rv1 = TD_ROW_SVER(row1);
//         }
//         if (row2 && rv2 != TD_ROW_SVER(row2)) {
//           //          pSchema2 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row2));
//           rv2 = TD_ROW_SVER(row2);
//         }

//         numOfRows +=
//             mergeTwoRowFromMem(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, &curRow, row1, row2, numOfCols,
//                                pCheckInfo->tableId, pSchema1, pSchema2, pCfg->update, &lastKeyAppend);
//         if (cur->win.skey == TSKEY_INITIAL_VAL) {
//           cur->win.skey = key;
//         }

//         cur->win.ekey = key;
//         cur->lastKey = key + step;
//         cur->mixBlock = true;
//         moveToNextRowInMem(pCheckInfo);
//       } else if (key == tsArray[pos]) {  // data in buffer has the same timestamp of data in file block, ignore it
//         if (TD_SUPPORT_UPDATE(pCfg->update)) {
//           if (lastKeyAppend != key) {
//             if (lastKeyAppend != TSKEY_INITIAL_VAL) {
//               ++curRow;
//             }
//             lastKeyAppend = key;
//           }
//           // load data from file firstly
//           numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, curRow, pos, pos);

//           if (rv1 != TD_ROW_SVER(row1)) {
//             rv1 = TD_ROW_SVER(row1);
//           }
//           if (row2 && rv2 != TD_ROW_SVER(row2)) {
//             rv2 = TD_ROW_SVER(row2);
//           }

//           // still assign data into current row
//           numOfRows +=
//               mergeTwoRowFromMem(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, &curRow, row1, row2, numOfCols,
//                                  pCheckInfo->tableId, pSchema1, pSchema2, pCfg->update, &lastKeyAppend);

//           if (cur->win.skey == TSKEY_INITIAL_VAL) {
//             cur->win.skey = key;
//           }

//           cur->win.ekey = key;
//           cur->lastKey = key + step;
//           cur->mixBlock = true;

//           moveToNextRowInMem(pCheckInfo);

//           pos += step;
//           adjustPos = true;
//         } else {
//           // discard the memory record
//           moveToNextRowInMem(pCheckInfo);
//         }
//       } else if ((key > tsArray[pos] && ascScan) || (key < tsArray[pos] && !ascScan)) {
//         if (cur->win.skey == TSKEY_INITIAL_VAL) {
//           cur->win.skey = tsArray[pos];
//         }

//         int32_t end = doBinarySearchKey(pCols->cols[0].pData, pCols->numOfRows, key, order);
//         assert(end != -1);

//         if (tsArray[end] == key) {  // the value of key in cache equals to the end timestamp value, ignore it
// #if 0
//           if (pCfg->update == TD_ROW_DISCARD_UPDATE) {
//             moveToNextRowInMem(pCheckInfo);
//           } else {
//             end -= step;
//           }
// #endif
//           if (!TD_SUPPORT_UPDATE(pCfg->update)) {
//             moveToNextRowInMem(pCheckInfo);
//           } else {
//             end -= step;
//           }
//         }

//         int32_t qstart = 0, qend = 0;
//         getQualifiedRowsPos(pTsdbReadHandle, pos, end, numOfRows, &qstart, &qend);

//         if ((lastKeyAppend != TSKEY_INITIAL_VAL) && (lastKeyAppend != (ascScan ? tsArray[qstart] : tsArray[qend]))) {
//           ++curRow;
//         }

//         numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, curRow, qstart, qend);
//         pos += (qend - qstart + 1) * step;
//         if (numOfRows > 0) {
//           curRow = numOfRows - 1;
//         }

//         cur->win.ekey = ascScan ? tsArray[qend] : tsArray[qstart];
//         cur->lastKey = cur->win.ekey + step;
//         lastKeyAppend = cur->win.ekey;
//       }
//     } while (numOfRows < pTsdbReadHandle->outputCapacity);

//     if (numOfRows < pTsdbReadHandle->outputCapacity) {
//       /**
//        * if cache is empty, load remain file block data. In contrast, if there are remain data in cache, do NOT
//        * copy them all to result buffer, since it may be overlapped with file data block.
//        */
//       if (node == NULL || ((TD_ROW_KEY((STSRow*)SL_GET_NODE_DATA(node)) > pTsdbReadHandle->window.ekey) && ascScan)
//       ||
//           ((TD_ROW_KEY((STSRow*)SL_GET_NODE_DATA(node)) < pTsdbReadHandle->window.ekey) && !ascScan)) {
//         // no data in cache or data in cache is greater than the ekey of time window, load data from file block
//         if (cur->win.skey == TSKEY_INITIAL_VAL) {
//           cur->win.skey = tsArray[pos];
//         }

//         int32_t start = -1, end = -1;
//         getQualifiedRowsPos(pTsdbReadHandle, pos, endPos, numOfRows, &start, &end);

//         numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, numOfRows, start, end);
//         pos += (end - start + 1) * step;

//         cur->win.ekey = ascScan ? tsArray[end] : tsArray[start];
//         cur->lastKey = cur->win.ekey + step;
//         cur->mixBlock = true;
//       }
//     }
//   }
H
Hongze Cheng 已提交
1182

H
Hongze Cheng 已提交
1183 1184
//   cur->blockCompleted = (((pos > endPos || cur->lastKey > pTsdbReadHandle->window.ekey) && ascScan) ||
//                          ((pos < endPos || cur->lastKey < pTsdbReadHandle->window.ekey) && !ascScan));
H
Hongze Cheng 已提交
1185

H
Hongze Cheng 已提交
1186 1187 1188
//   if (!ascScan) {
//     TSWAP(cur->win.skey, cur->win.ekey);
//   }
H
Hongze Cheng 已提交
1189

H
Hongze Cheng 已提交
1190 1191
//   updateInfoAfterMerge(pTsdbReadHandle, pCheckInfo, numOfRows, pos);
//   doCheckGeneratedBlockRange(pTsdbReadHandle);
H
Hongze Cheng 已提交
1192

H
Hongze Cheng 已提交
1193 1194 1195 1196
//   tsdbDebug("%p uid:%" PRIu64 ", data block created, mixblock:%d, brange:%" PRIu64 "-%" PRIu64 " rows:%d, %s",
//             pTsdbReadHandle, pCheckInfo->tableId, cur->mixBlock, cur->win.skey, cur->win.ekey, cur->rows,
//             pTsdbReadHandle->idStr);
// }
H
Hongze Cheng 已提交
1197

H
Haojun Liao 已提交
1198 1199 1200
static void cleanupBlockOrderSupporter(SBlockOrderSupporter* pSup) {
  taosMemoryFreeClear(pSup->numOfBlocksPerTable);
  taosMemoryFreeClear(pSup->indexPerTable);
H
Hongze Cheng 已提交
1201

H
Haojun Liao 已提交
1202 1203 1204 1205
  for (int32_t i = 0; i < pSup->numOfTables; ++i) {
    SBlockOrderWrapper* pBlockInfo = pSup->pDataBlockInfo[i];
    taosMemoryFreeClear(pBlockInfo);
  }
H
Hongze Cheng 已提交
1206

H
Haojun Liao 已提交
1207 1208
  taosMemoryFreeClear(pSup->pDataBlockInfo);
}
H
Hongze Cheng 已提交
1209

H
Haojun Liao 已提交
1210 1211
static int32_t initBlockOrderSupporter(SBlockOrderSupporter* pSup, int32_t numOfTables) {
  ASSERT(numOfTables >= 1);
H
Hongze Cheng 已提交
1212

H
Haojun Liao 已提交
1213
  pSup->numOfBlocksPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables);
1214 1215
  pSup->indexPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables);
  pSup->pDataBlockInfo = taosMemoryCalloc(1, POINTER_BYTES * numOfTables);
H
Hongze Cheng 已提交
1216

H
Haojun Liao 已提交
1217 1218 1219 1220
  if (pSup->numOfBlocksPerTable == NULL || pSup->indexPerTable == NULL || pSup->pDataBlockInfo == NULL) {
    cleanupBlockOrderSupporter(pSup);
    return TSDB_CODE_OUT_OF_MEMORY;
  }
H
Hongze Cheng 已提交
1221

H
Haojun Liao 已提交
1222 1223
  return TSDB_CODE_SUCCESS;
}
H
Hongze Cheng 已提交
1224

H
Haojun Liao 已提交
1225
static int32_t fileDataBlockOrderCompar(const void* pLeft, const void* pRight, void* param) {
1226
  int32_t leftIndex = *(int32_t*)pLeft;
H
Haojun Liao 已提交
1227
  int32_t rightIndex = *(int32_t*)pRight;
H
Hongze Cheng 已提交
1228

H
Haojun Liao 已提交
1229
  SBlockOrderSupporter* pSupporter = (SBlockOrderSupporter*)param;
H
Hongze Cheng 已提交
1230

H
Haojun Liao 已提交
1231 1232
  int32_t leftTableBlockIndex = pSupporter->indexPerTable[leftIndex];
  int32_t rightTableBlockIndex = pSupporter->indexPerTable[rightIndex];
H
Hongze Cheng 已提交
1233

H
Haojun Liao 已提交
1234 1235 1236 1237 1238 1239 1240
  if (leftTableBlockIndex > pSupporter->numOfBlocksPerTable[leftIndex]) {
    /* left block is empty */
    return 1;
  } else if (rightTableBlockIndex > pSupporter->numOfBlocksPerTable[rightIndex]) {
    /* right block is empty */
    return -1;
  }
H
Hongze Cheng 已提交
1241

1242
  SBlockOrderWrapper* pLeftBlock = &pSupporter->pDataBlockInfo[leftIndex][leftTableBlockIndex];
H
Haojun Liao 已提交
1243
  SBlockOrderWrapper* pRightBlock = &pSupporter->pDataBlockInfo[rightIndex][rightTableBlockIndex];
H
Hongze Cheng 已提交
1244

H
Haojun Liao 已提交
1245 1246
  return pLeftBlock->pBlock->aSubBlock[0].offset > pRightBlock->pBlock->aSubBlock[0].offset ? 1 : -1;
}
H
Hongze Cheng 已提交
1247

H
Haojun Liao 已提交
1248
static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks) {
1249
  bool asc = ASCENDING_TRAVERSE(pReader->order);
H
Haojun Liao 已提交
1250

1251
  pBlockIter->numOfBlocks = numOfBlocks;
1252 1253
  taosArrayClear(pBlockIter->blockList);

1254 1255
  // access data blocks according to the offset of each block in asc/desc order.
  int32_t numOfTables = (int32_t)taosHashGetSize(pReader->status.pTableMap);
H
Haojun Liao 已提交
1256

1257
  SBlockOrderSupporter sup = {0};
H
Haojun Liao 已提交
1258

1259 1260 1261 1262
  int32_t code = initBlockOrderSupporter(&sup, numOfTables);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
H
Haojun Liao 已提交
1263

1264 1265 1266 1267 1268 1269 1270
  int32_t cnt = 0;
  void*   ptr = NULL;
  while (1) {
    ptr = taosHashIterate(pReader->status.pTableMap, ptr);
    if (ptr == NULL) {
      break;
    }
H
Haojun Liao 已提交
1271

1272 1273 1274 1275
    STableBlockScanInfo* pTableScanInfo = (STableBlockScanInfo*)ptr;
    if (pTableScanInfo->pBlockList == NULL || taosArrayGetSize(pTableScanInfo->pBlockList) == 0) {
      continue;
    }
H
Haojun Liao 已提交
1276

1277 1278
    size_t num = taosArrayGetSize(pTableScanInfo->pBlockList);
    sup.numOfBlocksPerTable[sup.numOfTables] = num;
H
Haojun Liao 已提交
1279

1280 1281 1282 1283 1284
    char* buf = taosMemoryMalloc(sizeof(SBlockOrderWrapper) * num);
    if (buf == NULL) {
      cleanupBlockOrderSupporter(&sup);
      return TSDB_CODE_TDB_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1285

1286 1287 1288 1289 1290
    sup.pDataBlockInfo[sup.numOfTables] = (SBlockOrderWrapper*)buf;
    for (int32_t k = 0; k < num; ++k) {
      SBlockOrderWrapper wrapper = {0};
      wrapper.pBlock = (SBlock*)taosArrayGet(pTableScanInfo->pBlockList, k);
      wrapper.uid = pTableScanInfo->uid;
H
Haojun Liao 已提交
1291

1292 1293 1294 1295 1296 1297
      sup.pDataBlockInfo[sup.numOfTables][k] = wrapper;
      cnt++;
    }

    sup.numOfTables += 1;
  }
H
Haojun Liao 已提交
1298

1299
  ASSERT(numOfBlocks == cnt);
H
Haojun Liao 已提交
1300

1301 1302 1303 1304 1305
  // since there is only one table qualified, blocks are not sorted
  if (sup.numOfTables == 1) {
    for (int32_t i = 0; i < numOfBlocks; ++i) {
      SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[0][i].uid, .tbBlockIdx = i};
      taosArrayPush(pBlockIter->blockList, &blockInfo);
1306
    }
1307 1308
    tsdbDebug("%p create blocks info struct completed for one table, %d blocks not sorted %s", pReader, cnt,
              pReader->idStr);
1309

1310
    pBlockIter->index = asc ? 0 : (numOfBlocks - 1);
H
Haojun Liao 已提交
1311 1312

    cleanupBlockOrderSupporter(&sup);
1313
    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
1314
  }
H
Haojun Liao 已提交
1315

1316 1317
  tsdbDebug("%p create data blocks info struct completed, %d blocks in %d tables %s", pReader, cnt, sup.numOfTables,
            pReader->idStr);
1318

1319
  assert(cnt <= numOfBlocks && sup.numOfTables <= numOfTables);
H
Haojun Liao 已提交
1320

1321 1322 1323 1324 1325
  SMultiwayMergeTreeInfo* pTree = NULL;
  uint8_t                 ret = tMergeTreeCreate(&pTree, sup.numOfTables, &sup, fileDataBlockOrderCompar);
  if (ret != TSDB_CODE_SUCCESS) {
    cleanupBlockOrderSupporter(&sup);
    return TSDB_CODE_TDB_OUT_OF_MEMORY;
H
Haojun Liao 已提交
1326
  }
H
Haojun Liao 已提交
1327

1328 1329 1330 1331
  int32_t numOfTotal = 0;
  while (numOfTotal < cnt) {
    int32_t pos = tMergeTreeGetChosenIndex(pTree);
    int32_t index = sup.indexPerTable[pos]++;
H
Haojun Liao 已提交
1332

1333 1334
    SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[pos][index].uid, .tbBlockIdx = index};
    taosArrayPush(pBlockIter->blockList, &blockInfo);
H
Haojun Liao 已提交
1335

1336 1337 1338 1339
    // set data block index overflow, in order to disable the offset comparator
    if (sup.indexPerTable[pos] >= sup.numOfBlocksPerTable[pos]) {
      sup.indexPerTable[pos] = sup.numOfBlocksPerTable[pos] + 1;
    }
H
Haojun Liao 已提交
1340

1341 1342
    numOfTotal += 1;
    tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree));
H
Haojun Liao 已提交
1343
  }
H
Haojun Liao 已提交
1344

1345 1346 1347
  tsdbDebug("%p %d data blocks sort completed, %s", pReader, cnt, pReader->idStr);
  cleanupBlockOrderSupporter(&sup);
  taosMemoryFree(pTree);
H
Haojun Liao 已提交
1348

1349 1350
  pBlockIter->index = asc ? 0 : (numOfBlocks - 1);
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
1351
}
H
Hongze Cheng 已提交
1352

H
Haojun Liao 已提交
1353
static bool blockIteratorNext(SDataBlockIter* pBlockIter) {
1354 1355
  bool asc = ASCENDING_TRAVERSE(pBlockIter->order);

1356
  int32_t step = asc ? 1 : -1;
1357
  if ((pBlockIter->index >= pBlockIter->numOfBlocks - 1 && asc) || (pBlockIter->index <= 0 && (!asc))) {
1358 1359 1360
    return false;
  }

1361
  pBlockIter->index += step;
1362 1363 1364
  return true;
}

1365 1366 1367
/**
 * This is an two rectangles overlap cases.
 */
1368
static int32_t dataBlockPartiallyRequired(STimeWindow* pWindow, SVersionRange* pVerRange, SBlock* pBlock) {
1369 1370 1371 1372
  return (pWindow->ekey < pBlock->maxKey.ts && pWindow->ekey >= pBlock->minKey.ts) ||
         (pWindow->skey > pBlock->minKey.ts && pWindow->skey <= pBlock->maxKey.ts) ||
         (pVerRange->minVer > pBlock->minVersion && pVerRange->minVer <= pBlock->maxVersion) ||
         (pVerRange->maxVer < pBlock->maxVersion && pVerRange->maxVer >= pBlock->minVersion);
H
Haojun Liao 已提交
1373
}
H
Hongze Cheng 已提交
1374

H
Haojun Liao 已提交
1375 1376 1377 1378
static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter) {
  SFileDataBlockInfo* pFBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index);
  return pFBlockInfo;
}
H
Hongze Cheng 已提交
1379

1380 1381
static SBlock* getNeighborBlockOfSameTable(SFileDataBlockInfo* pFBlockInfo, STableBlockScanInfo* pTableBlockScanInfo,
                                           int32_t* nextIndex, int32_t order) {
1382 1383 1384
  bool asc = ASCENDING_TRAVERSE(order);
  if (asc && pFBlockInfo->tbBlockIdx >= taosArrayGetSize(pTableBlockScanInfo->pBlockList) - 1) {
    return NULL;
1385 1386
  }

1387
  if (!asc && pFBlockInfo->tbBlockIdx == 0) {
1388 1389 1390
    return NULL;
  }

1391
  int32_t step = asc ? 1 : -1;
1392 1393 1394 1395 1396 1397 1398 1399 1400

  *nextIndex = pFBlockInfo->tbBlockIdx + step;
  SBlock* pNext = taosArrayGet(pTableBlockScanInfo->pBlockList, *nextIndex);
  return pNext;
}

static int32_t findFileBlockInfoIndex(SDataBlockIter* pBlockIter, SFileDataBlockInfo* pFBlockInfo) {
  ASSERT(pBlockIter != NULL && pFBlockInfo != NULL);

1401
  int32_t step = ASCENDING_TRAVERSE(pBlockIter->order) ? 1 : -1;
1402 1403
  int32_t index = pBlockIter->index;

1404
  while (index < pBlockIter->numOfBlocks && index >= 0) {
1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416
    SFileDataBlockInfo* pFBlock = taosArrayGet(pBlockIter->blockList, index);
    if (pFBlock->uid == pFBlockInfo->uid && pFBlock->tbBlockIdx == pFBlockInfo->tbBlockIdx) {
      return index;
    }

    index += step;
  }

  ASSERT(0);
  return -1;
}

1417
static int32_t setFileBlockActiveInBlockIter(SDataBlockIter* pBlockIter, int32_t index, int32_t step) {
1418 1419 1420 1421 1422
  if (index < 0 || index >= pBlockIter->numOfBlocks) {
    return -1;
  }

  SFileDataBlockInfo fblock = *(SFileDataBlockInfo*)taosArrayGet(pBlockIter->blockList, index);
1423 1424 1425 1426 1427
  pBlockIter->index += step;

  if (index != pBlockIter->index) {
    taosArrayRemove(pBlockIter->blockList, index);
    taosArrayInsert(pBlockIter->blockList, pBlockIter->index, &fblock);
1428

1429 1430 1431
    SFileDataBlockInfo* pBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index);
    ASSERT(pBlockInfo->uid == fblock.uid && pBlockInfo->tbBlockIdx == fblock.tbBlockIdx);
  }
1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442

  return TSDB_CODE_SUCCESS;
}

static bool overlapWithNeighborBlock(SBlock* pBlock, SBlock* pNeighbor, int32_t order) {
  // it is the last block in current file, no chance to overlap with neighbor blocks.
  if (ASCENDING_TRAVERSE(order)) {
    return pBlock->maxKey.ts == pNeighbor->minKey.ts;
  } else {
    return pBlock->minKey.ts == pNeighbor->maxKey.ts;
  }
H
Haojun Liao 已提交
1443
}
H
Hongze Cheng 已提交
1444

1445
static bool bufferDataInFileBlockGap(int32_t order, TSDBKEY key, SBlock* pBlock) {
H
Haojun Liao 已提交
1446
  bool ascScan = ASCENDING_TRAVERSE(order);
H
Hongze Cheng 已提交
1447

1448
  return (ascScan && (key.ts != TSKEY_INITIAL_VAL && key.ts <= pBlock->minKey.ts)) ||
1449
         (!ascScan && (key.ts != TSKEY_INITIAL_VAL && key.ts >= pBlock->maxKey.ts));
H
Haojun Liao 已提交
1450
}
H
Hongze Cheng 已提交
1451

H
Haojun Liao 已提交
1452
static bool keyOverlapFileBlock(TSDBKEY key, SBlock* pBlock, SVersionRange* pVerRange) {
1453 1454
  return (key.ts >= pBlock->minKey.ts && key.ts <= pBlock->maxKey.ts) && (pBlock->maxVersion >= pVerRange->minVer) &&
         (pBlock->minVersion <= pVerRange->maxVer);
H
Haojun Liao 已提交
1455 1456
}

1457
static bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SBlock* pBlock, int32_t order) {
1458 1459 1460 1461 1462
  if (pBlockScanInfo->delSkyline == NULL) {
    return false;
  }

  TSDBKEY* pFirst = taosArrayGet(pBlockScanInfo->delSkyline, 0);
L
Liu Jicong 已提交
1463
  TSDBKEY* pLast = taosArrayGetLast(pBlockScanInfo->delSkyline);
1464 1465 1466 1467 1468 1469

  // ts is not overlap
  if (pBlock->minKey.ts > pLast->ts || pBlock->maxKey.ts < pFirst->ts) {
    return false;
  }

L
Liu Jicong 已提交
1470
  int32_t step = ASCENDING_TRAVERSE(order) ? 1 : -1;
1471

1472 1473
  // version is not overlap
  size_t num = taosArrayGetSize(pBlockScanInfo->delSkyline);
L
Liu Jicong 已提交
1474
  for (int32_t i = pBlockScanInfo->fileDelIndex; i < num; i += step) {
1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488
    TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, i);
    if (p->ts >= pBlock->minKey.ts && p->ts <= pBlock->maxKey.ts) {
      if (p->version >= pBlock->minVersion) {
        return true;
      }
    } else if (p->ts > pBlock->maxKey.ts) {
      return false;
    }
  }

  ASSERT(0);
  return false;
}

1489 1490 1491 1492
// 1. the version of all rows should be less than the endVersion
// 2. current block should not overlap with next neighbor block
// 3. current timestamp should not be overlap with each other
// 4. output buffer should be large enough to hold all rows in current block
1493
// 5. delete info should not overlap with current block data
1494 1495
static bool fileBlockShouldLoad(STsdbReader* pReader, SFileDataBlockInfo* pFBlock, SBlock* pBlock,
                                STableBlockScanInfo* pScanInfo, TSDBKEY key) {
1496 1497 1498
  int32_t neighborIndex = 0;
  SBlock* pNeighbor = getNeighborBlockOfSameTable(pFBlock, pScanInfo, &neighborIndex, pReader->order);

1499
  // overlap with neighbor
1500 1501 1502 1503 1504
  bool overlapWithNeighbor = false;
  if (pNeighbor) {
    overlapWithNeighbor = overlapWithNeighborBlock(pBlock, pNeighbor, pReader->order);
  }

1505
  // has duplicated ts of different version in this block
L
Liu Jicong 已提交
1506 1507
  bool hasDup = (pBlock->nSubBlock == 1) ? pBlock->hasDup : true;
  bool overlapWithDel = overlapWithDelSkyline(pScanInfo, pBlock, pReader->order);
1508

1509
  return (overlapWithNeighbor || hasDup || dataBlockPartiallyRequired(&pReader->window, &pReader->verRange, pBlock) ||
1510
          keyOverlapFileBlock(key, pBlock, &pReader->verRange) || (pBlock->nRow > pReader->capacity) || overlapWithDel);
H
Haojun Liao 已提交
1511 1512
}

1513
static int32_t buildDataBlockFromBuf(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, int64_t endKey) {
1514
  if (!(pBlockScanInfo->iiter.hasVal || pBlockScanInfo->iter.hasVal)) {
1515 1516
    return TSDB_CODE_SUCCESS;
  }
H
Haojun Liao 已提交
1517

1518 1519 1520
  SSDataBlock* pBlock = pReader->pResBlock;

  int64_t st = taosGetTimestampUs();
1521
  int32_t code = buildDataBlockFromBufImpl(pBlockScanInfo, endKey, pReader->capacity, pReader);
H
Haojun Liao 已提交
1522

1523
  blockDataUpdateTsWindow(pBlock, 0);
1524
  pBlock->info.uid = pBlockScanInfo->uid;
1525

1526
  setComposedBlockFlag(pReader, true);
1527 1528 1529 1530 1531 1532

  int64_t elapsedTime = taosGetTimestampUs() - st;
  tsdbDebug("%p build data block from cache completed, elapsed time:%" PRId64
            " us, numOfRows:%d, numOfCols:%d, brange: %" PRId64 " - %" PRId64 " %s",
            pReader, elapsedTime, pBlock->info.rows, (int32_t)blockDataGetNumOfCols(pBlock), pBlock->info.window.skey,
            pBlock->info.window.ekey, pReader->idStr);
H
Haojun Liao 已提交
1533 1534 1535
  return code;
}

1536
static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, TSDBROW* pRow,
1537
                                     STSRow* pTSRow, SIterInfo* pIter, int64_t key) {
1538 1539 1540 1541 1542
  SRowMerger          merge = {0};
  SBlockData*         pBlockData = &pReader->status.fileBlockData;
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;

  TSDBKEY k = TSDBROW_KEY(pRow);
1543
  TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
1544
  SArray* pDelList = pBlockScanInfo->delSkyline;
1545

1546 1547 1548 1549 1550 1551 1552 1553
  // ascending order traverse
  if (ASCENDING_TRAVERSE(pReader->order)) {
    if (key < k.ts) {
      tRowMergerInit(&merge, &fRow, pReader->pSchema);

      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
      tRowMergerGetRow(&merge, &pTSRow);
    } else if (k.ts < key) {  // k.ts < key
1554
      doMergeMultiRows(pRow, pBlockScanInfo->uid, pIter, pDelList, &pTSRow, pReader);
1555 1556 1557
    } else {  // k.ts == key, ascending order: file block ----> imem rows -----> mem rows
      tRowMergerInit(&merge, &fRow, pReader->pSchema);
      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
1558 1559

      tRowMerge(&merge, pRow);
1560
      doMergeRowsInBuf(pIter, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
1561 1562

      tRowMergerGetRow(&merge, &pTSRow);
1563
    }
1564 1565
  } else {  // descending order scan
    if (key < k.ts) {
1566
      doMergeMultiRows(pRow, pBlockScanInfo->uid, pIter, pDelList, &pTSRow, pReader);
1567 1568
    } else if (k.ts < key) {
      tRowMergerInit(&merge, &fRow, pReader->pSchema);
1569

1570 1571 1572 1573 1574 1575
      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
      tRowMergerGetRow(&merge, &pTSRow);
    } else {  // descending order: mem rows -----> imem rows ------> file block
      updateSchema(pRow, pBlockScanInfo->uid, pReader);

      tRowMergerInit(&merge, pRow, pReader->pSchema);
1576
      doMergeRowsInBuf(pIter, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
1577 1578 1579 1580 1581 1582

      tRowMerge(&merge, &fRow);
      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);

      tRowMergerGetRow(&merge, &pTSRow);
    }
1583 1584
  }

1585
  tRowMergerClear(&merge);
1586 1587 1588 1589
  doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
  return TSDB_CODE_SUCCESS;
}

1590 1591 1592 1593
static int32_t doMergeThreeLevelRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) {
  SRowMerger merge = {0};
  STSRow*    pTSRow = NULL;

1594 1595
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
  SBlockData*         pBlockData = &pReader->status.fileBlockData;
dengyihao's avatar
dengyihao 已提交
1596
  SArray*             pDelList = pBlockScanInfo->delSkyline;
1597

1598 1599
  TSDBROW* pRow = getValidRow(&pBlockScanInfo->iter, pDelList, pReader);
  TSDBROW* piRow = getValidRow(&pBlockScanInfo->iiter, pDelList, pReader);
1600
  ASSERT(pRow != NULL && piRow != NULL);
H
Haojun Liao 已提交
1601

1602
  int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex];
H
Haojun Liao 已提交
1603

1604
  uint64_t uid = pBlockScanInfo->uid;
H
Haojun Liao 已提交
1605

1606 1607 1608 1609
  TSDBKEY k = TSDBROW_KEY(pRow);
  TSDBKEY ik = TSDBROW_KEY(piRow);

  if (ASCENDING_TRAVERSE(pReader->order)) {
1610 1611
    // [1&2] key <= [k.ts && ik.ts]
    if (key <= k.ts && key <= ik.ts) {
1612 1613 1614
      TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
      tRowMergerInit(&merge, &fRow, pReader->pSchema);

1615
      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
H
Haojun Liao 已提交
1616

1617 1618
      if (ik.ts == key) {
        tRowMerge(&merge, piRow);
1619
        doMergeRowsInBuf(&pBlockScanInfo->iiter, key, pBlockScanInfo->delSkyline, &merge, pReader);
1620 1621
      }

1622 1623
      if (k.ts == key) {
        tRowMerge(&merge, pRow);
1624
        doMergeRowsInBuf(&pBlockScanInfo->iter, key, pBlockScanInfo->delSkyline, &merge, pReader);
1625 1626 1627 1628
      }

      tRowMergerGetRow(&merge, &pTSRow);
      doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
1629
      return TSDB_CODE_SUCCESS;
1630
    } else {  // key > ik.ts || key > k.ts
1631 1632
      ASSERT(key != ik.ts);

1633
      // [3] ik.ts < key <= k.ts
1634
      // [4] ik.ts < k.ts <= key
1635
      if (ik.ts < k.ts) {
1636
        doMergeMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, &pTSRow, pReader);
1637 1638 1639 1640
        doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
        return TSDB_CODE_SUCCESS;
      }

1641 1642
      // [5] k.ts < key   <= ik.ts
      // [6] k.ts < ik.ts <= key
1643
      if (k.ts < ik.ts) {
1644
        doMergeMultiRows(pRow, uid, &pBlockScanInfo->iter, pDelList, &pTSRow, pReader);
1645 1646 1647 1648
        doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
        return TSDB_CODE_SUCCESS;
      }

1649
      // [7] k.ts == ik.ts < key
1650
      if (k.ts == ik.ts) {
1651 1652
        ASSERT(key > ik.ts && key > k.ts);

1653
        doMergeMemIMemRows(pRow, piRow, pBlockScanInfo, pReader, &pTSRow);
1654 1655 1656 1657
        doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
        return TSDB_CODE_SUCCESS;
      }
    }
1658 1659 1660 1661 1662 1663
  } else {  // descending order scan
    // [1/2] k.ts >= ik.ts && k.ts >= key
    if (k.ts >= ik.ts && k.ts >= key) {
      updateSchema(pRow, uid, pReader);

      tRowMergerInit(&merge, pRow, pReader->pSchema);
1664
      doMergeRowsInBuf(&pBlockScanInfo->iter, key, pBlockScanInfo->delSkyline, &merge, pReader);
1665 1666 1667

      if (ik.ts == k.ts) {
        tRowMerge(&merge, piRow);
1668
        doMergeRowsInBuf(&pBlockScanInfo->iiter, key, pBlockScanInfo->delSkyline, &merge, pReader);
1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680
      }

      if (k.ts == key) {
        TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
        tRowMerge(&merge, &fRow);
        doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
      }

      tRowMergerGetRow(&merge, &pTSRow);
      doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
      return TSDB_CODE_SUCCESS;
    } else {
1681
      ASSERT(ik.ts != k.ts);  // this case has been included in the previous if branch
1682 1683 1684 1685

      // [3] ik.ts > k.ts >= Key
      // [4] ik.ts > key >= k.ts
      if (ik.ts > key) {
1686
        doMergeMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, &pTSRow, pReader);
1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704
        doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
        return TSDB_CODE_SUCCESS;
      }

      // [5] key > ik.ts > k.ts
      // [6] key > k.ts > ik.ts
      if (key > ik.ts) {
        TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
        tRowMergerInit(&merge, &fRow, pReader->pSchema);

        doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
        tRowMergerGetRow(&merge, &pTSRow);
        doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
        return TSDB_CODE_SUCCESS;
      }

      //[7] key = ik.ts > k.ts
      if (key == ik.ts) {
1705
        doMergeMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, &pTSRow, pReader);
1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717

        TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
        tRowMerge(&merge, &fRow);
        doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
        tRowMergerGetRow(&merge, &pTSRow);
        doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
        return TSDB_CODE_SUCCESS;
      }
    }
  }

  ASSERT(0);
S
Shengliang Guan 已提交
1718
  return -1;
1719 1720
}

dengyihao's avatar
dengyihao 已提交
1721 1722
static bool isValidFileBlockRow(SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo,
                                STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) {
1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733
  // check for version and time range
  int64_t ver = pBlockData->aVersion[pDumpInfo->rowIndex];
  if (ver > pReader->verRange.maxVer || ver < pReader->verRange.minVer) {
    return false;
  }

  int64_t ts = pBlockData->aTSKEY[pDumpInfo->rowIndex];
  if (ts > pReader->window.ekey || ts < pReader->window.skey) {
    return false;
  }

1734
  TSDBKEY k = {.ts = ts, .version = ver};
1735
  if (hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->fileDelIndex, &k, pReader->order)) {
1736 1737 1738
    return false;
  }

1739 1740 1741
  return true;
}

1742
static bool outOfTimeWindow(int64_t ts, STimeWindow* pWindow) { return (ts > pWindow->ekey) || (ts < pWindow->skey); }
1743

1744 1745 1746 1747 1748 1749 1750 1751
static int32_t buildComposedDataBlockImpl(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) {
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
  SBlockData*         pBlockData = &pReader->status.fileBlockData;

  SRowMerger merge = {0};
  STSRow*    pTSRow = NULL;

  int64_t  key = pBlockData->aTSKEY[pDumpInfo->rowIndex];
1752 1753
  TSDBROW* pRow = getValidRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader);
  TSDBROW* piRow = getValidRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader);
1754

1755
  if (pBlockScanInfo->iter.hasVal && pBlockScanInfo->iiter.hasVal) {
1756
    return doMergeThreeLevelRows(pReader, pBlockScanInfo);
1757
  } else {
1758
    // imem + file
1759 1760
    if (pBlockScanInfo->iiter.hasVal) {
      return doMergeBufAndFileRows(pReader, pBlockScanInfo, piRow, pTSRow, &pBlockScanInfo->iiter, key);
1761 1762
    }

1763
    // mem + file
1764
    if (pBlockScanInfo->iter.hasVal) {
dengyihao's avatar
dengyihao 已提交
1765
      return doMergeBufAndFileRows(pReader, pBlockScanInfo, pRow, pTSRow, &pBlockScanInfo->iter, key);
H
Haojun Liao 已提交
1766
    }
1767

1768
    // imem & mem are all empty, only file exist
1769
    TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
1770 1771 1772 1773
    tRowMergerInit(&merge, &fRow, pReader->pSchema);
    doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
    tRowMergerGetRow(&merge, &pTSRow);
    doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
1774

1775
    return TSDB_CODE_SUCCESS;
1776 1777 1778
  }
}

1779
static int32_t buildComposedDataBlock(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) {
1780 1781
  SSDataBlock* pResBlock = pReader->pResBlock;

1782
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
1783 1784
  SBlockData*         pBlockData = &pReader->status.fileBlockData;
  int32_t             step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1;
1785

1786
  while (1) {
1787 1788
    // todo check the validate of row in file block
    {
1789
      if (!isValidFileBlockRow(pBlockData, pDumpInfo, pBlockScanInfo, pReader)) {
1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803
        pDumpInfo->rowIndex += step;

        SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(&pReader->status.blockIter);
        SBlock*             pBlock = taosArrayGet(pBlockScanInfo->pBlockList, pFBlock->tbBlockIdx);

        if (pDumpInfo->rowIndex >= pBlock->nRow || pDumpInfo->rowIndex < 0) {
          setBlockAllDumped(pDumpInfo, pBlock, pReader->order);
          break;
        }

        continue;
      }
    }

1804
    buildComposedDataBlockImpl(pReader, pBlockScanInfo);
1805

1806 1807
    SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(&pReader->status.blockIter);
    SBlock*             pBlock = taosArrayGet(pBlockScanInfo->pBlockList, pFBlock->tbBlockIdx);
1808

1809 1810 1811 1812 1813 1814 1815 1816
    // currently loaded file data block is consumed
    if (pDumpInfo->rowIndex >= pBlock->nRow || pDumpInfo->rowIndex < 0) {
      setBlockAllDumped(pDumpInfo, pBlock, pReader->order);
      break;
    }

    if (pResBlock->info.rows >= pReader->capacity) {
      break;
1817 1818 1819 1820
    }
  }

  pResBlock->info.uid = pBlockScanInfo->uid;
1821 1822
  blockDataUpdateTsWindow(pResBlock, 0);

1823
  setComposedBlockFlag(pReader, true);
1824

1825 1826
  tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%d, %s", pReader,
            pBlockScanInfo->uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, pResBlock->info.rows,
1827
            pReader->idStr);
1828

1829 1830 1831 1832 1833
  return TSDB_CODE_SUCCESS;
}

void setComposedBlockFlag(STsdbReader* pReader, bool composed) { pReader->status.composedDataBlock = composed; }

1834
static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) {
1835 1836 1837 1838
  if (pBlockScanInfo->iterInit) {
    return TSDB_CODE_SUCCESS;
  }

1839
  int32_t code = TSDB_CODE_SUCCESS;
1840 1841 1842 1843 1844 1845 1846 1847 1848

  TSDBKEY startKey = {0};
  if (ASCENDING_TRAVERSE(pReader->order)) {
    startKey = (TSDBKEY){.ts = pReader->window.skey, .version = pReader->verRange.minVer};
  } else {
    startKey = (TSDBKEY){.ts = pReader->window.ekey, .version = pReader->verRange.maxVer};
  }

  int32_t backward = (!ASCENDING_TRAVERSE(pReader->order));
1849 1850 1851 1852

  STbData* d = NULL;
  if (pReader->pTsdb->mem != NULL) {
    tsdbGetTbDataFromMemTable(pReader->pTsdb->mem, pReader->suid, pBlockScanInfo->uid, &d);
1853
    if (d != NULL) {
1854
      code = tsdbTbDataIterCreate(d, &startKey, backward, &pBlockScanInfo->iter.iter);
1855
      if (code == TSDB_CODE_SUCCESS) {
1856
        pBlockScanInfo->iter.hasVal = (tsdbTbDataIterGet(pBlockScanInfo->iter.iter) != NULL);
1857

H
Haojun Liao 已提交
1858
        tsdbDebug("%p uid:%" PRId64 ", check data in mem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64
1859 1860
                  "-%" PRId64 " %s",
                  pReader, pBlockScanInfo->uid, startKey.ts, pReader->order, d->minKey, d->maxKey, pReader->idStr);
1861
      } else {
1862 1863
        tsdbError("%p uid:%" PRId64 ", failed to create iterator for imem, code:%s, %s", pReader, pBlockScanInfo->uid,
                  tstrerror(code), pReader->idStr);
1864
        return code;
1865 1866
      }
    }
H
Haojun Liao 已提交
1867
  } else {
1868
    tsdbDebug("%p uid:%" PRId64 ", no data in mem, %s", pReader, pBlockScanInfo->uid, pReader->idStr);
H
Haojun Liao 已提交
1869 1870
  }

1871 1872 1873
  STbData* di = NULL;
  if (pReader->pTsdb->imem != NULL) {
    tsdbGetTbDataFromMemTable(pReader->pTsdb->imem, pReader->suid, pBlockScanInfo->uid, &di);
1874
    if (di != NULL) {
1875
      code = tsdbTbDataIterCreate(di, &startKey, backward, &pBlockScanInfo->iiter.iter);
1876
      if (code == TSDB_CODE_SUCCESS) {
1877
        pBlockScanInfo->iiter.hasVal = (tsdbTbDataIterGet(pBlockScanInfo->iiter.iter) != NULL);
1878

H
Haojun Liao 已提交
1879
        tsdbDebug("%p uid:%" PRId64 ", check data in imem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64
1880
                  "-%" PRId64 " %s",
1881
                  pReader, pBlockScanInfo->uid, startKey.ts, pReader->order, di->minKey, di->maxKey, pReader->idStr);
1882
      } else {
1883 1884
        tsdbError("%p uid:%" PRId64 ", failed to create iterator for mem, code:%s, %s", pReader, pBlockScanInfo->uid,
                  tstrerror(code), pReader->idStr);
1885
        return code;
1886 1887
      }
    }
H
Haojun Liao 已提交
1888 1889
  } else {
    tsdbDebug("%p uid:%" PRId64 ", no data in imem, %s", pReader, pBlockScanInfo->uid, pReader->idStr);
1890 1891
  }

1892 1893
  initDelSkylineIterator(pBlockScanInfo, pReader, d, di);

1894
  pBlockScanInfo->iterInit = true;
H
Haojun Liao 已提交
1895 1896 1897
  return TSDB_CODE_SUCCESS;
}

dengyihao's avatar
dengyihao 已提交
1898 1899
int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, STbData* pMemTbData,
                               STbData* piMemTbData) {
1900 1901 1902
  if (pBlockScanInfo->delSkyline != NULL) {
    return TSDB_CODE_SUCCESS;
  }
1903

1904 1905 1906
  int32_t code = 0;
  STsdb*  pTsdb = pReader->pTsdb;

1907 1908
  SArray* pDelData = taosArrayInit(4, sizeof(SDelData));

dengyihao's avatar
dengyihao 已提交
1909
  SDelFile* pDelFile = tsdbFSStateGetDelFile(pTsdb->fs->cState);
1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921
  if (pDelFile) {
    SDelFReader* pDelFReader = NULL;
    code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL);
    if (code) {
      goto _err;
    }

    SArray* aDelIdx = taosArrayInit(4, sizeof(SDelIdx));
    if (aDelIdx == NULL) {
      goto _err;
    }

1922 1923 1924 1925
    code = tsdbReadDelIdx(pDelFReader, aDelIdx, NULL);
    if (code) {
      goto _err;
    }
1926

1927 1928 1929 1930 1931 1932
    SDelIdx  idx = {.suid = pReader->suid, .uid = pBlockScanInfo->uid};
    SDelIdx* pIdx = taosArraySearch(aDelIdx, &idx, tCmprDelIdx, TD_EQ);

    code = tsdbReadDelData(pDelFReader, pIdx, pDelData, NULL);
    if (code != TSDB_CODE_SUCCESS) {
      goto _err;
1933
    }
1934
  }
1935

1936 1937 1938 1939 1940 1941 1942
  SDelData* p = NULL;
  if (pMemTbData != NULL) {
    p = pMemTbData->pHead;
    while (p) {
      taosArrayPush(pDelData, p);
      p = p->pNext;
    }
1943 1944
  }

1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958
  if (piMemTbData != NULL) {
    p = piMemTbData->pHead;
    while (p) {
      taosArrayPush(pDelData, p);
      p = p->pNext;
    }
  }

  if (taosArrayGetSize(pDelData) > 0) {
    pBlockScanInfo->delSkyline = taosArrayInit(4, sizeof(TSDBKEY));
    code = tsdbBuildDeleteSkyline(pDelData, 0, (int32_t)(taosArrayGetSize(pDelData) - 1), pBlockScanInfo->delSkyline);
  }

  taosArrayDestroy(pDelData);
dengyihao's avatar
dengyihao 已提交
1959 1960
  pBlockScanInfo->iter.index =
      ASCENDING_TRAVERSE(pReader->order) ? 0 : taosArrayGetSize(pBlockScanInfo->delSkyline) - 1;
1961 1962
  pBlockScanInfo->iiter.index = pBlockScanInfo->iter.index;
  pBlockScanInfo->fileDelIndex = pBlockScanInfo->iter.index;
1963 1964
  return code;

1965 1966 1967
_err:
  taosArrayDestroy(pDelData);
  return code;
1968 1969
}

1970 1971 1972
static TSDBKEY getCurrentKeyInBuf(SDataBlockIter* pBlockIter, STsdbReader* pReader) {
  TSDBKEY key = {.ts = TSKEY_INITIAL_VAL};

1973
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(pBlockIter);
1974 1975
  STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));

1976 1977
  initMemDataIterator(pScanInfo, pReader);
  TSDBROW* pRow = getValidRow(&pScanInfo->iter, pScanInfo->delSkyline, pReader);
1978
  if (pRow != NULL) {
1979 1980 1981
    key = TSDBROW_KEY(pRow);
  }

1982
  pRow = getValidRow(&pScanInfo->iiter, pScanInfo->delSkyline, pReader);
1983
  if (pRow != NULL) {
1984 1985 1986 1987 1988 1989 1990 1991 1992
    TSDBKEY k = TSDBROW_KEY(pRow);
    if (key.ts > k.ts) {
      key = k;
    }
  }

  return key;
}

H
Haojun Liao 已提交
1993 1994
static int32_t moveToNextFile(STsdbReader* pReader, int32_t* numOfBlocks) {
  SReaderStatus* pStatus = &pReader->status;
H
Haojun Liao 已提交
1995
  SArray* pIndexList = taosArrayInit(4, sizeof(SBlockIdx));
H
Haojun Liao 已提交
1996 1997

  while (1) {
1998
    bool hasNext = filesetIteratorNext(&pStatus->fileIter, pReader);
1999
    if (!hasNext) {  // no data files on disk
H
Haojun Liao 已提交
2000 2001 2002
      break;
    }

H
Haojun Liao 已提交
2003
    taosArrayClear(pIndexList);
H
Haojun Liao 已提交
2004 2005
    int32_t code = doLoadBlockIndex(pReader, pReader->pFileReader, pIndexList);
    if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
2006
      taosArrayDestroy(pIndexList);
H
Haojun Liao 已提交
2007 2008 2009 2010 2011 2012 2013
      return code;
    }

    if (taosArrayGetSize(pIndexList) > 0) {
      uint32_t numOfValidTable = 0;
      code = doLoadFileBlock(pReader, pIndexList, &numOfValidTable, numOfBlocks);
      if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
2014
        taosArrayDestroy(pIndexList);
H
Haojun Liao 已提交
2015 2016 2017 2018 2019 2020 2021 2022 2023 2024
        return code;
      }

      if (numOfValidTable > 0) {
        break;
      }
    }
    // no blocks in current file, try next files
  }

H
Haojun Liao 已提交
2025
  taosArrayDestroy(pIndexList);
H
Haojun Liao 已提交
2026 2027 2028
  return TSDB_CODE_SUCCESS;
}

2029 2030 2031
static int32_t doBuildDataBlock(STsdbReader* pReader) {
  int32_t code = TSDB_CODE_SUCCESS;

2032
  SReaderStatus*  pStatus = &pReader->status;
2033 2034
  SDataBlockIter* pBlockIter = &pStatus->blockIter;

2035 2036
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(pBlockIter);
  STableBlockScanInfo* pScanInfo = taosHashGet(pStatus->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));
2037 2038 2039 2040 2041

  SBlock* pBlock = taosArrayGet(pScanInfo->pBlockList, pFBlock->tbBlockIdx);

  TSDBKEY key = getCurrentKeyInBuf(pBlockIter, pReader);
  if (fileBlockShouldLoad(pReader, pFBlock, pBlock, pScanInfo, key)) {
2042 2043
    tBlockDataInit(&pStatus->fileBlockData);
    code = doLoadFileBlockData(pReader, pBlockIter, pScanInfo, &pStatus->fileBlockData);
2044 2045 2046 2047 2048
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    // build composed data block
2049
    code = buildComposedDataBlock(pReader, pScanInfo);
2050 2051
  } else if (bufferDataInFileBlockGap(pReader->order, key, pBlock)) {
    // data in memory that are earlier than current file block
2052
    // todo rows in buffer should be less than the file block in asc, greater than file block in desc
2053
    int64_t endKey = (ASCENDING_TRAVERSE(pReader->order)) ? pBlock->minKey.ts : pBlock->maxKey.ts;
2054
    code = buildDataBlockFromBuf(pReader, pScanInfo, endKey);
2055
  } else {  // whole block is required, return it directly
2056
    SDataBlockInfo* pInfo = &pReader->pResBlock->info;
2057 2058 2059
    pInfo->rows = pBlock->nRow;
    pInfo->uid = pScanInfo->uid;
    pInfo->window = (STimeWindow){.skey = pBlock->minKey.ts, .ekey = pBlock->maxKey.ts};
2060
    setComposedBlockFlag(pReader, false);
2061
    setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlock, pReader->order);
2062 2063 2064 2065 2066
  }

  return code;
}

H
Haojun Liao 已提交
2067
static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader) {
2068 2069
  SReaderStatus* pStatus = &pReader->status;

2070
  while (1) {
2071 2072 2073
    if (pStatus->pTableIter == NULL) {
      pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, NULL);
      if (pStatus->pTableIter == NULL) {
H
Haojun Liao 已提交
2074
        return TSDB_CODE_SUCCESS;
2075 2076 2077 2078
      }
    }

    STableBlockScanInfo* pBlockScanInfo = pStatus->pTableIter;
2079
    initMemDataIterator(pBlockScanInfo, pReader);
2080

2081
    int64_t endKey = (ASCENDING_TRAVERSE(pReader->order)) ? INT64_MAX : INT64_MIN;
2082
    int32_t code = buildDataBlockFromBuf(pReader, pBlockScanInfo, endKey);
H
Haojun Liao 已提交
2083 2084 2085 2086
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

2087
    if (pReader->pResBlock->info.rows > 0) {
H
Haojun Liao 已提交
2088
      return TSDB_CODE_SUCCESS;
2089 2090 2091 2092 2093
    }

    // current table is exhausted, let's try the next table
    pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, pStatus->pTableIter);
    if (pStatus->pTableIter == NULL) {
H
Haojun Liao 已提交
2094
      return TSDB_CODE_SUCCESS;
2095 2096 2097 2098
    }
  }
}

2099
// set the correct start position in case of the first/last file block, according to the query time window
2100
static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter) {
2101
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(pBlockIter);
2102
  STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));
2103
  SBlock*              pBlock = taosArrayGet(pScanInfo->pBlockList, pFBlock->tbBlockIdx);
2104

2105 2106 2107
  SReaderStatus* pStatus = &pReader->status;

  SFileBlockDumpInfo* pDumpInfo = &pStatus->fBlockDumpInfo;
2108 2109 2110

  pDumpInfo->totalRows = pBlock->nRow;
  pDumpInfo->allDumped = false;
2111
  pDumpInfo->rowIndex = ASCENDING_TRAVERSE(pReader->order) ? 0 : pBlock->nRow - 1;
2112 2113
}

2114
static int32_t initForFirstBlockInFile(STsdbReader* pReader, SDataBlockIter* pBlockIter) {
2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128
  int32_t numOfBlocks = 0;
  int32_t code = moveToNextFile(pReader, &numOfBlocks);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

  // all data files are consumed, try data in buffer
  if (numOfBlocks == 0) {
    pReader->status.loadFromFile = false;
    return code;
  }

  // initialize the block iterator for a new fileset
  code = initBlockIterator(pReader, pBlockIter, numOfBlocks);
2129 2130

  // set the correct start position according to the query time window
2131
  initBlockDumpInfo(pReader, pBlockIter);
2132 2133 2134
  return code;
}

2135
static bool fileBlockPartiallyRead(SFileBlockDumpInfo* pDumpInfo, bool asc) {
2136 2137
  return (!pDumpInfo->allDumped) &&
         ((pDumpInfo->rowIndex > 0 && asc) || (pDumpInfo->rowIndex < (pDumpInfo->totalRows - 1) && (!asc)));
2138 2139
}

2140
static int32_t buildBlockFromFiles(STsdbReader* pReader) {
H
Haojun Liao 已提交
2141
  int32_t code = TSDB_CODE_SUCCESS;
2142 2143
  bool    asc = ASCENDING_TRAVERSE(pReader->order);

2144 2145
  SDataBlockIter* pBlockIter = &pReader->status.blockIter;

2146
  while (1) {
2147
    SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(&pReader->status.blockIter);
2148 2149
    STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));

2150 2151
    SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;

2152
    if (fileBlockPartiallyRead(pDumpInfo, asc)) {  // file data block is partially loaded
2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167
      code = buildComposedDataBlock(pReader, pScanInfo);
    } else {
      // current block are exhausted, try the next file block
      if (pDumpInfo->allDumped) {
        // try next data block in current file
        bool hasNext = blockIteratorNext(&pReader->status.blockIter);
        if (hasNext) {  // check for the next block in the block accessed order list
          initBlockDumpInfo(pReader, pBlockIter);
        } else {  // data blocks in current file are exhausted, let's try the next file now
          code = initForFirstBlockInFile(pReader, pBlockIter);

          // error happens or all the data files are completely checked
          if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) {
            return code;
          }
2168
        }
H
Haojun Liao 已提交
2169
      }
2170 2171 2172

      // current block is not loaded yet, or data in buffer may overlap with the file block.
      code = doBuildDataBlock(pReader);
2173 2174
    }

2175 2176 2177 2178 2179 2180 2181 2182
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    if (pReader->pResBlock->info.rows > 0) {
      return TSDB_CODE_SUCCESS;
    }
  }
2183
}
H
refact  
Hongze Cheng 已提交
2184

2185 2186
static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idStr,
                                  int8_t* pLevel) {
2187
  if (VND_IS_RSMA(pVnode)) {
2188
    int8_t  level = 0;
2189 2190
    int64_t now = taosGetTimestamp(pVnode->config.tsdbCfg.precision);

2191
    for (int8_t i = 0; i < TSDB_RETENTION_MAX; ++i) {
2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204
      SRetention* pRetention = retentions + level;
      if (pRetention->keep <= 0) {
        if (level > 0) {
          --level;
        }
        break;
      }
      if ((now - pRetention->keep) <= winSKey) {
        break;
      }
      ++level;
    }

2205 2206
    int32_t     vgId = TD_VID(pVnode);
    const char* str = (idStr != NULL) ? idStr : "";
2207 2208

    if (level == TSDB_RETENTION_L0) {
2209
      *pLevel = TSDB_RETENTION_L0;
2210
      tsdbDebug("vgId:%d, rsma level %d is selected to query %s", vgId, TSDB_RETENTION_L0, str);
2211 2212
      return VND_RSMA0(pVnode);
    } else if (level == TSDB_RETENTION_L1) {
2213
      *pLevel = TSDB_RETENTION_L1;
2214
      tsdbDebug("vgId:%d, rsma level %d is selected to query %s", vgId, TSDB_RETENTION_L1, str);
2215 2216
      return VND_RSMA1(pVnode);
    } else {
2217
      *pLevel = TSDB_RETENTION_L2;
2218
      tsdbDebug("vgId:%d, rsma level %d is selected to query %s", vgId, TSDB_RETENTION_L2, str);
2219 2220 2221 2222 2223 2224 2225
      return VND_RSMA2(pVnode);
    }
  }

  return VND_TSDB(pVnode);
}

H
Haojun Liao 已提交
2226
SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level) {
L
Liu Jicong 已提交
2227
  int64_t startVer = (pCond->startVersion == -1) ? 0 : pCond->startVersion;
H
Haojun Liao 已提交
2228

2229
  if (VND_IS_RSMA(pVnode)) {
H
Haojun Liao 已提交
2230 2231 2232 2233
    return (SVersionRange){.minVer = startVer, .maxVer = tdRSmaGetMaxSubmitVer(pVnode->pSma, level)};
  }

  int64_t endVer = 0;
L
Liu Jicong 已提交
2234 2235
  if (pCond->endVersion ==
      -1) {  // user not specified end version, set current maximum version of vnode as the endVersion
H
Haojun Liao 已提交
2236 2237
    endVer = pVnode->state.applied;
  } else {
L
Liu Jicong 已提交
2238
    endVer = (pCond->endVersion > pVnode->state.applied) ? pVnode->state.applied : pCond->endVersion;
2239 2240
  }

H
Haojun Liao 已提交
2241
  return (SVersionRange){.minVer = startVer, .maxVer = endVer};
2242 2243
}

H
Hongze Cheng 已提交
2244 2245 2246 2247
// // todo not unref yet, since it is not support multi-group interpolation query
// static UNUSED_FUNC void changeQueryHandleForInterpQuery(STsdbReader* pHandle) {
//   // filter the queried time stamp in the first place
//   STsdbReader* pTsdbReadHandle = (STsdbReader*)pHandle;
H
refact  
Hongze Cheng 已提交
2248

H
Hongze Cheng 已提交
2249 2250
//   // starts from the buffer in case of descending timestamp order check data blocks
//   size_t numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
H
refact  
Hongze Cheng 已提交
2251

H
Hongze Cheng 已提交
2252 2253
//   int32_t i = 0;
//   while (i < numOfTables) {
H
Haojun Liao 已提交
2254
//     STableBlockScanInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i);
H
refact  
Hongze Cheng 已提交
2255

H
Hongze Cheng 已提交
2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269
//     // the first qualified table for interpolation query
//     //    if ((pTsdbReadHandle->window.skey <= pCheckInfo->pTableObj->lastKey) &&
//     //        (pCheckInfo->pTableObj->lastKey != TSKEY_INITIAL_VAL)) {
//     //      break;
//     //    }

//     i++;
//   }

//   // there are no data in all the tables
//   if (i == numOfTables) {
//     return;
//   }

H
Haojun Liao 已提交
2270
//   STableBlockScanInfo info = *(STableBlockScanInfo*)taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i);
H
Hongze Cheng 已提交
2271 2272 2273 2274 2275 2276
//   taosArrayClear(pTsdbReadHandle->pTableCheckInfo);

//   info.lastKey = pTsdbReadHandle->window.skey;
//   taosArrayPush(pTsdbReadHandle->pTableCheckInfo, &info);
// }

2277
bool hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey, int32_t order) {
2278 2279 2280 2281
  ASSERT(pKey != NULL);
  if (pDelList == NULL) {
    return false;
  }
L
Liu Jicong 已提交
2282 2283 2284
  size_t  num = taosArrayGetSize(pDelList);
  bool    asc = ASCENDING_TRAVERSE(order);
  int32_t step = asc ? 1 : -1;
2285

2286 2287 2288 2289 2290 2291
  if (asc) {
    if (*index >= num - 1) {
      TSDBKEY* last = taosArrayGetLast(pDelList);
      ASSERT(pKey->ts >= last->ts);

      if (pKey->ts > last->ts) {
2292
        return false;
2293 2294 2295
      } else if (pKey->ts == last->ts) {
        TSDBKEY* prev = taosArrayGet(pDelList, num - 2);
        return (prev->version >= pKey->version);
2296 2297
      }
    } else {
2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327
      TSDBKEY* pCurrent = taosArrayGet(pDelList, *index);
      TSDBKEY* pNext = taosArrayGet(pDelList, (*index) + 1);

      if (pKey->ts < pCurrent->ts) {
        return false;
      }

      if (pCurrent->ts <= pKey->ts && pNext->ts >= pKey->ts && pCurrent->version >= pKey->version) {
        return true;
      }

      while (pNext->ts <= pKey->ts && (*index) < num - 1) {
        (*index) += 1;

        if ((*index) < num - 1) {
          pCurrent = taosArrayGet(pDelList, *index);
          pNext = taosArrayGet(pDelList, (*index) + 1);

          // it is not a consecutive deletion range, ignore it
          if (pCurrent->version == 0 && pNext->version > 0) {
            continue;
          }

          if (pCurrent->ts <= pKey->ts && pNext->ts >= pKey->ts && pCurrent->version >= pKey->version) {
            return true;
          }
        }
      }

      return false;
2328 2329
    }
  } else {
2330 2331
    if (*index <= 0) {
      TSDBKEY* pFirst = taosArrayGet(pDelList, 0);
2332

2333 2334 2335 2336 2337 2338 2339
      if (pKey->ts < pFirst->ts) {
        return false;
      } else if (pKey->ts == pFirst->ts) {
        return pFirst->version >= pKey->version;
      } else {
        ASSERT(0);
      }
2340
    } else {
2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367
      TSDBKEY* pCurrent = taosArrayGet(pDelList, *index);
      TSDBKEY* pPrev = taosArrayGet(pDelList, (*index) - 1);

      if (pKey->ts > pCurrent->ts) {
        return false;
      }

      if (pPrev->ts <= pKey->ts && pCurrent->ts >= pKey->ts && pPrev->version >= pKey->version) {
        return true;
      }

      while (pPrev->ts >= pKey->ts && (*index) > 1) {
        (*index) += step;

        if ((*index) >= 1) {
          pCurrent = taosArrayGet(pDelList, *index);
          pPrev = taosArrayGet(pDelList, (*index) - 1);

          // it is not a consecutive deletion range, ignore it
          if (pCurrent->version > 0 && pPrev->version == 0) {
            continue;
          }

          if (pPrev->ts <= pKey->ts && pCurrent->ts >= pKey->ts && pPrev->version >= pKey->version) {
            return true;
          }
        }
2368 2369 2370 2371 2372
      }

      return false;
    }
  }
2373 2374

  return false;
2375 2376 2377 2378
}

TSDBROW* getValidRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader) {
  if (!pIter->hasVal) {
H
Haojun Liao 已提交
2379 2380
    return NULL;
  }
H
Hongze Cheng 已提交
2381

2382
  TSDBROW* pRow = tsdbTbDataIterGet(pIter->iter);
2383
  TSDBKEY  key = TSDBROW_KEY(pRow);
2384
  if (outOfTimeWindow(key.ts, &pReader->window)) {
2385
    pIter->hasVal = false;
H
Haojun Liao 已提交
2386 2387
    return NULL;
  }
H
Hongze Cheng 已提交
2388

2389
  // it is a valid data version
dengyihao's avatar
dengyihao 已提交
2390
  if ((key.version <= pReader->verRange.maxVer && key.version >= pReader->verRange.minVer) &&
2391
      (!hasBeenDropped(pDelList, &pIter->index, &key, pReader->order))) {
H
Haojun Liao 已提交
2392 2393
    return pRow;
  }
H
Hongze Cheng 已提交
2394

2395
  while (1) {
2396 2397
    pIter->hasVal = tsdbTbDataIterNext(pIter->iter);
    if (!pIter->hasVal) {
H
Haojun Liao 已提交
2398 2399
      return NULL;
    }
H
Hongze Cheng 已提交
2400

2401
    pRow = tsdbTbDataIterGet(pIter->iter);
H
Hongze Cheng 已提交
2402

H
Haojun Liao 已提交
2403
    key = TSDBROW_KEY(pRow);
2404
    if (outOfTimeWindow(key.ts, &pReader->window)) {
2405
      pIter->hasVal = false;
H
Haojun Liao 已提交
2406 2407
      return NULL;
    }
H
Hongze Cheng 已提交
2408

dengyihao's avatar
dengyihao 已提交
2409
    if (key.version <= pReader->verRange.maxVer && key.version >= pReader->verRange.minVer &&
2410
        (!hasBeenDropped(pDelList, &pIter->index, &key, pReader->order))) {
H
Haojun Liao 已提交
2411 2412 2413 2414
      return pRow;
    }
  }
}
H
Hongze Cheng 已提交
2415

dengyihao's avatar
dengyihao 已提交
2416
int32_t doMergeRowsInBuf(SIterInfo* pIter, int64_t ts, SArray* pDelList, SRowMerger* pMerger, STsdbReader* pReader) {
H
Haojun Liao 已提交
2417
  while (1) {
2418 2419
    pIter->hasVal = tsdbTbDataIterNext(pIter->iter);
    if (!pIter->hasVal) {
H
Haojun Liao 已提交
2420 2421
      break;
    }
H
Hongze Cheng 已提交
2422

2423
    // data exists but not valid
2424
    TSDBROW* pRow = getValidRow(pIter, pDelList, pReader);
2425 2426 2427 2428 2429
    if (pRow == NULL) {
      break;
    }

    // ts is not identical, quit
H
Haojun Liao 已提交
2430
    TSDBKEY k = TSDBROW_KEY(pRow);
2431
    if (k.ts != ts) {
H
Haojun Liao 已提交
2432 2433 2434 2435 2436 2437 2438 2439 2440
      break;
    }

    tRowMerge(pMerger, pRow);
  }

  return TSDB_CODE_SUCCESS;
}

2441
static int32_t doMergeRowsInFileBlockImpl(SBlockData* pBlockData, int32_t rowIndex, int64_t key, SRowMerger* pMerger,
2442
                                          SVersionRange* pVerRange, int32_t step) {
2443 2444
  while (pBlockData->aTSKEY[rowIndex] == key && rowIndex < pBlockData->nRow && rowIndex >= 0) {
    if (pBlockData->aVersion[rowIndex] > pVerRange->maxVer || pBlockData->aVersion[rowIndex] < pVerRange->minVer) {
2445
      rowIndex += step;
2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462
      continue;
    }

    TSDBROW fRow = tsdbRowFromBlockData(pBlockData, rowIndex);
    tRowMerge(pMerger, &fRow);
    rowIndex += step;
  }

  return rowIndex;
}

typedef enum {
  CHECK_FILEBLOCK_CONT = 0x1,
  CHECK_FILEBLOCK_QUIT = 0x2,
} CHECK_FILEBLOCK_STATE;

static int32_t checkForNeighborFileBlock(STsdbReader* pReader, STableBlockScanInfo* pScanInfo, SBlock* pBlock,
2463 2464
                                         SFileDataBlockInfo* pFBlock, SRowMerger* pMerger, int64_t key,
                                         CHECK_FILEBLOCK_STATE* state) {
2465
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
2466
  SBlockData*         pBlockData = &pReader->status.fileBlockData;
2467

2468
  *state = CHECK_FILEBLOCK_QUIT;
2469
  int32_t step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1;
2470 2471 2472

  int32_t nextIndex = -1;
  SBlock* pNeighborBlock = getNeighborBlockOfSameTable(pFBlock, pScanInfo, &nextIndex, pReader->order);
2473
  if (pNeighborBlock == NULL) {  // do nothing
2474 2475 2476 2477 2478
    return 0;
  }

  bool overlap = overlapWithNeighborBlock(pBlock, pNeighborBlock, pReader->order);
  if (overlap) {  // load next block
2479
    SReaderStatus*  pStatus = &pReader->status;
2480 2481
    SDataBlockIter* pBlockIter = &pStatus->blockIter;

2482
    // 1. find the next neighbor block in the scan block list
2483
    SFileDataBlockInfo fb = {.uid = pFBlock->uid, .tbBlockIdx = nextIndex};
2484
    int32_t            neighborIndex = findFileBlockInfoIndex(pBlockIter, &fb);
2485

2486
    // 2. remove it from the scan block list
2487
    setFileBlockActiveInBlockIter(pBlockIter, neighborIndex, step);
2488

2489
    // 3. load the neighbor block, and set it to be the currently accessed file data block
2490 2491 2492 2493 2494
    int32_t code = doLoadFileBlockData(pReader, pBlockIter, pScanInfo, &pStatus->fileBlockData);
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

2495
    // 4. check the data values
2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508
    initBlockDumpInfo(pReader, pBlockIter);

    pDumpInfo->rowIndex =
        doMergeRowsInFileBlockImpl(pBlockData, pDumpInfo->rowIndex, key, pMerger, &pReader->verRange, step);

    if (pDumpInfo->rowIndex >= pBlock->nRow) {
      *state = CHECK_FILEBLOCK_CONT;
    }
  }

  return TSDB_CODE_SUCCESS;
}

2509 2510
int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pScanInfo, STsdbReader* pReader,
                                SRowMerger* pMerger) {
2511 2512
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;

2513
  bool    asc = ASCENDING_TRAVERSE(pReader->order);
2514
  int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex];
2515
  int32_t step = asc ? 1 : -1;
2516

2517 2518 2519 2520 2521
  pDumpInfo->rowIndex += step;
  if (pDumpInfo->rowIndex <= pBlockData->nRow - 1) {
    pDumpInfo->rowIndex =
        doMergeRowsInFileBlockImpl(pBlockData, pDumpInfo->rowIndex, key, pMerger, &pReader->verRange, step);
  }
2522

2523 2524 2525 2526
  // all rows are consumed, let's try next file block
  if ((pDumpInfo->rowIndex >= pBlockData->nRow && asc) || (pDumpInfo->rowIndex < 0 && !asc)) {
    while (1) {
      CHECK_FILEBLOCK_STATE st;
2527

2528 2529 2530 2531 2532
      SFileDataBlockInfo* pFileBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter);
      SBlock*             pCurrentBlock = taosArrayGet(pScanInfo->pBlockList, pFileBlockInfo->tbBlockIdx);
      checkForNeighborFileBlock(pReader, pScanInfo, pCurrentBlock, pFileBlockInfo, pMerger, key, &st);
      if (st == CHECK_FILEBLOCK_QUIT) {
        break;
2533
      }
2534
    }
H
Haojun Liao 已提交
2535
  }
2536

H
Haojun Liao 已提交
2537 2538 2539
  return TSDB_CODE_SUCCESS;
}

2540
void updateSchema(TSDBROW* pRow, uint64_t uid, STsdbReader* pReader) {
2541 2542 2543 2544 2545 2546 2547 2548 2549 2550
  int32_t sversion = TSDBROW_SVERSION(pRow);

  if (pReader->pSchema == NULL) {
    pReader->pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, uid, sversion);
  } else if (pReader->pSchema->version != sversion) {
    taosMemoryFreeClear(pReader->pSchema);
    pReader->pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, uid, sversion);
  }
}

dengyihao's avatar
dengyihao 已提交
2551 2552
void doMergeMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, STSRow** pTSRow,
                      STsdbReader* pReader) {
2553 2554 2555
  SRowMerger merge = {0};

  TSDBKEY k = TSDBROW_KEY(pRow);
2556
  updateSchema(pRow, uid, pReader);
H
Haojun Liao 已提交
2557

2558
  tRowMergerInit(&merge, pRow, pReader->pSchema);
2559
  doMergeRowsInBuf(pIter, k.ts, pDelList, &merge, pReader);
2560
  tRowMergerGetRow(&merge, pTSRow);
2561
  tRowMergerClear(&merge);
2562 2563
}

2564 2565
void doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader,
                        STSRow** pTSRow) {
H
Haojun Liao 已提交
2566 2567
  SRowMerger merge = {0};

2568 2569 2570
  TSDBKEY k = TSDBROW_KEY(pRow);
  TSDBKEY ik = TSDBROW_KEY(piRow);

2571 2572 2573 2574
  if (ASCENDING_TRAVERSE(pReader->order)) {  // ascending order imem --> mem
    updateSchema(piRow, pBlockScanInfo->uid, pReader);

    tRowMergerInit(&merge, piRow, pReader->pSchema);
2575
    doMergeRowsInBuf(&pBlockScanInfo->iiter, ik.ts, pBlockScanInfo->delSkyline, &merge, pReader);
2576

2577
    tRowMerge(&merge, pRow);
2578
    doMergeRowsInBuf(&pBlockScanInfo->iter, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
2579 2580
  } else {
    updateSchema(pRow, pBlockScanInfo->uid, pReader);
2581

2582
    tRowMergerInit(&merge, pRow, pReader->pSchema);
2583
    doMergeRowsInBuf(&pBlockScanInfo->iter, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
2584 2585

    tRowMerge(&merge, piRow);
2586
    doMergeRowsInBuf(&pBlockScanInfo->iiter, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
2587
  }
2588 2589 2590 2591

  tRowMergerGetRow(&merge, pTSRow);
}

2592 2593
int32_t tsdbGetNextRowInMem(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, STSRow** pTSRow,
                            int64_t endKey) {
2594 2595
  TSDBROW* pRow = getValidRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader);
  TSDBROW* piRow = getValidRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader);
dengyihao's avatar
dengyihao 已提交
2596
  SArray*  pDelList = pBlockScanInfo->delSkyline;
H
Haojun Liao 已提交
2597

2598 2599
  // todo refactor
  bool asc = ASCENDING_TRAVERSE(pReader->order);
2600
  if (pBlockScanInfo->iter.hasVal) {
2601 2602 2603 2604 2605 2606
    TSDBKEY k = TSDBROW_KEY(pRow);
    if ((k.ts >= endKey && asc) || (k.ts <= endKey && !asc)) {
      pRow = NULL;
    }
  }

2607
  if (pBlockScanInfo->iiter.hasVal) {
2608 2609 2610 2611 2612 2613
    TSDBKEY k = TSDBROW_KEY(piRow);
    if ((k.ts >= endKey && asc) || (k.ts <= endKey && !asc)) {
      piRow = NULL;
    }
  }

2614
  if (pBlockScanInfo->iter.hasVal && pBlockScanInfo->iiter.hasVal && pRow != NULL && piRow != NULL) {
2615
    TSDBKEY k = TSDBROW_KEY(pRow);
2616
    TSDBKEY ik = TSDBROW_KEY(piRow);
H
Haojun Liao 已提交
2617

2618
    if (ik.ts < k.ts) {  // ik.ts < k.ts
2619
      doMergeMultiRows(piRow, pBlockScanInfo->uid, &pBlockScanInfo->iiter, pDelList, pTSRow, pReader);
2620
    } else if (k.ts < ik.ts) {
2621
      doMergeMultiRows(pRow, pBlockScanInfo->uid, &pBlockScanInfo->iter, pDelList, pTSRow, pReader);
2622 2623
    } else {  // ik.ts == k.ts
      doMergeMemIMemRows(pRow, piRow, pBlockScanInfo, pReader, pTSRow);
H
Haojun Liao 已提交
2624
    }
2625 2626

    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
2627 2628
  }

2629 2630
  if (pBlockScanInfo->iter.hasVal && pRow != NULL) {
    doMergeMultiRows(pRow, pBlockScanInfo->uid, &pBlockScanInfo->iter, pDelList, pTSRow, pReader);
H
Haojun Liao 已提交
2631 2632 2633
    return TSDB_CODE_SUCCESS;
  }

2634 2635
  if (pBlockScanInfo->iiter.hasVal && piRow != NULL) {
    doMergeMultiRows(piRow, pBlockScanInfo->uid, &pBlockScanInfo->iiter, pDelList, pTSRow, pReader);
H
Haojun Liao 已提交
2636 2637 2638 2639 2640 2641
    return TSDB_CODE_SUCCESS;
  }

  return TSDB_CODE_SUCCESS;
}

2642 2643 2644 2645
int32_t doAppendOneRow(SSDataBlock* pBlock, STsdbReader* pReader, STSRow* pTSRow) {
  int32_t numOfRows = pBlock->info.rows;
  int32_t numOfCols = (int32_t)taosArrayGetSize(pBlock->pDataBlock);

2646
  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;
2647
  STSchema*           pSchema = pReader->pSchema;
2648

2649
  SColVal colVal = {0};
2650
  int32_t i = 0, j = 0;
H
Haojun Liao 已提交
2651

2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671
  SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, i);
  if (pColInfoData->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
    colDataAppend(pColInfoData, numOfRows, (const char*)&pTSRow->ts, false);
    i += 1;
  }

  while (i < numOfCols && j < pSchema->numOfCols) {
    pColInfoData = taosArrayGet(pBlock->pDataBlock, i);
    col_id_t colId = pColInfoData->info.colId;

    if (colId == pSchema->columns[j].colId) {
      tTSRowGetVal(pTSRow, pReader->pSchema, j, &colVal);
      doCopyColVal(pColInfoData, numOfRows, i, &colVal, pSupInfo);
      i += 1;
      j += 1;
    } else if (colId < pSchema->columns[j].colId) {
      colDataAppendNULL(pColInfoData, numOfRows);
      i += 1;
    } else if (colId > pSchema->columns[j].colId) {
      j += 1;
2672
    }
2673 2674
  }

2675
  // set null value since current column does not exist in the "pSchema"
2676
  while (i < numOfCols) {
2677 2678 2679 2680 2681
    pColInfoData = taosArrayGet(pBlock->pDataBlock, i);
    colDataAppendNULL(pColInfoData, numOfRows);
    i += 1;
  }

2682 2683 2684 2685
  pBlock->info.rows += 1;
  return TSDB_CODE_SUCCESS;
}

2686 2687
int32_t buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t endKey, int32_t capacity,
                                  STsdbReader* pReader) {
H
Haojun Liao 已提交
2688 2689 2690 2691
  SSDataBlock* pBlock = pReader->pResBlock;

  do {
    STSRow* pTSRow = NULL;
2692
    tsdbGetNextRowInMem(pBlockScanInfo, pReader, &pTSRow, endKey);
2693 2694
    if (pTSRow == NULL) {
      break;
H
Haojun Liao 已提交
2695 2696
    }

2697
    doAppendOneRow(pBlock, pReader, pTSRow);
2698
    taosMemoryFree(pTSRow);
H
Haojun Liao 已提交
2699 2700

    // no data in buffer, return immediately
2701
    if (!(pBlockScanInfo->iter.hasVal || pBlockScanInfo->iiter.hasVal)) {
H
Haojun Liao 已提交
2702 2703 2704
      break;
    }

2705
    if (pBlock->info.rows >= capacity) {
H
Haojun Liao 已提交
2706 2707 2708 2709
      break;
    }
  } while (1);

2710
  ASSERT(pBlock->info.rows <= capacity);
H
Haojun Liao 已提交
2711 2712
  return TSDB_CODE_SUCCESS;
}
H
Hongze Cheng 已提交
2713

2714
// todo refactor, use arraylist instead
H
Hongze Cheng 已提交
2715
int32_t tsdbSetTableId(STsdbReader* pReader, int64_t uid) {
2716 2717 2718 2719 2720
  ASSERT(pReader != NULL);
  taosHashClear(pReader->status.pTableMap);

  STableBlockScanInfo info = {.lastKey = 0, .uid = uid};
  taosHashPut(pReader->status.pTableMap, &info.uid, sizeof(uint64_t), &info, sizeof(info));
H
Hongze Cheng 已提交
2721 2722 2723
  return TDB_CODE_SUCCESS;
}

dengyihao's avatar
dengyihao 已提交
2724 2725 2726 2727 2728 2729
void* tsdbGetIdx(SMeta* pMeta) {
  if (pMeta == NULL) {
    return NULL;
  }
  return metaGetIdx(pMeta);
}
dengyihao's avatar
dengyihao 已提交
2730

dengyihao's avatar
dengyihao 已提交
2731 2732 2733 2734 2735 2736
void* tsdbGetIvtIdx(SMeta* pMeta) {
  if (pMeta == NULL) {
    return NULL;
  }
  return metaGetIvtIdx(pMeta);
}
L
Liu Jicong 已提交
2737

C
Cary Xu 已提交
2738 2739 2740 2741 2742 2743 2744 2745 2746 2747
/**
 * @brief Get all suids since suid
 *
 * @param pMeta
 * @param suid return all suids in one vnode if suid is 0
 * @param list
 * @return int32_t
 */
int32_t tsdbGetStbIdList(SMeta* pMeta, int64_t suid, SArray* list) {
  SMStbCursor* pCur = metaOpenStbCursor(pMeta, suid);
L
Liu Jicong 已提交
2748
  if (!pCur) {
C
Cary Xu 已提交
2749 2750
    return TSDB_CODE_FAILED;
  }
C
Cary Xu 已提交
2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764

  while (1) {
    tb_uid_t id = metaStbCursorNext(pCur);
    if (id == 0) {
      break;
    }

    taosArrayPush(list, &id);
  }

  metaCloseStbCursor(pCur);
  return TSDB_CODE_SUCCESS;
}

H
refact  
Hongze Cheng 已提交
2765
// ====================================== EXPOSED APIs ======================================
2766 2767
int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTableList, STsdbReader** ppReader,
                       const char* idstr) {
H
Haojun Liao 已提交
2768
  int32_t code = tsdbReaderCreate(pVnode, pCond, ppReader, idstr);
H
Haojun Liao 已提交
2769 2770 2771
  if (code) {
    goto _err;
  }
H
Hongze Cheng 已提交
2772

2773 2774 2775 2776 2777 2778 2779 2780
  if (pCond->suid != 0) {
    (*ppReader)->pSchema = metaGetTbTSchema((*ppReader)->pTsdb->pVnode->pMeta, (*ppReader)->suid, -1);
    ASSERT((*ppReader)->pSchema);
  } else if (taosArrayGetSize(pTableList) > 0) {
    STableKeyInfo* pKey = taosArrayGet(pTableList, 0);
    (*ppReader)->pSchema = metaGetTbTSchema((*ppReader)->pTsdb->pVnode->pMeta, pKey->uid, -1);
  }

H
Haojun Liao 已提交
2781
  STsdbReader* pReader = *ppReader;
2782
  if (isEmptyQueryTimeWindow(&pReader->window)) {
H
Haojun Liao 已提交
2783 2784 2785
    tsdbDebug("%p query window not overlaps with the data set, no result returned, %s", pReader, pReader->idStr);
    return TSDB_CODE_SUCCESS;
  }
H
Hongze Cheng 已提交
2786

2787 2788
  int32_t numOfTables = taosArrayGetSize(pTableList);
  pReader->status.pTableMap = createDataBlockScanInfo(pReader, pTableList->pData, numOfTables);
H
Haojun Liao 已提交
2789 2790 2791
  if (pReader->status.pTableMap == NULL) {
    tsdbReaderClose(pReader);
    *ppReader = NULL;
H
Haojun Liao 已提交
2792

H
Haojun Liao 已提交
2793 2794 2795
    code = TSDB_CODE_TDB_OUT_OF_MEMORY;
    goto _err;
  }
H
Hongze Cheng 已提交
2796

2797 2798 2799
  SDataBlockIter* pBlockIter = &pReader->status.blockIter;

  STsdbFSState* pFState = pReader->pTsdb->fs->cState;
2800
  initFilesetIterator(&pReader->status.fileIter, pFState, pReader->order, pReader->idStr);
2801 2802 2803 2804 2805 2806 2807
  resetDataBlockIterator(&pReader->status.blockIter, pReader->order);

  // no data in files, let's try buffer in memory
  if (pReader->status.fileIter.numOfFiles == 0) {
    pReader->status.loadFromFile = false;
  } else {
    code = initForFirstBlockInFile(pReader, pBlockIter);
2808
    if (code != TSDB_CODE_SUCCESS) {
2809 2810 2811 2812
      return code;
    }
  }

2813
  tsdbDebug("%p total numOfTable:%d in this query %s", pReader, numOfTables, pReader->idStr);
H
Hongze Cheng 已提交
2814
  return code;
H
Hongze Cheng 已提交
2815 2816

_err:
2817
  tsdbError("failed to create data reader, code: %s %s", tstrerror(code), pReader->idStr);
H
Hongze Cheng 已提交
2818
  return code;
H
refact  
Hongze Cheng 已提交
2819 2820 2821
}

void tsdbReaderClose(STsdbReader* pReader) {
2822 2823
  if (pReader == NULL) {
    return;
2824
  }
H
refact  
Hongze Cheng 已提交
2825

2826 2827 2828 2829 2830 2831
  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;

  taosMemoryFreeClear(pSupInfo->plist);
  taosMemoryFree(pSupInfo->colIds);

  taosArrayDestroy(pSupInfo->pColAgg);
L
Liu Jicong 已提交
2832
  for (int32_t i = 0; i < blockDataGetNumOfCols(pReader->pResBlock); ++i) {
2833 2834 2835 2836 2837 2838 2839 2840 2841
    if (pSupInfo->buildBuf[i] != NULL) {
      taosMemoryFreeClear(pSupInfo->buildBuf[i]);
    }
  }
  taosMemoryFree(pSupInfo->buildBuf);

  cleanupFilesetIterator(&pReader->status.fileIter);
  cleanupDataBlockIterator(&pReader->status.blockIter);
  destroyBlockScanInfo(pReader->status.pTableMap);
2842
  blockDataDestroy(pReader->pResBlock);
2843

H
Haojun Liao 已提交
2844 2845 2846
  if (pReader->pFileReader != NULL) {
    tsdbDataFReaderClose(&pReader->pFileReader);
  }
H
refact  
Hongze Cheng 已提交
2847

H
Haojun Liao 已提交
2848 2849 2850 2851
#if 0
//   if (pReader->status.pTableScanInfo != NULL) {
//     pReader->status.pTableScanInfo = destroyTableCheckInfo(pReader->status.pTableScanInfo);
//   }
H
refact  
Hongze Cheng 已提交
2852

H
Haojun Liao 已提交
2853
//   tsdbDestroyReadH(&pReader->rhelper);
H
refact  
Hongze Cheng 已提交
2854

H
Haojun Liao 已提交
2855 2856 2857 2858 2859 2860
//   tdFreeDataCols(pReader->pDataCols);
//   pReader->pDataCols = NULL;
//
//   pReader->prev = doFreeColumnInfoData(pReader->prev);
//   pReader->next = doFreeColumnInfoData(pReader->next);
#endif
H
refact  
Hongze Cheng 已提交
2861

2862
  SIOCostSummary* pCost = &pReader->cost;
H
refact  
Hongze Cheng 已提交
2863

2864 2865
  tsdbDebug("%p :io-cost summary: head-file read cnt:%" PRIu64 ", head-file time:%" PRIu64 " us, statis-info:%" PRId64
            " us, datablock:%" PRId64 " us, check data:%" PRId64 " us, %s",
2866
            pReader, pCost->headFileLoad, pCost->headFileLoadTime, pCost->smaLoadTime, pCost->blockLoadTime,
2867
            pCost->checkForNextTime, pReader->idStr);
H
refact  
Hongze Cheng 已提交
2868

2869 2870 2871
  taosMemoryFree(pReader->idStr);
  taosMemoryFree(pReader->pSchema);
  taosMemoryFreeClear(pReader);
H
refact  
Hongze Cheng 已提交
2872 2873 2874
}

bool tsdbNextDataBlock(STsdbReader* pReader) {
2875
  if (isEmptyQueryTimeWindow(&pReader->window)) {
H
Haojun Liao 已提交
2876 2877
    return false;
  }
H
Hongze Cheng 已提交
2878

H
Haojun Liao 已提交
2879
  // cleanup the data that belongs to the previous data block
2880 2881
  SSDataBlock* pBlock = pReader->pResBlock;
  blockDataCleanup(pBlock);
H
Hongze Cheng 已提交
2882

2883 2884
  int64_t        stime = taosGetTimestampUs();
  int64_t        elapsedTime = stime;
2885
  SReaderStatus* pStatus = &pReader->status;
H
Haojun Liao 已提交
2886 2887

  if (pReader->type == BLOCK_LOAD_OFFSET_ORDER) {
2888
    if (pStatus->loadFromFile) {
2889
      int32_t code = buildBlockFromFiles(pReader);
2890 2891 2892 2893
      if (code != TSDB_CODE_SUCCESS) {
        return false;
      }

2894
      if (pBlock->info.rows > 0) {
2895
        return true;
2896
      } else {
H
Haojun Liao 已提交
2897
        buildBlockFromBufferSequentially(pReader);
2898
        return pBlock->info.rows > 0;
2899
      }
2900
    } else {  // no data in files, let's try the buffer
H
Haojun Liao 已提交
2901
      buildBlockFromBufferSequentially(pReader);
2902
      return pBlock->info.rows > 0;
H
Haojun Liao 已提交
2903 2904 2905
    }
  } else if (pReader->type == BLOCK_LOAD_TABLESEQ_ORDER) {
  } else if (pReader->type == BLOCK_LOAD_EXTERN_ORDER) {
2906 2907
  } else {
    ASSERT(0);
H
Haojun Liao 已提交
2908
  }
2909
  return false;
H
refact  
Hongze Cheng 已提交
2910 2911 2912
}

void tsdbRetrieveDataBlockInfo(STsdbReader* pReader, SDataBlockInfo* pDataBlockInfo) {
2913 2914 2915 2916
  ASSERT(pDataBlockInfo != NULL && pReader != NULL);
  pDataBlockInfo->rows = pReader->pResBlock->info.rows;
  pDataBlockInfo->uid = pReader->pResBlock->info.uid;
  pDataBlockInfo->window = pReader->pResBlock->info.window;
H
Hongze Cheng 已提交
2917 2918
}

2919
int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SColumnDataAgg*** pBlockStatis, bool* allHave) {
H
Hongze Cheng 已提交
2920
  int32_t code = 0;
2921
  *allHave = false;
H
Hongze Cheng 已提交
2922

2923
  // there is no statistics data for composed block
2924 2925 2926 2927
  if (pReader->status.composedDataBlock) {
    *pBlockStatis = NULL;
    return TSDB_CODE_SUCCESS;
  }
H
Hongze Cheng 已提交
2928

2929 2930 2931
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(&pReader->status.blockIter);
  STableBlockScanInfo* pBlockScanInfo = taosHashGet(pReader->status.pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));
  SBlock*              pBlock = taosArrayGet(pBlockScanInfo->pBlockList, pFBlock->tbBlockIdx);
H
Hongze Cheng 已提交
2932

2933
  int64_t stime = taosGetTimestampUs();
H
Hongze Cheng 已提交
2934

2935 2936
  SBlockLoadSuppInfo* pSup = &pReader->suppInfo;

2937
  if (tBlockHasSma(pBlock)) {
2938
    code = tsdbReadBlockSma(pReader->pFileReader, pBlock, pSup->pColAgg, NULL);
2939
    if (code != TSDB_CODE_SUCCESS) {
2940 2941
      tsdbDebug("vgId:%d, failed to load block SMA for uid %" PRIu64 ", code:%s, %s", 0, pFBlock->uid, tstrerror(code),
                pReader->idStr);
2942 2943
      return code;
    }
2944 2945 2946
  } else {
    *pBlockStatis = NULL;
    return TSDB_CODE_SUCCESS;
2947
  }
H
Hongze Cheng 已提交
2948

2949
  *allHave = true;
H
Hongze Cheng 已提交
2950

2951 2952
  // always load the first primary timestamp column data
  SColumnDataAgg* pTsAgg = &pSup->tsColAgg;
2953

2954 2955
  pTsAgg->numOfNull = 0;
  pTsAgg->colId = PRIMARYKEY_TIMESTAMP_COL_ID;
2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971
  pTsAgg->min = pReader->pResBlock->info.window.skey;
  pTsAgg->max = pReader->pResBlock->info.window.ekey;
  pSup->plist[0] = pTsAgg;

  // update the number of NULL data rows
  size_t numOfCols = blockDataGetNumOfCols(pReader->pResBlock);

  int32_t i = 0, j = 0;
  while (j < numOfCols && i < taosArrayGetSize(pSup->pColAgg)) {
    SColumnDataAgg* pAgg = taosArrayGet(pSup->pColAgg, i);
    if (pAgg->colId == pSup->colIds[j]) {
      if (IS_BSMA_ON(&(pReader->pSchema->columns[i]))) {
        pSup->plist[j] = pAgg;
      } else {
        *allHave = false;
      }
2972 2973
      i += 1;
      j += 1;
2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986
    } else if (pAgg->colId < pSup->colIds[j]) {
      i += 1;
    } else if (pSup->colIds[j] < pAgg->colId) {
      j += 1;
    }
  }

  int64_t elapsed = taosGetTimestampUs() - stime;
  pReader->cost.smaLoadTime += elapsed;

  *pBlockStatis = pSup->plist;

  tsdbDebug("vgId:%d, succeed to load block SMA for uid %" PRIu64 ", elapsed time:%" PRId64 "us, %s", 0, pFBlock->uid,
2987 2988
            elapsed, pReader->idStr);

H
Hongze Cheng 已提交
2989
  return code;
H
Hongze Cheng 已提交
2990 2991
}

H
Hongze Cheng 已提交
2992
SArray* tsdbRetrieveDataBlock(STsdbReader* pReader, SArray* pIdList) {
H
Haojun Liao 已提交
2993 2994 2995
  SReaderStatus* pStatus = &pReader->status;

  if (pStatus->composedDataBlock) {
2996
    return pReader->pResBlock->pDataBlock;
2997
  }
2998

2999
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(&pStatus->blockIter);
3000
  STableBlockScanInfo* pBlockScanInfo = taosHashGet(pStatus->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));
3001

3002 3003 3004 3005 3006
  int32_t code = tBlockDataInit(&pStatus->fileBlockData);
  if (code != TSDB_CODE_SUCCESS) {
    terrno = code;
    return NULL;
  }
3007

3008 3009 3010 3011
  code = doLoadFileBlockData(pReader, &pStatus->blockIter, pBlockScanInfo, &pStatus->fileBlockData);
  if (code != TSDB_CODE_SUCCESS) {
    terrno = code;
    return NULL;
3012
  }
3013 3014 3015

  copyBlockDataToSDataBlock(pReader, pBlockScanInfo);
  return pReader->pResBlock->pDataBlock;
H
Hongze Cheng 已提交
3016 3017
}

H
Haojun Liao 已提交
3018
int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) {
3019 3020 3021
  if (isEmptyQueryTimeWindow(&pReader->window)) {
    return TSDB_CODE_SUCCESS;
  }
H
Hongze Cheng 已提交
3022

L
Liu Jicong 已提交
3023 3024
  pReader->order = pCond->order;
  pReader->type = BLOCK_LOAD_OFFSET_ORDER;
3025
  pReader->status.loadFromFile = true;
dengyihao's avatar
dengyihao 已提交
3026
  pReader->status.pTableIter = NULL;
H
Hongze Cheng 已提交
3027

H
Haojun Liao 已提交
3028
  pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows);
H
Hongze Cheng 已提交
3029

3030
  // allocate buffer in order to load data blocks from file
3031
  memset(&pReader->suppInfo.tsColAgg, 0, sizeof(SColumnDataAgg));
3032 3033
  memset(pReader->suppInfo.plist, 0, POINTER_BYTES);

3034
  pReader->suppInfo.tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID;
3035
  tsdbDataFReaderClose(&pReader->pFileReader);
3036

3037 3038 3039 3040
  // todo set the correct numOfTables
  int32_t         numOfTables = 1;
  SDataBlockIter* pBlockIter = &pReader->status.blockIter;

L
Liu Jicong 已提交
3041 3042
  tsdbDataFReaderClose(&pReader->pFileReader);

3043 3044 3045
  STsdbFSState* pFState = pReader->pTsdb->fs->cState;
  initFilesetIterator(&pReader->status.fileIter, pFState, pReader->order, pReader->idStr);
  resetDataBlockIterator(&pReader->status.blockIter, pReader->order);
3046
  resetDataBlockScanInfo(pReader->status.pTableMap);
3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057

  int32_t code = 0;
  // no data in files, let's try buffer in memory
  if (pReader->status.fileIter.numOfFiles == 0) {
    pReader->status.loadFromFile = false;
  } else {
    code = initForFirstBlockInFile(pReader, pBlockIter);
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
  }
H
Hongze Cheng 已提交
3058

dengyihao's avatar
dengyihao 已提交
3059 3060
  tsdbDebug("%p reset reader, suid:%" PRIu64 ", numOfTables:%d, query range:%" PRId64 " - %" PRId64 " in query %s",
            pReader, pReader->suid, numOfTables, pReader->window.skey, pReader->window.ekey, pReader->idStr);
3061
  return code;
H
Hongze Cheng 已提交
3062
}
H
Hongze Cheng 已提交
3063

3064 3065 3066
static int32_t getBucketIndex(int32_t startRow, int32_t bucketRange, int32_t numOfRows) {
  return (numOfRows - startRow) / bucketRange;
}
H
Hongze Cheng 已提交
3067

3068 3069 3070 3071
int32_t tsdbGetFileBlocksDistInfo(STsdbReader* pReader, STableBlockDistInfo* pTableBlockInfo) {
  int32_t code = TSDB_CODE_SUCCESS;
  pTableBlockInfo->totalSize = 0;
  pTableBlockInfo->totalRows = 0;
H
Hongze Cheng 已提交
3072

3073 3074
  // find the start data block in file
  SReaderStatus* pStatus = &pReader->status;
H
Hongze Cheng 已提交
3075

3076 3077 3078
  STsdbCfg* pc = &pReader->pTsdb->pVnode->config.tsdbCfg;
  pTableBlockInfo->defMinRows = pc->minRows;
  pTableBlockInfo->defMaxRows = pc->maxRows;
H
Hongze Cheng 已提交
3079

3080
  int32_t bucketRange = ceil((pc->maxRows - pc->minRows) / 20.0);
H
Hongze Cheng 已提交
3081

3082
  pTableBlockInfo->numOfFiles += 1;
H
Hongze Cheng 已提交
3083

3084 3085
  int32_t numOfTables = (int32_t)taosHashGetSize(pStatus->pTableMap);
  int     defaultRows = 4096;
H
Hongze Cheng 已提交
3086

3087 3088
  SDataBlockIter* pBlockIter = &pStatus->blockIter;
  pTableBlockInfo->numOfFiles += pStatus->fileIter.numOfFiles;
H
Haojun Liao 已提交
3089 3090 3091 3092

  if (pBlockIter->numOfBlocks > 0) {
    pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks;
  }
H
Hongze Cheng 已提交
3093

3094
  pTableBlockInfo->numOfTables = numOfTables;
H
Haojun Liao 已提交
3095
  bool hasNext = (pBlockIter->numOfBlocks > 0);
H
Hongze Cheng 已提交
3096

3097 3098 3099 3100 3101
  while (true) {
    if (hasNext) {
      SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(pBlockIter);
      STableBlockScanInfo* pScanInfo = taosHashGet(pStatus->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));
      SBlock*              pBlock = taosArrayGet(pScanInfo->pBlockList, pFBlock->tbBlockIdx);
H
Hongze Cheng 已提交
3102

3103 3104
      int32_t numOfRows = pBlock->nRow;
      pTableBlockInfo->totalRows += numOfRows;
H
Hongze Cheng 已提交
3105

3106 3107 3108
      if (numOfRows > pTableBlockInfo->maxRows) {
        pTableBlockInfo->maxRows = numOfRows;
      }
H
refact  
Hongze Cheng 已提交
3109

3110 3111 3112
      if (numOfRows < pTableBlockInfo->minRows) {
        pTableBlockInfo->minRows = numOfRows;
      }
H
refact  
Hongze Cheng 已提交
3113

3114 3115 3116
      if (numOfRows < defaultRows) {
        pTableBlockInfo->numOfSmallBlocks += 1;
      }
H
refact  
Hongze Cheng 已提交
3117

3118 3119
      int32_t bucketIndex = getBucketIndex(pTableBlockInfo->defMinRows, bucketRange, numOfRows);
      pTableBlockInfo->blockRowsHisto[bucketIndex]++;
3120 3121 3122

      hasNext = blockIteratorNext(&pStatus->blockIter);

3123 3124 3125 3126 3127
    } else {
      code = initForFirstBlockInFile(pReader, pBlockIter);
      if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) {
        break;
      }
H
refact  
Hongze Cheng 已提交
3128

3129
      pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks;
3130
      hasNext = (pBlockIter->numOfBlocks > 0);
3131
    }
H
refact  
Hongze Cheng 已提交
3132

3133 3134
//    tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %s", pReader, numOfBlocks, numOfTables,
//              pReader->pFileGroup->fid, pReader->idStr);
3135
  }
H
Hongze Cheng 已提交
3136

H
refact  
Hongze Cheng 已提交
3137 3138
  return code;
}
H
Hongze Cheng 已提交
3139

H
refact  
Hongze Cheng 已提交
3140
int64_t tsdbGetNumOfRowsInMemTable(STsdbReader* pReader) {
3141
  int64_t rows = 0;
H
Hongze Cheng 已提交
3142

3143 3144
  SReaderStatus* pStatus = &pReader->status;
  pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, NULL);
H
Hongze Cheng 已提交
3145

3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167
  while (pStatus->pTableIter != NULL) {
    STableBlockScanInfo* pBlockScanInfo = pStatus->pTableIter;

    STbData* d = NULL;
    if (pReader->pTsdb->mem != NULL) {
      tsdbGetTbDataFromMemTable(pReader->pTsdb->mem, pReader->suid, pBlockScanInfo->uid, &d);
      if (d != NULL) {
        rows += tsdbGetNRowsInTbData(d);
      }
    }

    STbData* di = NULL;
    if (pReader->pTsdb->imem != NULL) {
      tsdbGetTbDataFromMemTable(pReader->pTsdb->imem, pReader->suid, pBlockScanInfo->uid, &di);
      if (di != NULL) {
        rows += tsdbGetNRowsInTbData(di);
      }
    }

    // current table is exhausted, let's try the next table
    pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, pStatus->pTableIter);
  }
H
Hongze Cheng 已提交
3168

H
refact  
Hongze Cheng 已提交
3169
  return rows;
H
Hongze Cheng 已提交
3170
}
D
dapan1121 已提交
3171

L
Liu Jicong 已提交
3172
int32_t tsdbGetTableSchema(SVnode* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid) {
D
dapan1121 已提交
3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184
  int32_t sversion = 1;

  SMetaReader mr = {0};
  metaReaderInit(&mr, pVnode->pMeta, 0);
  int32_t code = metaGetTableEntryByUid(&mr, uid);
  if (code != TSDB_CODE_SUCCESS) {
    terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
    metaReaderClear(&mr);
    return terrno;
  }

  *suid = 0;
L
Liu Jicong 已提交
3185

D
dapan1121 已提交
3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201
  if (mr.me.type == TSDB_CHILD_TABLE) {
    *suid = mr.me.ctbEntry.suid;
    code = metaGetTableEntryByUid(&mr, *suid);
    if (code != TSDB_CODE_SUCCESS) {
      terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
      metaReaderClear(&mr);
      return terrno;
    }
    sversion = mr.me.stbEntry.schemaRow.version;
  } else {
    ASSERT(mr.me.type == TSDB_NORMAL_TABLE);
    sversion = mr.me.ntbEntry.schemaRow.version;
  }

  metaReaderClear(&mr);
  *pSchema = metaGetTbTSchema(pVnode->pMeta, uid, sversion);
L
Liu Jicong 已提交
3202

D
dapan1121 已提交
3203 3204 3205
  return TSDB_CODE_SUCCESS;
}