tsdbRead.c 110.6 KB
Newer Older
H
hjxilinx 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
Hongze Cheng 已提交
16
#include "tsdb.h"
17
#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC)
H
Hongze Cheng 已提交
18

19
typedef struct {
dengyihao's avatar
dengyihao 已提交
20
  STbDataIter* iter;
21 22 23 24
  int32_t      index;
  bool         hasVal;
} SIterInfo;

H
Haojun Liao 已提交
25
typedef struct STableBlockScanInfo {
dengyihao's avatar
dengyihao 已提交
26 27 28 29 30 31 32 33 34
  uint64_t  uid;
  TSKEY     lastKey;
  SBlockIdx blockIdx;
  SArray*   pBlockList;  // block data index list
  SIterInfo iter;        // mem buffer skip list iterator
  SIterInfo iiter;       // imem buffer skip list iterator
  SArray*   delSkyline;  // delete info for this table
  int32_t   fileDelIndex;
  bool      iterInit;  // whether to initialize the in-memory skip list iterator or not
H
Haojun Liao 已提交
35 36 37
} STableBlockScanInfo;

typedef struct SBlockOrderWrapper {
dengyihao's avatar
dengyihao 已提交
38 39
  int64_t uid;
  SBlock* pBlock;
H
Haojun Liao 已提交
40
} SBlockOrderWrapper;
H
Hongze Cheng 已提交
41 42

typedef struct SBlockOrderSupporter {
43 44 45 46
  SBlockOrderWrapper** pDataBlockInfo;
  int32_t*             indexPerTable;
  int32_t*             numOfBlocksPerTable;
  int32_t              numOfTables;
H
Hongze Cheng 已提交
47 48 49
} SBlockOrderSupporter;

typedef struct SIOCostSummary {
H
Haojun Liao 已提交
50
  int64_t blockLoadTime;
51
  int64_t smaLoadTime;
H
Haojun Liao 已提交
52
  int64_t checkForNextTime;
53 54
  int64_t headFileLoad;
  int64_t headFileLoadTime;
H
Hongze Cheng 已提交
55 56 57
} SIOCostSummary;

typedef struct SBlockLoadSuppInfo {
58
  SArray*          pColAgg;
59
  SColumnDataAgg   tsColAgg;
C
Cary Xu 已提交
60
  SColumnDataAgg** plist;
61 62
  int16_t*         colIds;    // column ids for loading file block data
  char**           buildBuf;  // build string tmp buffer, todo remove it later after all string format being updated.
H
Hongze Cheng 已提交
63 64
} SBlockLoadSuppInfo;

65
typedef struct SFilesetIter {
H
Hongze Cheng 已提交
66 67 68 69
  int32_t numOfFiles;  // number of total files
  int32_t index;       // current accessed index in the list
  SArray* pFileList;   // data file list
  int32_t order;
70
} SFilesetIter;
H
Haojun Liao 已提交
71 72

typedef struct SFileDataBlockInfo {
dengyihao's avatar
dengyihao 已提交
73
  int32_t
L
Liu Jicong 已提交
74
      tbBlockIdx;  // index position in STableBlockScanInfo in order to check whether neighbor block overlaps with it
dengyihao's avatar
dengyihao 已提交
75
  uint64_t uid;
H
Haojun Liao 已提交
76 77 78
} SFileDataBlockInfo;

typedef struct SDataBlockIter {
dengyihao's avatar
dengyihao 已提交
79 80 81 82
  int32_t numOfBlocks;
  int32_t index;
  SArray* blockList;  // SArray<SFileDataBlockInfo>
  int32_t order;
H
Haojun Liao 已提交
83 84 85
} SDataBlockIter;

typedef struct SFileBlockDumpInfo {
dengyihao's avatar
dengyihao 已提交
86 87 88 89
  int32_t totalRows;
  int32_t rowIndex;
  int64_t lastKey;
  bool    allDumped;
H
Haojun Liao 已提交
90 91
} SFileBlockDumpInfo;

H
Haojun Liao 已提交
92
typedef struct SVersionRange {
dengyihao's avatar
dengyihao 已提交
93 94
  uint64_t minVer;
  uint64_t maxVer;
H
Haojun Liao 已提交
95 96
} SVersionRange;

H
Haojun Liao 已提交
97
typedef struct SReaderStatus {
dengyihao's avatar
dengyihao 已提交
98 99
  bool                 loadFromFile;  // check file stage
  SHashObj*            pTableMap;     // SHash<STableBlockScanInfo>
100
  STableBlockScanInfo* pTableIter;    // table iterator used in building in-memory buffer data blocks.
101
  SFileBlockDumpInfo   fBlockDumpInfo;
102

dengyihao's avatar
dengyihao 已提交
103 104 105 106 107
  SDFileSet*     pCurrentFileset;  // current opened file set
  SBlockData     fileBlockData;
  SFilesetIter   fileIter;
  SDataBlockIter blockIter;
  bool           composedDataBlock;  // the returned data block is a composed block or not
H
Haojun Liao 已提交
108 109
} SReaderStatus;

H
Hongze Cheng 已提交
110
struct STsdbReader {
H
Haojun Liao 已提交
111 112 113 114 115 116 117
  STsdb*             pTsdb;
  uint64_t           suid;
  int16_t            order;
  STimeWindow        window;  // the primary query time window that applies to all queries
  SSDataBlock*       pResBlock;
  int32_t            capacity;
  SReaderStatus      status;
118 119
  char*              idStr;  // query info handle, for debug purpose
  int32_t            type;   // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows
H
Hongze Cheng 已提交
120
  SBlockLoadSuppInfo suppInfo;
H
Hongze Cheng 已提交
121
  STsdbReadSnap*     pReadSnap;
122

L
Liu Jicong 已提交
123 124 125 126
  SIOCostSummary cost;
  STSchema*      pSchema;
  SDataFReader*  pFileReader;
  SVersionRange  verRange;
H
Hongze Cheng 已提交
127
};
H
Hongze Cheng 已提交
128

H
Haojun Liao 已提交
129
static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter);
130 131
static int      buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t endKey, int32_t capacity,
                                          STsdbReader* pReader);
132
static TSDBROW* getValidRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader);
133 134
static int32_t  doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pScanInfo, STsdbReader* pReader,
                                        SRowMerger* pMerger);
dengyihao's avatar
dengyihao 已提交
135 136
static int32_t  doMergeRowsInBuf(SIterInfo* pIter, int64_t ts, SArray* pDelList, SRowMerger* pMerger,
                                 STsdbReader* pReader);
137 138 139
static int32_t  doAppendOneRow(SSDataBlock* pBlock, STsdbReader* pReader, STSRow* pTSRow);
static void     setComposedBlockFlag(STsdbReader* pReader, bool composed);
static void     updateSchema(TSDBROW* pRow, uint64_t uid, STsdbReader* pReader);
140
static bool     hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey, int32_t order);
141

dengyihao's avatar
dengyihao 已提交
142 143
static void doMergeMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, STSRow** pTSRow,
                             STsdbReader* pReader);
144 145
static void doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader,
                               STSRow** pTSRow);
dengyihao's avatar
dengyihao 已提交
146 147 148 149
static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, STbData* pMemTbData,
                                      STbData* piMemTbData);
static STsdb*  getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr,
                                   int8_t* pLevel);
150
static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level);
H
Haojun Liao 已提交
151

152 153 154
static int32_t setColumnIdSlotList(STsdbReader* pReader, SSDataBlock* pBlock) {
  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;

155
  size_t numOfCols = blockDataGetNumOfCols(pBlock);
156

157
  pSupInfo->colIds = taosMemoryMalloc(numOfCols * sizeof(int16_t));
158
  pSupInfo->buildBuf = taosMemoryCalloc(numOfCols, POINTER_BYTES);
159 160 161
  if (pSupInfo->buildBuf == NULL || pSupInfo->colIds == NULL) {
    taosMemoryFree(pSupInfo->colIds);
    taosMemoryFree(pSupInfo->buildBuf);
H
Haojun Liao 已提交
162 163
    return TSDB_CODE_OUT_OF_MEMORY;
  }
H
Hongze Cheng 已提交
164

H
Haojun Liao 已提交
165 166
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, i);
167
    pSupInfo->colIds[i] = pCol->info.colId;
168 169 170 171

    if (IS_VAR_DATA_TYPE(pCol->info.type)) {
      pSupInfo->buildBuf[i] = taosMemoryMalloc(pCol->info.bytes);
    }
H
Haojun Liao 已提交
172
  }
H
Hongze Cheng 已提交
173

H
Haojun Liao 已提交
174 175
  return TSDB_CODE_SUCCESS;
}
H
Hongze Cheng 已提交
176

177
static SHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, const STableKeyInfo* idList, int32_t numOfTables) {
H
Haojun Liao 已提交
178
  // allocate buffer in order to load data blocks from file
179 180 181 182
  // todo use simple hash instead
  SHashObj* pTableMap =
      taosHashInit(numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK);
  if (pTableMap == NULL) {
H
Haojun Liao 已提交
183 184 185
    return NULL;
  }

186 187 188 189 190
  for (int32_t j = 0; j < numOfTables; ++j) {
    STableBlockScanInfo info = {.lastKey = 0, .uid = idList[j].uid};
    if (ASCENDING_TRAVERSE(pTsdbReader->order)) {
      if (info.lastKey == INT64_MIN || info.lastKey < pTsdbReader->window.skey) {
        info.lastKey = pTsdbReader->window.skey;
H
Haojun Liao 已提交
191 192
      }

193
      ASSERT(info.lastKey >= pTsdbReader->window.skey && info.lastKey <= pTsdbReader->window.ekey);
wmmhello's avatar
wmmhello 已提交
194
    } else {
195
      info.lastKey = pTsdbReader->window.skey;
H
Haojun Liao 已提交
196
    }
wmmhello's avatar
wmmhello 已提交
197

198 199 200
    taosHashPut(pTableMap, &info.uid, sizeof(uint64_t), &info, sizeof(info));
    tsdbDebug("%p check table uid:%" PRId64 " from lastKey:%" PRId64 " %s", pTsdbReader, info.uid, info.lastKey,
              pTsdbReader->idStr);
H
Haojun Liao 已提交
201 202
  }

203
  return pTableMap;
H
Hongze Cheng 已提交
204
}
H
Hongze Cheng 已提交
205

206 207 208
static void resetDataBlockScanInfo(SHashObj* pTableMap) {
  STableBlockScanInfo* p = NULL;

dengyihao's avatar
dengyihao 已提交
209
  while ((p = taosHashIterate(pTableMap, p)) != NULL) {
210 211
    p->iterInit = false;
    p->iiter.hasVal = false;
dengyihao's avatar
dengyihao 已提交
212
    if (p->iter.iter != NULL) {
213
      p->iter.iter = tsdbTbDataIterDestroy(p->iter.iter);
214 215
    }

216
    p->delSkyline = taosArrayDestroy(p->delSkyline);
217 218 219
  }
}

220 221 222 223 224 225 226 227
static void destroyBlockScanInfo(SHashObj* pTableMap) {
  STableBlockScanInfo* p = NULL;

  while ((p = taosHashIterate(pTableMap, p)) != NULL) {
    p->iterInit = false;
    p->iiter.hasVal = false;

    if (p->iter.iter != NULL) {
228
      p->iter.iter = tsdbTbDataIterDestroy(p->iter.iter);
229 230 231
    }

    if (p->iiter.iter != NULL) {
232
      p->iiter.iter = tsdbTbDataIterDestroy(p->iiter.iter);
233 234
    }

235 236
    p->delSkyline = taosArrayDestroy(p->delSkyline);
    p->pBlockList = taosArrayDestroy(p->pBlockList);
237 238 239 240 241
  }

  taosHashCleanup(pTableMap);
}

242
static bool isEmptyQueryTimeWindow(STimeWindow* pWindow) {
243 244
  ASSERT(pWindow != NULL);
  return pWindow->skey > pWindow->ekey;
H
Haojun Liao 已提交
245
}
H
Hongze Cheng 已提交
246

247 248 249
// Update the query time window according to the data time to live(TTL) information, in order to avoid to return
// the expired data to client, even it is queried already.
static STimeWindow updateQueryTimeWindow(STsdb* pTsdb, STimeWindow* pWindow) {
dengyihao's avatar
dengyihao 已提交
250
  STsdbKeepCfg* pCfg = &pTsdb->keepCfg;
H
Hongze Cheng 已提交
251

252
  int64_t now = taosGetTimestamp(pCfg->precision);
dengyihao's avatar
dengyihao 已提交
253
  int64_t earilyTs = now - (tsTickPerMin[pCfg->precision] * pCfg->keep2) + 1;  // needs to add one tick
254

dengyihao's avatar
dengyihao 已提交
255
  STimeWindow win = *pWindow;
256 257 258 259 260 261
  if (win.skey < earilyTs) {
    win.skey = earilyTs;
  }

  return win;
}
H
Hongze Cheng 已提交
262

H
Haojun Liao 已提交
263
static void limitOutputBufferSize(const SQueryTableDataCond* pCond, int32_t* capacity) {
H
Haojun Liao 已提交
264 265 266 267 268 269
  int32_t rowLen = 0;
  for (int32_t i = 0; i < pCond->numOfCols; ++i) {
    rowLen += pCond->colList[i].bytes;
  }

  // make sure the output SSDataBlock size be less than 2MB.
H
Haojun Liao 已提交
270 271 272
  const int32_t TWOMB = 2 * 1024 * 1024;
  if ((*capacity) * rowLen > TWOMB) {
    (*capacity) = TWOMB / rowLen;
H
Haojun Liao 已提交
273 274 275 276
  }
}

// init file iterator
H
Hongze Cheng 已提交
277 278
static int32_t initFilesetIterator(SFilesetIter* pIter, SArray* aDFileSet, int32_t order, const char* idstr) {
  size_t numOfFileset = taosArrayGetSize(aDFileSet);
279

280 281
  pIter->index = ASCENDING_TRAVERSE(order) ? -1 : numOfFileset;
  pIter->order = order;
H
Hongze Cheng 已提交
282
  pIter->pFileList = aDFileSet;
283
  pIter->numOfFiles = numOfFileset;
H
Haojun Liao 已提交
284

H
Haojun Liao 已提交
285
  tsdbDebug("init fileset iterator, total files:%d %s", pIter->numOfFiles, idstr);
H
Haojun Liao 已提交
286 287 288
  return TSDB_CODE_SUCCESS;
}

289
static bool filesetIteratorNext(SFilesetIter* pIter, STsdbReader* pReader) {
290 291
  bool    asc = ASCENDING_TRAVERSE(pIter->order);
  int32_t step = asc ? 1 : -1;
292 293 294
  pIter->index += step;

  if ((asc && pIter->index >= pIter->numOfFiles) || ((!asc) && pIter->index < 0)) {
H
Haojun Liao 已提交
295 296 297 298 299
    return false;
  }

  // check file the time range of coverage
  STimeWindow win = {0};
H
Hongze Cheng 已提交
300

301
  while (1) {
H
Haojun Liao 已提交
302 303 304
    if (pReader->pFileReader != NULL) {
      tsdbDataFReaderClose(&pReader->pFileReader);
    }
305

306
    pReader->status.pCurrentFileset = (SDFileSet*)taosArrayGet(pIter->pFileList, pIter->index);
H
Haojun Liao 已提交
307

308 309 310 311
    int32_t code = tsdbDataFReaderOpen(&pReader->pFileReader, pReader->pTsdb, pReader->status.pCurrentFileset);
    if (code != TSDB_CODE_SUCCESS) {
      goto _err;
    }
H
Haojun Liao 已提交
312

313 314 315 316 317 318 319 320 321 322 323 324
    int32_t fid = pReader->status.pCurrentFileset->fid;
    tsdbFidKeyRange(fid, pReader->pTsdb->keepCfg.days, pReader->pTsdb->keepCfg.precision, &win.skey, &win.ekey);

    // current file are no longer overlapped with query time window, ignore remain files
    if ((asc && win.skey > pReader->window.ekey) || (!asc && win.ekey < pReader->window.skey)) {
      tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, %s", pReader,
                pReader->window.skey, pReader->window.ekey, pReader->idStr);
      return false;
    }

    if ((asc && (win.ekey < pReader->window.skey)) || ((!asc) && (win.skey > pReader->window.ekey))) {
      pIter->index += step;
325 326 327
      if ((asc && pIter->index >= pIter->numOfFiles) || ((!asc) && pIter->index < 0)) {
        return false;
      }
328 329
      continue;
    }
C
Cary Xu 已提交
330

331 332
    tsdbDebug("%p file found fid:%d for qrange:%" PRId64 "-%" PRId64 ", ignore, %s", pReader, fid, pReader->window.skey,
              pReader->window.ekey, pReader->idStr);
333 334
    return true;
  }
335

336
_err:
H
Haojun Liao 已提交
337 338 339
  return false;
}

340
static void resetDataBlockIterator(SDataBlockIter* pIter, int32_t order) {
341 342
  pIter->order = order;
  pIter->index = -1;
H
Haojun Liao 已提交
343
  pIter->numOfBlocks = -1;
344 345 346 347 348 349 350
  if (pIter->blockList == NULL) {
    pIter->blockList = taosArrayInit(4, sizeof(SFileDataBlockInfo));
  } else {
    taosArrayClear(pIter->blockList);
  }
}

L
Liu Jicong 已提交
351
static void cleanupDataBlockIterator(SDataBlockIter* pIter) { taosArrayDestroy(pIter->blockList); }
H
Haojun Liao 已提交
352

H
Haojun Liao 已提交
353
static void initReaderStatus(SReaderStatus* pStatus) {
dengyihao's avatar
dengyihao 已提交
354 355
  pStatus->pTableIter = NULL;
  pStatus->loadFromFile = true;
H
Haojun Liao 已提交
356 357
}

358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
static SSDataBlock* createResBlock(SQueryTableDataCond* pCond, int32_t capacity) {
  SSDataBlock* pResBlock = createDataBlock();
  if (pResBlock == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

  for (int32_t i = 0; i < pCond->numOfCols; ++i) {
    SColumnInfoData colInfo = {{0}, 0};
    colInfo.info = pCond->colList[i];
    blockDataAppendColInfo(pResBlock, &colInfo);
  }

  int32_t code = blockDataEnsureCapacity(pResBlock, capacity);
  if (code != TSDB_CODE_SUCCESS) {
    terrno = code;
    taosMemoryFree(pResBlock);
    return NULL;
  }

  return pResBlock;
}

H
Haojun Liao 已提交
381 382
static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsdbReader** ppReader, const char* idstr) {
  int32_t      code = 0;
383
  int8_t       level = 0;
H
Haojun Liao 已提交
384
  STsdbReader* pReader = (STsdbReader*)taosMemoryCalloc(1, sizeof(*pReader));
H
Hongze Cheng 已提交
385 386
  if (pReader == NULL) {
    code = TSDB_CODE_OUT_OF_MEMORY;
H
Haojun Liao 已提交
387
    goto _end;
H
Hongze Cheng 已提交
388 389
  }

H
Haojun Liao 已提交
390
  initReaderStatus(&pReader->status);
391

L
Liu Jicong 已提交
392
  pReader->pTsdb = getTsdbByRetentions(pVnode, pCond->twindows.skey, pVnode->config.tsdbCfg.retentions, idstr, &level);
dengyihao's avatar
dengyihao 已提交
393 394 395 396 397
  pReader->suid = pCond->suid;
  pReader->order = pCond->order;
  pReader->capacity = 4096;
  pReader->idStr = (idstr != NULL) ? strdup(idstr) : NULL;
  pReader->verRange = getQueryVerRange(pVnode, pCond, level);
398
  pReader->type = pCond->type;
399
  pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows);
400

401
  ASSERT(pCond->numOfCols > 0);
H
Hongze Cheng 已提交
402

403
  limitOutputBufferSize(pCond, &pReader->capacity);
404

405 406
  // allocate buffer in order to load data blocks from file
  SBlockLoadSuppInfo* pSup = &pReader->suppInfo;
407
  pSup->pColAgg = taosArrayInit(4, sizeof(SColumnDataAgg));
408
  pSup->plist = taosMemoryCalloc(pCond->numOfCols, POINTER_BYTES);
409
  if (pSup->pColAgg == NULL || pSup->plist == NULL) {
410 411 412
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _end;
  }
H
Haojun Liao 已提交
413

414 415
  pSup->tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID;

416 417 418 419
  pReader->pResBlock = createResBlock(pCond, pReader->capacity);
  if (pReader->pResBlock == NULL) {
    code = terrno;
    goto _end;
H
Hongze Cheng 已提交
420
  }
H
Hongze Cheng 已提交
421

422 423
  setColumnIdSlotList(pReader, pReader->pResBlock);

H
Hongze Cheng 已提交
424 425
  *ppReader = pReader;
  return code;
H
Hongze Cheng 已提交
426

H
Haojun Liao 已提交
427 428
_end:
  tsdbReaderClose(pReader);
H
Hongze Cheng 已提交
429 430 431
  *ppReader = NULL;
  return code;
}
H
Hongze Cheng 已提交
432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464

// void tsdbResetQueryHandleForNewTable(STsdbReader* queryHandle, SQueryTableDataCond* pCond, STableListInfo* tableList,
//                                      int32_t tWinIdx) {
//   STsdbReader* pTsdbReadHandle = queryHandle;

//   pTsdbReadHandle->order = pCond->order;
//   pTsdbReadHandle->window = pCond->twindows[tWinIdx];
//   pTsdbReadHandle->type = TSDB_QUERY_TYPE_ALL;
//   pTsdbReadHandle->cur.fid = -1;
//   pTsdbReadHandle->cur.win = TSWINDOW_INITIALIZER;
//   pTsdbReadHandle->checkFiles = true;
//   pTsdbReadHandle->activeIndex = 0;  // current active table index
//   pTsdbReadHandle->locateStart = false;
//   pTsdbReadHandle->loadExternalRow = pCond->loadExternalRows;

//   if (ASCENDING_TRAVERSE(pCond->order)) {
//     assert(pTsdbReadHandle->window.skey <= pTsdbReadHandle->window.ekey);
//   } else {
//     assert(pTsdbReadHandle->window.skey >= pTsdbReadHandle->window.ekey);
//   }

//   // allocate buffer in order to load data blocks from file
//   memset(pTsdbReadHandle->suppInfo.pstatis, 0, sizeof(SColumnDataAgg));
//   memset(pTsdbReadHandle->suppInfo.plist, 0, POINTER_BYTES);

//   tsdbInitDataBlockLoadInfo(&pTsdbReadHandle->dataBlockLoadInfo);
//   tsdbInitCompBlockLoadInfo(&pTsdbReadHandle->compBlockLoadInfo);

//   SArray* pTable = NULL;
//   //  STsdbMeta* pMeta = tsdbGetMeta(pTsdbReadHandle->pTsdb);

//   //  pTsdbReadHandle->pTableCheckInfo = destroyTableCheckInfo(pTsdbReadHandle->pTableCheckInfo);

H
Haojun Liao 已提交
465
//   pTsdbReadHandle->pTableCheckInfo = NULL;  // createDataBlockScanInfo(pTsdbReadHandle, groupList, pMeta,
H
Hongze Cheng 已提交
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
//                                             // &pTable);
//   if (pTsdbReadHandle->pTableCheckInfo == NULL) {
//     //    tsdbReaderClose(pTsdbReadHandle);
//     terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
//   }

//   //  pTsdbReadHandle->prev = doFreeColumnInfoData(pTsdbReadHandle->prev);
//   //  pTsdbReadHandle->next = doFreeColumnInfoData(pTsdbReadHandle->next);
// }

// SArray* tsdbGetQueriedTableList(STsdbReader** pHandle) {
//   assert(pHandle != NULL);

//   STsdbReader* pTsdbReadHandle = (STsdbReader*)pHandle;

//   size_t  size = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
//   SArray* res = taosArrayInit(size, POINTER_BYTES);
//   return res;
// }

486 487
// static TSKEY extractFirstTraverseKey(STableBlockScanInfo* pCheckInfo, int32_t order, int32_t update, TDRowVerT
// maxVer) {
H
Hongze Cheng 已提交
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
//   TSDBROW row = {0};
//   STSRow *rmem = NULL, *rimem = NULL;

//   if (pCheckInfo->iter) {
//     if (tsdbTbDataIterGet(pCheckInfo->iter, &row)) {
//       rmem = row.pTSRow;
//     }
//   }

//   if (pCheckInfo->iiter) {
//     if (tsdbTbDataIterGet(pCheckInfo->iiter, &row)) {
//       rimem = row.pTSRow;
//     }
//   }

//   if (rmem == NULL && rimem == NULL) {
//     return TSKEY_INITIAL_VAL;
//   }

//   if (rmem != NULL && rimem == NULL) {
//     pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
//     return TD_ROW_KEY(rmem);
//   }

//   if (rmem == NULL && rimem != NULL) {
//     pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
//     return TD_ROW_KEY(rimem);
//   }

//   TSKEY r1 = TD_ROW_KEY(rmem);
//   TSKEY r2 = TD_ROW_KEY(rimem);

//   if (r1 == r2) {
//     if (TD_SUPPORT_UPDATE(update)) {
//       pCheckInfo->chosen = CHECKINFO_CHOSEN_BOTH;
//     } else {
//       pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
//       tsdbTbDataIterNext(pCheckInfo->iter);
//     }
//     return r1;
//   } else if (r1 < r2 && ASCENDING_TRAVERSE(order)) {
//     pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
//     return r1;
//   } else {
//     pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
//     return r2;
//   }
// }

H
Haojun Liao 已提交
537
// static bool moveToNextRowInMem(STableBlockScanInfo* pCheckInfo) {
H
Hongze Cheng 已提交
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570
//   bool hasNext = false;
//   if (pCheckInfo->chosen == CHECKINFO_CHOSEN_MEM) {
//     if (pCheckInfo->iter != NULL) {
//       hasNext = tsdbTbDataIterNext(pCheckInfo->iter);
//     }

//     if (hasNext) {
//       return hasNext;
//     }

//     if (pCheckInfo->iiter != NULL) {
//       return tsdbTbDataIterGet(pCheckInfo->iiter, NULL);
//     }
//   } else if (pCheckInfo->chosen == CHECKINFO_CHOSEN_IMEM) {
//     if (pCheckInfo->iiter != NULL) {
//       hasNext = tsdbTbDataIterNext(pCheckInfo->iiter);
//     }

//     if (hasNext) {
//       return hasNext;
//     }

//     if (pCheckInfo->iter != NULL) {
//       return tsdbTbDataIterGet(pCheckInfo->iter, NULL);
//     }
//   } else {
//     if (pCheckInfo->iter != NULL) {
//       hasNext = tsdbTbDataIterNext(pCheckInfo->iter);
//     }
//     if (pCheckInfo->iiter != NULL) {
//       hasNext = tsdbTbDataIterNext(pCheckInfo->iiter) || hasNext;
//     }
//   }
H
Hongze Cheng 已提交
571

H
Hongze Cheng 已提交
572 573
//   return hasNext;
// }
H
Hongze Cheng 已提交
574

H
Hongze Cheng 已提交
575 576 577
// static int32_t binarySearchForBlock(SBlock* pBlock, int32_t numOfBlocks, TSKEY skey, int32_t order) {
//   int32_t firstSlot = 0;
//   int32_t lastSlot = numOfBlocks - 1;
H
Hongze Cheng 已提交
578

H
Hongze Cheng 已提交
579
//   int32_t midSlot = firstSlot;
H
Hongze Cheng 已提交
580

H
Hongze Cheng 已提交
581 582 583
//   while (1) {
//     numOfBlocks = lastSlot - firstSlot + 1;
//     midSlot = (firstSlot + (numOfBlocks >> 1));
H
Hongze Cheng 已提交
584

H
Hongze Cheng 已提交
585
//     if (numOfBlocks == 1) break;
H
Hongze Cheng 已提交
586

H
Hongze Cheng 已提交
587 588 589 590 591 592 593 594 595 596 597
//     if (skey > pBlock[midSlot].maxKey.ts) {
//       if (numOfBlocks == 2) break;
//       if ((order == TSDB_ORDER_DESC) && (skey < pBlock[midSlot + 1].minKey.ts)) break;
//       firstSlot = midSlot + 1;
//     } else if (skey < pBlock[midSlot].minKey.ts) {
//       if ((order == TSDB_ORDER_ASC) && (skey > pBlock[midSlot - 1].maxKey.ts)) break;
//       lastSlot = midSlot - 1;
//     } else {
//       break;  // got the slot
//     }
//   }
H
Hongze Cheng 已提交
598

H
Hongze Cheng 已提交
599 600
//   return midSlot;
// }
H
Hongze Cheng 已提交
601

H
Haojun Liao 已提交
602
static int32_t doLoadBlockIndex(STsdbReader* pReader, SDataFReader* pFileReader, SArray* pIndexList) {
603
  SArray* aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx));
H
Hongze Cheng 已提交
604

605
  int32_t code = tsdbReadBlockIdx(pFileReader, aBlockIdx, NULL);
H
Haojun Liao 已提交
606
  if (code != TSDB_CODE_SUCCESS) {
607
    goto _end;
H
Haojun Liao 已提交
608
  }
H
Hongze Cheng 已提交
609

H
Hongze Cheng 已提交
610 611
  if (taosArrayGetSize(aBlockIdx) == 0) {
    taosArrayClear(aBlockIdx);
H
Haojun Liao 已提交
612 613
    return TSDB_CODE_SUCCESS;
  }
H
Hongze Cheng 已提交
614

615
  SBlockIdx* pBlockIdx;
H
Hongze Cheng 已提交
616
  for (int32_t i = 0; i < taosArrayGetSize(aBlockIdx); ++i) {
617
    pBlockIdx = (SBlockIdx*)taosArrayGet(aBlockIdx, i);
H
Haojun Liao 已提交
618

619
    // uid check
H
Hongze Cheng 已提交
620
    if (pBlockIdx->suid != pReader->suid) {
H
Haojun Liao 已提交
621 622 623 624
      continue;
    }

    // this block belongs to a table that is not queried.
H
Hongze Cheng 已提交
625
    void* p = taosHashGet(pReader->status.pTableMap, &pBlockIdx->uid, sizeof(uint64_t));
H
Haojun Liao 已提交
626 627 628 629
    if (p == NULL) {
      continue;
    }

630 631
    // todo: not valid info in bockIndex
    // time range check
632 633 634
    //    if (pBlockIdx->minKey > pReader->window.ekey || pBlockIdx->maxKey < pReader->window.skey) {
    //      continue;
    //    }
635 636

    // version check
637 638 639
    //    if (pBlockIdx->minVersion > pReader->verRange.maxVer || pBlockIdx->maxVersion < pReader->verRange.minVer) {
    //      continue;
    //    }
H
Haojun Liao 已提交
640 641 642 643 644 645

    STableBlockScanInfo* pScanInfo = p;
    if (pScanInfo->pBlockList == NULL) {
      pScanInfo->pBlockList = taosArrayInit(16, sizeof(SBlock));
    }

H
Hongze Cheng 已提交
646 647
    pScanInfo->blockIdx = *pBlockIdx;
    taosArrayPush(pIndexList, pBlockIdx);
H
Haojun Liao 已提交
648
  }
H
Hongze Cheng 已提交
649

650
_end:
H
Hongze Cheng 已提交
651
  taosArrayDestroy(aBlockIdx);
H
Haojun Liao 已提交
652 653
  return code;
}
H
Hongze Cheng 已提交
654

655 656
static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, uint32_t* numOfValidTables,
                               int32_t* numOfBlocks) {
H
Haojun Liao 已提交
657
  size_t numOfTables = taosArrayGetSize(pIndexList);
H
Hongze Cheng 已提交
658

H
Haojun Liao 已提交
659
  *numOfValidTables = 0;
H
Hongze Cheng 已提交
660

661
  STableBlockScanInfo* px = NULL;
dengyihao's avatar
dengyihao 已提交
662
  while (1) {
663 664 665 666 667 668 669 670
    px = taosHashIterate(pReader->status.pTableMap, px);
    if (px == NULL) {
      break;
    }

    taosArrayClear(px->pBlockList);
  }

dengyihao's avatar
dengyihao 已提交
671
  for (int32_t i = 0; i < numOfTables; ++i) {
H
Haojun Liao 已提交
672
    SBlockIdx* pBlockIdx = taosArrayGet(pIndexList, i);
H
Hongze Cheng 已提交
673

H
Hongze Cheng 已提交
674
    SMapData mapData = {0};
H
Haojun Liao 已提交
675 676
    tMapDataReset(&mapData);
    tsdbReadBlock(pReader->pFileReader, pBlockIdx, &mapData, NULL);
H
Hongze Cheng 已提交
677

H
Haojun Liao 已提交
678 679 680
    STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pBlockIdx->uid, sizeof(int64_t));
    for (int32_t j = 0; j < mapData.nItem; ++j) {
      SBlock block = {0};
H
Hongze Cheng 已提交
681

H
Hongze Cheng 已提交
682
      tMapDataGetItemByIdx(&mapData, j, &block, tGetBlock);
H
Hongze Cheng 已提交
683

684
      // 1. time range check
685
      if (block.minKey.ts > pReader->window.ekey || block.maxKey.ts < pReader->window.skey) {
H
Haojun Liao 已提交
686 687
        continue;
      }
H
Hongze Cheng 已提交
688

689
      // 2. version range check
690 691 692
      if (block.minVersion > pReader->verRange.maxVer || block.maxVersion < pReader->verRange.minVer) {
        continue;
      }
693

H
Haojun Liao 已提交
694 695
      void* p = taosArrayPush(pScanInfo->pBlockList, &block);
      if (p == NULL) {
H
Haojun Liao 已提交
696
        tMapDataClear(&mapData);
H
Haojun Liao 已提交
697 698
        return TSDB_CODE_OUT_OF_MEMORY;
      }
699 700

      (*numOfBlocks) += 1;
H
Haojun Liao 已提交
701
    }
H
Hongze Cheng 已提交
702

H
Haojun Liao 已提交
703
    tMapDataClear(&mapData);
H
Haojun Liao 已提交
704 705 706 707
    if (pScanInfo->pBlockList != NULL && taosArrayGetSize(pScanInfo->pBlockList) > 0) {
      (*numOfValidTables) += 1;
    }
  }
H
Hongze Cheng 已提交
708

H
Haojun Liao 已提交
709 710
  return TSDB_CODE_SUCCESS;
}
H
Hongze Cheng 已提交
711

712 713
// todo remove pblock parameter
static void setBlockAllDumped(SFileBlockDumpInfo* pDumpInfo, SBlock* pBlock, int32_t order) {
714
  int32_t step = ASCENDING_TRAVERSE(order) ? 1 : -1;
H
Haojun Liao 已提交
715

716
  pDumpInfo->allDumped = true;
717
  pDumpInfo->lastKey = pBlock->maxKey.ts + step;
H
Haojun Liao 已提交
718 719
}

720 721
static void doCopyColVal(SColumnInfoData* pColInfoData, int32_t rowIndex, int32_t colIndex, SColVal* pColVal,
                         SBlockLoadSuppInfo* pSup) {
H
Haojun Liao 已提交
722
  if (IS_VAR_DATA_TYPE(pColVal->type)) {
723
    if (pColVal->isNull || pColVal->isNone) {
H
Haojun Liao 已提交
724 725 726 727 728 729 730
      colDataAppendNULL(pColInfoData, rowIndex);
    } else {
      varDataSetLen(pSup->buildBuf[colIndex], pColVal->value.nData);
      memcpy(varDataVal(pSup->buildBuf[colIndex]), pColVal->value.pData, pColVal->value.nData);
      colDataAppend(pColInfoData, rowIndex, pSup->buildBuf[colIndex], false);
    }
  } else {
731
    colDataAppend(pColInfoData, rowIndex, (const char*)&pColVal->value, pColVal->isNull || pColVal->isNone);
H
Haojun Liao 已提交
732
  }
H
Haojun Liao 已提交
733 734
}

735
static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) {
736
  SReaderStatus*  pStatus = &pReader->status;
737
  SDataBlockIter* pBlockIter = &pStatus->blockIter;
H
Hongze Cheng 已提交
738

739
  SBlockData*         pBlockData = &pStatus->fileBlockData;
H
Haojun Liao 已提交
740
  SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(pBlockIter);
H
Haojun Liao 已提交
741 742
  SBlock*             pBlock = taosArrayGet(pBlockScanInfo->pBlockList, pFBlock->tbBlockIdx);
  SSDataBlock*        pResBlock = pReader->pResBlock;
743
  int32_t             numOfCols = blockDataGetNumOfCols(pResBlock);
H
Haojun Liao 已提交
744

H
Haojun Liao 已提交
745
  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;
746
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
H
Haojun Liao 已提交
747

748
  int64_t st = taosGetTimestampUs();
H
Haojun Liao 已提交
749

H
Haojun Liao 已提交
750
  SColVal cv = {0};
751 752
  int32_t colIndex = 0;

753 754
  bool    asc = ASCENDING_TRAVERSE(pReader->order);
  int32_t step = asc ? 1 : -1;
755

756
  int32_t rowIndex = 0;
757 758
  int32_t remain = asc ? (pBlockData->nRow - pDumpInfo->rowIndex) : (pDumpInfo->rowIndex + 1);

759 760 761 762 763 764 765 766
  int32_t endIndex = 0;
  if (remain <= pReader->capacity) {
    endIndex = pBlockData->nRow;
  } else {
    endIndex = pDumpInfo->rowIndex + step * pReader->capacity;
    remain = pReader->capacity;
  }

767
  int32_t          i = 0;
768 769
  SColumnInfoData* pColData = taosArrayGet(pResBlock->pDataBlock, i);
  if (pColData->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
770
    for (int32_t j = pDumpInfo->rowIndex; j < endIndex && j >= 0; j += step) {
771 772 773 774 775
      colDataAppend(pColData, rowIndex++, (const char*)&pBlockData->aTSKEY[j], false);
    }
    i += 1;
  }

H
Hongze Cheng 已提交
776
  while (i < numOfCols && colIndex < taosArrayGetSize(pBlockData->aIdx)) {
777 778 779
    rowIndex = 0;
    pColData = taosArrayGet(pResBlock->pDataBlock, i);

H
Hongze Cheng 已提交
780
    SColData* pData = tBlockDataGetColDataByIdx(pBlockData, colIndex);
781 782

    if (pData->cid == pColData->info.colId) {
783
      for (int32_t j = pDumpInfo->rowIndex; j < endIndex && j >= 0; j += step) {
784 785
        tColDataGetValue(pData, j, &cv);
        doCopyColVal(pColData, rowIndex++, i, &cv, pSupInfo);
H
Haojun Liao 已提交
786
      }
787 788 789
      colIndex += 1;
    } else {  // the specified column does not exist in file block, fill with null data
      colDataAppendNNULL(pColData, 0, remain);
H
Haojun Liao 已提交
790
    }
791 792 793 794 795

    ASSERT(rowIndex == remain);
    i += 1;
  }

796
  while (i < numOfCols) {
797 798 799
    pColData = taosArrayGet(pResBlock->pDataBlock, i);
    colDataAppendNNULL(pColData, 0, remain);
    i += 1;
H
Haojun Liao 已提交
800
  }
H
Haojun Liao 已提交
801

802
  pResBlock->info.rows = remain;
803
  pDumpInfo->rowIndex += step * remain;
804 805

  setBlockAllDumped(pDumpInfo, pBlock, pReader->order);
H
Haojun Liao 已提交
806

H
Haojun Liao 已提交
807 808
  int64_t elapsedTime = (taosGetTimestampUs() - st);
  pReader->cost.blockLoadTime += elapsedTime;
H
Haojun Liao 已提交
809

810
  int32_t unDumpedRows = asc ? pBlock->nRow - pDumpInfo->rowIndex : pDumpInfo->rowIndex + 1;
H
Haojun Liao 已提交
811
  tsdbDebug("%p load file block into buffer, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64
812
            ", rows:%d, remain:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", elapsed time:%" PRId64 " us, %s",
813 814 815 816 817 818 819
            pReader, pBlockIter->index, pFBlock->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, remain, unDumpedRows,
            pBlock->minVersion, pBlock->maxVersion, elapsedTime, pReader->idStr);

  return TSDB_CODE_SUCCESS;
}

// todo consider the output buffer size
820 821
static int32_t doLoadFileBlockData(STsdbReader* pReader, SDataBlockIter* pBlockIter,
                                   STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData) {
822 823 824 825 826 827 828 829 830 831
  int64_t st = taosGetTimestampUs();

  SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(pBlockIter);
  SBlock*             pBlock = taosArrayGet(pBlockScanInfo->pBlockList, pFBlock->tbBlockIdx);
  SSDataBlock*        pResBlock = pReader->pResBlock;
  int32_t             numOfCols = blockDataGetNumOfCols(pResBlock);

  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;

H
Hongze Cheng 已提交
832 833
  int32_t code = tsdbReadColData(pReader->pFileReader, &pBlockScanInfo->blockIdx, pBlock, pSupInfo->colIds, numOfCols,
                                 pBlockData, NULL, NULL);
834 835 836 837 838 839 840 841 842
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }

  int64_t elapsedTime = (taosGetTimestampUs() - st);
  pReader->cost.blockLoadTime += elapsedTime;

  pDumpInfo->allDumped = false;
  tsdbDebug("%p load file block into buffer, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64
843
            ", rows:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", elapsed time:%" PRId64 " us, %s",
844
            pReader, pBlockIter->index, pFBlock->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->nRow,
H
Haojun Liao 已提交
845 846
            pBlock->minVersion, pBlock->maxVersion, elapsedTime, pReader->idStr);
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
847 848

_error:
H
Haojun Liao 已提交
849 850 851 852 853
  tsdbError("%p error occurs in loading file block, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64
            ", rows:%d, %s",
            pReader, pBlockIter->index, pFBlock->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->nRow,
            pReader->idStr);
  return code;
H
Haojun Liao 已提交
854
}
H
Hongze Cheng 已提交
855

H
Hongze Cheng 已提交
856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913
// static int doBinarySearchKey(char* pValue, int num, TSKEY key, int order) {
//   int    firstPos, lastPos, midPos = -1;
//   int    numOfRows;
//   TSKEY* keyList;

//   assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);

//   if (num <= 0) return -1;

//   keyList = (TSKEY*)pValue;
//   firstPos = 0;
//   lastPos = num - 1;

//   if (order == TSDB_ORDER_DESC) {
//     // find the first position which is smaller than the key
//     while (1) {
//       if (key >= keyList[lastPos]) return lastPos;
//       if (key == keyList[firstPos]) return firstPos;
//       if (key < keyList[firstPos]) return firstPos - 1;

//       numOfRows = lastPos - firstPos + 1;
//       midPos = (numOfRows >> 1) + firstPos;

//       if (key < keyList[midPos]) {
//         lastPos = midPos - 1;
//       } else if (key > keyList[midPos]) {
//         firstPos = midPos + 1;
//       } else {
//         break;
//       }
//     }

//   } else {
//     // find the first position which is bigger than the key
//     while (1) {
//       if (key <= keyList[firstPos]) return firstPos;
//       if (key == keyList[lastPos]) return lastPos;

//       if (key > keyList[lastPos]) {
//         lastPos = lastPos + 1;
//         if (lastPos >= num)
//           return -1;
//         else
//           return lastPos;
//       }

//       numOfRows = lastPos - firstPos + 1;
//       midPos = (numOfRows >> 1) + firstPos;

//       if (key < keyList[midPos]) {
//         lastPos = midPos - 1;
//       } else if (key > keyList[midPos]) {
//         firstPos = midPos + 1;
//       } else {
//         break;
//       }
//     }
//   }
H
Hongze Cheng 已提交
914

H
Hongze Cheng 已提交
915 916
//   return midPos;
// }
H
Hongze Cheng 已提交
917

H
Hongze Cheng 已提交
918 919
// static void doCheckGeneratedBlockRange(STsdbReader* pTsdbReadHandle) {
//   SQueryFilePos* cur = &pTsdbReadHandle->cur;
H
Hongze Cheng 已提交
920

H
Hongze Cheng 已提交
921 922 923 924 925 926
//   if (cur->rows > 0) {
//     if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
//       assert(cur->win.skey >= pTsdbReadHandle->window.skey && cur->win.ekey <= pTsdbReadHandle->window.ekey);
//     } else {
//       assert(cur->win.skey >= pTsdbReadHandle->window.ekey && cur->win.ekey <= pTsdbReadHandle->window.skey);
//     }
H
Hongze Cheng 已提交
927

H
Hongze Cheng 已提交
928 929 930 931 932
//     SColumnInfoData* pColInfoData = taosArrayGet(pTsdbReadHandle->pColumns, 0);
//     assert(cur->win.skey == ((TSKEY*)pColInfoData->pData)[0] &&
//            cur->win.ekey == ((TSKEY*)pColInfoData->pData)[cur->rows - 1]);
//   } else {
//     cur->win = pTsdbReadHandle->window;
H
Hongze Cheng 已提交
933

H
Hongze Cheng 已提交
934 935 936 937
//     int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order) ? 1 : -1;
//     cur->lastKey = pTsdbReadHandle->window.ekey + step;
//   }
// }
H
Hongze Cheng 已提交
938

H
Haojun Liao 已提交
939
// static void copyAllRemainRowsFromFileBlock(STsdbReader* pTsdbReadHandle, STableBlockScanInfo* pCheckInfo,
H
Hongze Cheng 已提交
940 941
//                                            SDataBlockInfo* pBlockInfo, int32_t endPos) {
//   SQueryFilePos* cur = &pTsdbReadHandle->cur;
H
Hongze Cheng 已提交
942

H
Hongze Cheng 已提交
943 944
//   SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0];
//   TSKEY*     tsArray = pCols->cols[0].pData;
H
Hongze Cheng 已提交
945

H
Hongze Cheng 已提交
946
//   bool ascScan = ASCENDING_TRAVERSE(pTsdbReadHandle->order);
H
Hongze Cheng 已提交
947

H
Hongze Cheng 已提交
948
//   int32_t step = ascScan ? 1 : -1;
H
Hongze Cheng 已提交
949

H
Hongze Cheng 已提交
950 951
//   int32_t start = cur->pos;
//   int32_t end = endPos;
H
Hongze Cheng 已提交
952

H
Hongze Cheng 已提交
953 954 955
//   if (!ascScan) {
//     TSWAP(start, end);
//   }
H
Hongze Cheng 已提交
956

H
Hongze Cheng 已提交
957 958
//   assert(pTsdbReadHandle->outputCapacity >= (end - start + 1));
//   int32_t numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, 0, start, end);
H
Hongze Cheng 已提交
959

H
Hongze Cheng 已提交
960 961 962 963 964
//   // the time window should always be ascending order: skey <= ekey
//   cur->win = (STimeWindow){.skey = tsArray[start], .ekey = tsArray[end]};
//   cur->mixBlock = (numOfRows != pBlockInfo->rows);
//   cur->lastKey = tsArray[endPos] + step;
//   cur->blockCompleted = (ascScan ? (endPos == pBlockInfo->rows - 1) : (endPos == 0));
H
Hongze Cheng 已提交
965

H
Hongze Cheng 已提交
966 967 968 969
//   // The value of pos may be -1 or pBlockInfo->rows, and it is invalid in both cases.
//   int32_t pos = endPos + step;
//   updateInfoAfterMerge(pTsdbReadHandle, pCheckInfo, numOfRows, pos);
//   doCheckGeneratedBlockRange(pTsdbReadHandle);
H
Hongze Cheng 已提交
970

H
Hongze Cheng 已提交
971 972 973 974
//   tsdbDebug("%p uid:%" PRIu64 ", data block created, mixblock:%d, brange:%" PRIu64 "-%" PRIu64 " rows:%d, %s",
//             pTsdbReadHandle, pCheckInfo->tableId, cur->mixBlock, cur->win.skey, cur->win.ekey, cur->rows,
//             pTsdbReadHandle->idStr);
// }
H
Hongze Cheng 已提交
975

H
Hongze Cheng 已提交
976 977
// // only return the qualified data to client in terms of query time window, data rows in the same block but do not
// // be included in the query time window will be discarded
H
Haojun Liao 已提交
978
// static void doMergeTwoLevelData(STsdbReader* pTsdbReadHandle, STableBlockScanInfo* pCheckInfo, SBlock* pBlock) {
H
Hongze Cheng 已提交
979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179
//   SQueryFilePos* cur = &pTsdbReadHandle->cur;
//   SDataBlockInfo blockInfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlock);
//   STsdbCfg*      pCfg = REPO_CFG(pTsdbReadHandle->pTsdb);

//   initTableMemIterator(pTsdbReadHandle, pCheckInfo);

//   SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0];
//   assert(pCols->cols[0].type == TSDB_DATA_TYPE_TIMESTAMP && pCols->cols[0].colId == PRIMARYKEY_TIMESTAMP_COL_ID &&
//          cur->pos >= 0 && cur->pos < pBlock->numOfRows);
//   // Even Multi-Version supported, the records with duplicated TSKEY would be merged inside of tsdbLoadData
//   interface. TSKEY* tsArray = pCols->cols[0].pData; assert(pCols->numOfRows == pBlock->numOfRows && tsArray[0] ==
//   pBlock->minKey.ts &&
//          tsArray[pBlock->numOfRows - 1] == pBlock->maxKey.ts);

//   bool    ascScan = ASCENDING_TRAVERSE(pTsdbReadHandle->order);
//   int32_t step = ascScan ? 1 : -1;

//   // for search the endPos, so the order needs to reverse
//   int32_t order = ascScan ? TSDB_ORDER_DESC : TSDB_ORDER_ASC;

//   int32_t numOfCols = (int32_t)(QH_GET_NUM_OF_COLS(pTsdbReadHandle));
//   int32_t endPos = getEndPosInDataBlock(pTsdbReadHandle, &blockInfo);

//   STimeWindow* pWin = &blockInfo.window;
//   tsdbDebug("%p uid:%" PRIu64 " start merge data block, file block range:%" PRIu64 "-%" PRIu64
//             " rows:%d, start:%d, end:%d, %s",
//             pTsdbReadHandle, pCheckInfo->tableId, pWin->skey, pWin->ekey, blockInfo.rows, cur->pos, endPos,
//             pTsdbReadHandle->idStr);

//   // compared with the data from in-memory buffer, to generate the correct timestamp array list
//   int32_t numOfRows = 0;
//   int32_t curRow = 0;

//   int16_t   rv1 = -1;
//   int16_t   rv2 = -1;
//   STSchema* pSchema1 = NULL;
//   STSchema* pSchema2 = NULL;

//   int32_t pos = cur->pos;
//   cur->win = TSWINDOW_INITIALIZER;
//   bool adjustPos = false;

//   // no data in buffer, load data from file directly
//   if (pCheckInfo->iiter == NULL && pCheckInfo->iter == NULL) {
//     copyAllRemainRowsFromFileBlock(pTsdbReadHandle, pCheckInfo, &blockInfo, endPos);
//     return;
//   } else if (pCheckInfo->iter != NULL || pCheckInfo->iiter != NULL) {
//     SSkipListNode* node = NULL;
//     TSKEY          lastKeyAppend = TSKEY_INITIAL_VAL;

//     do {
//       STSRow* row2 = NULL;
//       STSRow* row1 = getSRowInTableMem(pCheckInfo, pTsdbReadHandle->order, pCfg->update, &row2, TD_VER_MAX);
//       if (row1 == NULL) {
//         break;
//       }

//       TSKEY key = TD_ROW_KEY(row1);
//       if ((key > pTsdbReadHandle->window.ekey && ascScan) || (key < pTsdbReadHandle->window.ekey && !ascScan)) {
//         break;
//       }

//       if (adjustPos) {
//         if (key == lastKeyAppend) {
//           pos -= step;
//         }
//         adjustPos = false;
//       }

//       if (((pos > endPos || tsArray[pos] > pTsdbReadHandle->window.ekey) && ascScan) ||
//           ((pos < endPos || tsArray[pos] < pTsdbReadHandle->window.ekey) && !ascScan)) {
//         break;
//       }

//       if ((key < tsArray[pos] && ascScan) || (key > tsArray[pos] && !ascScan)) {
//         if (rv1 != TD_ROW_SVER(row1)) {
//           //          pSchema1 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row1));
//           rv1 = TD_ROW_SVER(row1);
//         }
//         if (row2 && rv2 != TD_ROW_SVER(row2)) {
//           //          pSchema2 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row2));
//           rv2 = TD_ROW_SVER(row2);
//         }

//         numOfRows +=
//             mergeTwoRowFromMem(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, &curRow, row1, row2, numOfCols,
//                                pCheckInfo->tableId, pSchema1, pSchema2, pCfg->update, &lastKeyAppend);
//         if (cur->win.skey == TSKEY_INITIAL_VAL) {
//           cur->win.skey = key;
//         }

//         cur->win.ekey = key;
//         cur->lastKey = key + step;
//         cur->mixBlock = true;
//         moveToNextRowInMem(pCheckInfo);
//       } else if (key == tsArray[pos]) {  // data in buffer has the same timestamp of data in file block, ignore it
//         if (TD_SUPPORT_UPDATE(pCfg->update)) {
//           if (lastKeyAppend != key) {
//             if (lastKeyAppend != TSKEY_INITIAL_VAL) {
//               ++curRow;
//             }
//             lastKeyAppend = key;
//           }
//           // load data from file firstly
//           numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, curRow, pos, pos);

//           if (rv1 != TD_ROW_SVER(row1)) {
//             rv1 = TD_ROW_SVER(row1);
//           }
//           if (row2 && rv2 != TD_ROW_SVER(row2)) {
//             rv2 = TD_ROW_SVER(row2);
//           }

//           // still assign data into current row
//           numOfRows +=
//               mergeTwoRowFromMem(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, &curRow, row1, row2, numOfCols,
//                                  pCheckInfo->tableId, pSchema1, pSchema2, pCfg->update, &lastKeyAppend);

//           if (cur->win.skey == TSKEY_INITIAL_VAL) {
//             cur->win.skey = key;
//           }

//           cur->win.ekey = key;
//           cur->lastKey = key + step;
//           cur->mixBlock = true;

//           moveToNextRowInMem(pCheckInfo);

//           pos += step;
//           adjustPos = true;
//         } else {
//           // discard the memory record
//           moveToNextRowInMem(pCheckInfo);
//         }
//       } else if ((key > tsArray[pos] && ascScan) || (key < tsArray[pos] && !ascScan)) {
//         if (cur->win.skey == TSKEY_INITIAL_VAL) {
//           cur->win.skey = tsArray[pos];
//         }

//         int32_t end = doBinarySearchKey(pCols->cols[0].pData, pCols->numOfRows, key, order);
//         assert(end != -1);

//         if (tsArray[end] == key) {  // the value of key in cache equals to the end timestamp value, ignore it
// #if 0
//           if (pCfg->update == TD_ROW_DISCARD_UPDATE) {
//             moveToNextRowInMem(pCheckInfo);
//           } else {
//             end -= step;
//           }
// #endif
//           if (!TD_SUPPORT_UPDATE(pCfg->update)) {
//             moveToNextRowInMem(pCheckInfo);
//           } else {
//             end -= step;
//           }
//         }

//         int32_t qstart = 0, qend = 0;
//         getQualifiedRowsPos(pTsdbReadHandle, pos, end, numOfRows, &qstart, &qend);

//         if ((lastKeyAppend != TSKEY_INITIAL_VAL) && (lastKeyAppend != (ascScan ? tsArray[qstart] : tsArray[qend]))) {
//           ++curRow;
//         }

//         numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, curRow, qstart, qend);
//         pos += (qend - qstart + 1) * step;
//         if (numOfRows > 0) {
//           curRow = numOfRows - 1;
//         }

//         cur->win.ekey = ascScan ? tsArray[qend] : tsArray[qstart];
//         cur->lastKey = cur->win.ekey + step;
//         lastKeyAppend = cur->win.ekey;
//       }
//     } while (numOfRows < pTsdbReadHandle->outputCapacity);

//     if (numOfRows < pTsdbReadHandle->outputCapacity) {
//       /**
//        * if cache is empty, load remain file block data. In contrast, if there are remain data in cache, do NOT
//        * copy them all to result buffer, since it may be overlapped with file data block.
//        */
//       if (node == NULL || ((TD_ROW_KEY((STSRow*)SL_GET_NODE_DATA(node)) > pTsdbReadHandle->window.ekey) && ascScan)
//       ||
//           ((TD_ROW_KEY((STSRow*)SL_GET_NODE_DATA(node)) < pTsdbReadHandle->window.ekey) && !ascScan)) {
//         // no data in cache or data in cache is greater than the ekey of time window, load data from file block
//         if (cur->win.skey == TSKEY_INITIAL_VAL) {
//           cur->win.skey = tsArray[pos];
//         }

//         int32_t start = -1, end = -1;
//         getQualifiedRowsPos(pTsdbReadHandle, pos, endPos, numOfRows, &start, &end);

//         numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, numOfRows, start, end);
//         pos += (end - start + 1) * step;

//         cur->win.ekey = ascScan ? tsArray[end] : tsArray[start];
//         cur->lastKey = cur->win.ekey + step;
//         cur->mixBlock = true;
//       }
//     }
//   }
H
Hongze Cheng 已提交
1180

H
Hongze Cheng 已提交
1181 1182
//   cur->blockCompleted = (((pos > endPos || cur->lastKey > pTsdbReadHandle->window.ekey) && ascScan) ||
//                          ((pos < endPos || cur->lastKey < pTsdbReadHandle->window.ekey) && !ascScan));
H
Hongze Cheng 已提交
1183

H
Hongze Cheng 已提交
1184 1185 1186
//   if (!ascScan) {
//     TSWAP(cur->win.skey, cur->win.ekey);
//   }
H
Hongze Cheng 已提交
1187

H
Hongze Cheng 已提交
1188 1189
//   updateInfoAfterMerge(pTsdbReadHandle, pCheckInfo, numOfRows, pos);
//   doCheckGeneratedBlockRange(pTsdbReadHandle);
H
Hongze Cheng 已提交
1190

H
Hongze Cheng 已提交
1191 1192 1193 1194
//   tsdbDebug("%p uid:%" PRIu64 ", data block created, mixblock:%d, brange:%" PRIu64 "-%" PRIu64 " rows:%d, %s",
//             pTsdbReadHandle, pCheckInfo->tableId, cur->mixBlock, cur->win.skey, cur->win.ekey, cur->rows,
//             pTsdbReadHandle->idStr);
// }
H
Hongze Cheng 已提交
1195

H
Haojun Liao 已提交
1196 1197 1198
static void cleanupBlockOrderSupporter(SBlockOrderSupporter* pSup) {
  taosMemoryFreeClear(pSup->numOfBlocksPerTable);
  taosMemoryFreeClear(pSup->indexPerTable);
H
Hongze Cheng 已提交
1199

H
Haojun Liao 已提交
1200 1201 1202 1203
  for (int32_t i = 0; i < pSup->numOfTables; ++i) {
    SBlockOrderWrapper* pBlockInfo = pSup->pDataBlockInfo[i];
    taosMemoryFreeClear(pBlockInfo);
  }
H
Hongze Cheng 已提交
1204

H
Haojun Liao 已提交
1205 1206
  taosMemoryFreeClear(pSup->pDataBlockInfo);
}
H
Hongze Cheng 已提交
1207

H
Haojun Liao 已提交
1208 1209
static int32_t initBlockOrderSupporter(SBlockOrderSupporter* pSup, int32_t numOfTables) {
  ASSERT(numOfTables >= 1);
H
Hongze Cheng 已提交
1210

H
Haojun Liao 已提交
1211
  pSup->numOfBlocksPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables);
1212 1213
  pSup->indexPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables);
  pSup->pDataBlockInfo = taosMemoryCalloc(1, POINTER_BYTES * numOfTables);
H
Hongze Cheng 已提交
1214

H
Haojun Liao 已提交
1215 1216 1217 1218
  if (pSup->numOfBlocksPerTable == NULL || pSup->indexPerTable == NULL || pSup->pDataBlockInfo == NULL) {
    cleanupBlockOrderSupporter(pSup);
    return TSDB_CODE_OUT_OF_MEMORY;
  }
H
Hongze Cheng 已提交
1219

H
Haojun Liao 已提交
1220 1221
  return TSDB_CODE_SUCCESS;
}
H
Hongze Cheng 已提交
1222

H
Haojun Liao 已提交
1223
static int32_t fileDataBlockOrderCompar(const void* pLeft, const void* pRight, void* param) {
1224
  int32_t leftIndex = *(int32_t*)pLeft;
H
Haojun Liao 已提交
1225
  int32_t rightIndex = *(int32_t*)pRight;
H
Hongze Cheng 已提交
1226

H
Haojun Liao 已提交
1227
  SBlockOrderSupporter* pSupporter = (SBlockOrderSupporter*)param;
H
Hongze Cheng 已提交
1228

H
Haojun Liao 已提交
1229 1230
  int32_t leftTableBlockIndex = pSupporter->indexPerTable[leftIndex];
  int32_t rightTableBlockIndex = pSupporter->indexPerTable[rightIndex];
H
Hongze Cheng 已提交
1231

H
Haojun Liao 已提交
1232 1233 1234 1235 1236 1237 1238
  if (leftTableBlockIndex > pSupporter->numOfBlocksPerTable[leftIndex]) {
    /* left block is empty */
    return 1;
  } else if (rightTableBlockIndex > pSupporter->numOfBlocksPerTable[rightIndex]) {
    /* right block is empty */
    return -1;
  }
H
Hongze Cheng 已提交
1239

1240
  SBlockOrderWrapper* pLeftBlock = &pSupporter->pDataBlockInfo[leftIndex][leftTableBlockIndex];
H
Haojun Liao 已提交
1241
  SBlockOrderWrapper* pRightBlock = &pSupporter->pDataBlockInfo[rightIndex][rightTableBlockIndex];
H
Hongze Cheng 已提交
1242

H
Haojun Liao 已提交
1243 1244
  return pLeftBlock->pBlock->aSubBlock[0].offset > pRightBlock->pBlock->aSubBlock[0].offset ? 1 : -1;
}
H
Hongze Cheng 已提交
1245

H
Haojun Liao 已提交
1246
static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks) {
1247
  bool asc = ASCENDING_TRAVERSE(pReader->order);
H
Haojun Liao 已提交
1248

1249
  pBlockIter->numOfBlocks = numOfBlocks;
1250 1251
  taosArrayClear(pBlockIter->blockList);

1252 1253
  // access data blocks according to the offset of each block in asc/desc order.
  int32_t numOfTables = (int32_t)taosHashGetSize(pReader->status.pTableMap);
H
Haojun Liao 已提交
1254

1255
  SBlockOrderSupporter sup = {0};
H
Haojun Liao 已提交
1256

1257 1258 1259 1260
  int32_t code = initBlockOrderSupporter(&sup, numOfTables);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
H
Haojun Liao 已提交
1261

1262 1263 1264 1265 1266 1267 1268
  int32_t cnt = 0;
  void*   ptr = NULL;
  while (1) {
    ptr = taosHashIterate(pReader->status.pTableMap, ptr);
    if (ptr == NULL) {
      break;
    }
H
Haojun Liao 已提交
1269

1270 1271 1272 1273
    STableBlockScanInfo* pTableScanInfo = (STableBlockScanInfo*)ptr;
    if (pTableScanInfo->pBlockList == NULL || taosArrayGetSize(pTableScanInfo->pBlockList) == 0) {
      continue;
    }
H
Haojun Liao 已提交
1274

1275 1276
    size_t num = taosArrayGetSize(pTableScanInfo->pBlockList);
    sup.numOfBlocksPerTable[sup.numOfTables] = num;
H
Haojun Liao 已提交
1277

1278 1279 1280 1281 1282
    char* buf = taosMemoryMalloc(sizeof(SBlockOrderWrapper) * num);
    if (buf == NULL) {
      cleanupBlockOrderSupporter(&sup);
      return TSDB_CODE_TDB_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1283

1284 1285 1286 1287 1288
    sup.pDataBlockInfo[sup.numOfTables] = (SBlockOrderWrapper*)buf;
    for (int32_t k = 0; k < num; ++k) {
      SBlockOrderWrapper wrapper = {0};
      wrapper.pBlock = (SBlock*)taosArrayGet(pTableScanInfo->pBlockList, k);
      wrapper.uid = pTableScanInfo->uid;
H
Haojun Liao 已提交
1289

1290 1291 1292 1293 1294 1295
      sup.pDataBlockInfo[sup.numOfTables][k] = wrapper;
      cnt++;
    }

    sup.numOfTables += 1;
  }
H
Haojun Liao 已提交
1296

1297
  ASSERT(numOfBlocks == cnt);
H
Haojun Liao 已提交
1298

1299 1300 1301 1302 1303
  // since there is only one table qualified, blocks are not sorted
  if (sup.numOfTables == 1) {
    for (int32_t i = 0; i < numOfBlocks; ++i) {
      SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[0][i].uid, .tbBlockIdx = i};
      taosArrayPush(pBlockIter->blockList, &blockInfo);
1304
    }
1305 1306
    tsdbDebug("%p create blocks info struct completed for one table, %d blocks not sorted %s", pReader, cnt,
              pReader->idStr);
1307

1308
    pBlockIter->index = asc ? 0 : (numOfBlocks - 1);
H
Haojun Liao 已提交
1309 1310

    cleanupBlockOrderSupporter(&sup);
1311
    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
1312
  }
H
Haojun Liao 已提交
1313

1314 1315
  tsdbDebug("%p create data blocks info struct completed, %d blocks in %d tables %s", pReader, cnt, sup.numOfTables,
            pReader->idStr);
1316

1317
  assert(cnt <= numOfBlocks && sup.numOfTables <= numOfTables);
H
Haojun Liao 已提交
1318

1319 1320 1321 1322 1323
  SMultiwayMergeTreeInfo* pTree = NULL;
  uint8_t                 ret = tMergeTreeCreate(&pTree, sup.numOfTables, &sup, fileDataBlockOrderCompar);
  if (ret != TSDB_CODE_SUCCESS) {
    cleanupBlockOrderSupporter(&sup);
    return TSDB_CODE_TDB_OUT_OF_MEMORY;
H
Haojun Liao 已提交
1324
  }
H
Haojun Liao 已提交
1325

1326 1327 1328 1329
  int32_t numOfTotal = 0;
  while (numOfTotal < cnt) {
    int32_t pos = tMergeTreeGetChosenIndex(pTree);
    int32_t index = sup.indexPerTable[pos]++;
H
Haojun Liao 已提交
1330

1331 1332
    SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[pos][index].uid, .tbBlockIdx = index};
    taosArrayPush(pBlockIter->blockList, &blockInfo);
H
Haojun Liao 已提交
1333

1334 1335 1336 1337
    // set data block index overflow, in order to disable the offset comparator
    if (sup.indexPerTable[pos] >= sup.numOfBlocksPerTable[pos]) {
      sup.indexPerTable[pos] = sup.numOfBlocksPerTable[pos] + 1;
    }
H
Haojun Liao 已提交
1338

1339 1340
    numOfTotal += 1;
    tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree));
H
Haojun Liao 已提交
1341
  }
H
Haojun Liao 已提交
1342

1343 1344 1345
  tsdbDebug("%p %d data blocks sort completed, %s", pReader, cnt, pReader->idStr);
  cleanupBlockOrderSupporter(&sup);
  taosMemoryFree(pTree);
H
Haojun Liao 已提交
1346

1347 1348
  pBlockIter->index = asc ? 0 : (numOfBlocks - 1);
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
1349
}
H
Hongze Cheng 已提交
1350

H
Haojun Liao 已提交
1351
static bool blockIteratorNext(SDataBlockIter* pBlockIter) {
1352 1353
  bool asc = ASCENDING_TRAVERSE(pBlockIter->order);

1354
  int32_t step = asc ? 1 : -1;
1355
  if ((pBlockIter->index >= pBlockIter->numOfBlocks - 1 && asc) || (pBlockIter->index <= 0 && (!asc))) {
1356 1357 1358
    return false;
  }

1359
  pBlockIter->index += step;
1360 1361 1362
  return true;
}

1363 1364 1365
/**
 * This is an two rectangles overlap cases.
 */
1366
static int32_t dataBlockPartiallyRequired(STimeWindow* pWindow, SVersionRange* pVerRange, SBlock* pBlock) {
1367 1368 1369 1370
  return (pWindow->ekey < pBlock->maxKey.ts && pWindow->ekey >= pBlock->minKey.ts) ||
         (pWindow->skey > pBlock->minKey.ts && pWindow->skey <= pBlock->maxKey.ts) ||
         (pVerRange->minVer > pBlock->minVersion && pVerRange->minVer <= pBlock->maxVersion) ||
         (pVerRange->maxVer < pBlock->maxVersion && pVerRange->maxVer >= pBlock->minVersion);
H
Haojun Liao 已提交
1371
}
H
Hongze Cheng 已提交
1372

H
Haojun Liao 已提交
1373 1374 1375 1376
static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter) {
  SFileDataBlockInfo* pFBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index);
  return pFBlockInfo;
}
H
Hongze Cheng 已提交
1377

1378 1379
static SBlock* getNeighborBlockOfSameTable(SFileDataBlockInfo* pFBlockInfo, STableBlockScanInfo* pTableBlockScanInfo,
                                           int32_t* nextIndex, int32_t order) {
1380 1381 1382
  bool asc = ASCENDING_TRAVERSE(order);
  if (asc && pFBlockInfo->tbBlockIdx >= taosArrayGetSize(pTableBlockScanInfo->pBlockList) - 1) {
    return NULL;
1383 1384
  }

1385
  if (!asc && pFBlockInfo->tbBlockIdx == 0) {
1386 1387 1388
    return NULL;
  }

1389
  int32_t step = asc ? 1 : -1;
1390 1391 1392 1393 1394 1395 1396 1397 1398

  *nextIndex = pFBlockInfo->tbBlockIdx + step;
  SBlock* pNext = taosArrayGet(pTableBlockScanInfo->pBlockList, *nextIndex);
  return pNext;
}

static int32_t findFileBlockInfoIndex(SDataBlockIter* pBlockIter, SFileDataBlockInfo* pFBlockInfo) {
  ASSERT(pBlockIter != NULL && pFBlockInfo != NULL);

1399
  int32_t step = ASCENDING_TRAVERSE(pBlockIter->order) ? 1 : -1;
1400 1401
  int32_t index = pBlockIter->index;

1402
  while (index < pBlockIter->numOfBlocks && index >= 0) {
1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414
    SFileDataBlockInfo* pFBlock = taosArrayGet(pBlockIter->blockList, index);
    if (pFBlock->uid == pFBlockInfo->uid && pFBlock->tbBlockIdx == pFBlockInfo->tbBlockIdx) {
      return index;
    }

    index += step;
  }

  ASSERT(0);
  return -1;
}

1415
static int32_t setFileBlockActiveInBlockIter(SDataBlockIter* pBlockIter, int32_t index, int32_t step) {
1416 1417 1418 1419 1420
  if (index < 0 || index >= pBlockIter->numOfBlocks) {
    return -1;
  }

  SFileDataBlockInfo fblock = *(SFileDataBlockInfo*)taosArrayGet(pBlockIter->blockList, index);
1421 1422 1423 1424 1425
  pBlockIter->index += step;

  if (index != pBlockIter->index) {
    taosArrayRemove(pBlockIter->blockList, index);
    taosArrayInsert(pBlockIter->blockList, pBlockIter->index, &fblock);
1426

1427 1428 1429
    SFileDataBlockInfo* pBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index);
    ASSERT(pBlockInfo->uid == fblock.uid && pBlockInfo->tbBlockIdx == fblock.tbBlockIdx);
  }
1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440

  return TSDB_CODE_SUCCESS;
}

static bool overlapWithNeighborBlock(SBlock* pBlock, SBlock* pNeighbor, int32_t order) {
  // it is the last block in current file, no chance to overlap with neighbor blocks.
  if (ASCENDING_TRAVERSE(order)) {
    return pBlock->maxKey.ts == pNeighbor->minKey.ts;
  } else {
    return pBlock->minKey.ts == pNeighbor->maxKey.ts;
  }
H
Haojun Liao 已提交
1441
}
H
Hongze Cheng 已提交
1442

1443
static bool bufferDataInFileBlockGap(int32_t order, TSDBKEY key, SBlock* pBlock) {
H
Haojun Liao 已提交
1444
  bool ascScan = ASCENDING_TRAVERSE(order);
H
Hongze Cheng 已提交
1445

1446
  return (ascScan && (key.ts != TSKEY_INITIAL_VAL && key.ts <= pBlock->minKey.ts)) ||
1447
         (!ascScan && (key.ts != TSKEY_INITIAL_VAL && key.ts >= pBlock->maxKey.ts));
H
Haojun Liao 已提交
1448
}
H
Hongze Cheng 已提交
1449

H
Haojun Liao 已提交
1450
static bool keyOverlapFileBlock(TSDBKEY key, SBlock* pBlock, SVersionRange* pVerRange) {
1451 1452
  return (key.ts >= pBlock->minKey.ts && key.ts <= pBlock->maxKey.ts) && (pBlock->maxVersion >= pVerRange->minVer) &&
         (pBlock->minVersion <= pVerRange->maxVer);
H
Haojun Liao 已提交
1453 1454
}

1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488
static bool doCheckforDatablockOverlap(STableBlockScanInfo* pBlockScanInfo, const SBlock* pBlock) {
  size_t num = taosArrayGetSize(pBlockScanInfo->delSkyline);

  for (int32_t i = pBlockScanInfo->fileDelIndex; i < num; i += 1) {
    TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, i);
    if (p->ts >= pBlock->minKey.ts && p->ts <= pBlock->maxKey.ts) {
      if (p->version >= pBlock->minVersion) {
        return true;
      }
    } else if (p->ts < pBlock->minKey.ts) {  // p->ts < pBlock->minKey.ts
      if (p->version >= pBlock->minVersion) {
        if (i < num - 1) {
          TSDBKEY* pnext = taosArrayGet(pBlockScanInfo->delSkyline, i + 1);
          if (i + 1 == num - 1) {  // pnext is the last point
            if (pnext->ts >= pBlock->minKey.ts) {
              return true;
            }
          } else {
            if (pnext->ts >= pBlock->minKey.ts && pnext->version >= pBlock->minVersion) {
              return true;
            }
          }
        } else {  // it must be the last point
          ASSERT(p->version == 0);
        }
      }
    } else {  // (p->ts > pBlock->maxKey.ts) {
      return false;
    }
  }

  return false;
}

1489
static bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SBlock* pBlock, int32_t order) {
1490 1491 1492 1493
  if (pBlockScanInfo->delSkyline == NULL) {
    return false;
  }

1494
  // ts is not overlap
1495
  TSDBKEY* pFirst = taosArrayGet(pBlockScanInfo->delSkyline, 0);
L
Liu Jicong 已提交
1496
  TSDBKEY* pLast = taosArrayGetLast(pBlockScanInfo->delSkyline);
1497 1498 1499 1500 1501
  if (pBlock->minKey.ts > pLast->ts || pBlock->maxKey.ts < pFirst->ts) {
    return false;
  }

  // version is not overlap
1502 1503 1504 1505
  if (ASCENDING_TRAVERSE(order)) {
    return doCheckforDatablockOverlap(pBlockScanInfo, pBlock);
  } else {
    int32_t index = pBlockScanInfo->fileDelIndex;
1506
    while (1) {
1507 1508 1509 1510 1511
      TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, index);
      if (p->ts > pBlock->minKey.ts && index > 0) {
        index -= 1;
      } else {  // find the first point that is smaller than the minKey.ts of dataBlock.
        break;
1512 1513 1514
      }
    }

1515 1516
    return doCheckforDatablockOverlap(pBlockScanInfo, pBlock);
  }
1517 1518
}

1519 1520 1521 1522
// 1. the version of all rows should be less than the endVersion
// 2. current block should not overlap with next neighbor block
// 3. current timestamp should not be overlap with each other
// 4. output buffer should be large enough to hold all rows in current block
1523
// 5. delete info should not overlap with current block data
1524 1525
static bool fileBlockShouldLoad(STsdbReader* pReader, SFileDataBlockInfo* pFBlock, SBlock* pBlock,
                                STableBlockScanInfo* pScanInfo, TSDBKEY key) {
1526 1527 1528
  int32_t neighborIndex = 0;
  SBlock* pNeighbor = getNeighborBlockOfSameTable(pFBlock, pScanInfo, &neighborIndex, pReader->order);

1529
  // overlap with neighbor
1530 1531 1532 1533 1534
  bool overlapWithNeighbor = false;
  if (pNeighbor) {
    overlapWithNeighbor = overlapWithNeighborBlock(pBlock, pNeighbor, pReader->order);
  }

1535
  // has duplicated ts of different version in this block
L
Liu Jicong 已提交
1536 1537
  bool hasDup = (pBlock->nSubBlock == 1) ? pBlock->hasDup : true;
  bool overlapWithDel = overlapWithDelSkyline(pScanInfo, pBlock, pReader->order);
1538

1539
  return (overlapWithNeighbor || hasDup || dataBlockPartiallyRequired(&pReader->window, &pReader->verRange, pBlock) ||
1540
          keyOverlapFileBlock(key, pBlock, &pReader->verRange) || (pBlock->nRow > pReader->capacity) || overlapWithDel);
H
Haojun Liao 已提交
1541 1542
}

1543
static int32_t buildDataBlockFromBuf(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, int64_t endKey) {
1544
  if (!(pBlockScanInfo->iiter.hasVal || pBlockScanInfo->iter.hasVal)) {
1545 1546
    return TSDB_CODE_SUCCESS;
  }
H
Haojun Liao 已提交
1547

1548 1549 1550
  SSDataBlock* pBlock = pReader->pResBlock;

  int64_t st = taosGetTimestampUs();
1551
  int32_t code = buildDataBlockFromBufImpl(pBlockScanInfo, endKey, pReader->capacity, pReader);
H
Haojun Liao 已提交
1552

1553
  blockDataUpdateTsWindow(pBlock, 0);
1554
  pBlock->info.uid = pBlockScanInfo->uid;
1555

1556
  setComposedBlockFlag(pReader, true);
1557 1558 1559 1560 1561 1562

  int64_t elapsedTime = taosGetTimestampUs() - st;
  tsdbDebug("%p build data block from cache completed, elapsed time:%" PRId64
            " us, numOfRows:%d, numOfCols:%d, brange: %" PRId64 " - %" PRId64 " %s",
            pReader, elapsedTime, pBlock->info.rows, (int32_t)blockDataGetNumOfCols(pBlock), pBlock->info.window.skey,
            pBlock->info.window.ekey, pReader->idStr);
H
Haojun Liao 已提交
1563 1564 1565
  return code;
}

1566
static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, TSDBROW* pRow,
1567
                                     STSRow* pTSRow, SIterInfo* pIter, int64_t key) {
1568 1569 1570 1571 1572
  SRowMerger          merge = {0};
  SBlockData*         pBlockData = &pReader->status.fileBlockData;
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;

  TSDBKEY k = TSDBROW_KEY(pRow);
1573
  TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
1574
  SArray* pDelList = pBlockScanInfo->delSkyline;
1575

1576 1577 1578 1579 1580 1581 1582 1583
  // ascending order traverse
  if (ASCENDING_TRAVERSE(pReader->order)) {
    if (key < k.ts) {
      tRowMergerInit(&merge, &fRow, pReader->pSchema);

      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
      tRowMergerGetRow(&merge, &pTSRow);
    } else if (k.ts < key) {  // k.ts < key
1584
      doMergeMultiRows(pRow, pBlockScanInfo->uid, pIter, pDelList, &pTSRow, pReader);
1585 1586 1587
    } else {  // k.ts == key, ascending order: file block ----> imem rows -----> mem rows
      tRowMergerInit(&merge, &fRow, pReader->pSchema);
      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
1588 1589

      tRowMerge(&merge, pRow);
1590
      doMergeRowsInBuf(pIter, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
1591 1592

      tRowMergerGetRow(&merge, &pTSRow);
1593
    }
1594 1595
  } else {  // descending order scan
    if (key < k.ts) {
1596
      doMergeMultiRows(pRow, pBlockScanInfo->uid, pIter, pDelList, &pTSRow, pReader);
1597 1598
    } else if (k.ts < key) {
      tRowMergerInit(&merge, &fRow, pReader->pSchema);
1599

1600 1601 1602 1603 1604 1605
      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
      tRowMergerGetRow(&merge, &pTSRow);
    } else {  // descending order: mem rows -----> imem rows ------> file block
      updateSchema(pRow, pBlockScanInfo->uid, pReader);

      tRowMergerInit(&merge, pRow, pReader->pSchema);
1606
      doMergeRowsInBuf(pIter, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
1607 1608 1609 1610 1611 1612

      tRowMerge(&merge, &fRow);
      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);

      tRowMergerGetRow(&merge, &pTSRow);
    }
1613 1614
  }

1615
  tRowMergerClear(&merge);
1616 1617 1618 1619
  doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
  return TSDB_CODE_SUCCESS;
}

1620 1621 1622 1623
static int32_t doMergeThreeLevelRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) {
  SRowMerger merge = {0};
  STSRow*    pTSRow = NULL;

1624 1625
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
  SBlockData*         pBlockData = &pReader->status.fileBlockData;
dengyihao's avatar
dengyihao 已提交
1626
  SArray*             pDelList = pBlockScanInfo->delSkyline;
1627

1628 1629
  TSDBROW* pRow = getValidRow(&pBlockScanInfo->iter, pDelList, pReader);
  TSDBROW* piRow = getValidRow(&pBlockScanInfo->iiter, pDelList, pReader);
1630
  ASSERT(pRow != NULL && piRow != NULL);
H
Haojun Liao 已提交
1631

1632
  int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex];
H
Haojun Liao 已提交
1633

1634
  uint64_t uid = pBlockScanInfo->uid;
H
Haojun Liao 已提交
1635

1636 1637 1638 1639
  TSDBKEY k = TSDBROW_KEY(pRow);
  TSDBKEY ik = TSDBROW_KEY(piRow);

  if (ASCENDING_TRAVERSE(pReader->order)) {
1640 1641
    // [1&2] key <= [k.ts && ik.ts]
    if (key <= k.ts && key <= ik.ts) {
1642 1643 1644
      TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
      tRowMergerInit(&merge, &fRow, pReader->pSchema);

1645
      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
H
Haojun Liao 已提交
1646

1647 1648
      if (ik.ts == key) {
        tRowMerge(&merge, piRow);
1649
        doMergeRowsInBuf(&pBlockScanInfo->iiter, key, pBlockScanInfo->delSkyline, &merge, pReader);
1650 1651
      }

1652 1653
      if (k.ts == key) {
        tRowMerge(&merge, pRow);
1654
        doMergeRowsInBuf(&pBlockScanInfo->iter, key, pBlockScanInfo->delSkyline, &merge, pReader);
1655 1656 1657 1658
      }

      tRowMergerGetRow(&merge, &pTSRow);
      doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
1659
      return TSDB_CODE_SUCCESS;
1660
    } else {  // key > ik.ts || key > k.ts
1661 1662
      ASSERT(key != ik.ts);

1663
      // [3] ik.ts < key <= k.ts
1664
      // [4] ik.ts < k.ts <= key
1665
      if (ik.ts < k.ts) {
1666
        doMergeMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, &pTSRow, pReader);
1667 1668 1669 1670
        doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
        return TSDB_CODE_SUCCESS;
      }

1671 1672
      // [5] k.ts < key   <= ik.ts
      // [6] k.ts < ik.ts <= key
1673
      if (k.ts < ik.ts) {
1674
        doMergeMultiRows(pRow, uid, &pBlockScanInfo->iter, pDelList, &pTSRow, pReader);
1675 1676 1677 1678
        doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
        return TSDB_CODE_SUCCESS;
      }

1679
      // [7] k.ts == ik.ts < key
1680
      if (k.ts == ik.ts) {
1681 1682
        ASSERT(key > ik.ts && key > k.ts);

1683
        doMergeMemIMemRows(pRow, piRow, pBlockScanInfo, pReader, &pTSRow);
1684 1685 1686 1687
        doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
        return TSDB_CODE_SUCCESS;
      }
    }
1688 1689 1690 1691 1692 1693
  } else {  // descending order scan
    // [1/2] k.ts >= ik.ts && k.ts >= key
    if (k.ts >= ik.ts && k.ts >= key) {
      updateSchema(pRow, uid, pReader);

      tRowMergerInit(&merge, pRow, pReader->pSchema);
1694
      doMergeRowsInBuf(&pBlockScanInfo->iter, key, pBlockScanInfo->delSkyline, &merge, pReader);
1695 1696 1697

      if (ik.ts == k.ts) {
        tRowMerge(&merge, piRow);
1698
        doMergeRowsInBuf(&pBlockScanInfo->iiter, key, pBlockScanInfo->delSkyline, &merge, pReader);
1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710
      }

      if (k.ts == key) {
        TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
        tRowMerge(&merge, &fRow);
        doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
      }

      tRowMergerGetRow(&merge, &pTSRow);
      doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
      return TSDB_CODE_SUCCESS;
    } else {
1711
      ASSERT(ik.ts != k.ts);  // this case has been included in the previous if branch
1712 1713 1714 1715

      // [3] ik.ts > k.ts >= Key
      // [4] ik.ts > key >= k.ts
      if (ik.ts > key) {
1716
        doMergeMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, &pTSRow, pReader);
1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734
        doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
        return TSDB_CODE_SUCCESS;
      }

      // [5] key > ik.ts > k.ts
      // [6] key > k.ts > ik.ts
      if (key > ik.ts) {
        TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
        tRowMergerInit(&merge, &fRow, pReader->pSchema);

        doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
        tRowMergerGetRow(&merge, &pTSRow);
        doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
        return TSDB_CODE_SUCCESS;
      }

      //[7] key = ik.ts > k.ts
      if (key == ik.ts) {
1735
        doMergeMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, &pTSRow, pReader);
1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747

        TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
        tRowMerge(&merge, &fRow);
        doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
        tRowMergerGetRow(&merge, &pTSRow);
        doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
        return TSDB_CODE_SUCCESS;
      }
    }
  }

  ASSERT(0);
S
Shengliang Guan 已提交
1748
  return -1;
1749 1750
}

dengyihao's avatar
dengyihao 已提交
1751 1752
static bool isValidFileBlockRow(SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo,
                                STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) {
1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763
  // check for version and time range
  int64_t ver = pBlockData->aVersion[pDumpInfo->rowIndex];
  if (ver > pReader->verRange.maxVer || ver < pReader->verRange.minVer) {
    return false;
  }

  int64_t ts = pBlockData->aTSKEY[pDumpInfo->rowIndex];
  if (ts > pReader->window.ekey || ts < pReader->window.skey) {
    return false;
  }

1764
  TSDBKEY k = {.ts = ts, .version = ver};
1765
  if (hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->fileDelIndex, &k, pReader->order)) {
1766 1767 1768
    return false;
  }

1769 1770 1771
  return true;
}

1772
static bool outOfTimeWindow(int64_t ts, STimeWindow* pWindow) { return (ts > pWindow->ekey) || (ts < pWindow->skey); }
1773

1774 1775 1776 1777 1778 1779 1780 1781
static int32_t buildComposedDataBlockImpl(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) {
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
  SBlockData*         pBlockData = &pReader->status.fileBlockData;

  SRowMerger merge = {0};
  STSRow*    pTSRow = NULL;

  int64_t  key = pBlockData->aTSKEY[pDumpInfo->rowIndex];
1782 1783
  TSDBROW* pRow = getValidRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader);
  TSDBROW* piRow = getValidRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader);
1784

1785
  if (pBlockScanInfo->iter.hasVal && pBlockScanInfo->iiter.hasVal) {
1786
    return doMergeThreeLevelRows(pReader, pBlockScanInfo);
1787
  } else {
1788
    // imem + file
1789 1790
    if (pBlockScanInfo->iiter.hasVal) {
      return doMergeBufAndFileRows(pReader, pBlockScanInfo, piRow, pTSRow, &pBlockScanInfo->iiter, key);
1791 1792
    }

1793
    // mem + file
1794
    if (pBlockScanInfo->iter.hasVal) {
dengyihao's avatar
dengyihao 已提交
1795
      return doMergeBufAndFileRows(pReader, pBlockScanInfo, pRow, pTSRow, &pBlockScanInfo->iter, key);
H
Haojun Liao 已提交
1796
    }
1797

1798
    // imem & mem are all empty, only file exist
1799
    TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
1800 1801 1802 1803
    tRowMergerInit(&merge, &fRow, pReader->pSchema);
    doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
    tRowMergerGetRow(&merge, &pTSRow);
    doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
1804

1805
    return TSDB_CODE_SUCCESS;
1806 1807 1808
  }
}

1809
static int32_t buildComposedDataBlock(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) {
1810 1811
  SSDataBlock* pResBlock = pReader->pResBlock;

1812
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
1813 1814
  SBlockData*         pBlockData = &pReader->status.fileBlockData;
  int32_t             step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1;
1815

1816
  while (1) {
1817 1818
    // todo check the validate of row in file block
    {
1819
      if (!isValidFileBlockRow(pBlockData, pDumpInfo, pBlockScanInfo, pReader)) {
1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833
        pDumpInfo->rowIndex += step;

        SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(&pReader->status.blockIter);
        SBlock*             pBlock = taosArrayGet(pBlockScanInfo->pBlockList, pFBlock->tbBlockIdx);

        if (pDumpInfo->rowIndex >= pBlock->nRow || pDumpInfo->rowIndex < 0) {
          setBlockAllDumped(pDumpInfo, pBlock, pReader->order);
          break;
        }

        continue;
      }
    }

1834
    buildComposedDataBlockImpl(pReader, pBlockScanInfo);
1835

1836 1837
    SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(&pReader->status.blockIter);
    SBlock*             pBlock = taosArrayGet(pBlockScanInfo->pBlockList, pFBlock->tbBlockIdx);
1838

1839 1840 1841 1842 1843 1844 1845 1846
    // currently loaded file data block is consumed
    if (pDumpInfo->rowIndex >= pBlock->nRow || pDumpInfo->rowIndex < 0) {
      setBlockAllDumped(pDumpInfo, pBlock, pReader->order);
      break;
    }

    if (pResBlock->info.rows >= pReader->capacity) {
      break;
1847 1848 1849 1850
    }
  }

  pResBlock->info.uid = pBlockScanInfo->uid;
1851 1852
  blockDataUpdateTsWindow(pResBlock, 0);

1853
  setComposedBlockFlag(pReader, true);
1854

1855 1856
  tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%d, %s", pReader,
            pBlockScanInfo->uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, pResBlock->info.rows,
1857
            pReader->idStr);
1858

1859 1860 1861 1862 1863
  return TSDB_CODE_SUCCESS;
}

void setComposedBlockFlag(STsdbReader* pReader, bool composed) { pReader->status.composedDataBlock = composed; }

1864
static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) {
1865 1866 1867 1868
  if (pBlockScanInfo->iterInit) {
    return TSDB_CODE_SUCCESS;
  }

1869
  int32_t code = TSDB_CODE_SUCCESS;
1870 1871 1872 1873 1874 1875 1876 1877 1878

  TSDBKEY startKey = {0};
  if (ASCENDING_TRAVERSE(pReader->order)) {
    startKey = (TSDBKEY){.ts = pReader->window.skey, .version = pReader->verRange.minVer};
  } else {
    startKey = (TSDBKEY){.ts = pReader->window.ekey, .version = pReader->verRange.maxVer};
  }

  int32_t backward = (!ASCENDING_TRAVERSE(pReader->order));
1879 1880

  STbData* d = NULL;
H
Hongze Cheng 已提交
1881 1882
  if (pReader->pReadSnap->pMem != NULL) {
    tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid, &d);
1883
    if (d != NULL) {
1884
      code = tsdbTbDataIterCreate(d, &startKey, backward, &pBlockScanInfo->iter.iter);
1885
      if (code == TSDB_CODE_SUCCESS) {
1886
        pBlockScanInfo->iter.hasVal = (tsdbTbDataIterGet(pBlockScanInfo->iter.iter) != NULL);
1887

H
Haojun Liao 已提交
1888
        tsdbDebug("%p uid:%" PRId64 ", check data in mem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64
1889 1890
                  "-%" PRId64 " %s",
                  pReader, pBlockScanInfo->uid, startKey.ts, pReader->order, d->minKey, d->maxKey, pReader->idStr);
1891
      } else {
1892 1893
        tsdbError("%p uid:%" PRId64 ", failed to create iterator for imem, code:%s, %s", pReader, pBlockScanInfo->uid,
                  tstrerror(code), pReader->idStr);
1894
        return code;
1895 1896
      }
    }
H
Haojun Liao 已提交
1897
  } else {
1898
    tsdbDebug("%p uid:%" PRId64 ", no data in mem, %s", pReader, pBlockScanInfo->uid, pReader->idStr);
H
Haojun Liao 已提交
1899 1900
  }

1901
  STbData* di = NULL;
H
Hongze Cheng 已提交
1902 1903
  if (pReader->pReadSnap->pIMem != NULL) {
    tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->suid, pBlockScanInfo->uid, &di);
1904
    if (di != NULL) {
1905
      code = tsdbTbDataIterCreate(di, &startKey, backward, &pBlockScanInfo->iiter.iter);
1906
      if (code == TSDB_CODE_SUCCESS) {
1907
        pBlockScanInfo->iiter.hasVal = (tsdbTbDataIterGet(pBlockScanInfo->iiter.iter) != NULL);
1908

H
Haojun Liao 已提交
1909
        tsdbDebug("%p uid:%" PRId64 ", check data in imem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64
1910
                  "-%" PRId64 " %s",
1911
                  pReader, pBlockScanInfo->uid, startKey.ts, pReader->order, di->minKey, di->maxKey, pReader->idStr);
1912
      } else {
1913 1914
        tsdbError("%p uid:%" PRId64 ", failed to create iterator for mem, code:%s, %s", pReader, pBlockScanInfo->uid,
                  tstrerror(code), pReader->idStr);
1915
        return code;
1916 1917
      }
    }
H
Haojun Liao 已提交
1918 1919
  } else {
    tsdbDebug("%p uid:%" PRId64 ", no data in imem, %s", pReader, pBlockScanInfo->uid, pReader->idStr);
1920 1921
  }

1922 1923
  initDelSkylineIterator(pBlockScanInfo, pReader, d, di);

1924
  pBlockScanInfo->iterInit = true;
H
Haojun Liao 已提交
1925 1926 1927
  return TSDB_CODE_SUCCESS;
}

dengyihao's avatar
dengyihao 已提交
1928 1929
int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, STbData* pMemTbData,
                               STbData* piMemTbData) {
1930 1931 1932
  if (pBlockScanInfo->delSkyline != NULL) {
    return TSDB_CODE_SUCCESS;
  }
1933

1934 1935 1936
  int32_t code = 0;
  STsdb*  pTsdb = pReader->pTsdb;

1937 1938
  SArray* pDelData = taosArrayInit(4, sizeof(SDelData));

H
Hongze Cheng 已提交
1939
  SDelFile* pDelFile = pReader->pReadSnap->fs.pDelFile;
1940 1941 1942
  if (pDelFile) {
    SDelFReader* pDelFReader = NULL;
    code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL);
1943
    if (code != TSDB_CODE_SUCCESS) {
1944 1945 1946 1947 1948
      goto _err;
    }

    SArray* aDelIdx = taosArrayInit(4, sizeof(SDelIdx));
    if (aDelIdx == NULL) {
1949
      tsdbDelFReaderClose(&pDelFReader);
1950 1951 1952
      goto _err;
    }

1953
    code = tsdbReadDelIdx(pDelFReader, aDelIdx, NULL);
1954 1955 1956
    if (code != TSDB_CODE_SUCCESS) {
      taosArrayDestroy(aDelIdx);
      tsdbDelFReaderClose(&pDelFReader);
1957 1958
      goto _err;
    }
1959

1960 1961 1962
    SDelIdx  idx = {.suid = pReader->suid, .uid = pBlockScanInfo->uid};
    SDelIdx* pIdx = taosArraySearch(aDelIdx, &idx, tCmprDelIdx, TD_EQ);

H
Haojun Liao 已提交
1963 1964
    if (pIdx != NULL) {
      code = tsdbReadDelData(pDelFReader, pIdx, pDelData, NULL);
1965 1966 1967 1968 1969 1970 1971
    }

    taosArrayDestroy(aDelIdx);
    tsdbDelFReaderClose(&pDelFReader);

    if (code != TSDB_CODE_SUCCESS) {
      goto _err;
1972
    }
1973
  }
1974

1975 1976 1977 1978 1979 1980 1981
  SDelData* p = NULL;
  if (pMemTbData != NULL) {
    p = pMemTbData->pHead;
    while (p) {
      taosArrayPush(pDelData, p);
      p = p->pNext;
    }
1982 1983
  }

1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997
  if (piMemTbData != NULL) {
    p = piMemTbData->pHead;
    while (p) {
      taosArrayPush(pDelData, p);
      p = p->pNext;
    }
  }

  if (taosArrayGetSize(pDelData) > 0) {
    pBlockScanInfo->delSkyline = taosArrayInit(4, sizeof(TSDBKEY));
    code = tsdbBuildDeleteSkyline(pDelData, 0, (int32_t)(taosArrayGetSize(pDelData) - 1), pBlockScanInfo->delSkyline);
  }

  taosArrayDestroy(pDelData);
dengyihao's avatar
dengyihao 已提交
1998 1999
  pBlockScanInfo->iter.index =
      ASCENDING_TRAVERSE(pReader->order) ? 0 : taosArrayGetSize(pBlockScanInfo->delSkyline) - 1;
2000 2001
  pBlockScanInfo->iiter.index = pBlockScanInfo->iter.index;
  pBlockScanInfo->fileDelIndex = pBlockScanInfo->iter.index;
2002 2003
  return code;

2004 2005 2006
_err:
  taosArrayDestroy(pDelData);
  return code;
2007 2008
}

2009 2010 2011
static TSDBKEY getCurrentKeyInBuf(SDataBlockIter* pBlockIter, STsdbReader* pReader) {
  TSDBKEY key = {.ts = TSKEY_INITIAL_VAL};

2012
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(pBlockIter);
2013 2014
  STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));

2015 2016
  initMemDataIterator(pScanInfo, pReader);
  TSDBROW* pRow = getValidRow(&pScanInfo->iter, pScanInfo->delSkyline, pReader);
2017
  if (pRow != NULL) {
2018 2019 2020
    key = TSDBROW_KEY(pRow);
  }

2021
  pRow = getValidRow(&pScanInfo->iiter, pScanInfo->delSkyline, pReader);
2022
  if (pRow != NULL) {
2023 2024 2025 2026 2027 2028 2029 2030 2031
    TSDBKEY k = TSDBROW_KEY(pRow);
    if (key.ts > k.ts) {
      key = k;
    }
  }

  return key;
}

H
Haojun Liao 已提交
2032 2033
static int32_t moveToNextFile(STsdbReader* pReader, int32_t* numOfBlocks) {
  SReaderStatus* pStatus = &pReader->status;
H
Hongze Cheng 已提交
2034
  SArray*        pIndexList = taosArrayInit(4, sizeof(SBlockIdx));
H
Haojun Liao 已提交
2035 2036

  while (1) {
2037
    bool hasNext = filesetIteratorNext(&pStatus->fileIter, pReader);
2038
    if (!hasNext) {  // no data files on disk
H
Haojun Liao 已提交
2039 2040 2041
      break;
    }

H
Haojun Liao 已提交
2042
    taosArrayClear(pIndexList);
H
Haojun Liao 已提交
2043 2044
    int32_t code = doLoadBlockIndex(pReader, pReader->pFileReader, pIndexList);
    if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
2045
      taosArrayDestroy(pIndexList);
H
Haojun Liao 已提交
2046 2047 2048 2049 2050 2051 2052
      return code;
    }

    if (taosArrayGetSize(pIndexList) > 0) {
      uint32_t numOfValidTable = 0;
      code = doLoadFileBlock(pReader, pIndexList, &numOfValidTable, numOfBlocks);
      if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
2053
        taosArrayDestroy(pIndexList);
H
Haojun Liao 已提交
2054 2055 2056 2057 2058 2059 2060 2061 2062 2063
        return code;
      }

      if (numOfValidTable > 0) {
        break;
      }
    }
    // no blocks in current file, try next files
  }

H
Haojun Liao 已提交
2064
  taosArrayDestroy(pIndexList);
H
Haojun Liao 已提交
2065 2066 2067
  return TSDB_CODE_SUCCESS;
}

2068 2069 2070
static int32_t doBuildDataBlock(STsdbReader* pReader) {
  int32_t code = TSDB_CODE_SUCCESS;

2071
  SReaderStatus*  pStatus = &pReader->status;
2072 2073
  SDataBlockIter* pBlockIter = &pStatus->blockIter;

2074 2075
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(pBlockIter);
  STableBlockScanInfo* pScanInfo = taosHashGet(pStatus->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));
2076 2077 2078 2079 2080

  SBlock* pBlock = taosArrayGet(pScanInfo->pBlockList, pFBlock->tbBlockIdx);

  TSDBKEY key = getCurrentKeyInBuf(pBlockIter, pReader);
  if (fileBlockShouldLoad(pReader, pFBlock, pBlock, pScanInfo, key)) {
2081 2082
    tBlockDataInit(&pStatus->fileBlockData);
    code = doLoadFileBlockData(pReader, pBlockIter, pScanInfo, &pStatus->fileBlockData);
2083 2084 2085 2086 2087
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    // build composed data block
2088
    code = buildComposedDataBlock(pReader, pScanInfo);
2089 2090
  } else if (bufferDataInFileBlockGap(pReader->order, key, pBlock)) {
    // data in memory that are earlier than current file block
2091
    // todo rows in buffer should be less than the file block in asc, greater than file block in desc
2092
    int64_t endKey = (ASCENDING_TRAVERSE(pReader->order)) ? pBlock->minKey.ts : pBlock->maxKey.ts;
2093
    code = buildDataBlockFromBuf(pReader, pScanInfo, endKey);
2094
  } else {  // whole block is required, return it directly
2095
    SDataBlockInfo* pInfo = &pReader->pResBlock->info;
2096 2097 2098
    pInfo->rows = pBlock->nRow;
    pInfo->uid = pScanInfo->uid;
    pInfo->window = (STimeWindow){.skey = pBlock->minKey.ts, .ekey = pBlock->maxKey.ts};
2099
    setComposedBlockFlag(pReader, false);
2100
    setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlock, pReader->order);
2101 2102 2103 2104 2105
  }

  return code;
}

H
Haojun Liao 已提交
2106
static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader) {
2107 2108
  SReaderStatus* pStatus = &pReader->status;

2109
  while (1) {
2110 2111 2112
    if (pStatus->pTableIter == NULL) {
      pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, NULL);
      if (pStatus->pTableIter == NULL) {
H
Haojun Liao 已提交
2113
        return TSDB_CODE_SUCCESS;
2114 2115 2116 2117
      }
    }

    STableBlockScanInfo* pBlockScanInfo = pStatus->pTableIter;
2118
    initMemDataIterator(pBlockScanInfo, pReader);
2119

2120
    int64_t endKey = (ASCENDING_TRAVERSE(pReader->order)) ? INT64_MAX : INT64_MIN;
2121
    int32_t code = buildDataBlockFromBuf(pReader, pBlockScanInfo, endKey);
H
Haojun Liao 已提交
2122 2123 2124 2125
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

2126
    if (pReader->pResBlock->info.rows > 0) {
H
Haojun Liao 已提交
2127
      return TSDB_CODE_SUCCESS;
2128 2129 2130 2131 2132
    }

    // current table is exhausted, let's try the next table
    pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, pStatus->pTableIter);
    if (pStatus->pTableIter == NULL) {
H
Haojun Liao 已提交
2133
      return TSDB_CODE_SUCCESS;
2134 2135 2136 2137
    }
  }
}

2138
// set the correct start position in case of the first/last file block, according to the query time window
2139
static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter) {
2140
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(pBlockIter);
2141
  STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));
2142
  SBlock*              pBlock = taosArrayGet(pScanInfo->pBlockList, pFBlock->tbBlockIdx);
2143

2144 2145 2146
  SReaderStatus* pStatus = &pReader->status;

  SFileBlockDumpInfo* pDumpInfo = &pStatus->fBlockDumpInfo;
2147 2148 2149

  pDumpInfo->totalRows = pBlock->nRow;
  pDumpInfo->allDumped = false;
2150
  pDumpInfo->rowIndex = ASCENDING_TRAVERSE(pReader->order) ? 0 : pBlock->nRow - 1;
2151 2152
}

2153
static int32_t initForFirstBlockInFile(STsdbReader* pReader, SDataBlockIter* pBlockIter) {
2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167
  int32_t numOfBlocks = 0;
  int32_t code = moveToNextFile(pReader, &numOfBlocks);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

  // all data files are consumed, try data in buffer
  if (numOfBlocks == 0) {
    pReader->status.loadFromFile = false;
    return code;
  }

  // initialize the block iterator for a new fileset
  code = initBlockIterator(pReader, pBlockIter, numOfBlocks);
2168 2169

  // set the correct start position according to the query time window
2170
  initBlockDumpInfo(pReader, pBlockIter);
2171 2172 2173
  return code;
}

2174
static bool fileBlockPartiallyRead(SFileBlockDumpInfo* pDumpInfo, bool asc) {
2175 2176
  return (!pDumpInfo->allDumped) &&
         ((pDumpInfo->rowIndex > 0 && asc) || (pDumpInfo->rowIndex < (pDumpInfo->totalRows - 1) && (!asc)));
2177 2178
}

2179
static int32_t buildBlockFromFiles(STsdbReader* pReader) {
H
Haojun Liao 已提交
2180
  int32_t code = TSDB_CODE_SUCCESS;
2181 2182
  bool    asc = ASCENDING_TRAVERSE(pReader->order);

2183 2184
  SDataBlockIter* pBlockIter = &pReader->status.blockIter;

2185
  while (1) {
2186
    SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(&pReader->status.blockIter);
2187 2188
    STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));

2189 2190
    SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;

2191
    if (fileBlockPartiallyRead(pDumpInfo, asc)) {  // file data block is partially loaded
2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206
      code = buildComposedDataBlock(pReader, pScanInfo);
    } else {
      // current block are exhausted, try the next file block
      if (pDumpInfo->allDumped) {
        // try next data block in current file
        bool hasNext = blockIteratorNext(&pReader->status.blockIter);
        if (hasNext) {  // check for the next block in the block accessed order list
          initBlockDumpInfo(pReader, pBlockIter);
        } else {  // data blocks in current file are exhausted, let's try the next file now
          code = initForFirstBlockInFile(pReader, pBlockIter);

          // error happens or all the data files are completely checked
          if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) {
            return code;
          }
2207
        }
H
Haojun Liao 已提交
2208
      }
2209 2210 2211

      // current block is not loaded yet, or data in buffer may overlap with the file block.
      code = doBuildDataBlock(pReader);
2212 2213
    }

2214 2215 2216 2217 2218 2219 2220 2221
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    if (pReader->pResBlock->info.rows > 0) {
      return TSDB_CODE_SUCCESS;
    }
  }
2222
}
H
refact  
Hongze Cheng 已提交
2223

2224 2225
static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idStr,
                                  int8_t* pLevel) {
2226
  if (VND_IS_RSMA(pVnode)) {
2227
    int8_t  level = 0;
2228 2229
    int64_t now = taosGetTimestamp(pVnode->config.tsdbCfg.precision);

2230
    for (int8_t i = 0; i < TSDB_RETENTION_MAX; ++i) {
2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243
      SRetention* pRetention = retentions + level;
      if (pRetention->keep <= 0) {
        if (level > 0) {
          --level;
        }
        break;
      }
      if ((now - pRetention->keep) <= winSKey) {
        break;
      }
      ++level;
    }

2244 2245
    int32_t     vgId = TD_VID(pVnode);
    const char* str = (idStr != NULL) ? idStr : "";
2246 2247

    if (level == TSDB_RETENTION_L0) {
2248
      *pLevel = TSDB_RETENTION_L0;
2249
      tsdbDebug("vgId:%d, rsma level %d is selected to query %s", vgId, TSDB_RETENTION_L0, str);
2250 2251
      return VND_RSMA0(pVnode);
    } else if (level == TSDB_RETENTION_L1) {
2252
      *pLevel = TSDB_RETENTION_L1;
2253
      tsdbDebug("vgId:%d, rsma level %d is selected to query %s", vgId, TSDB_RETENTION_L1, str);
2254 2255
      return VND_RSMA1(pVnode);
    } else {
2256
      *pLevel = TSDB_RETENTION_L2;
2257
      tsdbDebug("vgId:%d, rsma level %d is selected to query %s", vgId, TSDB_RETENTION_L2, str);
2258 2259 2260 2261 2262 2263 2264
      return VND_RSMA2(pVnode);
    }
  }

  return VND_TSDB(pVnode);
}

H
Haojun Liao 已提交
2265
SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level) {
L
Liu Jicong 已提交
2266
  int64_t startVer = (pCond->startVersion == -1) ? 0 : pCond->startVersion;
H
Haojun Liao 已提交
2267

2268
  if (VND_IS_RSMA(pVnode)) {
H
Haojun Liao 已提交
2269 2270 2271 2272
    return (SVersionRange){.minVer = startVer, .maxVer = tdRSmaGetMaxSubmitVer(pVnode->pSma, level)};
  }

  int64_t endVer = 0;
L
Liu Jicong 已提交
2273 2274
  if (pCond->endVersion ==
      -1) {  // user not specified end version, set current maximum version of vnode as the endVersion
H
Haojun Liao 已提交
2275 2276
    endVer = pVnode->state.applied;
  } else {
L
Liu Jicong 已提交
2277
    endVer = (pCond->endVersion > pVnode->state.applied) ? pVnode->state.applied : pCond->endVersion;
2278 2279
  }

H
Haojun Liao 已提交
2280
  return (SVersionRange){.minVer = startVer, .maxVer = endVer};
2281 2282
}

H
Hongze Cheng 已提交
2283 2284 2285 2286
// // todo not unref yet, since it is not support multi-group interpolation query
// static UNUSED_FUNC void changeQueryHandleForInterpQuery(STsdbReader* pHandle) {
//   // filter the queried time stamp in the first place
//   STsdbReader* pTsdbReadHandle = (STsdbReader*)pHandle;
H
refact  
Hongze Cheng 已提交
2287

H
Hongze Cheng 已提交
2288 2289
//   // starts from the buffer in case of descending timestamp order check data blocks
//   size_t numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
H
refact  
Hongze Cheng 已提交
2290

H
Hongze Cheng 已提交
2291 2292
//   int32_t i = 0;
//   while (i < numOfTables) {
H
Haojun Liao 已提交
2293
//     STableBlockScanInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i);
H
refact  
Hongze Cheng 已提交
2294

H
Hongze Cheng 已提交
2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308
//     // the first qualified table for interpolation query
//     //    if ((pTsdbReadHandle->window.skey <= pCheckInfo->pTableObj->lastKey) &&
//     //        (pCheckInfo->pTableObj->lastKey != TSKEY_INITIAL_VAL)) {
//     //      break;
//     //    }

//     i++;
//   }

//   // there are no data in all the tables
//   if (i == numOfTables) {
//     return;
//   }

H
Haojun Liao 已提交
2309
//   STableBlockScanInfo info = *(STableBlockScanInfo*)taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i);
H
Hongze Cheng 已提交
2310 2311 2312 2313 2314 2315
//   taosArrayClear(pTsdbReadHandle->pTableCheckInfo);

//   info.lastKey = pTsdbReadHandle->window.skey;
//   taosArrayPush(pTsdbReadHandle->pTableCheckInfo, &info);
// }

2316
bool hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey, int32_t order) {
2317 2318 2319 2320
  ASSERT(pKey != NULL);
  if (pDelList == NULL) {
    return false;
  }
L
Liu Jicong 已提交
2321 2322 2323
  size_t  num = taosArrayGetSize(pDelList);
  bool    asc = ASCENDING_TRAVERSE(order);
  int32_t step = asc ? 1 : -1;
2324

2325 2326 2327 2328 2329 2330
  if (asc) {
    if (*index >= num - 1) {
      TSDBKEY* last = taosArrayGetLast(pDelList);
      ASSERT(pKey->ts >= last->ts);

      if (pKey->ts > last->ts) {
2331
        return false;
2332 2333 2334
      } else if (pKey->ts == last->ts) {
        TSDBKEY* prev = taosArrayGet(pDelList, num - 2);
        return (prev->version >= pKey->version);
2335 2336
      }
    } else {
2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366
      TSDBKEY* pCurrent = taosArrayGet(pDelList, *index);
      TSDBKEY* pNext = taosArrayGet(pDelList, (*index) + 1);

      if (pKey->ts < pCurrent->ts) {
        return false;
      }

      if (pCurrent->ts <= pKey->ts && pNext->ts >= pKey->ts && pCurrent->version >= pKey->version) {
        return true;
      }

      while (pNext->ts <= pKey->ts && (*index) < num - 1) {
        (*index) += 1;

        if ((*index) < num - 1) {
          pCurrent = taosArrayGet(pDelList, *index);
          pNext = taosArrayGet(pDelList, (*index) + 1);

          // it is not a consecutive deletion range, ignore it
          if (pCurrent->version == 0 && pNext->version > 0) {
            continue;
          }

          if (pCurrent->ts <= pKey->ts && pNext->ts >= pKey->ts && pCurrent->version >= pKey->version) {
            return true;
          }
        }
      }

      return false;
2367 2368
    }
  } else {
2369 2370
    if (*index <= 0) {
      TSDBKEY* pFirst = taosArrayGet(pDelList, 0);
2371

2372 2373 2374 2375 2376 2377 2378
      if (pKey->ts < pFirst->ts) {
        return false;
      } else if (pKey->ts == pFirst->ts) {
        return pFirst->version >= pKey->version;
      } else {
        ASSERT(0);
      }
2379
    } else {
2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406
      TSDBKEY* pCurrent = taosArrayGet(pDelList, *index);
      TSDBKEY* pPrev = taosArrayGet(pDelList, (*index) - 1);

      if (pKey->ts > pCurrent->ts) {
        return false;
      }

      if (pPrev->ts <= pKey->ts && pCurrent->ts >= pKey->ts && pPrev->version >= pKey->version) {
        return true;
      }

      while (pPrev->ts >= pKey->ts && (*index) > 1) {
        (*index) += step;

        if ((*index) >= 1) {
          pCurrent = taosArrayGet(pDelList, *index);
          pPrev = taosArrayGet(pDelList, (*index) - 1);

          // it is not a consecutive deletion range, ignore it
          if (pCurrent->version > 0 && pPrev->version == 0) {
            continue;
          }

          if (pPrev->ts <= pKey->ts && pCurrent->ts >= pKey->ts && pPrev->version >= pKey->version) {
            return true;
          }
        }
2407 2408 2409 2410 2411
      }

      return false;
    }
  }
2412 2413

  return false;
2414 2415 2416 2417
}

TSDBROW* getValidRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader) {
  if (!pIter->hasVal) {
H
Haojun Liao 已提交
2418 2419
    return NULL;
  }
H
Hongze Cheng 已提交
2420

2421
  TSDBROW* pRow = tsdbTbDataIterGet(pIter->iter);
2422
  TSDBKEY  key = TSDBROW_KEY(pRow);
2423
  if (outOfTimeWindow(key.ts, &pReader->window)) {
2424
    pIter->hasVal = false;
H
Haojun Liao 已提交
2425 2426
    return NULL;
  }
H
Hongze Cheng 已提交
2427

2428
  // it is a valid data version
dengyihao's avatar
dengyihao 已提交
2429
  if ((key.version <= pReader->verRange.maxVer && key.version >= pReader->verRange.minVer) &&
2430
      (!hasBeenDropped(pDelList, &pIter->index, &key, pReader->order))) {
H
Haojun Liao 已提交
2431 2432
    return pRow;
  }
H
Hongze Cheng 已提交
2433

2434
  while (1) {
2435 2436
    pIter->hasVal = tsdbTbDataIterNext(pIter->iter);
    if (!pIter->hasVal) {
H
Haojun Liao 已提交
2437 2438
      return NULL;
    }
H
Hongze Cheng 已提交
2439

2440
    pRow = tsdbTbDataIterGet(pIter->iter);
H
Hongze Cheng 已提交
2441

H
Haojun Liao 已提交
2442
    key = TSDBROW_KEY(pRow);
2443
    if (outOfTimeWindow(key.ts, &pReader->window)) {
2444
      pIter->hasVal = false;
H
Haojun Liao 已提交
2445 2446
      return NULL;
    }
H
Hongze Cheng 已提交
2447

dengyihao's avatar
dengyihao 已提交
2448
    if (key.version <= pReader->verRange.maxVer && key.version >= pReader->verRange.minVer &&
2449
        (!hasBeenDropped(pDelList, &pIter->index, &key, pReader->order))) {
H
Haojun Liao 已提交
2450 2451 2452 2453
      return pRow;
    }
  }
}
H
Hongze Cheng 已提交
2454

dengyihao's avatar
dengyihao 已提交
2455
int32_t doMergeRowsInBuf(SIterInfo* pIter, int64_t ts, SArray* pDelList, SRowMerger* pMerger, STsdbReader* pReader) {
H
Haojun Liao 已提交
2456
  while (1) {
2457 2458
    pIter->hasVal = tsdbTbDataIterNext(pIter->iter);
    if (!pIter->hasVal) {
H
Haojun Liao 已提交
2459 2460
      break;
    }
H
Hongze Cheng 已提交
2461

2462
    // data exists but not valid
2463
    TSDBROW* pRow = getValidRow(pIter, pDelList, pReader);
2464 2465 2466 2467 2468
    if (pRow == NULL) {
      break;
    }

    // ts is not identical, quit
H
Haojun Liao 已提交
2469
    TSDBKEY k = TSDBROW_KEY(pRow);
2470
    if (k.ts != ts) {
H
Haojun Liao 已提交
2471 2472 2473 2474 2475 2476 2477 2478 2479
      break;
    }

    tRowMerge(pMerger, pRow);
  }

  return TSDB_CODE_SUCCESS;
}

2480
static int32_t doMergeRowsInFileBlockImpl(SBlockData* pBlockData, int32_t rowIndex, int64_t key, SRowMerger* pMerger,
2481
                                          SVersionRange* pVerRange, int32_t step) {
2482 2483
  while (pBlockData->aTSKEY[rowIndex] == key && rowIndex < pBlockData->nRow && rowIndex >= 0) {
    if (pBlockData->aVersion[rowIndex] > pVerRange->maxVer || pBlockData->aVersion[rowIndex] < pVerRange->minVer) {
2484
      rowIndex += step;
2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501
      continue;
    }

    TSDBROW fRow = tsdbRowFromBlockData(pBlockData, rowIndex);
    tRowMerge(pMerger, &fRow);
    rowIndex += step;
  }

  return rowIndex;
}

typedef enum {
  CHECK_FILEBLOCK_CONT = 0x1,
  CHECK_FILEBLOCK_QUIT = 0x2,
} CHECK_FILEBLOCK_STATE;

static int32_t checkForNeighborFileBlock(STsdbReader* pReader, STableBlockScanInfo* pScanInfo, SBlock* pBlock,
2502 2503
                                         SFileDataBlockInfo* pFBlock, SRowMerger* pMerger, int64_t key,
                                         CHECK_FILEBLOCK_STATE* state) {
2504
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
2505
  SBlockData*         pBlockData = &pReader->status.fileBlockData;
2506

2507
  *state = CHECK_FILEBLOCK_QUIT;
2508
  int32_t step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1;
2509 2510 2511

  int32_t nextIndex = -1;
  SBlock* pNeighborBlock = getNeighborBlockOfSameTable(pFBlock, pScanInfo, &nextIndex, pReader->order);
2512
  if (pNeighborBlock == NULL) {  // do nothing
2513 2514 2515 2516 2517
    return 0;
  }

  bool overlap = overlapWithNeighborBlock(pBlock, pNeighborBlock, pReader->order);
  if (overlap) {  // load next block
2518
    SReaderStatus*  pStatus = &pReader->status;
2519 2520
    SDataBlockIter* pBlockIter = &pStatus->blockIter;

2521
    // 1. find the next neighbor block in the scan block list
2522
    SFileDataBlockInfo fb = {.uid = pFBlock->uid, .tbBlockIdx = nextIndex};
2523
    int32_t            neighborIndex = findFileBlockInfoIndex(pBlockIter, &fb);
2524

2525
    // 2. remove it from the scan block list
2526
    setFileBlockActiveInBlockIter(pBlockIter, neighborIndex, step);
2527

2528
    // 3. load the neighbor block, and set it to be the currently accessed file data block
2529 2530 2531 2532 2533
    int32_t code = doLoadFileBlockData(pReader, pBlockIter, pScanInfo, &pStatus->fileBlockData);
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

2534
    // 4. check the data values
2535 2536 2537 2538
    initBlockDumpInfo(pReader, pBlockIter);

    pDumpInfo->rowIndex =
        doMergeRowsInFileBlockImpl(pBlockData, pDumpInfo->rowIndex, key, pMerger, &pReader->verRange, step);
H
Haojun Liao 已提交
2539
    if (pDumpInfo->rowIndex >= pDumpInfo->totalRows) {
2540 2541 2542 2543 2544 2545 2546
      *state = CHECK_FILEBLOCK_CONT;
    }
  }

  return TSDB_CODE_SUCCESS;
}

2547 2548
int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pScanInfo, STsdbReader* pReader,
                                SRowMerger* pMerger) {
2549 2550
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;

2551
  bool    asc = ASCENDING_TRAVERSE(pReader->order);
2552
  int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex];
2553
  int32_t step = asc ? 1 : -1;
2554

2555 2556 2557 2558 2559
  pDumpInfo->rowIndex += step;
  if (pDumpInfo->rowIndex <= pBlockData->nRow - 1) {
    pDumpInfo->rowIndex =
        doMergeRowsInFileBlockImpl(pBlockData, pDumpInfo->rowIndex, key, pMerger, &pReader->verRange, step);
  }
2560

2561 2562 2563 2564
  // all rows are consumed, let's try next file block
  if ((pDumpInfo->rowIndex >= pBlockData->nRow && asc) || (pDumpInfo->rowIndex < 0 && !asc)) {
    while (1) {
      CHECK_FILEBLOCK_STATE st;
2565

2566 2567 2568 2569 2570
      SFileDataBlockInfo* pFileBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter);
      SBlock*             pCurrentBlock = taosArrayGet(pScanInfo->pBlockList, pFileBlockInfo->tbBlockIdx);
      checkForNeighborFileBlock(pReader, pScanInfo, pCurrentBlock, pFileBlockInfo, pMerger, key, &st);
      if (st == CHECK_FILEBLOCK_QUIT) {
        break;
2571
      }
2572
    }
H
Haojun Liao 已提交
2573
  }
2574

H
Haojun Liao 已提交
2575 2576 2577
  return TSDB_CODE_SUCCESS;
}

2578
void updateSchema(TSDBROW* pRow, uint64_t uid, STsdbReader* pReader) {
2579 2580 2581
  int32_t sversion = TSDBROW_SVERSION(pRow);

  if (pReader->pSchema == NULL) {
M
Minglei Jin 已提交
2582
    metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->suid, uid, sversion, &pReader->pSchema);
2583 2584
  } else if (pReader->pSchema->version != sversion) {
    taosMemoryFreeClear(pReader->pSchema);
M
Minglei Jin 已提交
2585
    metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->suid, uid, sversion, &pReader->pSchema);
2586 2587 2588
  }
}

dengyihao's avatar
dengyihao 已提交
2589 2590
void doMergeMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, STSRow** pTSRow,
                      STsdbReader* pReader) {
2591 2592 2593
  SRowMerger merge = {0};

  TSDBKEY k = TSDBROW_KEY(pRow);
2594
  updateSchema(pRow, uid, pReader);
H
Haojun Liao 已提交
2595

2596
  tRowMergerInit(&merge, pRow, pReader->pSchema);
2597
  doMergeRowsInBuf(pIter, k.ts, pDelList, &merge, pReader);
2598
  tRowMergerGetRow(&merge, pTSRow);
2599
  tRowMergerClear(&merge);
2600 2601
}

2602 2603
void doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader,
                        STSRow** pTSRow) {
H
Haojun Liao 已提交
2604 2605
  SRowMerger merge = {0};

2606 2607 2608
  TSDBKEY k = TSDBROW_KEY(pRow);
  TSDBKEY ik = TSDBROW_KEY(piRow);

2609 2610 2611 2612
  if (ASCENDING_TRAVERSE(pReader->order)) {  // ascending order imem --> mem
    updateSchema(piRow, pBlockScanInfo->uid, pReader);

    tRowMergerInit(&merge, piRow, pReader->pSchema);
2613
    doMergeRowsInBuf(&pBlockScanInfo->iiter, ik.ts, pBlockScanInfo->delSkyline, &merge, pReader);
2614

2615
    tRowMerge(&merge, pRow);
2616
    doMergeRowsInBuf(&pBlockScanInfo->iter, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
2617 2618
  } else {
    updateSchema(pRow, pBlockScanInfo->uid, pReader);
2619

2620
    tRowMergerInit(&merge, pRow, pReader->pSchema);
2621
    doMergeRowsInBuf(&pBlockScanInfo->iter, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
2622 2623

    tRowMerge(&merge, piRow);
2624
    doMergeRowsInBuf(&pBlockScanInfo->iiter, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
2625
  }
2626 2627 2628 2629

  tRowMergerGetRow(&merge, pTSRow);
}

2630 2631
int32_t tsdbGetNextRowInMem(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, STSRow** pTSRow,
                            int64_t endKey) {
2632 2633
  TSDBROW* pRow = getValidRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader);
  TSDBROW* piRow = getValidRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader);
dengyihao's avatar
dengyihao 已提交
2634
  SArray*  pDelList = pBlockScanInfo->delSkyline;
H
Haojun Liao 已提交
2635

2636 2637
  // todo refactor
  bool asc = ASCENDING_TRAVERSE(pReader->order);
2638
  if (pBlockScanInfo->iter.hasVal) {
2639 2640 2641 2642 2643 2644
    TSDBKEY k = TSDBROW_KEY(pRow);
    if ((k.ts >= endKey && asc) || (k.ts <= endKey && !asc)) {
      pRow = NULL;
    }
  }

2645
  if (pBlockScanInfo->iiter.hasVal) {
2646 2647 2648 2649 2650 2651
    TSDBKEY k = TSDBROW_KEY(piRow);
    if ((k.ts >= endKey && asc) || (k.ts <= endKey && !asc)) {
      piRow = NULL;
    }
  }

2652
  if (pBlockScanInfo->iter.hasVal && pBlockScanInfo->iiter.hasVal && pRow != NULL && piRow != NULL) {
2653
    TSDBKEY k = TSDBROW_KEY(pRow);
2654
    TSDBKEY ik = TSDBROW_KEY(piRow);
H
Haojun Liao 已提交
2655

2656
    if (ik.ts < k.ts) {  // ik.ts < k.ts
2657
      doMergeMultiRows(piRow, pBlockScanInfo->uid, &pBlockScanInfo->iiter, pDelList, pTSRow, pReader);
2658
    } else if (k.ts < ik.ts) {
2659
      doMergeMultiRows(pRow, pBlockScanInfo->uid, &pBlockScanInfo->iter, pDelList, pTSRow, pReader);
2660 2661
    } else {  // ik.ts == k.ts
      doMergeMemIMemRows(pRow, piRow, pBlockScanInfo, pReader, pTSRow);
H
Haojun Liao 已提交
2662
    }
2663 2664

    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
2665 2666
  }

2667 2668
  if (pBlockScanInfo->iter.hasVal && pRow != NULL) {
    doMergeMultiRows(pRow, pBlockScanInfo->uid, &pBlockScanInfo->iter, pDelList, pTSRow, pReader);
H
Haojun Liao 已提交
2669 2670 2671
    return TSDB_CODE_SUCCESS;
  }

2672 2673
  if (pBlockScanInfo->iiter.hasVal && piRow != NULL) {
    doMergeMultiRows(piRow, pBlockScanInfo->uid, &pBlockScanInfo->iiter, pDelList, pTSRow, pReader);
H
Haojun Liao 已提交
2674 2675 2676 2677 2678 2679
    return TSDB_CODE_SUCCESS;
  }

  return TSDB_CODE_SUCCESS;
}

2680 2681 2682 2683
int32_t doAppendOneRow(SSDataBlock* pBlock, STsdbReader* pReader, STSRow* pTSRow) {
  int32_t numOfRows = pBlock->info.rows;
  int32_t numOfCols = (int32_t)taosArrayGetSize(pBlock->pDataBlock);

2684
  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;
2685
  STSchema*           pSchema = pReader->pSchema;
2686

2687
  SColVal colVal = {0};
2688
  int32_t i = 0, j = 0;
H
Haojun Liao 已提交
2689

2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709
  SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, i);
  if (pColInfoData->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
    colDataAppend(pColInfoData, numOfRows, (const char*)&pTSRow->ts, false);
    i += 1;
  }

  while (i < numOfCols && j < pSchema->numOfCols) {
    pColInfoData = taosArrayGet(pBlock->pDataBlock, i);
    col_id_t colId = pColInfoData->info.colId;

    if (colId == pSchema->columns[j].colId) {
      tTSRowGetVal(pTSRow, pReader->pSchema, j, &colVal);
      doCopyColVal(pColInfoData, numOfRows, i, &colVal, pSupInfo);
      i += 1;
      j += 1;
    } else if (colId < pSchema->columns[j].colId) {
      colDataAppendNULL(pColInfoData, numOfRows);
      i += 1;
    } else if (colId > pSchema->columns[j].colId) {
      j += 1;
2710
    }
2711 2712
  }

2713
  // set null value since current column does not exist in the "pSchema"
2714
  while (i < numOfCols) {
2715 2716 2717 2718 2719
    pColInfoData = taosArrayGet(pBlock->pDataBlock, i);
    colDataAppendNULL(pColInfoData, numOfRows);
    i += 1;
  }

2720 2721 2722 2723
  pBlock->info.rows += 1;
  return TSDB_CODE_SUCCESS;
}

2724 2725
int32_t buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t endKey, int32_t capacity,
                                  STsdbReader* pReader) {
H
Haojun Liao 已提交
2726 2727 2728 2729
  SSDataBlock* pBlock = pReader->pResBlock;

  do {
    STSRow* pTSRow = NULL;
2730
    tsdbGetNextRowInMem(pBlockScanInfo, pReader, &pTSRow, endKey);
2731 2732
    if (pTSRow == NULL) {
      break;
H
Haojun Liao 已提交
2733 2734
    }

2735
    doAppendOneRow(pBlock, pReader, pTSRow);
2736
    taosMemoryFree(pTSRow);
H
Haojun Liao 已提交
2737 2738

    // no data in buffer, return immediately
2739
    if (!(pBlockScanInfo->iter.hasVal || pBlockScanInfo->iiter.hasVal)) {
H
Haojun Liao 已提交
2740 2741 2742
      break;
    }

2743
    if (pBlock->info.rows >= capacity) {
H
Haojun Liao 已提交
2744 2745 2746 2747
      break;
    }
  } while (1);

2748
  ASSERT(pBlock->info.rows <= capacity);
H
Haojun Liao 已提交
2749 2750
  return TSDB_CODE_SUCCESS;
}
H
Hongze Cheng 已提交
2751

2752
// todo refactor, use arraylist instead
H
Hongze Cheng 已提交
2753
int32_t tsdbSetTableId(STsdbReader* pReader, int64_t uid) {
2754 2755 2756 2757 2758
  ASSERT(pReader != NULL);
  taosHashClear(pReader->status.pTableMap);

  STableBlockScanInfo info = {.lastKey = 0, .uid = uid};
  taosHashPut(pReader->status.pTableMap, &info.uid, sizeof(uint64_t), &info, sizeof(info));
H
Hongze Cheng 已提交
2759 2760 2761
  return TDB_CODE_SUCCESS;
}

dengyihao's avatar
dengyihao 已提交
2762 2763 2764 2765 2766 2767
void* tsdbGetIdx(SMeta* pMeta) {
  if (pMeta == NULL) {
    return NULL;
  }
  return metaGetIdx(pMeta);
}
dengyihao's avatar
dengyihao 已提交
2768

dengyihao's avatar
dengyihao 已提交
2769 2770 2771 2772 2773 2774
void* tsdbGetIvtIdx(SMeta* pMeta) {
  if (pMeta == NULL) {
    return NULL;
  }
  return metaGetIvtIdx(pMeta);
}
L
Liu Jicong 已提交
2775

C
Cary Xu 已提交
2776 2777 2778 2779 2780 2781 2782 2783 2784 2785
/**
 * @brief Get all suids since suid
 *
 * @param pMeta
 * @param suid return all suids in one vnode if suid is 0
 * @param list
 * @return int32_t
 */
int32_t tsdbGetStbIdList(SMeta* pMeta, int64_t suid, SArray* list) {
  SMStbCursor* pCur = metaOpenStbCursor(pMeta, suid);
L
Liu Jicong 已提交
2786
  if (!pCur) {
C
Cary Xu 已提交
2787 2788
    return TSDB_CODE_FAILED;
  }
C
Cary Xu 已提交
2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802

  while (1) {
    tb_uid_t id = metaStbCursorNext(pCur);
    if (id == 0) {
      break;
    }

    taosArrayPush(list, &id);
  }

  metaCloseStbCursor(pCur);
  return TSDB_CODE_SUCCESS;
}

H
refact  
Hongze Cheng 已提交
2803
// ====================================== EXPOSED APIs ======================================
2804 2805
int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTableList, STsdbReader** ppReader,
                       const char* idstr) {
H
Haojun Liao 已提交
2806
  int32_t code = tsdbReaderCreate(pVnode, pCond, ppReader, idstr);
H
Haojun Liao 已提交
2807 2808 2809
  if (code) {
    goto _err;
  }
H
Hongze Cheng 已提交
2810

2811 2812 2813 2814 2815 2816 2817
  if (pCond->suid != 0) {
    (*ppReader)->pSchema = metaGetTbTSchema((*ppReader)->pTsdb->pVnode->pMeta, (*ppReader)->suid, -1);
  } else if (taosArrayGetSize(pTableList) > 0) {
    STableKeyInfo* pKey = taosArrayGet(pTableList, 0);
    (*ppReader)->pSchema = metaGetTbTSchema((*ppReader)->pTsdb->pVnode->pMeta, pKey->uid, -1);
  }

H
Haojun Liao 已提交
2818
  STsdbReader* pReader = *ppReader;
2819
  if (isEmptyQueryTimeWindow(&pReader->window)) {
H
Haojun Liao 已提交
2820 2821 2822
    tsdbDebug("%p query window not overlaps with the data set, no result returned, %s", pReader, pReader->idStr);
    return TSDB_CODE_SUCCESS;
  }
H
Hongze Cheng 已提交
2823

2824 2825
  int32_t numOfTables = taosArrayGetSize(pTableList);
  pReader->status.pTableMap = createDataBlockScanInfo(pReader, pTableList->pData, numOfTables);
H
Haojun Liao 已提交
2826 2827 2828
  if (pReader->status.pTableMap == NULL) {
    tsdbReaderClose(pReader);
    *ppReader = NULL;
H
Haojun Liao 已提交
2829

H
Haojun Liao 已提交
2830 2831 2832
    code = TSDB_CODE_TDB_OUT_OF_MEMORY;
    goto _err;
  }
H
Hongze Cheng 已提交
2833

2834 2835
  SDataBlockIter* pBlockIter = &pReader->status.blockIter;

H
Hongze Cheng 已提交
2836 2837 2838
  code = tsdbTakeReadSnap(pVnode->pTsdb, &pReader->pReadSnap);
  if (code) goto _err;

H
Hongze Cheng 已提交
2839
  initFilesetIterator(&pReader->status.fileIter, (*ppReader)->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr);
2840 2841 2842 2843 2844 2845 2846
  resetDataBlockIterator(&pReader->status.blockIter, pReader->order);

  // no data in files, let's try buffer in memory
  if (pReader->status.fileIter.numOfFiles == 0) {
    pReader->status.loadFromFile = false;
  } else {
    code = initForFirstBlockInFile(pReader, pBlockIter);
2847
    if (code != TSDB_CODE_SUCCESS) {
2848 2849 2850 2851
      return code;
    }
  }

2852
  tsdbDebug("%p total numOfTable:%d in this query %s", pReader, numOfTables, pReader->idStr);
H
Hongze Cheng 已提交
2853
  return code;
H
Hongze Cheng 已提交
2854 2855

_err:
2856
  tsdbError("failed to create data reader, code: %s %s", tstrerror(code), pReader->idStr);
H
Hongze Cheng 已提交
2857
  return code;
H
refact  
Hongze Cheng 已提交
2858 2859 2860
}

void tsdbReaderClose(STsdbReader* pReader) {
2861 2862
  if (pReader == NULL) {
    return;
2863
  }
H
refact  
Hongze Cheng 已提交
2864

2865 2866
  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;

H
Hongze Cheng 已提交
2867
  tsdbUntakeReadSnap(pReader->pTsdb, pReader->pReadSnap);
H
Hongze Cheng 已提交
2868

2869 2870 2871 2872
  taosMemoryFreeClear(pSupInfo->plist);
  taosMemoryFree(pSupInfo->colIds);

  taosArrayDestroy(pSupInfo->pColAgg);
L
Liu Jicong 已提交
2873
  for (int32_t i = 0; i < blockDataGetNumOfCols(pReader->pResBlock); ++i) {
2874 2875 2876 2877 2878 2879 2880 2881
    if (pSupInfo->buildBuf[i] != NULL) {
      taosMemoryFreeClear(pSupInfo->buildBuf[i]);
    }
  }
  taosMemoryFree(pSupInfo->buildBuf);

  cleanupDataBlockIterator(&pReader->status.blockIter);
  destroyBlockScanInfo(pReader->status.pTableMap);
2882
  blockDataDestroy(pReader->pResBlock);
2883

H
Haojun Liao 已提交
2884 2885 2886
  if (pReader->pFileReader != NULL) {
    tsdbDataFReaderClose(&pReader->pFileReader);
  }
H
refact  
Hongze Cheng 已提交
2887

H
Haojun Liao 已提交
2888 2889 2890 2891
#if 0
//   if (pReader->status.pTableScanInfo != NULL) {
//     pReader->status.pTableScanInfo = destroyTableCheckInfo(pReader->status.pTableScanInfo);
//   }
H
refact  
Hongze Cheng 已提交
2892

H
Haojun Liao 已提交
2893
//   tsdbDestroyReadH(&pReader->rhelper);
H
refact  
Hongze Cheng 已提交
2894

H
Haojun Liao 已提交
2895 2896 2897 2898 2899 2900
//   tdFreeDataCols(pReader->pDataCols);
//   pReader->pDataCols = NULL;
//
//   pReader->prev = doFreeColumnInfoData(pReader->prev);
//   pReader->next = doFreeColumnInfoData(pReader->next);
#endif
H
refact  
Hongze Cheng 已提交
2901

2902
  SIOCostSummary* pCost = &pReader->cost;
H
refact  
Hongze Cheng 已提交
2903

2904 2905
  tsdbDebug("%p :io-cost summary: head-file read cnt:%" PRIu64 ", head-file time:%" PRIu64 " us, statis-info:%" PRId64
            " us, datablock:%" PRId64 " us, check data:%" PRId64 " us, %s",
2906
            pReader, pCost->headFileLoad, pCost->headFileLoadTime, pCost->smaLoadTime, pCost->blockLoadTime,
2907
            pCost->checkForNextTime, pReader->idStr);
H
refact  
Hongze Cheng 已提交
2908

2909 2910 2911
  taosMemoryFree(pReader->idStr);
  taosMemoryFree(pReader->pSchema);
  taosMemoryFreeClear(pReader);
H
refact  
Hongze Cheng 已提交
2912 2913 2914
}

bool tsdbNextDataBlock(STsdbReader* pReader) {
2915
  if (isEmptyQueryTimeWindow(&pReader->window)) {
H
Haojun Liao 已提交
2916 2917
    return false;
  }
H
Hongze Cheng 已提交
2918

H
Haojun Liao 已提交
2919
  // cleanup the data that belongs to the previous data block
2920 2921
  SSDataBlock* pBlock = pReader->pResBlock;
  blockDataCleanup(pBlock);
H
Hongze Cheng 已提交
2922

2923 2924
  int64_t        stime = taosGetTimestampUs();
  int64_t        elapsedTime = stime;
2925
  SReaderStatus* pStatus = &pReader->status;
H
Haojun Liao 已提交
2926 2927

  if (pReader->type == BLOCK_LOAD_OFFSET_ORDER) {
2928
    if (pStatus->loadFromFile) {
2929
      int32_t code = buildBlockFromFiles(pReader);
2930 2931 2932 2933
      if (code != TSDB_CODE_SUCCESS) {
        return false;
      }

2934
      if (pBlock->info.rows > 0) {
2935
        return true;
2936
      } else {
H
Haojun Liao 已提交
2937
        buildBlockFromBufferSequentially(pReader);
2938
        return pBlock->info.rows > 0;
2939
      }
2940
    } else {  // no data in files, let's try the buffer
H
Haojun Liao 已提交
2941
      buildBlockFromBufferSequentially(pReader);
2942
      return pBlock->info.rows > 0;
H
Haojun Liao 已提交
2943 2944 2945
    }
  } else if (pReader->type == BLOCK_LOAD_TABLESEQ_ORDER) {
  } else if (pReader->type == BLOCK_LOAD_EXTERN_ORDER) {
2946 2947
  } else {
    ASSERT(0);
H
Haojun Liao 已提交
2948
  }
2949
  return false;
H
refact  
Hongze Cheng 已提交
2950 2951 2952
}

void tsdbRetrieveDataBlockInfo(STsdbReader* pReader, SDataBlockInfo* pDataBlockInfo) {
2953 2954 2955 2956
  ASSERT(pDataBlockInfo != NULL && pReader != NULL);
  pDataBlockInfo->rows = pReader->pResBlock->info.rows;
  pDataBlockInfo->uid = pReader->pResBlock->info.uid;
  pDataBlockInfo->window = pReader->pResBlock->info.window;
H
Hongze Cheng 已提交
2957 2958
}

2959
int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SColumnDataAgg*** pBlockStatis, bool* allHave) {
H
Hongze Cheng 已提交
2960
  int32_t code = 0;
2961
  *allHave = false;
H
Hongze Cheng 已提交
2962

2963
  // there is no statistics data for composed block
2964 2965 2966 2967
  if (pReader->status.composedDataBlock) {
    *pBlockStatis = NULL;
    return TSDB_CODE_SUCCESS;
  }
H
Hongze Cheng 已提交
2968

2969 2970 2971
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(&pReader->status.blockIter);
  STableBlockScanInfo* pBlockScanInfo = taosHashGet(pReader->status.pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));
  SBlock*              pBlock = taosArrayGet(pBlockScanInfo->pBlockList, pFBlock->tbBlockIdx);
H
Hongze Cheng 已提交
2972

2973
  int64_t stime = taosGetTimestampUs();
H
Hongze Cheng 已提交
2974

2975 2976
  SBlockLoadSuppInfo* pSup = &pReader->suppInfo;

2977
  if (tBlockHasSma(pBlock)) {
2978
    code = tsdbReadBlockSma(pReader->pFileReader, pBlock, pSup->pColAgg, NULL);
2979
    if (code != TSDB_CODE_SUCCESS) {
2980 2981
      tsdbDebug("vgId:%d, failed to load block SMA for uid %" PRIu64 ", code:%s, %s", 0, pFBlock->uid, tstrerror(code),
                pReader->idStr);
2982 2983
      return code;
    }
2984 2985 2986
  } else {
    *pBlockStatis = NULL;
    return TSDB_CODE_SUCCESS;
2987
  }
H
Hongze Cheng 已提交
2988

2989
  *allHave = true;
H
Hongze Cheng 已提交
2990

2991 2992
  // always load the first primary timestamp column data
  SColumnDataAgg* pTsAgg = &pSup->tsColAgg;
2993

2994 2995
  pTsAgg->numOfNull = 0;
  pTsAgg->colId = PRIMARYKEY_TIMESTAMP_COL_ID;
2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011
  pTsAgg->min = pReader->pResBlock->info.window.skey;
  pTsAgg->max = pReader->pResBlock->info.window.ekey;
  pSup->plist[0] = pTsAgg;

  // update the number of NULL data rows
  size_t numOfCols = blockDataGetNumOfCols(pReader->pResBlock);

  int32_t i = 0, j = 0;
  while (j < numOfCols && i < taosArrayGetSize(pSup->pColAgg)) {
    SColumnDataAgg* pAgg = taosArrayGet(pSup->pColAgg, i);
    if (pAgg->colId == pSup->colIds[j]) {
      if (IS_BSMA_ON(&(pReader->pSchema->columns[i]))) {
        pSup->plist[j] = pAgg;
      } else {
        *allHave = false;
      }
3012 3013
      i += 1;
      j += 1;
3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026
    } else if (pAgg->colId < pSup->colIds[j]) {
      i += 1;
    } else if (pSup->colIds[j] < pAgg->colId) {
      j += 1;
    }
  }

  int64_t elapsed = taosGetTimestampUs() - stime;
  pReader->cost.smaLoadTime += elapsed;

  *pBlockStatis = pSup->plist;

  tsdbDebug("vgId:%d, succeed to load block SMA for uid %" PRIu64 ", elapsed time:%" PRId64 "us, %s", 0, pFBlock->uid,
3027 3028
            elapsed, pReader->idStr);

H
Hongze Cheng 已提交
3029
  return code;
H
Hongze Cheng 已提交
3030 3031
}

H
Hongze Cheng 已提交
3032
SArray* tsdbRetrieveDataBlock(STsdbReader* pReader, SArray* pIdList) {
H
Haojun Liao 已提交
3033 3034 3035
  SReaderStatus* pStatus = &pReader->status;

  if (pStatus->composedDataBlock) {
3036
    return pReader->pResBlock->pDataBlock;
3037
  }
3038

3039
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(&pStatus->blockIter);
3040
  STableBlockScanInfo* pBlockScanInfo = taosHashGet(pStatus->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));
3041

3042 3043 3044 3045 3046
  int32_t code = tBlockDataInit(&pStatus->fileBlockData);
  if (code != TSDB_CODE_SUCCESS) {
    terrno = code;
    return NULL;
  }
3047

3048 3049
  code = doLoadFileBlockData(pReader, &pStatus->blockIter, pBlockScanInfo, &pStatus->fileBlockData);
  if (code != TSDB_CODE_SUCCESS) {
H
Hongze Cheng 已提交
3050
    tBlockDataClear(&pStatus->fileBlockData, 1);
H
Haojun Liao 已提交
3051

3052 3053
    terrno = code;
    return NULL;
3054
  }
3055 3056

  copyBlockDataToSDataBlock(pReader, pBlockScanInfo);
H
Hongze Cheng 已提交
3057
  tBlockDataClear(&pStatus->fileBlockData, 1);
3058
  return pReader->pResBlock->pDataBlock;
H
Hongze Cheng 已提交
3059 3060
}

H
Haojun Liao 已提交
3061
int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) {
3062 3063 3064
  if (isEmptyQueryTimeWindow(&pReader->window)) {
    return TSDB_CODE_SUCCESS;
  }
H
Hongze Cheng 已提交
3065

L
Liu Jicong 已提交
3066 3067
  pReader->order = pCond->order;
  pReader->type = BLOCK_LOAD_OFFSET_ORDER;
3068
  pReader->status.loadFromFile = true;
dengyihao's avatar
dengyihao 已提交
3069
  pReader->status.pTableIter = NULL;
H
Hongze Cheng 已提交
3070

H
Haojun Liao 已提交
3071
  pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows);
H
Hongze Cheng 已提交
3072

3073
  // allocate buffer in order to load data blocks from file
3074
  memset(&pReader->suppInfo.tsColAgg, 0, sizeof(SColumnDataAgg));
3075 3076
  memset(pReader->suppInfo.plist, 0, POINTER_BYTES);

3077
  pReader->suppInfo.tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID;
3078
  tsdbDataFReaderClose(&pReader->pFileReader);
3079

3080 3081 3082 3083
  // todo set the correct numOfTables
  int32_t         numOfTables = 1;
  SDataBlockIter* pBlockIter = &pReader->status.blockIter;

L
Liu Jicong 已提交
3084 3085
  tsdbDataFReaderClose(&pReader->pFileReader);

H
Hongze Cheng 已提交
3086
  initFilesetIterator(&pReader->status.fileIter, pReader->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr);
3087
  resetDataBlockIterator(&pReader->status.blockIter, pReader->order);
3088
  resetDataBlockScanInfo(pReader->status.pTableMap);
3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099

  int32_t code = 0;
  // no data in files, let's try buffer in memory
  if (pReader->status.fileIter.numOfFiles == 0) {
    pReader->status.loadFromFile = false;
  } else {
    code = initForFirstBlockInFile(pReader, pBlockIter);
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
  }
H
Hongze Cheng 已提交
3100

dengyihao's avatar
dengyihao 已提交
3101 3102
  tsdbDebug("%p reset reader, suid:%" PRIu64 ", numOfTables:%d, query range:%" PRId64 " - %" PRId64 " in query %s",
            pReader, pReader->suid, numOfTables, pReader->window.skey, pReader->window.ekey, pReader->idStr);
3103
  return code;
H
Hongze Cheng 已提交
3104
}
H
Hongze Cheng 已提交
3105

3106 3107 3108
static int32_t getBucketIndex(int32_t startRow, int32_t bucketRange, int32_t numOfRows) {
  return (numOfRows - startRow) / bucketRange;
}
H
Hongze Cheng 已提交
3109

3110 3111 3112 3113
int32_t tsdbGetFileBlocksDistInfo(STsdbReader* pReader, STableBlockDistInfo* pTableBlockInfo) {
  int32_t code = TSDB_CODE_SUCCESS;
  pTableBlockInfo->totalSize = 0;
  pTableBlockInfo->totalRows = 0;
H
Hongze Cheng 已提交
3114

3115 3116
  // find the start data block in file
  SReaderStatus* pStatus = &pReader->status;
H
Hongze Cheng 已提交
3117

3118 3119 3120
  STsdbCfg* pc = &pReader->pTsdb->pVnode->config.tsdbCfg;
  pTableBlockInfo->defMinRows = pc->minRows;
  pTableBlockInfo->defMaxRows = pc->maxRows;
H
Hongze Cheng 已提交
3121

3122
  int32_t bucketRange = ceil((pc->maxRows - pc->minRows) / 20.0);
H
Hongze Cheng 已提交
3123

3124
  pTableBlockInfo->numOfFiles += 1;
H
Hongze Cheng 已提交
3125

3126 3127
  int32_t numOfTables = (int32_t)taosHashGetSize(pStatus->pTableMap);
  int     defaultRows = 4096;
H
Hongze Cheng 已提交
3128

3129 3130
  SDataBlockIter* pBlockIter = &pStatus->blockIter;
  pTableBlockInfo->numOfFiles += pStatus->fileIter.numOfFiles;
H
Haojun Liao 已提交
3131 3132 3133 3134

  if (pBlockIter->numOfBlocks > 0) {
    pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks;
  }
H
Hongze Cheng 已提交
3135

3136
  pTableBlockInfo->numOfTables = numOfTables;
H
Haojun Liao 已提交
3137
  bool hasNext = (pBlockIter->numOfBlocks > 0);
H
Hongze Cheng 已提交
3138

3139 3140 3141 3142 3143
  while (true) {
    if (hasNext) {
      SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(pBlockIter);
      STableBlockScanInfo* pScanInfo = taosHashGet(pStatus->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));
      SBlock*              pBlock = taosArrayGet(pScanInfo->pBlockList, pFBlock->tbBlockIdx);
H
Hongze Cheng 已提交
3144

3145 3146
      int32_t numOfRows = pBlock->nRow;
      pTableBlockInfo->totalRows += numOfRows;
H
Hongze Cheng 已提交
3147

3148 3149 3150
      if (numOfRows > pTableBlockInfo->maxRows) {
        pTableBlockInfo->maxRows = numOfRows;
      }
H
refact  
Hongze Cheng 已提交
3151

3152 3153 3154
      if (numOfRows < pTableBlockInfo->minRows) {
        pTableBlockInfo->minRows = numOfRows;
      }
H
refact  
Hongze Cheng 已提交
3155

3156 3157 3158
      if (numOfRows < defaultRows) {
        pTableBlockInfo->numOfSmallBlocks += 1;
      }
H
refact  
Hongze Cheng 已提交
3159

3160 3161
      int32_t bucketIndex = getBucketIndex(pTableBlockInfo->defMinRows, bucketRange, numOfRows);
      pTableBlockInfo->blockRowsHisto[bucketIndex]++;
3162 3163 3164

      hasNext = blockIteratorNext(&pStatus->blockIter);

3165 3166 3167 3168 3169
    } else {
      code = initForFirstBlockInFile(pReader, pBlockIter);
      if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) {
        break;
      }
H
refact  
Hongze Cheng 已提交
3170

3171
      pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks;
3172
      hasNext = (pBlockIter->numOfBlocks > 0);
3173
    }
H
refact  
Hongze Cheng 已提交
3174

H
Hongze Cheng 已提交
3175 3176
    //    tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %s", pReader, numOfBlocks, numOfTables,
    //              pReader->pFileGroup->fid, pReader->idStr);
3177
  }
H
Hongze Cheng 已提交
3178

H
refact  
Hongze Cheng 已提交
3179 3180
  return code;
}
H
Hongze Cheng 已提交
3181

H
refact  
Hongze Cheng 已提交
3182
int64_t tsdbGetNumOfRowsInMemTable(STsdbReader* pReader) {
3183
  int64_t rows = 0;
H
Hongze Cheng 已提交
3184

3185 3186
  SReaderStatus* pStatus = &pReader->status;
  pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, NULL);
H
Hongze Cheng 已提交
3187

3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209
  while (pStatus->pTableIter != NULL) {
    STableBlockScanInfo* pBlockScanInfo = pStatus->pTableIter;

    STbData* d = NULL;
    if (pReader->pTsdb->mem != NULL) {
      tsdbGetTbDataFromMemTable(pReader->pTsdb->mem, pReader->suid, pBlockScanInfo->uid, &d);
      if (d != NULL) {
        rows += tsdbGetNRowsInTbData(d);
      }
    }

    STbData* di = NULL;
    if (pReader->pTsdb->imem != NULL) {
      tsdbGetTbDataFromMemTable(pReader->pTsdb->imem, pReader->suid, pBlockScanInfo->uid, &di);
      if (di != NULL) {
        rows += tsdbGetNRowsInTbData(di);
      }
    }

    // current table is exhausted, let's try the next table
    pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, pStatus->pTableIter);
  }
H
Hongze Cheng 已提交
3210

H
refact  
Hongze Cheng 已提交
3211
  return rows;
H
Hongze Cheng 已提交
3212
}
D
dapan1121 已提交
3213

L
Liu Jicong 已提交
3214
int32_t tsdbGetTableSchema(SVnode* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid) {
D
dapan1121 已提交
3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226
  int32_t sversion = 1;

  SMetaReader mr = {0};
  metaReaderInit(&mr, pVnode->pMeta, 0);
  int32_t code = metaGetTableEntryByUid(&mr, uid);
  if (code != TSDB_CODE_SUCCESS) {
    terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
    metaReaderClear(&mr);
    return terrno;
  }

  *suid = 0;
L
Liu Jicong 已提交
3227

D
dapan1121 已提交
3228
  if (mr.me.type == TSDB_CHILD_TABLE) {
D
dapan1121 已提交
3229
    tDecoderClear(&mr.coder);
D
dapan1121 已提交
3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244
    *suid = mr.me.ctbEntry.suid;
    code = metaGetTableEntryByUid(&mr, *suid);
    if (code != TSDB_CODE_SUCCESS) {
      terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
      metaReaderClear(&mr);
      return terrno;
    }
    sversion = mr.me.stbEntry.schemaRow.version;
  } else {
    ASSERT(mr.me.type == TSDB_NORMAL_TABLE);
    sversion = mr.me.ntbEntry.schemaRow.version;
  }

  metaReaderClear(&mr);
  *pSchema = metaGetTbTSchema(pVnode->pMeta, uid, sversion);
L
Liu Jicong 已提交
3245

D
dapan1121 已提交
3246 3247
  return TSDB_CODE_SUCCESS;
}
H
Hongze Cheng 已提交
3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278

int32_t tsdbTakeReadSnap(STsdb* pTsdb, STsdbReadSnap** ppSnap) {
  int32_t code = 0;

  // alloc
  *ppSnap = (STsdbReadSnap*)taosMemoryCalloc(1, sizeof(STsdbReadSnap));
  if (*ppSnap == NULL) {
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _exit;
  }

  // lock
  code = taosThreadRwlockRdlock(&pTsdb->rwLock);
  if (code) {
    code = TAOS_SYSTEM_ERROR(code);
    goto _exit;
  }

  // take snapshot
  (*ppSnap)->pMem = pTsdb->mem;
  (*ppSnap)->pIMem = pTsdb->imem;

  if ((*ppSnap)->pMem) {
    tsdbRefMemTable((*ppSnap)->pMem);
  }

  if ((*ppSnap)->pIMem) {
    tsdbRefMemTable((*ppSnap)->pIMem);
  }

  // fs (todo)
H
Hongze Cheng 已提交
3279 3280 3281 3282 3283
  code = tsdbFSRef(pTsdb, &(*ppSnap)->fs);
  if (code) {
    taosThreadRwlockUnlock(&pTsdb->rwLock);
    goto _exit;
  }
H
Hongze Cheng 已提交
3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305

  // unlock
  code = taosThreadRwlockUnlock(&pTsdb->rwLock);
  if (code) {
    code = TAOS_SYSTEM_ERROR(code);
    goto _exit;
  }

_exit:
  return code;
}

void tsdbUntakeReadSnap(STsdb* pTsdb, STsdbReadSnap* pSnap) {
  if (pSnap) {
    if (pSnap->pMem) {
      tsdbUnrefMemTable(pSnap->pMem);
    }

    if (pSnap->pIMem) {
      tsdbUnrefMemTable(pSnap->pIMem);
    }

H
Hongze Cheng 已提交
3306
    tsdbFSUnref(pTsdb, &pSnap->fs);
H
Hongze Cheng 已提交
3307 3308
  }
}