tsdbRead.c 102.0 KB
Newer Older
H
hjxilinx 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
Hongze Cheng 已提交
16
#include "tsdb.h"
17
#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC)
H
Hongze Cheng 已提交
18

19 20 21 22 23 24
typedef struct {
  STbDataIter *iter;
  int32_t      index;
  bool         hasVal;
} SIterInfo;

H
Haojun Liao 已提交
25 26 27 28
typedef struct STableBlockScanInfo {
  uint64_t     uid;
  TSKEY        lastKey;
  SBlockIdx    blockIdx;
29
  SArray*      pBlockList;        // block data index list
30 31
  SIterInfo    iter;              // mem buffer skip list iterator
  SIterInfo    iiter;             // imem buffer skip list iterator
32
  SArray*      delSkyline;        // delete info for this table
33 34
  int32_t      fileDelIndex;
  bool         iterInit;          // whether to initialize the in-memory skip list iterator or not
H
Haojun Liao 已提交
35 36 37
} STableBlockScanInfo;

typedef struct SBlockOrderWrapper {
38 39
  int64_t      uid;
  SBlock*      pBlock;
H
Haojun Liao 已提交
40
} SBlockOrderWrapper;
H
Hongze Cheng 已提交
41 42

typedef struct SBlockOrderSupporter {
43 44 45 46
  SBlockOrderWrapper** pDataBlockInfo;
  int32_t*             indexPerTable;
  int32_t*             numOfBlocksPerTable;
  int32_t              numOfTables;
H
Hongze Cheng 已提交
47 48 49
} SBlockOrderSupporter;

typedef struct SIOCostSummary {
H
Haojun Liao 已提交
50
  int64_t blockLoadTime;
51
  int64_t smaLoadTime;
H
Haojun Liao 已提交
52
  int64_t checkForNextTime;
53 54
  int64_t headFileLoad;
  int64_t headFileLoadTime;
H
Hongze Cheng 已提交
55 56 57
} SIOCostSummary;

typedef struct SBlockLoadSuppInfo {
58
  SColumnDataAgg   tsColAgg;
C
Cary Xu 已提交
59
  SColumnDataAgg** plist;
60 61 62
  int16_t*         colIds;    // column ids for loading file block data
  int32_t*         slotIds;   // colId to slotId
  char**           buildBuf;  // build string tmp buffer, todo remove it later after all string format being updated.
H
Hongze Cheng 已提交
63 64
} SBlockLoadSuppInfo;

65
typedef struct SFilesetIter {
66 67 68 69
  int32_t          numOfFiles;  // number of total files
  int32_t          index;       // current accessed index in the list
  SArray*          pFileList;   // data file list
  int32_t          order;
70
} SFilesetIter;
H
Haojun Liao 已提交
71 72

typedef struct SFileDataBlockInfo {
73 74
  int32_t           tbBlockIdx;   // index position in STableBlockScanInfo in order to check whether neighbor block overlaps with it
  uint64_t          uid;
H
Haojun Liao 已提交
75 76 77
} SFileDataBlockInfo;

typedef struct SDataBlockIter {
78 79 80 81
  int32_t           numOfBlocks;
  int32_t           index;
  SArray*           blockList;   // SArray<SFileDataBlockInfo>
  int32_t           order;
H
Haojun Liao 已提交
82 83 84
} SDataBlockIter;

typedef struct SFileBlockDumpInfo {
85 86 87 88
  int32_t           totalRows;
  int32_t           rowIndex;
  int64_t           lastKey;
  bool              allDumped;
H
Haojun Liao 已提交
89 90
} SFileBlockDumpInfo;

H
Haojun Liao 已提交
91
typedef struct SVersionRange {
92 93
  uint64_t          minVer;
  uint64_t          maxVer;
H
Haojun Liao 已提交
94 95
} SVersionRange;

H
Haojun Liao 已提交
96
typedef struct SReaderStatus {
97 98
  bool              loadFromFile;    // check file stage
  SHashObj*         pTableMap;       // SHash<STableBlockScanInfo>
99
  STableBlockScanInfo* pTableIter;    // table iterator used in building in-memory buffer data blocks.
100
  SFileBlockDumpInfo   fBlockDumpInfo;
101

102 103 104 105 106
  SDFileSet*        pCurrentFileset; // current opened file set
  SBlockData        fileBlockData;
  SFilesetIter      fileIter;
  SDataBlockIter    blockIter;
  bool              composedDataBlock;// the returned data block is a composed block or not
H
Haojun Liao 已提交
107 108
} SReaderStatus;

H
Hongze Cheng 已提交
109
struct STsdbReader {
H
Haojun Liao 已提交
110 111 112 113 114 115 116
  STsdb*             pTsdb;
  uint64_t           suid;
  int16_t            order;
  STimeWindow        window;  // the primary query time window that applies to all queries
  SSDataBlock*       pResBlock;
  int32_t            capacity;
  SReaderStatus      status;
117 118
  char*              idStr;  // query info handle, for debug purpose
  int32_t            type;   // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows
H
Hongze Cheng 已提交
119
  SBlockLoadSuppInfo suppInfo;
120

H
Hongze Cheng 已提交
121 122
  SIOCostSummary     cost;
  STSchema*          pSchema;
123 124
  SDataFReader*      pFileReader;
  SVersionRange      verRange;
H
Hongze Cheng 已提交
125
};
H
Hongze Cheng 已提交
126

H
Haojun Liao 已提交
127
static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter);
128 129
static int      buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t endKey, int32_t capacity,
                                          STsdbReader* pReader);
130
static TSDBROW* getValidRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader);
131 132
static int32_t  doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pScanInfo, STsdbReader* pReader,
                                        SRowMerger* pMerger);
133
static int32_t  doMergeRowsInBuf(SIterInfo *pIter, int64_t ts, SArray* pDelList, SRowMerger* pMerger, STsdbReader* pReader);
134 135 136
static int32_t  doAppendOneRow(SSDataBlock* pBlock, STsdbReader* pReader, STSRow* pTSRow);
static void     setComposedBlockFlag(STsdbReader* pReader, bool composed);
static void     updateSchema(TSDBROW* pRow, uint64_t uid, STsdbReader* pReader);
137
static bool     hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey);
138

139
static void doMergeMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo *pIter, SArray* pDelList, STSRow** pTSRow, STsdbReader* pReader);
140 141
static void doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader,
                               STSRow** pTSRow);
142
static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, STbData* pMemTbData, STbData* piMemTbData);
143 144
static STsdb*  getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr, int8_t *pLevel);
static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level);
H
Haojun Liao 已提交
145

146 147 148
static int32_t setColumnIdSlotList(STsdbReader* pReader, SSDataBlock* pBlock) {
  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;

149
  size_t numOfCols = blockDataGetNumOfCols(pBlock);
150

151
  pSupInfo->colIds = taosMemoryMalloc(numOfCols * sizeof(int16_t));
152
  pSupInfo->buildBuf = taosMemoryCalloc(numOfCols, POINTER_BYTES);
153 154 155
  if (pSupInfo->buildBuf == NULL || pSupInfo->colIds == NULL) {
    taosMemoryFree(pSupInfo->colIds);
    taosMemoryFree(pSupInfo->buildBuf);
H
Haojun Liao 已提交
156 157
    return TSDB_CODE_OUT_OF_MEMORY;
  }
H
Hongze Cheng 已提交
158

H
Haojun Liao 已提交
159 160
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, i);
161
    pSupInfo->colIds[i] = pCol->info.colId;
162 163 164 165

    if (IS_VAR_DATA_TYPE(pCol->info.type)) {
      pSupInfo->buildBuf[i] = taosMemoryMalloc(pCol->info.bytes);
    }
H
Haojun Liao 已提交
166
  }
H
Hongze Cheng 已提交
167

H
Haojun Liao 已提交
168 169
  return TSDB_CODE_SUCCESS;
}
H
Hongze Cheng 已提交
170

171
static SHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, const STableKeyInfo* idList, int32_t numOfTables) {
H
Haojun Liao 已提交
172
  // allocate buffer in order to load data blocks from file
173 174 175 176
  // todo use simple hash instead
  SHashObj* pTableMap =
      taosHashInit(numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK);
  if (pTableMap == NULL) {
H
Haojun Liao 已提交
177 178 179 180
    return NULL;
  }

  // todo apply the lastkey of table check to avoid to load header file
181 182 183 184 185
  for (int32_t j = 0; j < numOfTables; ++j) {
    STableBlockScanInfo info = {.lastKey = 0, .uid = idList[j].uid};
    if (ASCENDING_TRAVERSE(pTsdbReader->order)) {
      if (info.lastKey == INT64_MIN || info.lastKey < pTsdbReader->window.skey) {
        info.lastKey = pTsdbReader->window.skey;
H
Haojun Liao 已提交
186 187
      }

188
      ASSERT(info.lastKey >= pTsdbReader->window.skey && info.lastKey <= pTsdbReader->window.ekey);
wmmhello's avatar
wmmhello 已提交
189
    } else {
190
      info.lastKey = pTsdbReader->window.skey;
H
Haojun Liao 已提交
191
    }
wmmhello's avatar
wmmhello 已提交
192

193 194 195
    taosHashPut(pTableMap, &info.uid, sizeof(uint64_t), &info, sizeof(info));
    tsdbDebug("%p check table uid:%" PRId64 " from lastKey:%" PRId64 " %s", pTsdbReader, info.uid, info.lastKey,
              pTsdbReader->idStr);
H
Haojun Liao 已提交
196 197
  }

198
  return pTableMap;
H
Hongze Cheng 已提交
199
}
H
Hongze Cheng 已提交
200

201 202 203 204 205 206 207 208 209 210 211 212 213 214
static void resetDataBlockScanInfo(SHashObj* pTableMap) {
  STableBlockScanInfo* p = NULL;

  while((p = taosHashIterate(pTableMap, p)) != NULL) {
    p->iterInit = false;
    p->iiter.hasVal = false;
    if (p->iter.iter != NULL)  {
      tsdbTbDataIterDestroy(p->iter.iter);
    }

    taosArrayDestroy(p->delSkyline);
  }
}

215
static bool isEmptyQueryTimeWindow(STimeWindow* pWindow) {
216 217
  ASSERT(pWindow != NULL);
  return pWindow->skey > pWindow->ekey;
H
Haojun Liao 已提交
218
}
H
Hongze Cheng 已提交
219

220 221 222 223
// Update the query time window according to the data time to live(TTL) information, in order to avoid to return
// the expired data to client, even it is queried already.
static STimeWindow updateQueryTimeWindow(STsdb* pTsdb, STimeWindow* pWindow) {
 STsdbKeepCfg* pCfg = &pTsdb->keepCfg;
H
Hongze Cheng 已提交
224

225 226 227 228 229 230 231 232 233 234
 int64_t now = taosGetTimestamp(pCfg->precision);
 int64_t earilyTs = now - (tsTickPerMin[pCfg->precision] * pCfg->keep2) + 1;  // needs to add one tick

 STimeWindow win = *pWindow;
  if (win.skey < earilyTs) {
    win.skey = earilyTs;
  }

  return win;
}
H
Hongze Cheng 已提交
235

H
Haojun Liao 已提交
236
static void limitOutputBufferSize(const SQueryTableDataCond* pCond, int32_t* capacity) {
H
Haojun Liao 已提交
237 238 239 240 241 242
  int32_t rowLen = 0;
  for (int32_t i = 0; i < pCond->numOfCols; ++i) {
    rowLen += pCond->colList[i].bytes;
  }

  // make sure the output SSDataBlock size be less than 2MB.
H
Haojun Liao 已提交
243 244 245
  const int32_t TWOMB = 2 * 1024 * 1024;
  if ((*capacity) * rowLen > TWOMB) {
    (*capacity) = TWOMB / rowLen;
H
Haojun Liao 已提交
246 247 248 249
  }
}

// init file iterator
250
static int32_t initFilesetIterator(SFilesetIter* pIter, const STsdbFSState* pFState, int32_t order, const char* idstr) {
251 252
  size_t numOfFileset = taosArrayGetSize(pFState->aDFileSet);

253 254 255
  pIter->index = ASCENDING_TRAVERSE(order) ? -1 : numOfFileset;
  pIter->order = order;
  pIter->pFileList = taosArrayDup(pFState->aDFileSet);
256
  pIter->numOfFiles = numOfFileset;
H
Haojun Liao 已提交
257

H
Haojun Liao 已提交
258
  tsdbDebug("init fileset iterator, total files:%d %s", pIter->numOfFiles, idstr);
H
Haojun Liao 已提交
259 260 261
  return TSDB_CODE_SUCCESS;
}

262
static bool filesetIteratorNext(SFilesetIter* pIter, STsdbReader* pReader) {
263 264
  bool    asc = ASCENDING_TRAVERSE(pIter->order);
  int32_t step = asc ? 1 : -1;
265 266 267
  pIter->index += step;

  if ((asc && pIter->index >= pIter->numOfFiles) || ((!asc) && pIter->index < 0)) {
H
Haojun Liao 已提交
268 269 270 271 272
    return false;
  }

  // check file the time range of coverage
  STimeWindow win = {0};
H
Hongze Cheng 已提交
273

274
  while (1) {
275
    pReader->status.pCurrentFileset = (SDFileSet*)taosArrayGet(pIter->pFileList, pIter->index);
H
Haojun Liao 已提交
276

277 278 279 280
    int32_t code = tsdbDataFReaderOpen(&pReader->pFileReader, pReader->pTsdb, pReader->status.pCurrentFileset);
    if (code != TSDB_CODE_SUCCESS) {
      goto _err;
    }
H
Haojun Liao 已提交
281

282 283 284 285 286 287 288 289 290 291 292 293 294 295
    int32_t fid = pReader->status.pCurrentFileset->fid;
    tsdbFidKeyRange(fid, pReader->pTsdb->keepCfg.days, pReader->pTsdb->keepCfg.precision, &win.skey, &win.ekey);

    // current file are no longer overlapped with query time window, ignore remain files
    if ((asc && win.skey > pReader->window.ekey) || (!asc && win.ekey < pReader->window.skey)) {
      tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, %s", pReader,
                pReader->window.skey, pReader->window.ekey, pReader->idStr);
      return false;
    }

    if ((asc && (win.ekey < pReader->window.skey)) || ((!asc) && (win.skey > pReader->window.ekey))) {
      pIter->index += step;
      continue;
    }
C
Cary Xu 已提交
296

297 298
    tsdbDebug("%p file found fid:%d for qrange:%" PRId64 "-%" PRId64 ", ignore, %s", pReader, fid, pReader->window.skey,
              pReader->window.ekey, pReader->idStr);
299 300
    return true;
  }
301

302
_err:
H
Haojun Liao 已提交
303 304 305
  return false;
}

306
static void resetDataBlockIterator(SDataBlockIter* pIter, int32_t order) {
307 308
  pIter->order = order;
  pIter->index = -1;
H
Haojun Liao 已提交
309
  pIter->numOfBlocks = -1;
310
  pIter->blockList = taosArrayInit(4, sizeof(SFileDataBlockInfo));
H
Haojun Liao 已提交
311 312
}

H
Haojun Liao 已提交
313
static void initReaderStatus(SReaderStatus* pStatus) {
314 315
  pStatus->pTableIter     = NULL;
  pStatus->loadFromFile   = true;
H
Haojun Liao 已提交
316 317
}

318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
static SSDataBlock* createResBlock(SQueryTableDataCond* pCond, int32_t capacity) {
  SSDataBlock* pResBlock = createDataBlock();
  if (pResBlock == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

  for (int32_t i = 0; i < pCond->numOfCols; ++i) {
    SColumnInfoData colInfo = {{0}, 0};
    colInfo.info = pCond->colList[i];
    blockDataAppendColInfo(pResBlock, &colInfo);
  }

  int32_t code = blockDataEnsureCapacity(pResBlock, capacity);
  if (code != TSDB_CODE_SUCCESS) {
    terrno = code;
    taosMemoryFree(pResBlock);
    return NULL;
  }

  return pResBlock;
}

H
Haojun Liao 已提交
341 342
static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsdbReader** ppReader, const char* idstr) {
  int32_t      code = 0;
343
  int8_t       level = 0;
H
Haojun Liao 已提交
344
  STsdbReader* pReader = (STsdbReader*)taosMemoryCalloc(1, sizeof(*pReader));
H
Hongze Cheng 已提交
345 346
  if (pReader == NULL) {
    code = TSDB_CODE_OUT_OF_MEMORY;
H
Haojun Liao 已提交
347
    goto _end;
H
Hongze Cheng 已提交
348 349
  }

H
Haojun Liao 已提交
350
  initReaderStatus(&pReader->status);
351

352
  pReader->pTsdb       = getTsdbByRetentions(pVnode, pCond->twindows[0].skey, pVnode->config.tsdbCfg.retentions, idstr, &level);
353 354 355
  pReader->suid        = pCond->suid;
  pReader->order       = pCond->order;
  pReader->capacity    = 4096;
356
  pReader->idStr       = (idstr != NULL)? strdup(idstr):NULL;
357 358
  pReader->verRange    = getQueryVerRange(pVnode, pCond, level);
  pReader->type = pCond->type;
359
  pReader->window      = updateQueryTimeWindow(pVnode->pTsdb, pCond->twindows);
360

361
  ASSERT(pCond->numOfCols > 0);
H
Hongze Cheng 已提交
362

363
  limitOutputBufferSize(pCond, &pReader->capacity);
364

365 366 367
  // allocate buffer in order to load data blocks from file
  SBlockLoadSuppInfo* pSup = &pReader->suppInfo;
  pSup->plist = taosMemoryCalloc(pCond->numOfCols, POINTER_BYTES);
368
  if (pSup->plist == NULL) {
369 370 371
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _end;
  }
H
Haojun Liao 已提交
372

373 374
  pSup->tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID;

375 376 377 378
  pReader->pResBlock = createResBlock(pCond, pReader->capacity);
  if (pReader->pResBlock == NULL) {
    code = terrno;
    goto _end;
H
Hongze Cheng 已提交
379
  }
H
Hongze Cheng 已提交
380

381 382
  setColumnIdSlotList(pReader, pReader->pResBlock);

H
Hongze Cheng 已提交
383 384
  *ppReader = pReader;
  return code;
H
Hongze Cheng 已提交
385

H
Haojun Liao 已提交
386 387
_end:
  tsdbReaderClose(pReader);
H
Hongze Cheng 已提交
388 389 390
  *ppReader = NULL;
  return code;
}
H
Hongze Cheng 已提交
391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423

// void tsdbResetQueryHandleForNewTable(STsdbReader* queryHandle, SQueryTableDataCond* pCond, STableListInfo* tableList,
//                                      int32_t tWinIdx) {
//   STsdbReader* pTsdbReadHandle = queryHandle;

//   pTsdbReadHandle->order = pCond->order;
//   pTsdbReadHandle->window = pCond->twindows[tWinIdx];
//   pTsdbReadHandle->type = TSDB_QUERY_TYPE_ALL;
//   pTsdbReadHandle->cur.fid = -1;
//   pTsdbReadHandle->cur.win = TSWINDOW_INITIALIZER;
//   pTsdbReadHandle->checkFiles = true;
//   pTsdbReadHandle->activeIndex = 0;  // current active table index
//   pTsdbReadHandle->locateStart = false;
//   pTsdbReadHandle->loadExternalRow = pCond->loadExternalRows;

//   if (ASCENDING_TRAVERSE(pCond->order)) {
//     assert(pTsdbReadHandle->window.skey <= pTsdbReadHandle->window.ekey);
//   } else {
//     assert(pTsdbReadHandle->window.skey >= pTsdbReadHandle->window.ekey);
//   }

//   // allocate buffer in order to load data blocks from file
//   memset(pTsdbReadHandle->suppInfo.pstatis, 0, sizeof(SColumnDataAgg));
//   memset(pTsdbReadHandle->suppInfo.plist, 0, POINTER_BYTES);

//   tsdbInitDataBlockLoadInfo(&pTsdbReadHandle->dataBlockLoadInfo);
//   tsdbInitCompBlockLoadInfo(&pTsdbReadHandle->compBlockLoadInfo);

//   SArray* pTable = NULL;
//   //  STsdbMeta* pMeta = tsdbGetMeta(pTsdbReadHandle->pTsdb);

//   //  pTsdbReadHandle->pTableCheckInfo = destroyTableCheckInfo(pTsdbReadHandle->pTableCheckInfo);

H
Haojun Liao 已提交
424
//   pTsdbReadHandle->pTableCheckInfo = NULL;  // createDataBlockScanInfo(pTsdbReadHandle, groupList, pMeta,
H
Hongze Cheng 已提交
425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
//                                             // &pTable);
//   if (pTsdbReadHandle->pTableCheckInfo == NULL) {
//     //    tsdbReaderClose(pTsdbReadHandle);
//     terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
//   }

//   //  pTsdbReadHandle->prev = doFreeColumnInfoData(pTsdbReadHandle->prev);
//   //  pTsdbReadHandle->next = doFreeColumnInfoData(pTsdbReadHandle->next);
// }

// SArray* tsdbGetQueriedTableList(STsdbReader** pHandle) {
//   assert(pHandle != NULL);

//   STsdbReader* pTsdbReadHandle = (STsdbReader*)pHandle;

//   size_t  size = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
//   SArray* res = taosArrayInit(size, POINTER_BYTES);
//   return res;
// }

445 446
// static TSKEY extractFirstTraverseKey(STableBlockScanInfo* pCheckInfo, int32_t order, int32_t update, TDRowVerT
// maxVer) {
H
Hongze Cheng 已提交
447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495
//   TSDBROW row = {0};
//   STSRow *rmem = NULL, *rimem = NULL;

//   if (pCheckInfo->iter) {
//     if (tsdbTbDataIterGet(pCheckInfo->iter, &row)) {
//       rmem = row.pTSRow;
//     }
//   }

//   if (pCheckInfo->iiter) {
//     if (tsdbTbDataIterGet(pCheckInfo->iiter, &row)) {
//       rimem = row.pTSRow;
//     }
//   }

//   if (rmem == NULL && rimem == NULL) {
//     return TSKEY_INITIAL_VAL;
//   }

//   if (rmem != NULL && rimem == NULL) {
//     pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
//     return TD_ROW_KEY(rmem);
//   }

//   if (rmem == NULL && rimem != NULL) {
//     pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
//     return TD_ROW_KEY(rimem);
//   }

//   TSKEY r1 = TD_ROW_KEY(rmem);
//   TSKEY r2 = TD_ROW_KEY(rimem);

//   if (r1 == r2) {
//     if (TD_SUPPORT_UPDATE(update)) {
//       pCheckInfo->chosen = CHECKINFO_CHOSEN_BOTH;
//     } else {
//       pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
//       tsdbTbDataIterNext(pCheckInfo->iter);
//     }
//     return r1;
//   } else if (r1 < r2 && ASCENDING_TRAVERSE(order)) {
//     pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
//     return r1;
//   } else {
//     pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
//     return r2;
//   }
// }

H
Haojun Liao 已提交
496
// static bool moveToNextRowInMem(STableBlockScanInfo* pCheckInfo) {
H
Hongze Cheng 已提交
497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
//   bool hasNext = false;
//   if (pCheckInfo->chosen == CHECKINFO_CHOSEN_MEM) {
//     if (pCheckInfo->iter != NULL) {
//       hasNext = tsdbTbDataIterNext(pCheckInfo->iter);
//     }

//     if (hasNext) {
//       return hasNext;
//     }

//     if (pCheckInfo->iiter != NULL) {
//       return tsdbTbDataIterGet(pCheckInfo->iiter, NULL);
//     }
//   } else if (pCheckInfo->chosen == CHECKINFO_CHOSEN_IMEM) {
//     if (pCheckInfo->iiter != NULL) {
//       hasNext = tsdbTbDataIterNext(pCheckInfo->iiter);
//     }

//     if (hasNext) {
//       return hasNext;
//     }

//     if (pCheckInfo->iter != NULL) {
//       return tsdbTbDataIterGet(pCheckInfo->iter, NULL);
//     }
//   } else {
//     if (pCheckInfo->iter != NULL) {
//       hasNext = tsdbTbDataIterNext(pCheckInfo->iter);
//     }
//     if (pCheckInfo->iiter != NULL) {
//       hasNext = tsdbTbDataIterNext(pCheckInfo->iiter) || hasNext;
//     }
//   }
H
Hongze Cheng 已提交
530

H
Hongze Cheng 已提交
531 532
//   return hasNext;
// }
H
Hongze Cheng 已提交
533

H
Hongze Cheng 已提交
534 535 536
// static int32_t binarySearchForBlock(SBlock* pBlock, int32_t numOfBlocks, TSKEY skey, int32_t order) {
//   int32_t firstSlot = 0;
//   int32_t lastSlot = numOfBlocks - 1;
H
Hongze Cheng 已提交
537

H
Hongze Cheng 已提交
538
//   int32_t midSlot = firstSlot;
H
Hongze Cheng 已提交
539

H
Hongze Cheng 已提交
540 541 542
//   while (1) {
//     numOfBlocks = lastSlot - firstSlot + 1;
//     midSlot = (firstSlot + (numOfBlocks >> 1));
H
Hongze Cheng 已提交
543

H
Hongze Cheng 已提交
544
//     if (numOfBlocks == 1) break;
H
Hongze Cheng 已提交
545

H
Hongze Cheng 已提交
546 547 548 549 550 551 552 553 554 555 556
//     if (skey > pBlock[midSlot].maxKey.ts) {
//       if (numOfBlocks == 2) break;
//       if ((order == TSDB_ORDER_DESC) && (skey < pBlock[midSlot + 1].minKey.ts)) break;
//       firstSlot = midSlot + 1;
//     } else if (skey < pBlock[midSlot].minKey.ts) {
//       if ((order == TSDB_ORDER_ASC) && (skey > pBlock[midSlot - 1].maxKey.ts)) break;
//       lastSlot = midSlot - 1;
//     } else {
//       break;  // got the slot
//     }
//   }
H
Hongze Cheng 已提交
557

H
Hongze Cheng 已提交
558 559
//   return midSlot;
// }
H
Hongze Cheng 已提交
560

H
Haojun Liao 已提交
561
static int32_t doLoadBlockIndex(STsdbReader* pReader, SDataFReader* pFileReader, SArray* pIndexList) {
562
  SArray* aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx));
H
Hongze Cheng 已提交
563

564
  int32_t code = tsdbReadBlockIdx(pFileReader, aBlockIdx, NULL);
H
Haojun Liao 已提交
565
  if (code != TSDB_CODE_SUCCESS) {
566
    goto _end;
H
Haojun Liao 已提交
567
  }
H
Hongze Cheng 已提交
568

H
Hongze Cheng 已提交
569 570
  if (taosArrayGetSize(aBlockIdx) == 0) {
    taosArrayClear(aBlockIdx);
H
Haojun Liao 已提交
571 572
    return TSDB_CODE_SUCCESS;
  }
H
Hongze Cheng 已提交
573

574
  SBlockIdx* pBlockIdx;
H
Hongze Cheng 已提交
575
  for (int32_t i = 0; i < taosArrayGetSize(aBlockIdx); ++i) {
576
    pBlockIdx = (SBlockIdx*)taosArrayGet(aBlockIdx, i);
H
Haojun Liao 已提交
577

578
    // uid check
H
Hongze Cheng 已提交
579
    if (pBlockIdx->suid != pReader->suid) {
H
Haojun Liao 已提交
580 581 582 583
      continue;
    }

    // this block belongs to a table that is not queried.
H
Hongze Cheng 已提交
584
    void* p = taosHashGet(pReader->status.pTableMap, &pBlockIdx->uid, sizeof(uint64_t));
H
Haojun Liao 已提交
585 586 587 588
    if (p == NULL) {
      continue;
    }

589 590
    // todo: not valid info in bockIndex
    // time range check
591 592 593
    //    if (pBlockIdx->minKey > pReader->window.ekey || pBlockIdx->maxKey < pReader->window.skey) {
    //      continue;
    //    }
594 595

    // version check
596 597 598
    //    if (pBlockIdx->minVersion > pReader->verRange.maxVer || pBlockIdx->maxVersion < pReader->verRange.minVer) {
    //      continue;
    //    }
H
Haojun Liao 已提交
599 600 601 602 603 604

    STableBlockScanInfo* pScanInfo = p;
    if (pScanInfo->pBlockList == NULL) {
      pScanInfo->pBlockList = taosArrayInit(16, sizeof(SBlock));
    }

H
Hongze Cheng 已提交
605 606
    pScanInfo->blockIdx = *pBlockIdx;
    taosArrayPush(pIndexList, pBlockIdx);
H
Haojun Liao 已提交
607
  }
H
Hongze Cheng 已提交
608

609
_end:
H
Hongze Cheng 已提交
610
  taosArrayDestroy(aBlockIdx);
H
Haojun Liao 已提交
611 612
  return code;
}
H
Hongze Cheng 已提交
613

614 615
static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, uint32_t* numOfValidTables,
                               int32_t* numOfBlocks) {
H
Haojun Liao 已提交
616
  size_t numOfTables = taosArrayGetSize(pIndexList);
H
Hongze Cheng 已提交
617

H
Haojun Liao 已提交
618
  *numOfValidTables = 0;
H
Hongze Cheng 已提交
619

620 621 622 623 624 625 626 627 628 629 630
  STableBlockScanInfo* px = NULL;
  while(1) {
    px = taosHashIterate(pReader->status.pTableMap, px);
    if (px == NULL) {
      break;
    }

    taosArrayClear(px->pBlockList);
  }

  for(int32_t i = 0; i < numOfTables; ++i) {
H
Haojun Liao 已提交
631
    SBlockIdx* pBlockIdx = taosArrayGet(pIndexList, i);
H
Hongze Cheng 已提交
632

H
Hongze Cheng 已提交
633
    SMapData mapData = {0};
H
Haojun Liao 已提交
634 635
    tMapDataReset(&mapData);
    tsdbReadBlock(pReader->pFileReader, pBlockIdx, &mapData, NULL);
H
Hongze Cheng 已提交
636

H
Haojun Liao 已提交
637 638 639
    STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pBlockIdx->uid, sizeof(int64_t));
    for (int32_t j = 0; j < mapData.nItem; ++j) {
      SBlock block = {0};
H
Hongze Cheng 已提交
640

H
Hongze Cheng 已提交
641
      tMapDataGetItemByIdx(&mapData, j, &block, tGetBlock);
H
Hongze Cheng 已提交
642

643
      // 1. time range check
644
      if (block.minKey.ts > pReader->window.ekey || block.maxKey.ts < pReader->window.skey) {
H
Haojun Liao 已提交
645 646
        continue;
      }
H
Hongze Cheng 已提交
647

648
      // 2. version range check
649 650 651
      if (block.minVersion > pReader->verRange.maxVer || block.maxVersion < pReader->verRange.minVer) {
        continue;
      }
652

H
Haojun Liao 已提交
653 654 655 656
      void* p = taosArrayPush(pScanInfo->pBlockList, &block);
      if (p == NULL) {
        return TSDB_CODE_OUT_OF_MEMORY;
      }
657 658

      (*numOfBlocks) += 1;
H
Haojun Liao 已提交
659
    }
H
Hongze Cheng 已提交
660

H
Haojun Liao 已提交
661 662 663 664
    if (pScanInfo->pBlockList != NULL && taosArrayGetSize(pScanInfo->pBlockList) > 0) {
      (*numOfValidTables) += 1;
    }
  }
H
Hongze Cheng 已提交
665

H
Haojun Liao 已提交
666 667
  return TSDB_CODE_SUCCESS;
}
H
Hongze Cheng 已提交
668

669 670
// todo remove pblock parameter
static void setBlockAllDumped(SFileBlockDumpInfo* pDumpInfo, SBlock* pBlock, int32_t order) {
671
  int32_t step = ASCENDING_TRAVERSE(order) ? 1 : -1;
H
Haojun Liao 已提交
672

673
  pDumpInfo->allDumped = true;
674
  pDumpInfo->lastKey = pBlock->maxKey.ts + step;
H
Haojun Liao 已提交
675 676
}

677 678
static void doCopyColVal(SColumnInfoData* pColInfoData, int32_t rowIndex, int32_t colIndex, SColVal* pColVal,
                         SBlockLoadSuppInfo* pSup) {
H
Haojun Liao 已提交
679
  if (IS_VAR_DATA_TYPE(pColVal->type)) {
680
    if (pColVal->isNull || pColVal->isNone) {
H
Haojun Liao 已提交
681 682 683 684 685 686 687
      colDataAppendNULL(pColInfoData, rowIndex);
    } else {
      varDataSetLen(pSup->buildBuf[colIndex], pColVal->value.nData);
      memcpy(varDataVal(pSup->buildBuf[colIndex]), pColVal->value.pData, pColVal->value.nData);
      colDataAppend(pColInfoData, rowIndex, pSup->buildBuf[colIndex], false);
    }
  } else {
688
    colDataAppend(pColInfoData, rowIndex, (const char*)&pColVal->value, pColVal->isNull || pColVal->isNone);
H
Haojun Liao 已提交
689
  }
H
Haojun Liao 已提交
690 691
}

692
static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) {
693
  SReaderStatus*  pStatus = &pReader->status;
694
  SDataBlockIter* pBlockIter = &pStatus->blockIter;
H
Hongze Cheng 已提交
695

696
  SBlockData*         pBlockData = &pStatus->fileBlockData;
H
Haojun Liao 已提交
697
  SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(pBlockIter);
H
Haojun Liao 已提交
698 699
  SBlock*             pBlock = taosArrayGet(pBlockScanInfo->pBlockList, pFBlock->tbBlockIdx);
  SSDataBlock*        pResBlock = pReader->pResBlock;
700
  int32_t             numOfCols = blockDataGetNumOfCols(pResBlock);
H
Haojun Liao 已提交
701

H
Haojun Liao 已提交
702
  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;
703
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
H
Haojun Liao 已提交
704

705
  int64_t st = taosGetTimestampUs();
H
Haojun Liao 已提交
706

H
Haojun Liao 已提交
707
  SColVal cv = {0};
708 709
  int32_t colIndex = 0;

710 711
  bool    asc = ASCENDING_TRAVERSE(pReader->order);
  int32_t step = asc ? 1 : -1;
712

713
  int32_t rowIndex = 0;
714 715
  int32_t remain = asc ? (pBlockData->nRow - pDumpInfo->rowIndex) : (pDumpInfo->rowIndex + 1);

716 717 718 719 720 721 722 723
  int32_t endIndex = 0;
  if (remain <= pReader->capacity) {
    endIndex = pBlockData->nRow;
  } else {
    endIndex = pDumpInfo->rowIndex + step * pReader->capacity;
    remain = pReader->capacity;
  }

724
  int32_t          i = 0;
725 726
  SColumnInfoData* pColData = taosArrayGet(pResBlock->pDataBlock, i);
  if (pColData->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
727
    for (int32_t j = pDumpInfo->rowIndex; j < endIndex && j >= 0; j += step) {
728 729 730 731 732
      colDataAppend(pColData, rowIndex++, (const char*)&pBlockData->aTSKEY[j], false);
    }
    i += 1;
  }

H
Hongze Cheng 已提交
733
  while (i < numOfCols && colIndex < taosArrayGetSize(pBlockData->aIdx)) {
734 735 736
    rowIndex = 0;
    pColData = taosArrayGet(pResBlock->pDataBlock, i);

H
Hongze Cheng 已提交
737
    SColData* pData = tBlockDataGetColDataByIdx(pBlockData, colIndex);
738 739

    if (pData->cid == pColData->info.colId) {
740
      for (int32_t j = pDumpInfo->rowIndex; j < endIndex && j >= 0; j += step) {
741 742
        tColDataGetValue(pData, j, &cv);
        doCopyColVal(pColData, rowIndex++, i, &cv, pSupInfo);
H
Haojun Liao 已提交
743
      }
744 745 746
      colIndex += 1;
    } else {  // the specified column does not exist in file block, fill with null data
      colDataAppendNNULL(pColData, 0, remain);
H
Haojun Liao 已提交
747
    }
748 749 750 751 752

    ASSERT(rowIndex == remain);
    i += 1;
  }

753
  while (i < numOfCols) {
754 755 756
    pColData = taosArrayGet(pResBlock->pDataBlock, i);
    colDataAppendNNULL(pColData, 0, remain);
    i += 1;
H
Haojun Liao 已提交
757
  }
H
Haojun Liao 已提交
758

759
  pResBlock->info.rows = remain;
760
  pDumpInfo->rowIndex += step * remain;
761 762

  setBlockAllDumped(pDumpInfo, pBlock, pReader->order);
H
Haojun Liao 已提交
763

H
Haojun Liao 已提交
764 765
  int64_t elapsedTime = (taosGetTimestampUs() - st);
  pReader->cost.blockLoadTime += elapsedTime;
H
Haojun Liao 已提交
766

767
  int32_t unDumpedRows = asc ? pBlock->nRow - pDumpInfo->rowIndex : pDumpInfo->rowIndex + 1;
H
Haojun Liao 已提交
768
  tsdbDebug("%p load file block into buffer, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64
769
            ", rows:%d, remain:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", elapsed time:%" PRId64 " us, %s",
770 771 772 773 774 775 776
            pReader, pBlockIter->index, pFBlock->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, remain, unDumpedRows,
            pBlock->minVersion, pBlock->maxVersion, elapsedTime, pReader->idStr);

  return TSDB_CODE_SUCCESS;
}

// todo consider the output buffer size
777 778
static int32_t doLoadFileBlockData(STsdbReader* pReader, SDataBlockIter* pBlockIter,
                                   STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData) {
779 780 781 782 783 784 785 786 787 788 789
  int64_t st = taosGetTimestampUs();

  SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(pBlockIter);
  SBlock*             pBlock = taosArrayGet(pBlockScanInfo->pBlockList, pFBlock->tbBlockIdx);
  SSDataBlock*        pResBlock = pReader->pResBlock;
  int32_t             numOfCols = blockDataGetNumOfCols(pResBlock);

  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;

  uint8_t *pb = NULL, *pb1 = NULL;
790 791
  int32_t  code = tsdbReadColData(pReader->pFileReader, &pBlockScanInfo->blockIdx, pBlock, pSupInfo->colIds, numOfCols,
                                  pBlockData, &pb, &pb1);
792 793 794 795 796 797 798 799 800
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }

  int64_t elapsedTime = (taosGetTimestampUs() - st);
  pReader->cost.blockLoadTime += elapsedTime;

  pDumpInfo->allDumped = false;
  tsdbDebug("%p load file block into buffer, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64
801
            ", rows:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", elapsed time:%" PRId64 " us, %s",
802
            pReader, pBlockIter->index, pFBlock->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->nRow,
H
Haojun Liao 已提交
803 804
            pBlock->minVersion, pBlock->maxVersion, elapsedTime, pReader->idStr);
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
805 806

_error:
H
Haojun Liao 已提交
807 808 809 810 811
  tsdbError("%p error occurs in loading file block, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64
            ", rows:%d, %s",
            pReader, pBlockIter->index, pFBlock->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->nRow,
            pReader->idStr);
  return code;
H
Haojun Liao 已提交
812
}
H
Hongze Cheng 已提交
813

H
Hongze Cheng 已提交
814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871
// static int doBinarySearchKey(char* pValue, int num, TSKEY key, int order) {
//   int    firstPos, lastPos, midPos = -1;
//   int    numOfRows;
//   TSKEY* keyList;

//   assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);

//   if (num <= 0) return -1;

//   keyList = (TSKEY*)pValue;
//   firstPos = 0;
//   lastPos = num - 1;

//   if (order == TSDB_ORDER_DESC) {
//     // find the first position which is smaller than the key
//     while (1) {
//       if (key >= keyList[lastPos]) return lastPos;
//       if (key == keyList[firstPos]) return firstPos;
//       if (key < keyList[firstPos]) return firstPos - 1;

//       numOfRows = lastPos - firstPos + 1;
//       midPos = (numOfRows >> 1) + firstPos;

//       if (key < keyList[midPos]) {
//         lastPos = midPos - 1;
//       } else if (key > keyList[midPos]) {
//         firstPos = midPos + 1;
//       } else {
//         break;
//       }
//     }

//   } else {
//     // find the first position which is bigger than the key
//     while (1) {
//       if (key <= keyList[firstPos]) return firstPos;
//       if (key == keyList[lastPos]) return lastPos;

//       if (key > keyList[lastPos]) {
//         lastPos = lastPos + 1;
//         if (lastPos >= num)
//           return -1;
//         else
//           return lastPos;
//       }

//       numOfRows = lastPos - firstPos + 1;
//       midPos = (numOfRows >> 1) + firstPos;

//       if (key < keyList[midPos]) {
//         lastPos = midPos - 1;
//       } else if (key > keyList[midPos]) {
//         firstPos = midPos + 1;
//       } else {
//         break;
//       }
//     }
//   }
H
Hongze Cheng 已提交
872

H
Hongze Cheng 已提交
873 874
//   return midPos;
// }
H
Hongze Cheng 已提交
875

H
Hongze Cheng 已提交
876 877
// static void doCheckGeneratedBlockRange(STsdbReader* pTsdbReadHandle) {
//   SQueryFilePos* cur = &pTsdbReadHandle->cur;
H
Hongze Cheng 已提交
878

H
Hongze Cheng 已提交
879 880 881 882 883 884
//   if (cur->rows > 0) {
//     if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
//       assert(cur->win.skey >= pTsdbReadHandle->window.skey && cur->win.ekey <= pTsdbReadHandle->window.ekey);
//     } else {
//       assert(cur->win.skey >= pTsdbReadHandle->window.ekey && cur->win.ekey <= pTsdbReadHandle->window.skey);
//     }
H
Hongze Cheng 已提交
885

H
Hongze Cheng 已提交
886 887 888 889 890
//     SColumnInfoData* pColInfoData = taosArrayGet(pTsdbReadHandle->pColumns, 0);
//     assert(cur->win.skey == ((TSKEY*)pColInfoData->pData)[0] &&
//            cur->win.ekey == ((TSKEY*)pColInfoData->pData)[cur->rows - 1]);
//   } else {
//     cur->win = pTsdbReadHandle->window;
H
Hongze Cheng 已提交
891

H
Hongze Cheng 已提交
892 893 894 895
//     int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order) ? 1 : -1;
//     cur->lastKey = pTsdbReadHandle->window.ekey + step;
//   }
// }
H
Hongze Cheng 已提交
896

H
Haojun Liao 已提交
897
// static void copyAllRemainRowsFromFileBlock(STsdbReader* pTsdbReadHandle, STableBlockScanInfo* pCheckInfo,
H
Hongze Cheng 已提交
898 899
//                                            SDataBlockInfo* pBlockInfo, int32_t endPos) {
//   SQueryFilePos* cur = &pTsdbReadHandle->cur;
H
Hongze Cheng 已提交
900

H
Hongze Cheng 已提交
901 902
//   SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0];
//   TSKEY*     tsArray = pCols->cols[0].pData;
H
Hongze Cheng 已提交
903

H
Hongze Cheng 已提交
904
//   bool ascScan = ASCENDING_TRAVERSE(pTsdbReadHandle->order);
H
Hongze Cheng 已提交
905

H
Hongze Cheng 已提交
906
//   int32_t step = ascScan ? 1 : -1;
H
Hongze Cheng 已提交
907

H
Hongze Cheng 已提交
908 909
//   int32_t start = cur->pos;
//   int32_t end = endPos;
H
Hongze Cheng 已提交
910

H
Hongze Cheng 已提交
911 912 913
//   if (!ascScan) {
//     TSWAP(start, end);
//   }
H
Hongze Cheng 已提交
914

H
Hongze Cheng 已提交
915 916
//   assert(pTsdbReadHandle->outputCapacity >= (end - start + 1));
//   int32_t numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, 0, start, end);
H
Hongze Cheng 已提交
917

H
Hongze Cheng 已提交
918 919 920 921 922
//   // the time window should always be ascending order: skey <= ekey
//   cur->win = (STimeWindow){.skey = tsArray[start], .ekey = tsArray[end]};
//   cur->mixBlock = (numOfRows != pBlockInfo->rows);
//   cur->lastKey = tsArray[endPos] + step;
//   cur->blockCompleted = (ascScan ? (endPos == pBlockInfo->rows - 1) : (endPos == 0));
H
Hongze Cheng 已提交
923

H
Hongze Cheng 已提交
924 925 926 927
//   // The value of pos may be -1 or pBlockInfo->rows, and it is invalid in both cases.
//   int32_t pos = endPos + step;
//   updateInfoAfterMerge(pTsdbReadHandle, pCheckInfo, numOfRows, pos);
//   doCheckGeneratedBlockRange(pTsdbReadHandle);
H
Hongze Cheng 已提交
928

H
Hongze Cheng 已提交
929 930 931 932
//   tsdbDebug("%p uid:%" PRIu64 ", data block created, mixblock:%d, brange:%" PRIu64 "-%" PRIu64 " rows:%d, %s",
//             pTsdbReadHandle, pCheckInfo->tableId, cur->mixBlock, cur->win.skey, cur->win.ekey, cur->rows,
//             pTsdbReadHandle->idStr);
// }
H
Hongze Cheng 已提交
933

H
Hongze Cheng 已提交
934 935
// // only return the qualified data to client in terms of query time window, data rows in the same block but do not
// // be included in the query time window will be discarded
H
Haojun Liao 已提交
936
// static void doMergeTwoLevelData(STsdbReader* pTsdbReadHandle, STableBlockScanInfo* pCheckInfo, SBlock* pBlock) {
H
Hongze Cheng 已提交
937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137
//   SQueryFilePos* cur = &pTsdbReadHandle->cur;
//   SDataBlockInfo blockInfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlock);
//   STsdbCfg*      pCfg = REPO_CFG(pTsdbReadHandle->pTsdb);

//   initTableMemIterator(pTsdbReadHandle, pCheckInfo);

//   SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0];
//   assert(pCols->cols[0].type == TSDB_DATA_TYPE_TIMESTAMP && pCols->cols[0].colId == PRIMARYKEY_TIMESTAMP_COL_ID &&
//          cur->pos >= 0 && cur->pos < pBlock->numOfRows);
//   // Even Multi-Version supported, the records with duplicated TSKEY would be merged inside of tsdbLoadData
//   interface. TSKEY* tsArray = pCols->cols[0].pData; assert(pCols->numOfRows == pBlock->numOfRows && tsArray[0] ==
//   pBlock->minKey.ts &&
//          tsArray[pBlock->numOfRows - 1] == pBlock->maxKey.ts);

//   bool    ascScan = ASCENDING_TRAVERSE(pTsdbReadHandle->order);
//   int32_t step = ascScan ? 1 : -1;

//   // for search the endPos, so the order needs to reverse
//   int32_t order = ascScan ? TSDB_ORDER_DESC : TSDB_ORDER_ASC;

//   int32_t numOfCols = (int32_t)(QH_GET_NUM_OF_COLS(pTsdbReadHandle));
//   int32_t endPos = getEndPosInDataBlock(pTsdbReadHandle, &blockInfo);

//   STimeWindow* pWin = &blockInfo.window;
//   tsdbDebug("%p uid:%" PRIu64 " start merge data block, file block range:%" PRIu64 "-%" PRIu64
//             " rows:%d, start:%d, end:%d, %s",
//             pTsdbReadHandle, pCheckInfo->tableId, pWin->skey, pWin->ekey, blockInfo.rows, cur->pos, endPos,
//             pTsdbReadHandle->idStr);

//   // compared with the data from in-memory buffer, to generate the correct timestamp array list
//   int32_t numOfRows = 0;
//   int32_t curRow = 0;

//   int16_t   rv1 = -1;
//   int16_t   rv2 = -1;
//   STSchema* pSchema1 = NULL;
//   STSchema* pSchema2 = NULL;

//   int32_t pos = cur->pos;
//   cur->win = TSWINDOW_INITIALIZER;
//   bool adjustPos = false;

//   // no data in buffer, load data from file directly
//   if (pCheckInfo->iiter == NULL && pCheckInfo->iter == NULL) {
//     copyAllRemainRowsFromFileBlock(pTsdbReadHandle, pCheckInfo, &blockInfo, endPos);
//     return;
//   } else if (pCheckInfo->iter != NULL || pCheckInfo->iiter != NULL) {
//     SSkipListNode* node = NULL;
//     TSKEY          lastKeyAppend = TSKEY_INITIAL_VAL;

//     do {
//       STSRow* row2 = NULL;
//       STSRow* row1 = getSRowInTableMem(pCheckInfo, pTsdbReadHandle->order, pCfg->update, &row2, TD_VER_MAX);
//       if (row1 == NULL) {
//         break;
//       }

//       TSKEY key = TD_ROW_KEY(row1);
//       if ((key > pTsdbReadHandle->window.ekey && ascScan) || (key < pTsdbReadHandle->window.ekey && !ascScan)) {
//         break;
//       }

//       if (adjustPos) {
//         if (key == lastKeyAppend) {
//           pos -= step;
//         }
//         adjustPos = false;
//       }

//       if (((pos > endPos || tsArray[pos] > pTsdbReadHandle->window.ekey) && ascScan) ||
//           ((pos < endPos || tsArray[pos] < pTsdbReadHandle->window.ekey) && !ascScan)) {
//         break;
//       }

//       if ((key < tsArray[pos] && ascScan) || (key > tsArray[pos] && !ascScan)) {
//         if (rv1 != TD_ROW_SVER(row1)) {
//           //          pSchema1 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row1));
//           rv1 = TD_ROW_SVER(row1);
//         }
//         if (row2 && rv2 != TD_ROW_SVER(row2)) {
//           //          pSchema2 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row2));
//           rv2 = TD_ROW_SVER(row2);
//         }

//         numOfRows +=
//             mergeTwoRowFromMem(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, &curRow, row1, row2, numOfCols,
//                                pCheckInfo->tableId, pSchema1, pSchema2, pCfg->update, &lastKeyAppend);
//         if (cur->win.skey == TSKEY_INITIAL_VAL) {
//           cur->win.skey = key;
//         }

//         cur->win.ekey = key;
//         cur->lastKey = key + step;
//         cur->mixBlock = true;
//         moveToNextRowInMem(pCheckInfo);
//       } else if (key == tsArray[pos]) {  // data in buffer has the same timestamp of data in file block, ignore it
//         if (TD_SUPPORT_UPDATE(pCfg->update)) {
//           if (lastKeyAppend != key) {
//             if (lastKeyAppend != TSKEY_INITIAL_VAL) {
//               ++curRow;
//             }
//             lastKeyAppend = key;
//           }
//           // load data from file firstly
//           numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, curRow, pos, pos);

//           if (rv1 != TD_ROW_SVER(row1)) {
//             rv1 = TD_ROW_SVER(row1);
//           }
//           if (row2 && rv2 != TD_ROW_SVER(row2)) {
//             rv2 = TD_ROW_SVER(row2);
//           }

//           // still assign data into current row
//           numOfRows +=
//               mergeTwoRowFromMem(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, &curRow, row1, row2, numOfCols,
//                                  pCheckInfo->tableId, pSchema1, pSchema2, pCfg->update, &lastKeyAppend);

//           if (cur->win.skey == TSKEY_INITIAL_VAL) {
//             cur->win.skey = key;
//           }

//           cur->win.ekey = key;
//           cur->lastKey = key + step;
//           cur->mixBlock = true;

//           moveToNextRowInMem(pCheckInfo);

//           pos += step;
//           adjustPos = true;
//         } else {
//           // discard the memory record
//           moveToNextRowInMem(pCheckInfo);
//         }
//       } else if ((key > tsArray[pos] && ascScan) || (key < tsArray[pos] && !ascScan)) {
//         if (cur->win.skey == TSKEY_INITIAL_VAL) {
//           cur->win.skey = tsArray[pos];
//         }

//         int32_t end = doBinarySearchKey(pCols->cols[0].pData, pCols->numOfRows, key, order);
//         assert(end != -1);

//         if (tsArray[end] == key) {  // the value of key in cache equals to the end timestamp value, ignore it
// #if 0
//           if (pCfg->update == TD_ROW_DISCARD_UPDATE) {
//             moveToNextRowInMem(pCheckInfo);
//           } else {
//             end -= step;
//           }
// #endif
//           if (!TD_SUPPORT_UPDATE(pCfg->update)) {
//             moveToNextRowInMem(pCheckInfo);
//           } else {
//             end -= step;
//           }
//         }

//         int32_t qstart = 0, qend = 0;
//         getQualifiedRowsPos(pTsdbReadHandle, pos, end, numOfRows, &qstart, &qend);

//         if ((lastKeyAppend != TSKEY_INITIAL_VAL) && (lastKeyAppend != (ascScan ? tsArray[qstart] : tsArray[qend]))) {
//           ++curRow;
//         }

//         numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, curRow, qstart, qend);
//         pos += (qend - qstart + 1) * step;
//         if (numOfRows > 0) {
//           curRow = numOfRows - 1;
//         }

//         cur->win.ekey = ascScan ? tsArray[qend] : tsArray[qstart];
//         cur->lastKey = cur->win.ekey + step;
//         lastKeyAppend = cur->win.ekey;
//       }
//     } while (numOfRows < pTsdbReadHandle->outputCapacity);

//     if (numOfRows < pTsdbReadHandle->outputCapacity) {
//       /**
//        * if cache is empty, load remain file block data. In contrast, if there are remain data in cache, do NOT
//        * copy them all to result buffer, since it may be overlapped with file data block.
//        */
//       if (node == NULL || ((TD_ROW_KEY((STSRow*)SL_GET_NODE_DATA(node)) > pTsdbReadHandle->window.ekey) && ascScan)
//       ||
//           ((TD_ROW_KEY((STSRow*)SL_GET_NODE_DATA(node)) < pTsdbReadHandle->window.ekey) && !ascScan)) {
//         // no data in cache or data in cache is greater than the ekey of time window, load data from file block
//         if (cur->win.skey == TSKEY_INITIAL_VAL) {
//           cur->win.skey = tsArray[pos];
//         }

//         int32_t start = -1, end = -1;
//         getQualifiedRowsPos(pTsdbReadHandle, pos, endPos, numOfRows, &start, &end);

//         numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, numOfRows, start, end);
//         pos += (end - start + 1) * step;

//         cur->win.ekey = ascScan ? tsArray[end] : tsArray[start];
//         cur->lastKey = cur->win.ekey + step;
//         cur->mixBlock = true;
//       }
//     }
//   }
H
Hongze Cheng 已提交
1138

H
Hongze Cheng 已提交
1139 1140
//   cur->blockCompleted = (((pos > endPos || cur->lastKey > pTsdbReadHandle->window.ekey) && ascScan) ||
//                          ((pos < endPos || cur->lastKey < pTsdbReadHandle->window.ekey) && !ascScan));
H
Hongze Cheng 已提交
1141

H
Hongze Cheng 已提交
1142 1143 1144
//   if (!ascScan) {
//     TSWAP(cur->win.skey, cur->win.ekey);
//   }
H
Hongze Cheng 已提交
1145

H
Hongze Cheng 已提交
1146 1147
//   updateInfoAfterMerge(pTsdbReadHandle, pCheckInfo, numOfRows, pos);
//   doCheckGeneratedBlockRange(pTsdbReadHandle);
H
Hongze Cheng 已提交
1148

H
Hongze Cheng 已提交
1149 1150 1151 1152
//   tsdbDebug("%p uid:%" PRIu64 ", data block created, mixblock:%d, brange:%" PRIu64 "-%" PRIu64 " rows:%d, %s",
//             pTsdbReadHandle, pCheckInfo->tableId, cur->mixBlock, cur->win.skey, cur->win.ekey, cur->rows,
//             pTsdbReadHandle->idStr);
// }
H
Hongze Cheng 已提交
1153

H
Haojun Liao 已提交
1154 1155 1156
static void cleanupBlockOrderSupporter(SBlockOrderSupporter* pSup) {
  taosMemoryFreeClear(pSup->numOfBlocksPerTable);
  taosMemoryFreeClear(pSup->indexPerTable);
H
Hongze Cheng 已提交
1157

H
Haojun Liao 已提交
1158 1159 1160 1161
  for (int32_t i = 0; i < pSup->numOfTables; ++i) {
    SBlockOrderWrapper* pBlockInfo = pSup->pDataBlockInfo[i];
    taosMemoryFreeClear(pBlockInfo);
  }
H
Hongze Cheng 已提交
1162

H
Haojun Liao 已提交
1163 1164
  taosMemoryFreeClear(pSup->pDataBlockInfo);
}
H
Hongze Cheng 已提交
1165

H
Haojun Liao 已提交
1166 1167
static int32_t initBlockOrderSupporter(SBlockOrderSupporter* pSup, int32_t numOfTables) {
  ASSERT(numOfTables >= 1);
H
Hongze Cheng 已提交
1168

H
Haojun Liao 已提交
1169
  pSup->numOfBlocksPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables);
1170 1171
  pSup->indexPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables);
  pSup->pDataBlockInfo = taosMemoryCalloc(1, POINTER_BYTES * numOfTables);
H
Hongze Cheng 已提交
1172

H
Haojun Liao 已提交
1173 1174 1175 1176
  if (pSup->numOfBlocksPerTable == NULL || pSup->indexPerTable == NULL || pSup->pDataBlockInfo == NULL) {
    cleanupBlockOrderSupporter(pSup);
    return TSDB_CODE_OUT_OF_MEMORY;
  }
H
Hongze Cheng 已提交
1177

H
Haojun Liao 已提交
1178 1179
  return TSDB_CODE_SUCCESS;
}
H
Hongze Cheng 已提交
1180

H
Haojun Liao 已提交
1181
static int32_t fileDataBlockOrderCompar(const void* pLeft, const void* pRight, void* param) {
1182
  int32_t leftIndex = *(int32_t*)pLeft;
H
Haojun Liao 已提交
1183
  int32_t rightIndex = *(int32_t*)pRight;
H
Hongze Cheng 已提交
1184

H
Haojun Liao 已提交
1185
  SBlockOrderSupporter* pSupporter = (SBlockOrderSupporter*)param;
H
Hongze Cheng 已提交
1186

H
Haojun Liao 已提交
1187 1188
  int32_t leftTableBlockIndex = pSupporter->indexPerTable[leftIndex];
  int32_t rightTableBlockIndex = pSupporter->indexPerTable[rightIndex];
H
Hongze Cheng 已提交
1189

H
Haojun Liao 已提交
1190 1191 1192 1193 1194 1195 1196
  if (leftTableBlockIndex > pSupporter->numOfBlocksPerTable[leftIndex]) {
    /* left block is empty */
    return 1;
  } else if (rightTableBlockIndex > pSupporter->numOfBlocksPerTable[rightIndex]) {
    /* right block is empty */
    return -1;
  }
H
Hongze Cheng 已提交
1197

1198
  SBlockOrderWrapper* pLeftBlock = &pSupporter->pDataBlockInfo[leftIndex][leftTableBlockIndex];
H
Haojun Liao 已提交
1199
  SBlockOrderWrapper* pRightBlock = &pSupporter->pDataBlockInfo[rightIndex][rightTableBlockIndex];
H
Hongze Cheng 已提交
1200

H
Haojun Liao 已提交
1201 1202
  return pLeftBlock->pBlock->aSubBlock[0].offset > pRightBlock->pBlock->aSubBlock[0].offset ? 1 : -1;
}
H
Hongze Cheng 已提交
1203

H
Haojun Liao 已提交
1204
static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks) {
1205
  bool asc = ASCENDING_TRAVERSE(pReader->order);
H
Haojun Liao 已提交
1206

1207
  pBlockIter->numOfBlocks = numOfBlocks;
1208 1209
  taosArrayClear(pBlockIter->blockList);

1210 1211
  // access data blocks according to the offset of each block in asc/desc order.
  int32_t numOfTables = (int32_t)taosHashGetSize(pReader->status.pTableMap);
H
Haojun Liao 已提交
1212

1213
  SBlockOrderSupporter sup = {0};
H
Haojun Liao 已提交
1214

1215 1216 1217 1218
  int32_t code = initBlockOrderSupporter(&sup, numOfTables);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
H
Haojun Liao 已提交
1219

1220 1221 1222 1223 1224 1225 1226
  int32_t cnt = 0;
  void*   ptr = NULL;
  while (1) {
    ptr = taosHashIterate(pReader->status.pTableMap, ptr);
    if (ptr == NULL) {
      break;
    }
H
Haojun Liao 已提交
1227

1228 1229 1230 1231
    STableBlockScanInfo* pTableScanInfo = (STableBlockScanInfo*)ptr;
    if (pTableScanInfo->pBlockList == NULL || taosArrayGetSize(pTableScanInfo->pBlockList) == 0) {
      continue;
    }
H
Haojun Liao 已提交
1232

1233 1234
    size_t num = taosArrayGetSize(pTableScanInfo->pBlockList);
    sup.numOfBlocksPerTable[sup.numOfTables] = num;
H
Haojun Liao 已提交
1235

1236 1237 1238 1239 1240
    char* buf = taosMemoryMalloc(sizeof(SBlockOrderWrapper) * num);
    if (buf == NULL) {
      cleanupBlockOrderSupporter(&sup);
      return TSDB_CODE_TDB_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
1241

1242 1243 1244 1245 1246
    sup.pDataBlockInfo[sup.numOfTables] = (SBlockOrderWrapper*)buf;
    for (int32_t k = 0; k < num; ++k) {
      SBlockOrderWrapper wrapper = {0};
      wrapper.pBlock = (SBlock*)taosArrayGet(pTableScanInfo->pBlockList, k);
      wrapper.uid = pTableScanInfo->uid;
H
Haojun Liao 已提交
1247

1248 1249 1250 1251 1252 1253
      sup.pDataBlockInfo[sup.numOfTables][k] = wrapper;
      cnt++;
    }

    sup.numOfTables += 1;
  }
H
Haojun Liao 已提交
1254

1255
  ASSERT(numOfBlocks == cnt);
H
Haojun Liao 已提交
1256

1257 1258 1259 1260 1261
  // since there is only one table qualified, blocks are not sorted
  if (sup.numOfTables == 1) {
    for (int32_t i = 0; i < numOfBlocks; ++i) {
      SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[0][i].uid, .tbBlockIdx = i};
      taosArrayPush(pBlockIter->blockList, &blockInfo);
1262
    }
1263 1264
    tsdbDebug("%p create blocks info struct completed for one table, %d blocks not sorted %s", pReader, cnt,
              pReader->idStr);
1265

1266 1267
    pBlockIter->index = asc ? 0 : (numOfBlocks - 1);
    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
1268
  }
H
Haojun Liao 已提交
1269

1270 1271
  tsdbDebug("%p create data blocks info struct completed, %d blocks in %d tables %s", pReader, cnt, sup.numOfTables,
            pReader->idStr);
1272

1273
  assert(cnt <= numOfBlocks && sup.numOfTables <= numOfTables);
H
Haojun Liao 已提交
1274

1275 1276 1277 1278 1279
  SMultiwayMergeTreeInfo* pTree = NULL;
  uint8_t                 ret = tMergeTreeCreate(&pTree, sup.numOfTables, &sup, fileDataBlockOrderCompar);
  if (ret != TSDB_CODE_SUCCESS) {
    cleanupBlockOrderSupporter(&sup);
    return TSDB_CODE_TDB_OUT_OF_MEMORY;
H
Haojun Liao 已提交
1280
  }
H
Haojun Liao 已提交
1281

1282 1283 1284 1285
  int32_t numOfTotal = 0;
  while (numOfTotal < cnt) {
    int32_t pos = tMergeTreeGetChosenIndex(pTree);
    int32_t index = sup.indexPerTable[pos]++;
H
Haojun Liao 已提交
1286

1287 1288
    SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[pos][index].uid, .tbBlockIdx = index};
    taosArrayPush(pBlockIter->blockList, &blockInfo);
H
Haojun Liao 已提交
1289

1290 1291 1292 1293
    // set data block index overflow, in order to disable the offset comparator
    if (sup.indexPerTable[pos] >= sup.numOfBlocksPerTable[pos]) {
      sup.indexPerTable[pos] = sup.numOfBlocksPerTable[pos] + 1;
    }
H
Haojun Liao 已提交
1294

1295 1296
    numOfTotal += 1;
    tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree));
H
Haojun Liao 已提交
1297
  }
H
Haojun Liao 已提交
1298

1299 1300 1301
  tsdbDebug("%p %d data blocks sort completed, %s", pReader, cnt, pReader->idStr);
  cleanupBlockOrderSupporter(&sup);
  taosMemoryFree(pTree);
H
Haojun Liao 已提交
1302

1303 1304
  pBlockIter->index = asc ? 0 : (numOfBlocks - 1);
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
1305
}
H
Hongze Cheng 已提交
1306

H
Haojun Liao 已提交
1307
static bool blockIteratorNext(SDataBlockIter* pBlockIter) {
1308 1309
  bool asc = ASCENDING_TRAVERSE(pBlockIter->order);

1310
  int32_t step = asc ? 1 : -1;
1311
  if ((pBlockIter->index >= pBlockIter->numOfBlocks - 1 && asc) || (pBlockIter->index <= 0 && (!asc))) {
1312 1313 1314
    return false;
  }

1315
  pBlockIter->index += step;
1316 1317 1318
  return true;
}

1319 1320 1321
/**
 * This is an two rectangles overlap cases.
 */
1322
static int32_t dataBlockPartiallyRequired(STimeWindow* pWindow, SVersionRange* pVerRange, SBlock* pBlock) {
1323 1324 1325 1326
  return (pWindow->ekey < pBlock->maxKey.ts && pWindow->ekey >= pBlock->minKey.ts) ||
         (pWindow->skey > pBlock->minKey.ts && pWindow->skey <= pBlock->maxKey.ts) ||
         (pVerRange->minVer > pBlock->minVersion && pVerRange->minVer <= pBlock->maxVersion) ||
         (pVerRange->maxVer < pBlock->maxVersion && pVerRange->maxVer >= pBlock->minVersion);
H
Haojun Liao 已提交
1327
}
H
Hongze Cheng 已提交
1328

H
Haojun Liao 已提交
1329 1330 1331 1332
static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter) {
  SFileDataBlockInfo* pFBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index);
  return pFBlockInfo;
}
H
Hongze Cheng 已提交
1333

1334 1335
static SBlock* getNeighborBlockOfSameTable(SFileDataBlockInfo* pFBlockInfo, STableBlockScanInfo* pTableBlockScanInfo,
                                           int32_t* nextIndex, int32_t order) {
1336 1337 1338
  bool asc = ASCENDING_TRAVERSE(order);
  if (asc && pFBlockInfo->tbBlockIdx >= taosArrayGetSize(pTableBlockScanInfo->pBlockList) - 1) {
    return NULL;
1339 1340
  }

1341
  if (!asc && pFBlockInfo->tbBlockIdx == 0) {
1342 1343 1344
    return NULL;
  }

1345
  int32_t step = asc ? 1 : -1;
1346 1347 1348 1349 1350 1351 1352 1353 1354

  *nextIndex = pFBlockInfo->tbBlockIdx + step;
  SBlock* pNext = taosArrayGet(pTableBlockScanInfo->pBlockList, *nextIndex);
  return pNext;
}

static int32_t findFileBlockInfoIndex(SDataBlockIter* pBlockIter, SFileDataBlockInfo* pFBlockInfo) {
  ASSERT(pBlockIter != NULL && pFBlockInfo != NULL);

1355
  int32_t step = ASCENDING_TRAVERSE(pBlockIter->order) ? 1 : -1;
1356 1357
  int32_t index = pBlockIter->index;

1358
  while (index < pBlockIter->numOfBlocks && index >= 0) {
1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370
    SFileDataBlockInfo* pFBlock = taosArrayGet(pBlockIter->blockList, index);
    if (pFBlock->uid == pFBlockInfo->uid && pFBlock->tbBlockIdx == pFBlockInfo->tbBlockIdx) {
      return index;
    }

    index += step;
  }

  ASSERT(0);
  return -1;
}

1371
static int32_t setFileBlockActiveInBlockIter(SDataBlockIter* pBlockIter, int32_t index, int32_t step) {
1372 1373 1374 1375 1376
  if (index < 0 || index >= pBlockIter->numOfBlocks) {
    return -1;
  }

  SFileDataBlockInfo fblock = *(SFileDataBlockInfo*)taosArrayGet(pBlockIter->blockList, index);
1377 1378 1379 1380 1381
  pBlockIter->index += step;

  if (index != pBlockIter->index) {
    taosArrayRemove(pBlockIter->blockList, index);
    taosArrayInsert(pBlockIter->blockList, pBlockIter->index, &fblock);
1382

1383 1384 1385
    SFileDataBlockInfo* pBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index);
    ASSERT(pBlockInfo->uid == fblock.uid && pBlockInfo->tbBlockIdx == fblock.tbBlockIdx);
  }
1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396

  return TSDB_CODE_SUCCESS;
}

static bool overlapWithNeighborBlock(SBlock* pBlock, SBlock* pNeighbor, int32_t order) {
  // it is the last block in current file, no chance to overlap with neighbor blocks.
  if (ASCENDING_TRAVERSE(order)) {
    return pBlock->maxKey.ts == pNeighbor->minKey.ts;
  } else {
    return pBlock->minKey.ts == pNeighbor->maxKey.ts;
  }
H
Haojun Liao 已提交
1397
}
H
Hongze Cheng 已提交
1398

1399
static bool bufferDataInFileBlockGap(int32_t order, TSDBKEY key, SBlock* pBlock) {
H
Haojun Liao 已提交
1400
  bool ascScan = ASCENDING_TRAVERSE(order);
H
Hongze Cheng 已提交
1401

1402
  return (ascScan && (key.ts != TSKEY_INITIAL_VAL && key.ts <= pBlock->minKey.ts)) ||
1403
         (!ascScan && (key.ts != TSKEY_INITIAL_VAL && key.ts >= pBlock->maxKey.ts));
H
Haojun Liao 已提交
1404
}
H
Hongze Cheng 已提交
1405

H
Haojun Liao 已提交
1406
static bool keyOverlapFileBlock(TSDBKEY key, SBlock* pBlock, SVersionRange* pVerRange) {
1407 1408
  return (key.ts >= pBlock->minKey.ts && key.ts <= pBlock->maxKey.ts) && (pBlock->maxVersion >= pVerRange->minVer) &&
         (pBlock->minVersion <= pVerRange->maxVer);
H
Haojun Liao 已提交
1409 1410
}

1411 1412 1413 1414
// 1. the version of all rows should be less than the endVersion
// 2. current block should not overlap with next neighbor block
// 3. current timestamp should not be overlap with each other
// 4. output buffer should be large enough to hold all rows in current block
1415 1416
static bool fileBlockShouldLoad(STsdbReader* pReader, SFileDataBlockInfo* pFBlock, SBlock* pBlock,
                                STableBlockScanInfo* pScanInfo, TSDBKEY key) {
1417 1418 1419 1420 1421 1422 1423 1424
  int32_t neighborIndex = 0;
  SBlock* pNeighbor = getNeighborBlockOfSameTable(pFBlock, pScanInfo, &neighborIndex, pReader->order);

  bool overlapWithNeighbor = false;
  if (pNeighbor) {
    overlapWithNeighbor = overlapWithNeighborBlock(pBlock, pNeighbor, pReader->order);
  }

1425 1426 1427 1428 1429 1430 1431
  bool hasDup = false;
  if (pBlock->nSubBlock == 1) {
    hasDup = pBlock->hasDup;
  } else {
    hasDup = true;
  }

1432 1433
  return (overlapWithNeighbor || hasDup || dataBlockPartiallyRequired(&pReader->window, &pReader->verRange, pBlock) ||
          keyOverlapFileBlock(key, pBlock, &pReader->verRange) || (pBlock->nRow > pReader->capacity));
H
Haojun Liao 已提交
1434 1435
}

1436
static int32_t buildDataBlockFromBuf(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, int64_t endKey) {
1437
  if (!(pBlockScanInfo->iiter.hasVal || pBlockScanInfo->iter.hasVal)) {
1438 1439
    return TSDB_CODE_SUCCESS;
  }
H
Haojun Liao 已提交
1440

1441 1442 1443
  SSDataBlock* pBlock = pReader->pResBlock;

  int64_t st = taosGetTimestampUs();
1444
  int32_t code = buildDataBlockFromBufImpl(pBlockScanInfo, endKey, pReader->capacity, pReader);
H
Haojun Liao 已提交
1445

1446
  blockDataUpdateTsWindow(pBlock, 0);
1447
  pBlock->info.uid = pBlockScanInfo->uid;
1448

1449
  setComposedBlockFlag(pReader, true);
1450 1451 1452 1453 1454 1455

  int64_t elapsedTime = taosGetTimestampUs() - st;
  tsdbDebug("%p build data block from cache completed, elapsed time:%" PRId64
            " us, numOfRows:%d, numOfCols:%d, brange: %" PRId64 " - %" PRId64 " %s",
            pReader, elapsedTime, pBlock->info.rows, (int32_t)blockDataGetNumOfCols(pBlock), pBlock->info.window.skey,
            pBlock->info.window.ekey, pReader->idStr);
H
Haojun Liao 已提交
1456 1457 1458
  return code;
}

1459
static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, TSDBROW* pRow,
1460
                                     STSRow* pTSRow, SIterInfo* pIter, int64_t key) {
1461 1462 1463 1464 1465
  SRowMerger          merge = {0};
  SBlockData*         pBlockData = &pReader->status.fileBlockData;
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;

  TSDBKEY k = TSDBROW_KEY(pRow);
1466
  TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
1467
  SArray* pDelList = pBlockScanInfo->delSkyline;
1468

1469 1470 1471 1472 1473 1474 1475 1476
  // ascending order traverse
  if (ASCENDING_TRAVERSE(pReader->order)) {
    if (key < k.ts) {
      tRowMergerInit(&merge, &fRow, pReader->pSchema);

      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
      tRowMergerGetRow(&merge, &pTSRow);
    } else if (k.ts < key) {  // k.ts < key
1477
      doMergeMultiRows(pRow, pBlockScanInfo->uid, pIter, pDelList, &pTSRow, pReader);
1478 1479 1480
    } else {  // k.ts == key, ascending order: file block ----> imem rows -----> mem rows
      tRowMergerInit(&merge, &fRow, pReader->pSchema);
      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
1481 1482

      tRowMerge(&merge, pRow);
1483
      doMergeRowsInBuf(pIter, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
1484 1485

      tRowMergerGetRow(&merge, &pTSRow);
1486
    }
1487 1488
  } else {  // descending order scan
    if (key < k.ts) {
1489
      doMergeMultiRows(pRow, pBlockScanInfo->uid, pIter, pDelList, &pTSRow, pReader);
1490 1491
    } else if (k.ts < key) {
      tRowMergerInit(&merge, &fRow, pReader->pSchema);
1492

1493 1494 1495 1496 1497 1498
      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
      tRowMergerGetRow(&merge, &pTSRow);
    } else {  // descending order: mem rows -----> imem rows ------> file block
      updateSchema(pRow, pBlockScanInfo->uid, pReader);

      tRowMergerInit(&merge, pRow, pReader->pSchema);
1499
      doMergeRowsInBuf(pIter, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
1500 1501 1502 1503 1504 1505

      tRowMerge(&merge, &fRow);
      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);

      tRowMergerGetRow(&merge, &pTSRow);
    }
1506 1507
  }

1508
  tRowMergerClear(&merge);
1509 1510 1511 1512
  doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
  return TSDB_CODE_SUCCESS;
}

1513 1514 1515 1516
static int32_t doMergeThreeLevelRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) {
  SRowMerger merge = {0};
  STSRow*    pTSRow = NULL;

1517 1518
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
  SBlockData*         pBlockData = &pReader->status.fileBlockData;
1519
  SArray* pDelList = pBlockScanInfo->delSkyline;
1520

1521 1522
  TSDBROW* pRow = getValidRow(&pBlockScanInfo->iter, pDelList, pReader);
  TSDBROW* piRow = getValidRow(&pBlockScanInfo->iiter, pDelList, pReader);
1523
  ASSERT(pRow != NULL && piRow != NULL);
H
Haojun Liao 已提交
1524

1525
  int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex];
H
Haojun Liao 已提交
1526

1527
  uint64_t uid = pBlockScanInfo->uid;
H
Haojun Liao 已提交
1528

1529 1530 1531 1532
  TSDBKEY k = TSDBROW_KEY(pRow);
  TSDBKEY ik = TSDBROW_KEY(piRow);

  if (ASCENDING_TRAVERSE(pReader->order)) {
1533 1534
    // [1&2] key <= [k.ts && ik.ts]
    if (key <= k.ts && key <= ik.ts) {
1535 1536 1537
      TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
      tRowMergerInit(&merge, &fRow, pReader->pSchema);

1538
      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
H
Haojun Liao 已提交
1539

1540 1541
      if (ik.ts == key) {
        tRowMerge(&merge, piRow);
1542
        doMergeRowsInBuf(&pBlockScanInfo->iiter, key, pBlockScanInfo->delSkyline, &merge, pReader);
1543 1544
      }

1545 1546
      if (k.ts == key) {
        tRowMerge(&merge, pRow);
1547
        doMergeRowsInBuf(&pBlockScanInfo->iter, key, pBlockScanInfo->delSkyline, &merge, pReader);
1548 1549 1550 1551
      }

      tRowMergerGetRow(&merge, &pTSRow);
      doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
1552
      return TSDB_CODE_SUCCESS;
1553
    } else {  // key > ik.ts || key > k.ts
1554 1555
      ASSERT(key != ik.ts);

1556
      // [3] ik.ts < key <= k.ts
1557
      // [4] ik.ts < k.ts <= key
1558
      if (ik.ts < k.ts) {
1559
        doMergeMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, &pTSRow, pReader);
1560 1561 1562 1563
        doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
        return TSDB_CODE_SUCCESS;
      }

1564 1565
      // [5] k.ts < key   <= ik.ts
      // [6] k.ts < ik.ts <= key
1566
      if (k.ts < ik.ts) {
1567
        doMergeMultiRows(pRow, uid, &pBlockScanInfo->iter, pDelList, &pTSRow, pReader);
1568 1569 1570 1571
        doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
        return TSDB_CODE_SUCCESS;
      }

1572
      // [7] k.ts == ik.ts < key
1573
      if (k.ts == ik.ts) {
1574 1575
        ASSERT(key > ik.ts && key > k.ts);

1576
        doMergeMemIMemRows(pRow, piRow, pBlockScanInfo, pReader, &pTSRow);
1577 1578 1579 1580
        doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
        return TSDB_CODE_SUCCESS;
      }
    }
1581 1582 1583 1584 1585 1586
  } else {  // descending order scan
    // [1/2] k.ts >= ik.ts && k.ts >= key
    if (k.ts >= ik.ts && k.ts >= key) {
      updateSchema(pRow, uid, pReader);

      tRowMergerInit(&merge, pRow, pReader->pSchema);
1587
      doMergeRowsInBuf(&pBlockScanInfo->iter, key, pBlockScanInfo->delSkyline, &merge, pReader);
1588 1589 1590

      if (ik.ts == k.ts) {
        tRowMerge(&merge, piRow);
1591
        doMergeRowsInBuf(&pBlockScanInfo->iiter, key, pBlockScanInfo->delSkyline, &merge, pReader);
1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603
      }

      if (k.ts == key) {
        TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
        tRowMerge(&merge, &fRow);
        doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
      }

      tRowMergerGetRow(&merge, &pTSRow);
      doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
      return TSDB_CODE_SUCCESS;
    } else {
1604
      ASSERT(ik.ts != k.ts);  // this case has been included in the previous if branch
1605 1606 1607 1608

      // [3] ik.ts > k.ts >= Key
      // [4] ik.ts > key >= k.ts
      if (ik.ts > key) {
1609
        doMergeMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, &pTSRow, pReader);
1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627
        doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
        return TSDB_CODE_SUCCESS;
      }

      // [5] key > ik.ts > k.ts
      // [6] key > k.ts > ik.ts
      if (key > ik.ts) {
        TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
        tRowMergerInit(&merge, &fRow, pReader->pSchema);

        doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
        tRowMergerGetRow(&merge, &pTSRow);
        doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
        return TSDB_CODE_SUCCESS;
      }

      //[7] key = ik.ts > k.ts
      if (key == ik.ts) {
1628
        doMergeMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, &pTSRow, pReader);
1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642

        TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
        tRowMerge(&merge, &fRow);
        doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
        tRowMergerGetRow(&merge, &pTSRow);
        doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
        return TSDB_CODE_SUCCESS;
      }
    }
  }

  ASSERT(0);
}

1643 1644
static bool isValidFileBlockRow(SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo, STableBlockScanInfo* pBlockScanInfo,
    STsdbReader* pReader) {
1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655
  // check for version and time range
  int64_t ver = pBlockData->aVersion[pDumpInfo->rowIndex];
  if (ver > pReader->verRange.maxVer || ver < pReader->verRange.minVer) {
    return false;
  }

  int64_t ts = pBlockData->aTSKEY[pDumpInfo->rowIndex];
  if (ts > pReader->window.ekey || ts < pReader->window.skey) {
    return false;
  }

1656 1657 1658 1659 1660
  TSDBKEY k = {.ts = ts, .version = ver};
  if (hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->fileDelIndex, &k)) {
    return false;
  }

1661 1662 1663
  return true;
}

1664
static bool outOfTimeWindow(int64_t ts, STimeWindow* pWindow) { return (ts > pWindow->ekey) || (ts < pWindow->skey); }
1665

1666 1667 1668 1669 1670 1671 1672 1673
static int32_t buildComposedDataBlockImpl(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) {
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
  SBlockData*         pBlockData = &pReader->status.fileBlockData;

  SRowMerger merge = {0};
  STSRow*    pTSRow = NULL;

  int64_t  key = pBlockData->aTSKEY[pDumpInfo->rowIndex];
1674 1675
  TSDBROW* pRow = getValidRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader);
  TSDBROW* piRow = getValidRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader);
1676

1677
  if (pBlockScanInfo->iter.hasVal && pBlockScanInfo->iiter.hasVal) {
1678
    return doMergeThreeLevelRows(pReader, pBlockScanInfo);
1679
  } else {
1680
    // imem + file
1681 1682
    if (pBlockScanInfo->iiter.hasVal) {
      return doMergeBufAndFileRows(pReader, pBlockScanInfo, piRow, pTSRow, &pBlockScanInfo->iiter, key);
1683 1684
    }

1685
    // mem + file
1686 1687
    if (pBlockScanInfo->iter.hasVal) {
      return doMergeBufAndFileRows(pReader, pBlockScanInfo, pRow, pTSRow, &pBlockScanInfo->iter,key);
H
Haojun Liao 已提交
1688
    }
1689

1690
    // imem & mem are all empty, only file exist
1691
    TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
1692 1693 1694 1695
    tRowMergerInit(&merge, &fRow, pReader->pSchema);
    doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
    tRowMergerGetRow(&merge, &pTSRow);
    doAppendOneRow(pReader->pResBlock, pReader, pTSRow);
1696

1697
    return TSDB_CODE_SUCCESS;
1698 1699 1700
  }
}

1701
static int32_t buildComposedDataBlock(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) {
1702 1703
  SSDataBlock* pResBlock = pReader->pResBlock;

1704
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
1705 1706
  SBlockData*         pBlockData = &pReader->status.fileBlockData;
  int32_t             step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1;
1707

1708
  while (1) {
1709 1710
    // todo check the validate of row in file block
    {
1711
      if (!isValidFileBlockRow(pBlockData, pDumpInfo, pBlockScanInfo, pReader)) {
1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725
        pDumpInfo->rowIndex += step;

        SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(&pReader->status.blockIter);
        SBlock*             pBlock = taosArrayGet(pBlockScanInfo->pBlockList, pFBlock->tbBlockIdx);

        if (pDumpInfo->rowIndex >= pBlock->nRow || pDumpInfo->rowIndex < 0) {
          setBlockAllDumped(pDumpInfo, pBlock, pReader->order);
          break;
        }

        continue;
      }
    }

1726
    buildComposedDataBlockImpl(pReader, pBlockScanInfo);
1727

1728 1729
    SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(&pReader->status.blockIter);
    SBlock*             pBlock = taosArrayGet(pBlockScanInfo->pBlockList, pFBlock->tbBlockIdx);
1730

1731 1732 1733 1734 1735 1736 1737 1738
    // currently loaded file data block is consumed
    if (pDumpInfo->rowIndex >= pBlock->nRow || pDumpInfo->rowIndex < 0) {
      setBlockAllDumped(pDumpInfo, pBlock, pReader->order);
      break;
    }

    if (pResBlock->info.rows >= pReader->capacity) {
      break;
1739 1740 1741 1742
    }
  }

  pResBlock->info.uid = pBlockScanInfo->uid;
1743 1744
  blockDataUpdateTsWindow(pResBlock, 0);

1745
  setComposedBlockFlag(pReader, true);
1746

1747 1748
  tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%d, %s", pReader,
            pBlockScanInfo->uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, pResBlock->info.rows,
1749
            pReader->idStr);
1750

1751 1752 1753 1754 1755
  return TSDB_CODE_SUCCESS;
}

void setComposedBlockFlag(STsdbReader* pReader, bool composed) { pReader->status.composedDataBlock = composed; }

1756
static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) {
1757 1758 1759 1760
  if (pBlockScanInfo->iterInit) {
    return TSDB_CODE_SUCCESS;
  }

1761
  int32_t code = TSDB_CODE_SUCCESS;
1762 1763 1764 1765 1766 1767 1768 1769 1770

  TSDBKEY startKey = {0};
  if (ASCENDING_TRAVERSE(pReader->order)) {
    startKey = (TSDBKEY){.ts = pReader->window.skey, .version = pReader->verRange.minVer};
  } else {
    startKey = (TSDBKEY){.ts = pReader->window.ekey, .version = pReader->verRange.maxVer};
  }

  int32_t backward = (!ASCENDING_TRAVERSE(pReader->order));
1771 1772 1773 1774

  STbData* d = NULL;
  if (pReader->pTsdb->mem != NULL) {
    tsdbGetTbDataFromMemTable(pReader->pTsdb->mem, pReader->suid, pBlockScanInfo->uid, &d);
1775
    if (d != NULL) {
1776
      code = tsdbTbDataIterCreate(d, &startKey, backward, &pBlockScanInfo->iter.iter);
1777
      if (code == TSDB_CODE_SUCCESS) {
1778
        pBlockScanInfo->iter.hasVal = (tsdbTbDataIterGet(pBlockScanInfo->iter.iter) != NULL);
1779

H
Haojun Liao 已提交
1780
        tsdbDebug("%p uid:%" PRId64 ", check data in mem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64
1781 1782
                  "-%" PRId64 " %s",
                  pReader, pBlockScanInfo->uid, startKey.ts, pReader->order, d->minKey, d->maxKey, pReader->idStr);
1783
      } else {
1784 1785
        tsdbError("%p uid:%" PRId64 ", failed to create iterator for imem, code:%s, %s", pReader, pBlockScanInfo->uid,
                  tstrerror(code), pReader->idStr);
1786
        return code;
1787 1788
      }
    }
H
Haojun Liao 已提交
1789
  } else {
1790
    tsdbDebug("%p uid:%" PRId64 ", no data in mem, %s", pReader, pBlockScanInfo->uid, pReader->idStr);
H
Haojun Liao 已提交
1791 1792
  }

1793 1794 1795
  STbData* di = NULL;
  if (pReader->pTsdb->imem != NULL) {
    tsdbGetTbDataFromMemTable(pReader->pTsdb->imem, pReader->suid, pBlockScanInfo->uid, &di);
1796
    if (di != NULL) {
1797
      code = tsdbTbDataIterCreate(di, &startKey, backward, &pBlockScanInfo->iiter.iter);
1798
      if (code == TSDB_CODE_SUCCESS) {
1799
        pBlockScanInfo->iiter.hasVal = (tsdbTbDataIterGet(pBlockScanInfo->iiter.iter) != NULL);
1800

H
Haojun Liao 已提交
1801
        tsdbDebug("%p uid:%" PRId64 ", check data in imem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64
1802
                  "-%" PRId64 " %s",
1803
                  pReader, pBlockScanInfo->uid, startKey.ts, pReader->order, di->minKey, di->maxKey, pReader->idStr);
1804
      } else {
1805 1806
        tsdbError("%p uid:%" PRId64 ", failed to create iterator for mem, code:%s, %s", pReader, pBlockScanInfo->uid,
                  tstrerror(code), pReader->idStr);
1807
        return code;
1808 1809
      }
    }
H
Haojun Liao 已提交
1810 1811
  } else {
    tsdbDebug("%p uid:%" PRId64 ", no data in imem, %s", pReader, pBlockScanInfo->uid, pReader->idStr);
1812 1813
  }

1814 1815
  initDelSkylineIterator(pBlockScanInfo, pReader, d, di);

1816
  pBlockScanInfo->iterInit = true;
H
Haojun Liao 已提交
1817 1818 1819
  return TSDB_CODE_SUCCESS;
}

1820
int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, STbData* pMemTbData, STbData* piMemTbData) {
1821 1822 1823
  if (pBlockScanInfo->delSkyline != NULL) {
    return TSDB_CODE_SUCCESS;
  }
1824

1825 1826 1827
  int32_t code = 0;
  STsdb*  pTsdb = pReader->pTsdb;

1828 1829
  SArray* pDelData = taosArrayInit(4, sizeof(SDelData));

1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842
  SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->fs->cState);
  if (pDelFile) {
    SDelFReader* pDelFReader = NULL;
    code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL);
    if (code) {
      goto _err;
    }

    SArray* aDelIdx = taosArrayInit(4, sizeof(SDelIdx));
    if (aDelIdx == NULL) {
      goto _err;
    }

1843 1844 1845 1846
    code = tsdbReadDelIdx(pDelFReader, aDelIdx, NULL);
    if (code) {
      goto _err;
    }
1847

1848 1849 1850 1851 1852 1853
    SDelIdx  idx = {.suid = pReader->suid, .uid = pBlockScanInfo->uid};
    SDelIdx* pIdx = taosArraySearch(aDelIdx, &idx, tCmprDelIdx, TD_EQ);

    code = tsdbReadDelData(pDelFReader, pIdx, pDelData, NULL);
    if (code != TSDB_CODE_SUCCESS) {
      goto _err;
1854
    }
1855
  }
1856

1857 1858 1859 1860 1861 1862 1863
  SDelData* p = NULL;
  if (pMemTbData != NULL) {
    p = pMemTbData->pHead;
    while (p) {
      taosArrayPush(pDelData, p);
      p = p->pNext;
    }
1864 1865
  }

1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882
  if (piMemTbData != NULL) {
    p = piMemTbData->pHead;
    while (p) {
      taosArrayPush(pDelData, p);
      p = p->pNext;
    }
  }

  if (taosArrayGetSize(pDelData) > 0) {
    pBlockScanInfo->delSkyline = taosArrayInit(4, sizeof(TSDBKEY));
    code = tsdbBuildDeleteSkyline(pDelData, 0, (int32_t)(taosArrayGetSize(pDelData) - 1), pBlockScanInfo->delSkyline);
  }

  taosArrayDestroy(pDelData);
  pBlockScanInfo->iter.index = ASCENDING_TRAVERSE(pReader->order)? 0:taosArrayGetSize(pBlockScanInfo->delSkyline) - 1;
  pBlockScanInfo->iiter.index = pBlockScanInfo->iter.index;
  pBlockScanInfo->fileDelIndex = pBlockScanInfo->iter.index;
1883 1884
  return code;

1885 1886 1887
_err:
  taosArrayDestroy(pDelData);
  return code;
1888 1889
}

1890 1891 1892
static TSDBKEY getCurrentKeyInBuf(SDataBlockIter* pBlockIter, STsdbReader* pReader) {
  TSDBKEY key = {.ts = TSKEY_INITIAL_VAL};

1893
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(pBlockIter);
1894 1895
  STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));

1896 1897
  initMemDataIterator(pScanInfo, pReader);
  TSDBROW* pRow = getValidRow(&pScanInfo->iter, pScanInfo->delSkyline, pReader);
1898
  if (pRow != NULL) {
1899 1900 1901
    key = TSDBROW_KEY(pRow);
  }

1902
  pRow = getValidRow(&pScanInfo->iiter, pScanInfo->delSkyline, pReader);
1903
  if (pRow != NULL) {
1904 1905 1906 1907 1908 1909 1910 1911 1912
    TSDBKEY k = TSDBROW_KEY(pRow);
    if (key.ts > k.ts) {
      key = k;
    }
  }

  return key;
}

H
Haojun Liao 已提交
1913 1914 1915 1916
static int32_t moveToNextFile(STsdbReader* pReader, int32_t* numOfBlocks) {
  SReaderStatus* pStatus = &pReader->status;

  while (1) {
1917
    bool hasNext = filesetIteratorNext(&pStatus->fileIter, pReader);
1918
    if (!hasNext) {  // no data files on disk
H
Haojun Liao 已提交
1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945
      break;
    }

    SArray* pIndexList = taosArrayInit(4, sizeof(SBlockIdx));
    int32_t code = doLoadBlockIndex(pReader, pReader->pFileReader, pIndexList);
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    if (taosArrayGetSize(pIndexList) > 0) {
      uint32_t numOfValidTable = 0;
      code = doLoadFileBlock(pReader, pIndexList, &numOfValidTable, numOfBlocks);
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }

      if (numOfValidTable > 0) {
        break;
      }
    }

    // no blocks in current file, try next files
  }

  return TSDB_CODE_SUCCESS;
}

1946 1947 1948
static int32_t doBuildDataBlock(STsdbReader* pReader) {
  int32_t code = TSDB_CODE_SUCCESS;

1949
  SReaderStatus*  pStatus = &pReader->status;
1950 1951
  SDataBlockIter* pBlockIter = &pStatus->blockIter;

1952 1953
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(pBlockIter);
  STableBlockScanInfo* pScanInfo = taosHashGet(pStatus->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));
1954 1955 1956 1957 1958

  SBlock* pBlock = taosArrayGet(pScanInfo->pBlockList, pFBlock->tbBlockIdx);

  TSDBKEY key = getCurrentKeyInBuf(pBlockIter, pReader);
  if (fileBlockShouldLoad(pReader, pFBlock, pBlock, pScanInfo, key)) {
1959 1960
    tBlockDataInit(&pStatus->fileBlockData);
    code = doLoadFileBlockData(pReader, pBlockIter, pScanInfo, &pStatus->fileBlockData);
1961 1962 1963 1964 1965
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    // build composed data block
1966
    code = buildComposedDataBlock(pReader, pScanInfo);
1967 1968
  } else if (bufferDataInFileBlockGap(pReader->order, key, pBlock)) {
    // data in memory that are earlier than current file block
1969
    // todo rows in buffer should be less than the file block in asc, greater than file block in desc
1970
    int64_t endKey = (ASCENDING_TRAVERSE(pReader->order)) ? pBlock->minKey.ts : pBlock->maxKey.ts;
1971
    code = buildDataBlockFromBuf(pReader, pScanInfo, endKey);
1972
  } else {  // whole block is required, return it directly
1973
    SDataBlockInfo* pInfo = &pReader->pResBlock->info;
1974 1975 1976
    pInfo->rows = pBlock->nRow;
    pInfo->uid = pScanInfo->uid;
    pInfo->window = (STimeWindow){.skey = pBlock->minKey.ts, .ekey = pBlock->maxKey.ts};
1977
    setComposedBlockFlag(pReader, false);
1978
    setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlock, pReader->order);
1979 1980 1981 1982 1983
  }

  return code;
}

H
Haojun Liao 已提交
1984
static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader) {
1985 1986
  SReaderStatus* pStatus = &pReader->status;

1987
  while (1) {
1988 1989 1990
    if (pStatus->pTableIter == NULL) {
      pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, NULL);
      if (pStatus->pTableIter == NULL) {
H
Haojun Liao 已提交
1991
        return TSDB_CODE_SUCCESS;
1992 1993 1994 1995
      }
    }

    STableBlockScanInfo* pBlockScanInfo = pStatus->pTableIter;
1996
    initMemDataIterator(pBlockScanInfo, pReader);
1997

1998
    int64_t endKey = (ASCENDING_TRAVERSE(pReader->order)) ? INT64_MAX : INT64_MIN;
1999
    int32_t code = buildDataBlockFromBuf(pReader, pBlockScanInfo, endKey);
H
Haojun Liao 已提交
2000 2001 2002 2003
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

2004
    if (pReader->pResBlock->info.rows > 0) {
H
Haojun Liao 已提交
2005
      return TSDB_CODE_SUCCESS;
2006 2007 2008 2009 2010
    }

    // current table is exhausted, let's try the next table
    pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, pStatus->pTableIter);
    if (pStatus->pTableIter == NULL) {
H
Haojun Liao 已提交
2011
      return TSDB_CODE_SUCCESS;
2012 2013 2014 2015
    }
  }
}

2016
// set the correct start position in case of the first/last file block, according to the query time window
2017
static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter) {
2018
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(pBlockIter);
2019
  STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));
2020
  SBlock*              pBlock = taosArrayGet(pScanInfo->pBlockList, pFBlock->tbBlockIdx);
2021

2022 2023 2024
  SReaderStatus* pStatus = &pReader->status;

  SFileBlockDumpInfo* pDumpInfo = &pStatus->fBlockDumpInfo;
2025 2026 2027

  pDumpInfo->totalRows = pBlock->nRow;
  pDumpInfo->allDumped = false;
2028
  pDumpInfo->rowIndex = ASCENDING_TRAVERSE(pReader->order) ? 0 : pBlock->nRow - 1;
2029 2030
}

2031
static int32_t initForFirstBlockInFile(STsdbReader* pReader, SDataBlockIter* pBlockIter) {
2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045
  int32_t numOfBlocks = 0;
  int32_t code = moveToNextFile(pReader, &numOfBlocks);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

  // all data files are consumed, try data in buffer
  if (numOfBlocks == 0) {
    pReader->status.loadFromFile = false;
    return code;
  }

  // initialize the block iterator for a new fileset
  code = initBlockIterator(pReader, pBlockIter, numOfBlocks);
2046 2047

  // set the correct start position according to the query time window
2048
  initBlockDumpInfo(pReader, pBlockIter);
2049 2050 2051
  return code;
}

2052
static bool fileBlockPartiallyRead(SFileBlockDumpInfo* pDumpInfo, bool asc) {
2053 2054
  return (!pDumpInfo->allDumped) &&
         ((pDumpInfo->rowIndex > 0 && asc) || (pDumpInfo->rowIndex < (pDumpInfo->totalRows - 1) && (!asc)));
2055 2056
}

2057
static int32_t buildBlockFromFiles(STsdbReader* pReader) {
H
Haojun Liao 已提交
2058
  int32_t code = TSDB_CODE_SUCCESS;
2059 2060
  bool    asc = ASCENDING_TRAVERSE(pReader->order);

2061 2062
  SDataBlockIter* pBlockIter = &pReader->status.blockIter;

2063
  while (1) {
2064
    SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(&pReader->status.blockIter);
2065 2066
    STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));

2067 2068
    SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;

2069
    if (fileBlockPartiallyRead(pDumpInfo, asc)) {  // file data block is partially loaded
2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084
      code = buildComposedDataBlock(pReader, pScanInfo);
    } else {
      // current block are exhausted, try the next file block
      if (pDumpInfo->allDumped) {
        // try next data block in current file
        bool hasNext = blockIteratorNext(&pReader->status.blockIter);
        if (hasNext) {  // check for the next block in the block accessed order list
          initBlockDumpInfo(pReader, pBlockIter);
        } else {  // data blocks in current file are exhausted, let's try the next file now
          code = initForFirstBlockInFile(pReader, pBlockIter);

          // error happens or all the data files are completely checked
          if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) {
            return code;
          }
2085
        }
H
Haojun Liao 已提交
2086
      }
2087 2088 2089

      // current block is not loaded yet, or data in buffer may overlap with the file block.
      code = doBuildDataBlock(pReader);
2090 2091
    }

2092 2093 2094 2095 2096 2097 2098 2099
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    if (pReader->pResBlock->info.rows > 0) {
      return TSDB_CODE_SUCCESS;
    }
  }
2100
}
H
refact  
Hongze Cheng 已提交
2101

2102 2103
static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idStr,
                                  int8_t* pLevel) {
2104
  if (VND_IS_RSMA(pVnode)) {
2105
    int8_t  level = 0;
2106 2107
    int64_t now = taosGetTimestamp(pVnode->config.tsdbCfg.precision);

2108
    for (int8_t i = 0; i < TSDB_RETENTION_MAX; ++i) {
2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121
      SRetention* pRetention = retentions + level;
      if (pRetention->keep <= 0) {
        if (level > 0) {
          --level;
        }
        break;
      }
      if ((now - pRetention->keep) <= winSKey) {
        break;
      }
      ++level;
    }

2122 2123
    int32_t     vgId = TD_VID(pVnode);
    const char* str = (idStr != NULL) ? idStr : "";
2124 2125

    if (level == TSDB_RETENTION_L0) {
2126
      *pLevel = TSDB_RETENTION_L0;
2127 2128 2129
      tsdbDebug("vgId:%d, read handle %p rsma level %d is selected to query %s", vgId, TSDB_RETENTION_L0, str);
      return VND_RSMA0(pVnode);
    } else if (level == TSDB_RETENTION_L1) {
2130
      *pLevel = TSDB_RETENTION_L1;
2131 2132 2133
      tsdbDebug("vgId:%d, read handle %p rsma level %d is selected to query %s", vgId, TSDB_RETENTION_L1, str);
      return VND_RSMA1(pVnode);
    } else {
2134
      *pLevel = TSDB_RETENTION_L2;
2135 2136 2137 2138 2139 2140 2141 2142
      tsdbDebug("vgId:%d, read handle %p rsma level %d is selected to query %s", vgId, TSDB_RETENTION_L2, str);
      return VND_RSMA2(pVnode);
    }
  }

  return VND_TSDB(pVnode);
}

2143 2144 2145 2146 2147 2148 2149 2150
static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level) {
  if (VND_IS_RSMA(pVnode)) {
    return (SVersionRange){.minVer = pCond->startVersion, .maxVer = tdRSmaGetMaxSubmitVer(pVnode->pSma, level)};
  }

  return (SVersionRange){.minVer = pCond->startVersion, .maxVer = pVnode->state.applied};
}

H
Hongze Cheng 已提交
2151 2152 2153 2154
// // todo not unref yet, since it is not support multi-group interpolation query
// static UNUSED_FUNC void changeQueryHandleForInterpQuery(STsdbReader* pHandle) {
//   // filter the queried time stamp in the first place
//   STsdbReader* pTsdbReadHandle = (STsdbReader*)pHandle;
H
refact  
Hongze Cheng 已提交
2155

H
Hongze Cheng 已提交
2156 2157
//   // starts from the buffer in case of descending timestamp order check data blocks
//   size_t numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
H
refact  
Hongze Cheng 已提交
2158

H
Hongze Cheng 已提交
2159 2160
//   int32_t i = 0;
//   while (i < numOfTables) {
H
Haojun Liao 已提交
2161
//     STableBlockScanInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i);
H
refact  
Hongze Cheng 已提交
2162

H
Hongze Cheng 已提交
2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176
//     // the first qualified table for interpolation query
//     //    if ((pTsdbReadHandle->window.skey <= pCheckInfo->pTableObj->lastKey) &&
//     //        (pCheckInfo->pTableObj->lastKey != TSKEY_INITIAL_VAL)) {
//     //      break;
//     //    }

//     i++;
//   }

//   // there are no data in all the tables
//   if (i == numOfTables) {
//     return;
//   }

H
Haojun Liao 已提交
2177
//   STableBlockScanInfo info = *(STableBlockScanInfo*)taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i);
H
Hongze Cheng 已提交
2178 2179 2180 2181 2182 2183
//   taosArrayClear(pTsdbReadHandle->pTableCheckInfo);

//   info.lastKey = pTsdbReadHandle->window.skey;
//   taosArrayPush(pTsdbReadHandle->pTableCheckInfo, &info);
// }

2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222
bool hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey) {
  ASSERT(pKey != NULL);
  if (pDelList == NULL) {
    return false;
  }

  if (*index >= taosArrayGetSize(pDelList) - 1) {
    TSDBKEY* last = taosArrayGetLast(pDelList);
    if (pKey->ts > last->ts) {
      return false;
    } else if (pKey->ts == last->ts) {
      size_t size = taosArrayGetSize(pDelList);
      TSDBKEY* prev = taosArrayGet(pDelList, size - 2);
      if (prev->version >= pKey->version) {
        return true;
      } else {
        return false;
      }
    } else {
      ASSERT(0);
    }
  } else {
    TSDBKEY* pCurrent = taosArrayGet(pDelList, *index);
    TSDBKEY* pNext = taosArrayGet(pDelList, (*index) + 1);

    if (pCurrent->ts <= pKey->ts && pNext->ts >= pKey->ts && pCurrent->version >= pKey->version) {
      return true;
    } else {
      while (pNext->ts < pKey->ts && (*index) < taosArrayGetSize(pDelList) - 1) {
        (*index) += 1;
      }

      return false;
    }
  }
}

TSDBROW* getValidRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader) {
  if (!pIter->hasVal) {
H
Haojun Liao 已提交
2223 2224
    return NULL;
  }
H
Hongze Cheng 已提交
2225

2226
  TSDBROW* pRow = tsdbTbDataIterGet(pIter->iter);
2227
  TSDBKEY  key = TSDBROW_KEY(pRow);
2228
  if (outOfTimeWindow(key.ts, &pReader->window)) {
2229
    pIter->hasVal = false;
H
Haojun Liao 已提交
2230 2231
    return NULL;
  }
H
Hongze Cheng 已提交
2232

2233 2234
  // it is a valid data version
  if ((key.version <= pReader->verRange.maxVer && key.version >= pReader->verRange.minVer) && (!hasBeenDropped(pDelList, &pIter->index, &key))) {
H
Haojun Liao 已提交
2235 2236
    return pRow;
  }
H
Hongze Cheng 已提交
2237

2238
  while (1) {
2239 2240
    pIter->hasVal = tsdbTbDataIterNext(pIter->iter);
    if (!pIter->hasVal) {
H
Haojun Liao 已提交
2241 2242
      return NULL;
    }
H
Hongze Cheng 已提交
2243

2244
    pRow = tsdbTbDataIterGet(pIter->iter);
H
Hongze Cheng 已提交
2245

H
Haojun Liao 已提交
2246
    key = TSDBROW_KEY(pRow);
2247
    if (outOfTimeWindow(key.ts, &pReader->window)) {
2248
      pIter->hasVal = false;
H
Haojun Liao 已提交
2249 2250
      return NULL;
    }
H
Hongze Cheng 已提交
2251

2252
    if (key.version <= pReader->verRange.maxVer && key.version >= pReader->verRange.minVer && (!hasBeenDropped(pDelList, &pIter->index, &key))) {
H
Haojun Liao 已提交
2253 2254 2255 2256
      return pRow;
    }
  }
}
H
Hongze Cheng 已提交
2257

2258
int32_t doMergeRowsInBuf(SIterInfo *pIter, int64_t ts, SArray* pDelList, SRowMerger* pMerger, STsdbReader* pReader) {
H
Haojun Liao 已提交
2259
  while (1) {
2260 2261
    pIter->hasVal = tsdbTbDataIterNext(pIter->iter);
    if (!pIter->hasVal) {
H
Haojun Liao 已提交
2262 2263
      break;
    }
H
Hongze Cheng 已提交
2264

2265
    // data exists but not valid
2266
    TSDBROW* pRow = getValidRow(pIter, pDelList, pReader);
2267 2268 2269 2270 2271
    if (pRow == NULL) {
      break;
    }

    // ts is not identical, quit
H
Haojun Liao 已提交
2272
    TSDBKEY k = TSDBROW_KEY(pRow);
2273
    if (k.ts != ts) {
H
Haojun Liao 已提交
2274 2275 2276 2277 2278 2279 2280 2281 2282
      break;
    }

    tRowMerge(pMerger, pRow);
  }

  return TSDB_CODE_SUCCESS;
}

2283
static int32_t doMergeRowsInFileBlockImpl(SBlockData* pBlockData, int32_t rowIndex, int64_t key, SRowMerger* pMerger,
2284
                                          SVersionRange* pVerRange, int32_t step) {
2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303
  while (pBlockData->aTSKEY[rowIndex] == key && rowIndex < pBlockData->nRow && rowIndex >= 0) {
    if (pBlockData->aVersion[rowIndex] > pVerRange->maxVer || pBlockData->aVersion[rowIndex] < pVerRange->minVer) {
      continue;
    }

    TSDBROW fRow = tsdbRowFromBlockData(pBlockData, rowIndex);
    tRowMerge(pMerger, &fRow);
    rowIndex += step;
  }

  return rowIndex;
}

typedef enum {
  CHECK_FILEBLOCK_CONT = 0x1,
  CHECK_FILEBLOCK_QUIT = 0x2,
} CHECK_FILEBLOCK_STATE;

static int32_t checkForNeighborFileBlock(STsdbReader* pReader, STableBlockScanInfo* pScanInfo, SBlock* pBlock,
2304 2305
                                         SFileDataBlockInfo* pFBlock, SRowMerger* pMerger, int64_t key,
                                         CHECK_FILEBLOCK_STATE* state) {
2306
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
2307
  SBlockData*         pBlockData = &pReader->status.fileBlockData;
2308

2309
  *state = CHECK_FILEBLOCK_QUIT;
2310
  int32_t step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1;
2311 2312 2313

  int32_t nextIndex = -1;
  SBlock* pNeighborBlock = getNeighborBlockOfSameTable(pFBlock, pScanInfo, &nextIndex, pReader->order);
2314
  if (pNeighborBlock == NULL) {  // do nothing
2315 2316 2317 2318 2319
    return 0;
  }

  bool overlap = overlapWithNeighborBlock(pBlock, pNeighborBlock, pReader->order);
  if (overlap) {  // load next block
2320
    SReaderStatus*  pStatus = &pReader->status;
2321 2322
    SDataBlockIter* pBlockIter = &pStatus->blockIter;

2323
    // 1. find the next neighbor block in the scan block list
2324
    SFileDataBlockInfo fb = {.uid = pFBlock->uid, .tbBlockIdx = nextIndex};
2325
    int32_t            neighborIndex = findFileBlockInfoIndex(pBlockIter, &fb);
2326

2327
    // 2. remove it from the scan block list
2328
    setFileBlockActiveInBlockIter(pBlockIter, neighborIndex, step);
2329

2330
    // 3. load the neighbor block, and set it to be the currently accessed file data block
2331 2332 2333 2334 2335
    int32_t code = doLoadFileBlockData(pReader, pBlockIter, pScanInfo, &pStatus->fileBlockData);
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

2336
    // 4. check the data values
2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349
    initBlockDumpInfo(pReader, pBlockIter);

    pDumpInfo->rowIndex =
        doMergeRowsInFileBlockImpl(pBlockData, pDumpInfo->rowIndex, key, pMerger, &pReader->verRange, step);

    if (pDumpInfo->rowIndex >= pBlock->nRow) {
      *state = CHECK_FILEBLOCK_CONT;
    }
  }

  return TSDB_CODE_SUCCESS;
}

2350 2351
int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pScanInfo, STsdbReader* pReader,
                                SRowMerger* pMerger) {
2352 2353
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;

2354
  bool    asc = ASCENDING_TRAVERSE(pReader->order);
2355
  int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex];
2356
  int32_t step = asc ? 1 : -1;
2357

2358 2359 2360 2361 2362
  pDumpInfo->rowIndex += step;
  if (pDumpInfo->rowIndex <= pBlockData->nRow - 1) {
    pDumpInfo->rowIndex =
        doMergeRowsInFileBlockImpl(pBlockData, pDumpInfo->rowIndex, key, pMerger, &pReader->verRange, step);
  }
2363

2364 2365 2366 2367
  // all rows are consumed, let's try next file block
  if ((pDumpInfo->rowIndex >= pBlockData->nRow && asc) || (pDumpInfo->rowIndex < 0 && !asc)) {
    while (1) {
      CHECK_FILEBLOCK_STATE st;
2368

2369 2370 2371 2372 2373
      SFileDataBlockInfo* pFileBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter);
      SBlock*             pCurrentBlock = taosArrayGet(pScanInfo->pBlockList, pFileBlockInfo->tbBlockIdx);
      checkForNeighborFileBlock(pReader, pScanInfo, pCurrentBlock, pFileBlockInfo, pMerger, key, &st);
      if (st == CHECK_FILEBLOCK_QUIT) {
        break;
2374
      }
2375
    }
H
Haojun Liao 已提交
2376
  }
2377

H
Haojun Liao 已提交
2378 2379 2380
  return TSDB_CODE_SUCCESS;
}

2381
void updateSchema(TSDBROW* pRow, uint64_t uid, STsdbReader* pReader) {
2382 2383 2384 2385 2386 2387 2388 2389 2390 2391
  int32_t sversion = TSDBROW_SVERSION(pRow);

  if (pReader->pSchema == NULL) {
    pReader->pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, uid, sversion);
  } else if (pReader->pSchema->version != sversion) {
    taosMemoryFreeClear(pReader->pSchema);
    pReader->pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, uid, sversion);
  }
}

2392
void doMergeMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo *pIter, SArray* pDelList, STSRow** pTSRow, STsdbReader* pReader) {
2393 2394 2395
  SRowMerger merge = {0};

  TSDBKEY k = TSDBROW_KEY(pRow);
2396
  updateSchema(pRow, uid, pReader);
H
Haojun Liao 已提交
2397

2398
  tRowMergerInit(&merge, pRow, pReader->pSchema);
2399
  doMergeRowsInBuf(pIter, k.ts, pDelList, &merge, pReader);
2400 2401 2402
  tRowMergerGetRow(&merge, pTSRow);
}

2403 2404
void doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader,
                        STSRow** pTSRow) {
H
Haojun Liao 已提交
2405 2406
  SRowMerger merge = {0};

2407 2408 2409
  TSDBKEY k = TSDBROW_KEY(pRow);
  TSDBKEY ik = TSDBROW_KEY(piRow);

2410 2411 2412 2413
  if (ASCENDING_TRAVERSE(pReader->order)) {  // ascending order imem --> mem
    updateSchema(piRow, pBlockScanInfo->uid, pReader);

    tRowMergerInit(&merge, piRow, pReader->pSchema);
2414
    doMergeRowsInBuf(&pBlockScanInfo->iiter, ik.ts, pBlockScanInfo->delSkyline, &merge, pReader);
2415

2416
    tRowMerge(&merge, pRow);
2417
    doMergeRowsInBuf(&pBlockScanInfo->iter, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
2418 2419
  } else {
    updateSchema(pRow, pBlockScanInfo->uid, pReader);
2420

2421
    tRowMergerInit(&merge, pRow, pReader->pSchema);
2422
    doMergeRowsInBuf(&pBlockScanInfo->iter, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
2423 2424

    tRowMerge(&merge, piRow);
2425
    doMergeRowsInBuf(&pBlockScanInfo->iiter, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
2426
  }
2427 2428 2429 2430

  tRowMergerGetRow(&merge, pTSRow);
}

2431 2432
int32_t tsdbGetNextRowInMem(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, STSRow** pTSRow,
                            int64_t endKey) {
2433 2434 2435
  TSDBROW* pRow = getValidRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader);
  TSDBROW* piRow = getValidRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader);
  SArray* pDelList = pBlockScanInfo->delSkyline;
H
Haojun Liao 已提交
2436

2437 2438
  // todo refactor
  bool asc = ASCENDING_TRAVERSE(pReader->order);
2439
  if (pBlockScanInfo->iter.hasVal) {
2440 2441 2442 2443 2444 2445
    TSDBKEY k = TSDBROW_KEY(pRow);
    if ((k.ts >= endKey && asc) || (k.ts <= endKey && !asc)) {
      pRow = NULL;
    }
  }

2446
  if (pBlockScanInfo->iiter.hasVal) {
2447 2448 2449 2450 2451 2452
    TSDBKEY k = TSDBROW_KEY(piRow);
    if ((k.ts >= endKey && asc) || (k.ts <= endKey && !asc)) {
      piRow = NULL;
    }
  }

2453
  if (pBlockScanInfo->iter.hasVal && pBlockScanInfo->iiter.hasVal && pRow != NULL && piRow != NULL) {
2454
    TSDBKEY k = TSDBROW_KEY(pRow);
2455
    TSDBKEY ik = TSDBROW_KEY(piRow);
H
Haojun Liao 已提交
2456

2457
    if (ik.ts < k.ts) {  // ik.ts < k.ts
2458
      doMergeMultiRows(piRow, pBlockScanInfo->uid, &pBlockScanInfo->iiter, pDelList, pTSRow, pReader);
2459
    } else if (k.ts < ik.ts) {
2460
      doMergeMultiRows(pRow, pBlockScanInfo->uid, &pBlockScanInfo->iter, pDelList, pTSRow, pReader);
2461 2462
    } else {  // ik.ts == k.ts
      doMergeMemIMemRows(pRow, piRow, pBlockScanInfo, pReader, pTSRow);
H
Haojun Liao 已提交
2463
    }
2464 2465

    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
2466 2467
  }

2468 2469
  if (pBlockScanInfo->iter.hasVal && pRow != NULL) {
    doMergeMultiRows(pRow, pBlockScanInfo->uid, &pBlockScanInfo->iter, pDelList, pTSRow, pReader);
H
Haojun Liao 已提交
2470 2471 2472
    return TSDB_CODE_SUCCESS;
  }

2473 2474
  if (pBlockScanInfo->iiter.hasVal && piRow != NULL) {
    doMergeMultiRows(piRow, pBlockScanInfo->uid, &pBlockScanInfo->iiter, pDelList, pTSRow, pReader);
H
Haojun Liao 已提交
2475 2476 2477 2478 2479 2480
    return TSDB_CODE_SUCCESS;
  }

  return TSDB_CODE_SUCCESS;
}

2481 2482 2483 2484
int32_t doAppendOneRow(SSDataBlock* pBlock, STsdbReader* pReader, STSRow* pTSRow) {
  int32_t numOfRows = pBlock->info.rows;
  int32_t numOfCols = (int32_t)taosArrayGetSize(pBlock->pDataBlock);

2485
  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;
2486
  STSchema*           pSchema = pReader->pSchema;
2487

2488
  SColVal colVal = {0};
2489
  int32_t i = 0, j = 0;
H
Haojun Liao 已提交
2490

2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510
  SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, i);
  if (pColInfoData->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
    colDataAppend(pColInfoData, numOfRows, (const char*)&pTSRow->ts, false);
    i += 1;
  }

  while (i < numOfCols && j < pSchema->numOfCols) {
    pColInfoData = taosArrayGet(pBlock->pDataBlock, i);
    col_id_t colId = pColInfoData->info.colId;

    if (colId == pSchema->columns[j].colId) {
      tTSRowGetVal(pTSRow, pReader->pSchema, j, &colVal);
      doCopyColVal(pColInfoData, numOfRows, i, &colVal, pSupInfo);
      i += 1;
      j += 1;
    } else if (colId < pSchema->columns[j].colId) {
      colDataAppendNULL(pColInfoData, numOfRows);
      i += 1;
    } else if (colId > pSchema->columns[j].colId) {
      j += 1;
2511
    }
2512 2513
  }

2514
  // set null value since current column does not exist in the "pSchema"
2515
  while (i < numOfCols) {
2516 2517 2518 2519 2520
    pColInfoData = taosArrayGet(pBlock->pDataBlock, i);
    colDataAppendNULL(pColInfoData, numOfRows);
    i += 1;
  }

2521 2522 2523 2524
  pBlock->info.rows += 1;
  return TSDB_CODE_SUCCESS;
}

2525 2526
int32_t buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t endKey, int32_t capacity,
                                  STsdbReader* pReader) {
H
Haojun Liao 已提交
2527 2528 2529 2530
  SSDataBlock* pBlock = pReader->pResBlock;

  do {
    STSRow* pTSRow = NULL;
2531
    tsdbGetNextRowInMem(pBlockScanInfo, pReader, &pTSRow, endKey);
2532 2533
    if (pTSRow == NULL) {
      break;
H
Haojun Liao 已提交
2534 2535
    }

2536
    doAppendOneRow(pBlock, pReader, pTSRow);
H
Haojun Liao 已提交
2537 2538

    // no data in buffer, return immediately
2539
    if (!(pBlockScanInfo->iter.hasVal || pBlockScanInfo->iiter.hasVal)) {
H
Haojun Liao 已提交
2540 2541 2542
      break;
    }

2543
    if (pBlock->info.rows >= capacity) {
H
Haojun Liao 已提交
2544 2545 2546 2547
      break;
    }
  } while (1);

2548
  ASSERT(pBlock->info.rows <= capacity);
H
Haojun Liao 已提交
2549 2550
  return TSDB_CODE_SUCCESS;
}
H
Hongze Cheng 已提交
2551

2552
// todo refactor, use arraylist instead
H
Hongze Cheng 已提交
2553
int32_t tsdbSetTableId(STsdbReader* pReader, int64_t uid) {
2554 2555 2556 2557 2558
  ASSERT(pReader != NULL);
  taosHashClear(pReader->status.pTableMap);

  STableBlockScanInfo info = {.lastKey = 0, .uid = uid};
  taosHashPut(pReader->status.pTableMap, &info.uid, sizeof(uint64_t), &info, sizeof(info));
H
Hongze Cheng 已提交
2559 2560 2561
  return TDB_CODE_SUCCESS;
}

C
Cary Xu 已提交
2562 2563 2564 2565 2566 2567 2568 2569 2570 2571
/**
 * @brief Get all suids since suid
 *
 * @param pMeta
 * @param suid return all suids in one vnode if suid is 0
 * @param list
 * @return int32_t
 */
int32_t tsdbGetStbIdList(SMeta* pMeta, int64_t suid, SArray* list) {
  SMStbCursor* pCur = metaOpenStbCursor(pMeta, suid);
L
Liu Jicong 已提交
2572
  if (!pCur) {
C
Cary Xu 已提交
2573 2574
    return TSDB_CODE_FAILED;
  }
C
Cary Xu 已提交
2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588

  while (1) {
    tb_uid_t id = metaStbCursorNext(pCur);
    if (id == 0) {
      break;
    }

    taosArrayPush(list, &id);
  }

  metaCloseStbCursor(pCur);
  return TSDB_CODE_SUCCESS;
}

H
refact  
Hongze Cheng 已提交
2589
// ====================================== EXPOSED APIs ======================================
2590 2591
int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTableList, STsdbReader** ppReader,
                       const char* idstr) {
H
Haojun Liao 已提交
2592
  int32_t code = tsdbReaderCreate(pVnode, pCond, ppReader, idstr);
H
Haojun Liao 已提交
2593 2594 2595
  if (code) {
    goto _err;
  }
H
Hongze Cheng 已提交
2596

2597 2598 2599 2600 2601 2602 2603 2604
  if (pCond->suid != 0) {
    (*ppReader)->pSchema = metaGetTbTSchema((*ppReader)->pTsdb->pVnode->pMeta, (*ppReader)->suid, -1);
    ASSERT((*ppReader)->pSchema);
  } else if (taosArrayGetSize(pTableList) > 0) {
    STableKeyInfo* pKey = taosArrayGet(pTableList, 0);
    (*ppReader)->pSchema = metaGetTbTSchema((*ppReader)->pTsdb->pVnode->pMeta, pKey->uid, -1);
  }

H
Haojun Liao 已提交
2605
  STsdbReader* pReader = *ppReader;
2606
  if (isEmptyQueryTimeWindow(&pReader->window)) {
H
Haojun Liao 已提交
2607 2608 2609
    tsdbDebug("%p query window not overlaps with the data set, no result returned, %s", pReader, pReader->idStr);
    return TSDB_CODE_SUCCESS;
  }
H
Hongze Cheng 已提交
2610

2611 2612
  int32_t numOfTables = taosArrayGetSize(pTableList);
  pReader->status.pTableMap = createDataBlockScanInfo(pReader, pTableList->pData, numOfTables);
H
Haojun Liao 已提交
2613 2614 2615
  if (pReader->status.pTableMap == NULL) {
    tsdbReaderClose(pReader);
    *ppReader = NULL;
H
Haojun Liao 已提交
2616

H
Haojun Liao 已提交
2617 2618 2619
    code = TSDB_CODE_TDB_OUT_OF_MEMORY;
    goto _err;
  }
H
Hongze Cheng 已提交
2620

2621 2622 2623
  SDataBlockIter* pBlockIter = &pReader->status.blockIter;

  STsdbFSState* pFState = pReader->pTsdb->fs->cState;
2624
  initFilesetIterator(&pReader->status.fileIter, pFState, pReader->order, pReader->idStr);
2625 2626 2627 2628 2629 2630 2631
  resetDataBlockIterator(&pReader->status.blockIter, pReader->order);

  // no data in files, let's try buffer in memory
  if (pReader->status.fileIter.numOfFiles == 0) {
    pReader->status.loadFromFile = false;
  } else {
    code = initForFirstBlockInFile(pReader, pBlockIter);
2632
    if (code != TSDB_CODE_SUCCESS) {
2633 2634 2635 2636
      return code;
    }
  }

2637
  tsdbDebug("%p total numOfTable:%d in this query %s", pReader, numOfTables, pReader->idStr);
H
Hongze Cheng 已提交
2638
  return code;
H
Hongze Cheng 已提交
2639 2640

_err:
2641
  tsdbError("failed to create data reader, code: %s %s", tstrerror(code), pReader->idStr);
H
Hongze Cheng 已提交
2642
  return code;
H
refact  
Hongze Cheng 已提交
2643 2644 2645
}

void tsdbReaderClose(STsdbReader* pReader) {
2646 2647
  if (pReader == NULL) {
    return;
2648
  }
H
refact  
Hongze Cheng 已提交
2649

2650 2651 2652
  blockDataDestroy(pReader->pResBlock);
  taosMemoryFreeClear(pReader->suppInfo.plist);
  taosMemoryFree(pReader->suppInfo.slotIds);
H
refact  
Hongze Cheng 已提交
2653

2654 2655 2656 2657 2658
   if (!isEmptyQueryTimeWindow(&pReader->window)) {
     //    tsdbMayUnTakeMemSnapshot(pTsdbReadHandle);
   } else {
     ASSERT(pReader->status.pTableMap == NULL);
   }
H
Haojun Liao 已提交
2659 2660 2661 2662
#if 0
//   if (pReader->status.pTableScanInfo != NULL) {
//     pReader->status.pTableScanInfo = destroyTableCheckInfo(pReader->status.pTableScanInfo);
//   }
H
refact  
Hongze Cheng 已提交
2663

H
Haojun Liao 已提交
2664
//   tsdbDestroyReadH(&pReader->rhelper);
H
refact  
Hongze Cheng 已提交
2665

H
Haojun Liao 已提交
2666 2667 2668 2669 2670 2671
//   tdFreeDataCols(pReader->pDataCols);
//   pReader->pDataCols = NULL;
//
//   pReader->prev = doFreeColumnInfoData(pReader->prev);
//   pReader->next = doFreeColumnInfoData(pReader->next);
#endif
H
refact  
Hongze Cheng 已提交
2672

2673
  SIOCostSummary* pCost = &pReader->cost;
H
refact  
Hongze Cheng 已提交
2674

2675 2676
  tsdbDebug("%p :io-cost summary: head-file read cnt:%" PRIu64 ", head-file time:%" PRIu64 " us, statis-info:%" PRId64
            " us, datablock:%" PRId64 " us, check data:%" PRId64 " us, %s",
2677
            pReader, pCost->headFileLoad, pCost->headFileLoadTime, pCost->smaLoadTime, pCost->blockLoadTime,
2678
            pCost->checkForNextTime, pReader->idStr);
H
refact  
Hongze Cheng 已提交
2679

2680 2681 2682
  taosMemoryFree(pReader->idStr);
  taosMemoryFree(pReader->pSchema);
  taosMemoryFreeClear(pReader);
H
refact  
Hongze Cheng 已提交
2683 2684 2685
}

bool tsdbNextDataBlock(STsdbReader* pReader) {
2686
  if (isEmptyQueryTimeWindow(&pReader->window)) {
H
Haojun Liao 已提交
2687 2688
    return false;
  }
H
Hongze Cheng 已提交
2689

H
Haojun Liao 已提交
2690
  // cleanup the data that belongs to the previous data block
2691 2692
  SSDataBlock* pBlock = pReader->pResBlock;
  blockDataCleanup(pBlock);
H
Hongze Cheng 已提交
2693

2694 2695
  int64_t        stime = taosGetTimestampUs();
  int64_t        elapsedTime = stime;
2696
  SReaderStatus* pStatus = &pReader->status;
H
Haojun Liao 已提交
2697 2698

  if (pReader->type == BLOCK_LOAD_OFFSET_ORDER) {
2699
    if (pStatus->loadFromFile) {
2700
      int32_t code = buildBlockFromFiles(pReader);
2701 2702 2703 2704
      if (code != TSDB_CODE_SUCCESS) {
        return false;
      }

2705
      if (pBlock->info.rows > 0) {
2706
        return true;
2707
      } else {
H
Haojun Liao 已提交
2708
        buildBlockFromBufferSequentially(pReader);
2709
        return pBlock->info.rows > 0;
2710
      }
2711
    } else {  // no data in files, let's try the buffer
H
Haojun Liao 已提交
2712
      buildBlockFromBufferSequentially(pReader);
2713
      return pBlock->info.rows > 0;
H
Haojun Liao 已提交
2714 2715 2716
    }
  } else if (pReader->type == BLOCK_LOAD_TABLESEQ_ORDER) {
  } else if (pReader->type == BLOCK_LOAD_EXTERN_ORDER) {
2717 2718
  } else {
    ASSERT(0);
H
Haojun Liao 已提交
2719
  }
2720
  return false;
H
refact  
Hongze Cheng 已提交
2721 2722 2723
}

void tsdbRetrieveDataBlockInfo(STsdbReader* pReader, SDataBlockInfo* pDataBlockInfo) {
2724 2725 2726 2727
  ASSERT(pDataBlockInfo != NULL && pReader != NULL);
  pDataBlockInfo->rows = pReader->pResBlock->info.rows;
  pDataBlockInfo->uid = pReader->pResBlock->info.uid;
  pDataBlockInfo->window = pReader->pResBlock->info.window;
H
Hongze Cheng 已提交
2728 2729
}

H
Hongze Cheng 已提交
2730
int32_t tsdbRetrieveDataBlockStatisInfo(STsdbReader* pReader, SColumnDataAgg*** pBlockStatis, bool* allHave) {
H
Hongze Cheng 已提交
2731
  int32_t code = 0;
2732
  *allHave = false;
H
Hongze Cheng 已提交
2733

2734
  // there is no statistics data for composed block
2735 2736 2737 2738
  if (pReader->status.composedDataBlock) {
    *pBlockStatis = NULL;
    return TSDB_CODE_SUCCESS;
  }
H
Hongze Cheng 已提交
2739

2740 2741 2742
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(&pReader->status.blockIter);
  STableBlockScanInfo* pBlockScanInfo = taosHashGet(pReader->status.pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));
  SBlock*              pBlock = taosArrayGet(pBlockScanInfo->pBlockList, pFBlock->tbBlockIdx);
H
Hongze Cheng 已提交
2743

2744
  int64_t stime = taosGetTimestampUs();
H
Hongze Cheng 已提交
2745

2746
  SArray* pColAgg = taosArrayInit(4, sizeof(SColumnDataAgg));
2747 2748 2749 2750 2751 2752 2753
  if (tBlockHasSma(pBlock)) {
    code = tsdbReadBlockSma(pReader->pFileReader, pBlock, pColAgg, NULL);
    if (code != TSDB_CODE_SUCCESS) {
      tsdbDebug("vgId:%d, failed to load block SMA for uid %" PRIu64", code:%s, %s", 0, pFBlock->uid,
                tstrerror(code), pReader->idStr);
      return code;
    }
2754 2755 2756
  } else {
    *pBlockStatis = NULL;
    return TSDB_CODE_SUCCESS;
2757
  }
H
Hongze Cheng 已提交
2758

2759
   *allHave = true;
H
Hongze Cheng 已提交
2760

2761
   // always load the first primary timestamp column data
2762
   SColumnDataAgg* pTsAgg = &pReader->suppInfo.tsColAgg;
2763

2764 2765
  pTsAgg->numOfNull = 0;
  pTsAgg->colId = PRIMARYKEY_TIMESTAMP_COL_ID;
2766 2767
   pTsAgg->min = pReader->pResBlock->info.window.skey;
   pTsAgg->max = pReader->pResBlock->info.window.ekey;
2768
   pReader->suppInfo.plist[0] = pTsAgg;
2769 2770 2771 2772

   // update the number of NULL data rows
   size_t numOfCols = blockDataGetNumOfCols(pReader->pResBlock);

2773 2774 2775 2776 2777 2778 2779 2780 2781 2782
   int32_t i = 0, j = 0;
   while(j < numOfCols && i < taosArrayGetSize(pColAgg)) {
     SColumnDataAgg* pAgg = taosArrayGet(pColAgg, i);
     if (pAgg->colId == pReader->suppInfo.colIds[j]) {
       if (IS_BSMA_ON(&(pReader->pSchema->columns[i]))) {
         pReader->suppInfo.plist[j] = pAgg;
         i += 1;
         j += 1;
       } else {
         *allHave = false;
2783
       }
2784 2785 2786 2787
     } else if (pAgg->colId < pReader->suppInfo.colIds[j]) {
       i += 1;
     } else if (pReader->suppInfo.colIds[j] < pAgg->colId) {
       j += 1;
2788 2789
     }
   }
H
Hongze Cheng 已提交
2790

2791 2792
   int64_t elapsed = taosGetTimestampUs() - stime;
   pReader->cost.smaLoadTime += elapsed;
H
Hongze Cheng 已提交
2793

2794
   *pBlockStatis = pReader->suppInfo.plist;
2795 2796 2797 2798

  tsdbDebug("vgId:%d, succeed to load block SMA for uid %" PRIu64", elapsed time:%"PRId64"us, %s", 0, pFBlock->uid,
            elapsed, pReader->idStr);

H
Hongze Cheng 已提交
2799
  return code;
H
Hongze Cheng 已提交
2800 2801
}

H
Hongze Cheng 已提交
2802
SArray* tsdbRetrieveDataBlock(STsdbReader* pReader, SArray* pIdList) {
H
Haojun Liao 已提交
2803 2804 2805
  SReaderStatus* pStatus = &pReader->status;

  if (pStatus->composedDataBlock) {
2806
    return pReader->pResBlock->pDataBlock;
2807
  }
2808

2809
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(&pStatus->blockIter);
2810
  STableBlockScanInfo* pBlockScanInfo = taosHashGet(pStatus->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));
2811

2812 2813 2814 2815 2816
  int32_t code = tBlockDataInit(&pStatus->fileBlockData);
  if (code != TSDB_CODE_SUCCESS) {
    terrno = code;
    return NULL;
  }
2817

2818 2819 2820 2821
  code = doLoadFileBlockData(pReader, &pStatus->blockIter, pBlockScanInfo, &pStatus->fileBlockData);
  if (code != TSDB_CODE_SUCCESS) {
    terrno = code;
    return NULL;
2822
  }
2823 2824 2825

  copyBlockDataToSDataBlock(pReader, pBlockScanInfo);
  return pReader->pResBlock->pDataBlock;
H
Hongze Cheng 已提交
2826 2827
}

2828 2829 2830 2831
int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond, int32_t tWinIdx) {
  if (isEmptyQueryTimeWindow(&pReader->window)) {
    return TSDB_CODE_SUCCESS;
  }
H
Hongze Cheng 已提交
2832

2833 2834
  pReader->order               = pCond->order;
  pReader->type                = BLOCK_LOAD_OFFSET_ORDER;
2835
  pReader->status.loadFromFile = true;
2836
  pReader->status.pTableIter   = NULL;
H
Hongze Cheng 已提交
2837

2838
  pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows[tWinIdx]);
H
Hongze Cheng 已提交
2839

2840
  // allocate buffer in order to load data blocks from file
2841
  memset(&pReader->suppInfo.tsColAgg, 0, sizeof(SColumnDataAgg));
2842 2843 2844 2845 2846 2847 2848 2849 2850
  memset(pReader->suppInfo.plist, 0, POINTER_BYTES);

  // todo set the correct numOfTables
  int32_t         numOfTables = 1;
  SDataBlockIter* pBlockIter = &pReader->status.blockIter;

  STsdbFSState* pFState = pReader->pTsdb->fs->cState;
  initFilesetIterator(&pReader->status.fileIter, pFState, pReader->order, pReader->idStr);
  resetDataBlockIterator(&pReader->status.blockIter, pReader->order);
2851
  resetDataBlockScanInfo(pReader->status.pTableMap);
2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862

  int32_t code = 0;
  // no data in files, let's try buffer in memory
  if (pReader->status.fileIter.numOfFiles == 0) {
    pReader->status.loadFromFile = false;
  } else {
    code = initForFirstBlockInFile(pReader, pBlockIter);
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
  }
H
Hongze Cheng 已提交
2863

2864 2865
  tsdbDebug("%p reset reader, suid:%"PRIu64", numOfTables:%d, query range:%"PRId64" - %"PRId64" in query %s", pReader, pReader->suid,
      numOfTables, pReader->window.skey, pReader->window.ekey, pReader->idStr);
2866
  return code;
H
Hongze Cheng 已提交
2867
}
H
Hongze Cheng 已提交
2868

2869 2870 2871
static int32_t getBucketIndex(int32_t startRow, int32_t bucketRange, int32_t numOfRows) {
  return (numOfRows - startRow) / bucketRange;
}
H
Hongze Cheng 已提交
2872

2873 2874 2875 2876
int32_t tsdbGetFileBlocksDistInfo(STsdbReader* pReader, STableBlockDistInfo* pTableBlockInfo) {
  int32_t code = TSDB_CODE_SUCCESS;
  pTableBlockInfo->totalSize = 0;
  pTableBlockInfo->totalRows = 0;
H
Hongze Cheng 已提交
2877

2878 2879
  // find the start data block in file
  SReaderStatus* pStatus = &pReader->status;
H
Hongze Cheng 已提交
2880

2881 2882 2883
  STsdbCfg* pc = &pReader->pTsdb->pVnode->config.tsdbCfg;
  pTableBlockInfo->defMinRows = pc->minRows;
  pTableBlockInfo->defMaxRows = pc->maxRows;
H
Hongze Cheng 已提交
2884

2885
  int32_t bucketRange = ceil((pc->maxRows - pc->minRows) / 20.0);
H
Hongze Cheng 已提交
2886

2887
  pTableBlockInfo->numOfFiles += 1;
H
Hongze Cheng 已提交
2888

2889 2890
  int32_t numOfTables = (int32_t)taosHashGetSize(pStatus->pTableMap);
  int     defaultRows = 4096;
H
Hongze Cheng 已提交
2891

2892 2893 2894
  SDataBlockIter* pBlockIter = &pStatus->blockIter;
  pTableBlockInfo->numOfFiles += pStatus->fileIter.numOfFiles;
  pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks;
H
Hongze Cheng 已提交
2895

2896
  pTableBlockInfo->numOfTables = numOfTables;
2897
  bool hasNext = true;
H
Hongze Cheng 已提交
2898

2899 2900 2901 2902 2903
  while (true) {
    if (hasNext) {
      SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(pBlockIter);
      STableBlockScanInfo* pScanInfo = taosHashGet(pStatus->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));
      SBlock*              pBlock = taosArrayGet(pScanInfo->pBlockList, pFBlock->tbBlockIdx);
H
Hongze Cheng 已提交
2904

2905 2906
      int32_t numOfRows = pBlock->nRow;
      pTableBlockInfo->totalRows += numOfRows;
H
Hongze Cheng 已提交
2907

2908 2909 2910
      if (numOfRows > pTableBlockInfo->maxRows) {
        pTableBlockInfo->maxRows = numOfRows;
      }
H
refact  
Hongze Cheng 已提交
2911

2912 2913 2914
      if (numOfRows < pTableBlockInfo->minRows) {
        pTableBlockInfo->minRows = numOfRows;
      }
H
refact  
Hongze Cheng 已提交
2915

2916 2917 2918
      if (numOfRows < defaultRows) {
        pTableBlockInfo->numOfSmallBlocks += 1;
      }
H
refact  
Hongze Cheng 已提交
2919

2920 2921
      int32_t bucketIndex = getBucketIndex(pTableBlockInfo->defMinRows, bucketRange, numOfRows);
      pTableBlockInfo->blockRowsHisto[bucketIndex]++;
2922 2923 2924

      hasNext = blockIteratorNext(&pStatus->blockIter);

2925 2926 2927 2928 2929
    } else {
      code = initForFirstBlockInFile(pReader, pBlockIter);
      if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) {
        break;
      }
H
refact  
Hongze Cheng 已提交
2930

2931 2932
      pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks;
    }
H
refact  
Hongze Cheng 已提交
2933

2934 2935 2936 2937 2938
/*
    hasNext = blockIteratorNext(&pStatus->blockIter);
*/


2939 2940 2941
//         tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %s", pReader, numOfBlocks, numOfTables,
//                   pReader->pFileGroup->fid, pReader->idStr);
  }
H
Hongze Cheng 已提交
2942

H
refact  
Hongze Cheng 已提交
2943 2944
  return code;
}
H
Hongze Cheng 已提交
2945

H
refact  
Hongze Cheng 已提交
2946
int64_t tsdbGetNumOfRowsInMemTable(STsdbReader* pReader) {
2947
  int64_t rows = 0;
H
Hongze Cheng 已提交
2948

2949 2950
  SReaderStatus* pStatus = &pReader->status;
  pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, NULL);
H
Hongze Cheng 已提交
2951

2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973
  while (pStatus->pTableIter != NULL) {
    STableBlockScanInfo* pBlockScanInfo = pStatus->pTableIter;

    STbData* d = NULL;
    if (pReader->pTsdb->mem != NULL) {
      tsdbGetTbDataFromMemTable(pReader->pTsdb->mem, pReader->suid, pBlockScanInfo->uid, &d);
      if (d != NULL) {
        rows += tsdbGetNRowsInTbData(d);
      }
    }

    STbData* di = NULL;
    if (pReader->pTsdb->imem != NULL) {
      tsdbGetTbDataFromMemTable(pReader->pTsdb->imem, pReader->suid, pBlockScanInfo->uid, &di);
      if (di != NULL) {
        rows += tsdbGetNRowsInTbData(di);
      }
    }

    // current table is exhausted, let's try the next table
    pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, pStatus->pTableIter);
  }
H
Hongze Cheng 已提交
2974

H
refact  
Hongze Cheng 已提交
2975
  return rows;
H
Hongze Cheng 已提交
2976
}