tsdbRead.c 106.0 KB
Newer Older
H
hjxilinx 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
Hongze Cheng 已提交
16
#include "tsdb.h"
17
#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC)
H
Hongze Cheng 已提交
18

19 20 21 22 23 24
typedef enum {
  EXTERNAL_ROWS_PREV = 0x1,
  EXTERNAL_ROWS_MAIN = 0x2,
  EXTERNAL_ROWS_NEXT = 0x3,
} EContentData;

25
typedef struct {
dengyihao's avatar
dengyihao 已提交
26
  STbDataIter* iter;
27 28 29 30
  int32_t      index;
  bool         hasVal;
} SIterInfo;

H
Haojun Liao 已提交
31
typedef struct STableBlockScanInfo {
dengyihao's avatar
dengyihao 已提交
32 33
  uint64_t  uid;
  TSKEY     lastKey;
34
  SMapData  mapData;     // block info (compressed)
dengyihao's avatar
dengyihao 已提交
35 36 37 38 39 40
  SArray*   pBlockList;  // block data index list
  SIterInfo iter;        // mem buffer skip list iterator
  SIterInfo iiter;       // imem buffer skip list iterator
  SArray*   delSkyline;  // delete info for this table
  int32_t   fileDelIndex;
  bool      iterInit;  // whether to initialize the in-memory skip list iterator or not
H
Haojun Liao 已提交
41 42 43
} STableBlockScanInfo;

typedef struct SBlockOrderWrapper {
dengyihao's avatar
dengyihao 已提交
44
  int64_t uid;
45
  int64_t offset;
H
Haojun Liao 已提交
46
} SBlockOrderWrapper;
H
Hongze Cheng 已提交
47 48

typedef struct SBlockOrderSupporter {
49 50 51 52
  SBlockOrderWrapper** pDataBlockInfo;
  int32_t*             indexPerTable;
  int32_t*             numOfBlocksPerTable;
  int32_t              numOfTables;
H
Hongze Cheng 已提交
53 54 55
} SBlockOrderSupporter;

typedef struct SIOCostSummary {
56 57 58
  int64_t numOfBlocks;
  double  blockLoadTime;
  double  buildmemBlock;
59
  int64_t headFileLoad;
60 61 62
  double  headFileLoadTime;
  int64_t smaData;
  double  smaLoadTime;
H
Hongze Cheng 已提交
63 64 65
} SIOCostSummary;

typedef struct SBlockLoadSuppInfo {
66
  SArray*          pColAgg;
67
  SColumnDataAgg   tsColAgg;
C
Cary Xu 已提交
68
  SColumnDataAgg** plist;
69 70
  int16_t*         colIds;    // column ids for loading file block data
  char**           buildBuf;  // build string tmp buffer, todo remove it later after all string format being updated.
H
Hongze Cheng 已提交
71 72
} SBlockLoadSuppInfo;

73
typedef struct SFilesetIter {
H
Hongze Cheng 已提交
74 75 76 77
  int32_t numOfFiles;  // number of total files
  int32_t index;       // current accessed index in the list
  SArray* pFileList;   // data file list
  int32_t order;
78
} SFilesetIter;
H
Haojun Liao 已提交
79 80

typedef struct SFileDataBlockInfo {
81
  // index position in STableBlockScanInfo in order to check whether neighbor block overlaps with it
dengyihao's avatar
dengyihao 已提交
82
  uint64_t uid;
83
  int32_t  tbBlockIdx;
H
Haojun Liao 已提交
84 85 86
} SFileDataBlockInfo;

typedef struct SDataBlockIter {
87 88 89 90 91
  int32_t   numOfBlocks;
  int32_t   index;
  SArray*   blockList;  // SArray<SFileDataBlockInfo>
  int32_t   order;
  SBlock    block;  // current SBlock data
92
  SHashObj* pTableMap;
H
Haojun Liao 已提交
93 94 95
} SDataBlockIter;

typedef struct SFileBlockDumpInfo {
dengyihao's avatar
dengyihao 已提交
96 97 98 99
  int32_t totalRows;
  int32_t rowIndex;
  int64_t lastKey;
  bool    allDumped;
H
Haojun Liao 已提交
100 101
} SFileBlockDumpInfo;

H
Haojun Liao 已提交
102
typedef struct SVersionRange {
dengyihao's avatar
dengyihao 已提交
103 104
  uint64_t minVer;
  uint64_t maxVer;
H
Haojun Liao 已提交
105 106
} SVersionRange;

H
Haojun Liao 已提交
107
typedef struct SReaderStatus {
dengyihao's avatar
dengyihao 已提交
108 109
  bool                 loadFromFile;  // check file stage
  SHashObj*            pTableMap;     // SHash<STableBlockScanInfo>
110
  STableBlockScanInfo* pTableIter;    // table iterator used in building in-memory buffer data blocks.
111
  SFileBlockDumpInfo   fBlockDumpInfo;
112 113 114 115 116
  SDFileSet*           pCurrentFileset;  // current opened file set
  SBlockData           fileBlockData;
  SFilesetIter         fileIter;
  SDataBlockIter       blockIter;
  bool                 composedDataBlock;  // the returned data block is a composed block or not
H
Haojun Liao 已提交
117 118
} SReaderStatus;

H
Hongze Cheng 已提交
119
struct STsdbReader {
H
Haojun Liao 已提交
120 121 122 123 124 125 126
  STsdb*             pTsdb;
  uint64_t           suid;
  int16_t            order;
  STimeWindow        window;  // the primary query time window that applies to all queries
  SSDataBlock*       pResBlock;
  int32_t            capacity;
  SReaderStatus      status;
127 128
  char*              idStr;  // query info handle, for debug purpose
  int32_t            type;   // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows
H
Hongze Cheng 已提交
129
  SBlockLoadSuppInfo suppInfo;
H
Hongze Cheng 已提交
130
  STsdbReadSnap*     pReadSnap;
131 132 133 134
  SIOCostSummary     cost;
  STSchema*          pSchema;
  SDataFReader*      pFileReader;
  SVersionRange      verRange;
135

136 137
  int32_t      step;
  STsdbReader* innerReader[2];
H
Hongze Cheng 已提交
138
};
H
Hongze Cheng 已提交
139

H
Haojun Liao 已提交
140
static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter);
141 142
static int      buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t endKey, int32_t capacity,
                                          STsdbReader* pReader);
143
static TSDBROW* getValidRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader);
144 145
static int32_t  doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pScanInfo, STsdbReader* pReader,
                                        SRowMerger* pMerger);
146
static int32_t  doMergeRowsInBuf(SIterInfo* pIter, uint64_t uid, int64_t ts, SArray* pDelList, SRowMerger* pMerger,
dengyihao's avatar
dengyihao 已提交
147
                                 STsdbReader* pReader);
148
static int32_t  doAppendRowFromTSRow(SSDataBlock* pBlock, STsdbReader* pReader, STSRow* pTSRow);
149 150
static int32_t  doAppendRowFromBlock(SSDataBlock* pResBlock, STsdbReader* pReader, SBlockData* pBlockData,
                                     int32_t rowIndex);
151 152
static void     setComposedBlockFlag(STsdbReader* pReader, bool composed);
static void     updateSchema(TSDBROW* pRow, uint64_t uid, STsdbReader* pReader);
153
static bool     hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey, int32_t order);
154

dengyihao's avatar
dengyihao 已提交
155
static void doMergeMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, STSRow** pTSRow,
156
                             STsdbReader* pReader, bool* freeTSRow);
157 158
static void doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader,
                               STSRow** pTSRow);
dengyihao's avatar
dengyihao 已提交
159 160 161 162
static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, STbData* pMemTbData,
                                      STbData* piMemTbData);
static STsdb*  getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr,
                                   int8_t* pLevel);
163
static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level);
H
Haojun Liao 已提交
164

165 166 167
static int32_t setColumnIdSlotList(STsdbReader* pReader, SSDataBlock* pBlock) {
  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;

168
  size_t numOfCols = blockDataGetNumOfCols(pBlock);
169

170
  pSupInfo->colIds = taosMemoryMalloc(numOfCols * sizeof(int16_t));
171
  pSupInfo->buildBuf = taosMemoryCalloc(numOfCols, POINTER_BYTES);
172 173 174
  if (pSupInfo->buildBuf == NULL || pSupInfo->colIds == NULL) {
    taosMemoryFree(pSupInfo->colIds);
    taosMemoryFree(pSupInfo->buildBuf);
H
Haojun Liao 已提交
175 176
    return TSDB_CODE_OUT_OF_MEMORY;
  }
H
Hongze Cheng 已提交
177

H
Haojun Liao 已提交
178 179
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, i);
180
    pSupInfo->colIds[i] = pCol->info.colId;
181 182 183 184

    if (IS_VAR_DATA_TYPE(pCol->info.type)) {
      pSupInfo->buildBuf[i] = taosMemoryMalloc(pCol->info.bytes);
    }
H
Haojun Liao 已提交
185
  }
H
Hongze Cheng 已提交
186

H
Haojun Liao 已提交
187 188
  return TSDB_CODE_SUCCESS;
}
H
Hongze Cheng 已提交
189

190
static SHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, const STableKeyInfo* idList, int32_t numOfTables) {
H
Haojun Liao 已提交
191
  // allocate buffer in order to load data blocks from file
192
  // todo use simple hash instead, optimize the memory consumption
193 194 195
  SHashObj* pTableMap =
      taosHashInit(numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK);
  if (pTableMap == NULL) {
H
Haojun Liao 已提交
196 197 198
    return NULL;
  }

199 200 201 202 203
  for (int32_t j = 0; j < numOfTables; ++j) {
    STableBlockScanInfo info = {.lastKey = 0, .uid = idList[j].uid};
    if (ASCENDING_TRAVERSE(pTsdbReader->order)) {
      if (info.lastKey == INT64_MIN || info.lastKey < pTsdbReader->window.skey) {
        info.lastKey = pTsdbReader->window.skey;
H
Haojun Liao 已提交
204 205
      }

206
      ASSERT(info.lastKey >= pTsdbReader->window.skey && info.lastKey <= pTsdbReader->window.ekey);
wmmhello's avatar
wmmhello 已提交
207
    } else {
208
      info.lastKey = pTsdbReader->window.skey;
H
Haojun Liao 已提交
209
    }
wmmhello's avatar
wmmhello 已提交
210

211 212 213
    taosHashPut(pTableMap, &info.uid, sizeof(uint64_t), &info, sizeof(info));
    tsdbDebug("%p check table uid:%" PRId64 " from lastKey:%" PRId64 " %s", pTsdbReader, info.uid, info.lastKey,
              pTsdbReader->idStr);
H
Haojun Liao 已提交
214 215
  }

216 217
  tsdbDebug("%p create %d tables scan-info, size:%.2f Kb, %s", pTsdbReader, numOfTables,
            (sizeof(STableBlockScanInfo) * numOfTables) / 1024.0, pTsdbReader->idStr);
218

219
  return pTableMap;
H
Hongze Cheng 已提交
220
}
H
Hongze Cheng 已提交
221

222 223 224
static void resetDataBlockScanInfo(SHashObj* pTableMap) {
  STableBlockScanInfo* p = NULL;

dengyihao's avatar
dengyihao 已提交
225
  while ((p = taosHashIterate(pTableMap, p)) != NULL) {
226 227
    p->iterInit = false;
    p->iiter.hasVal = false;
dengyihao's avatar
dengyihao 已提交
228
    if (p->iter.iter != NULL) {
229
      p->iter.iter = tsdbTbDataIterDestroy(p->iter.iter);
230 231
    }

232
    p->delSkyline = taosArrayDestroy(p->delSkyline);
233 234 235
  }
}

236 237 238 239 240 241 242 243
static void destroyBlockScanInfo(SHashObj* pTableMap) {
  STableBlockScanInfo* p = NULL;

  while ((p = taosHashIterate(pTableMap, p)) != NULL) {
    p->iterInit = false;
    p->iiter.hasVal = false;

    if (p->iter.iter != NULL) {
244
      p->iter.iter = tsdbTbDataIterDestroy(p->iter.iter);
245 246 247
    }

    if (p->iiter.iter != NULL) {
248
      p->iiter.iter = tsdbTbDataIterDestroy(p->iiter.iter);
249 250
    }

251 252
    p->delSkyline = taosArrayDestroy(p->delSkyline);
    p->pBlockList = taosArrayDestroy(p->pBlockList);
253
    tMapDataClear(&p->mapData);
254 255 256 257 258
  }

  taosHashCleanup(pTableMap);
}

259
static bool isEmptyQueryTimeWindow(STimeWindow* pWindow) {
260 261
  ASSERT(pWindow != NULL);
  return pWindow->skey > pWindow->ekey;
H
Haojun Liao 已提交
262
}
H
Hongze Cheng 已提交
263

264 265 266
// Update the query time window according to the data time to live(TTL) information, in order to avoid to return
// the expired data to client, even it is queried already.
static STimeWindow updateQueryTimeWindow(STsdb* pTsdb, STimeWindow* pWindow) {
dengyihao's avatar
dengyihao 已提交
267
  STsdbKeepCfg* pCfg = &pTsdb->keepCfg;
H
Hongze Cheng 已提交
268

269
  int64_t now = taosGetTimestamp(pCfg->precision);
dengyihao's avatar
dengyihao 已提交
270
  int64_t earilyTs = now - (tsTickPerMin[pCfg->precision] * pCfg->keep2) + 1;  // needs to add one tick
271

dengyihao's avatar
dengyihao 已提交
272
  STimeWindow win = *pWindow;
273 274 275 276 277 278
  if (win.skey < earilyTs) {
    win.skey = earilyTs;
  }

  return win;
}
H
Hongze Cheng 已提交
279

H
Haojun Liao 已提交
280
static void limitOutputBufferSize(const SQueryTableDataCond* pCond, int32_t* capacity) {
H
Haojun Liao 已提交
281 282 283 284 285 286
  int32_t rowLen = 0;
  for (int32_t i = 0; i < pCond->numOfCols; ++i) {
    rowLen += pCond->colList[i].bytes;
  }

  // make sure the output SSDataBlock size be less than 2MB.
H
Haojun Liao 已提交
287 288 289
  const int32_t TWOMB = 2 * 1024 * 1024;
  if ((*capacity) * rowLen > TWOMB) {
    (*capacity) = TWOMB / rowLen;
H
Haojun Liao 已提交
290 291 292 293
  }
}

// init file iterator
H
Hongze Cheng 已提交
294 295
static int32_t initFilesetIterator(SFilesetIter* pIter, SArray* aDFileSet, int32_t order, const char* idstr) {
  size_t numOfFileset = taosArrayGetSize(aDFileSet);
296

297 298
  pIter->index = ASCENDING_TRAVERSE(order) ? -1 : numOfFileset;
  pIter->order = order;
H
Hongze Cheng 已提交
299
  pIter->pFileList = aDFileSet;
300
  pIter->numOfFiles = numOfFileset;
H
Haojun Liao 已提交
301

H
Haojun Liao 已提交
302
  tsdbDebug("init fileset iterator, total files:%d %s", pIter->numOfFiles, idstr);
H
Haojun Liao 已提交
303 304 305
  return TSDB_CODE_SUCCESS;
}

306
static bool filesetIteratorNext(SFilesetIter* pIter, STsdbReader* pReader) {
307 308
  bool    asc = ASCENDING_TRAVERSE(pIter->order);
  int32_t step = asc ? 1 : -1;
309 310 311
  pIter->index += step;

  if ((asc && pIter->index >= pIter->numOfFiles) || ((!asc) && pIter->index < 0)) {
H
Haojun Liao 已提交
312 313 314 315 316
    return false;
  }

  // check file the time range of coverage
  STimeWindow win = {0};
H
Hongze Cheng 已提交
317

318
  while (1) {
H
Haojun Liao 已提交
319 320 321
    if (pReader->pFileReader != NULL) {
      tsdbDataFReaderClose(&pReader->pFileReader);
    }
322

323
    pReader->status.pCurrentFileset = (SDFileSet*)taosArrayGet(pIter->pFileList, pIter->index);
H
Haojun Liao 已提交
324

325 326 327 328
    int32_t code = tsdbDataFReaderOpen(&pReader->pFileReader, pReader->pTsdb, pReader->status.pCurrentFileset);
    if (code != TSDB_CODE_SUCCESS) {
      goto _err;
    }
H
Haojun Liao 已提交
329

330 331
    pReader->cost.headFileLoad += 1;

332 333 334 335 336 337 338 339 340 341 342 343
    int32_t fid = pReader->status.pCurrentFileset->fid;
    tsdbFidKeyRange(fid, pReader->pTsdb->keepCfg.days, pReader->pTsdb->keepCfg.precision, &win.skey, &win.ekey);

    // current file are no longer overlapped with query time window, ignore remain files
    if ((asc && win.skey > pReader->window.ekey) || (!asc && win.ekey < pReader->window.skey)) {
      tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, %s", pReader,
                pReader->window.skey, pReader->window.ekey, pReader->idStr);
      return false;
    }

    if ((asc && (win.ekey < pReader->window.skey)) || ((!asc) && (win.skey > pReader->window.ekey))) {
      pIter->index += step;
344 345 346
      if ((asc && pIter->index >= pIter->numOfFiles) || ((!asc) && pIter->index < 0)) {
        return false;
      }
347 348
      continue;
    }
C
Cary Xu 已提交
349

350
    tsdbDebug("%p file found fid:%d for qrange:%" PRId64 "-%" PRId64 ", %s", pReader, fid, pReader->window.skey,
351
              pReader->window.ekey, pReader->idStr);
352 353
    return true;
  }
354

355
_err:
H
Haojun Liao 已提交
356 357 358
  return false;
}

359
static void resetDataBlockIterator(SDataBlockIter* pIter, int32_t order, SHashObj* pTableMap) {
360 361
  pIter->order = order;
  pIter->index = -1;
H
Haojun Liao 已提交
362
  pIter->numOfBlocks = -1;
363 364 365 366 367
  if (pIter->blockList == NULL) {
    pIter->blockList = taosArrayInit(4, sizeof(SFileDataBlockInfo));
  } else {
    taosArrayClear(pIter->blockList);
  }
368
  pIter->pTableMap = pTableMap;
369 370
}

L
Liu Jicong 已提交
371
static void cleanupDataBlockIterator(SDataBlockIter* pIter) { taosArrayDestroy(pIter->blockList); }
H
Haojun Liao 已提交
372

H
Haojun Liao 已提交
373
static void initReaderStatus(SReaderStatus* pStatus) {
dengyihao's avatar
dengyihao 已提交
374 375
  pStatus->pTableIter = NULL;
  pStatus->loadFromFile = true;
H
Haojun Liao 已提交
376 377
}

378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
static SSDataBlock* createResBlock(SQueryTableDataCond* pCond, int32_t capacity) {
  SSDataBlock* pResBlock = createDataBlock();
  if (pResBlock == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

  for (int32_t i = 0; i < pCond->numOfCols; ++i) {
    SColumnInfoData colInfo = {{0}, 0};
    colInfo.info = pCond->colList[i];
    blockDataAppendColInfo(pResBlock, &colInfo);
  }

  int32_t code = blockDataEnsureCapacity(pResBlock, capacity);
  if (code != TSDB_CODE_SUCCESS) {
    terrno = code;
    taosMemoryFree(pResBlock);
    return NULL;
  }

  return pResBlock;
}

401 402
static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsdbReader** ppReader, int32_t capacity,
                                const char* idstr) {
H
Haojun Liao 已提交
403
  int32_t      code = 0;
404
  int8_t       level = 0;
H
Haojun Liao 已提交
405
  STsdbReader* pReader = (STsdbReader*)taosMemoryCalloc(1, sizeof(*pReader));
H
Hongze Cheng 已提交
406 407
  if (pReader == NULL) {
    code = TSDB_CODE_OUT_OF_MEMORY;
H
Haojun Liao 已提交
408
    goto _end;
H
Hongze Cheng 已提交
409 410
  }

C
Cary Xu 已提交
411 412 413 414
  if (VND_IS_TSMA(pVnode)) {
    tsdbDebug("vgId:%d, tsma is selected to query", TD_VID(pVnode));
  }

H
Haojun Liao 已提交
415
  initReaderStatus(&pReader->status);
416

L
Liu Jicong 已提交
417
  pReader->pTsdb = getTsdbByRetentions(pVnode, pCond->twindows.skey, pVnode->config.tsdbCfg.retentions, idstr, &level);
dengyihao's avatar
dengyihao 已提交
418 419
  pReader->suid = pCond->suid;
  pReader->order = pCond->order;
420
  pReader->capacity = capacity;
dengyihao's avatar
dengyihao 已提交
421 422
  pReader->idStr = (idstr != NULL) ? strdup(idstr) : NULL;
  pReader->verRange = getQueryVerRange(pVnode, pCond, level);
423
  pReader->type = pCond->type;
424
  pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows);
425

426
  ASSERT(pCond->numOfCols > 0);
H
Hongze Cheng 已提交
427

428
  limitOutputBufferSize(pCond, &pReader->capacity);
429

430 431
  // allocate buffer in order to load data blocks from file
  SBlockLoadSuppInfo* pSup = &pReader->suppInfo;
432
  pSup->pColAgg = taosArrayInit(4, sizeof(SColumnDataAgg));
433
  pSup->plist = taosMemoryCalloc(pCond->numOfCols, POINTER_BYTES);
434
  if (pSup->pColAgg == NULL || pSup->plist == NULL) {
435 436 437
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _end;
  }
H
Haojun Liao 已提交
438

439 440
  pSup->tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID;

H
Haojun Liao 已提交
441 442 443 444 445 446
  code = tBlockDataInit(&pReader->status.fileBlockData);
  if (code != TSDB_CODE_SUCCESS) {
    terrno = code;
    goto _end;
  }

447 448 449 450
  pReader->pResBlock = createResBlock(pCond, pReader->capacity);
  if (pReader->pResBlock == NULL) {
    code = terrno;
    goto _end;
H
Hongze Cheng 已提交
451
  }
H
Hongze Cheng 已提交
452

453 454
  setColumnIdSlotList(pReader, pReader->pResBlock);

H
Hongze Cheng 已提交
455 456
  *ppReader = pReader;
  return code;
H
Hongze Cheng 已提交
457

H
Haojun Liao 已提交
458 459
_end:
  tsdbReaderClose(pReader);
H
Hongze Cheng 已提交
460 461 462
  *ppReader = NULL;
  return code;
}
H
Hongze Cheng 已提交
463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495

// void tsdbResetQueryHandleForNewTable(STsdbReader* queryHandle, SQueryTableDataCond* pCond, STableListInfo* tableList,
//                                      int32_t tWinIdx) {
//   STsdbReader* pTsdbReadHandle = queryHandle;

//   pTsdbReadHandle->order = pCond->order;
//   pTsdbReadHandle->window = pCond->twindows[tWinIdx];
//   pTsdbReadHandle->type = TSDB_QUERY_TYPE_ALL;
//   pTsdbReadHandle->cur.fid = -1;
//   pTsdbReadHandle->cur.win = TSWINDOW_INITIALIZER;
//   pTsdbReadHandle->checkFiles = true;
//   pTsdbReadHandle->activeIndex = 0;  // current active table index
//   pTsdbReadHandle->locateStart = false;
//   pTsdbReadHandle->loadExternalRow = pCond->loadExternalRows;

//   if (ASCENDING_TRAVERSE(pCond->order)) {
//     assert(pTsdbReadHandle->window.skey <= pTsdbReadHandle->window.ekey);
//   } else {
//     assert(pTsdbReadHandle->window.skey >= pTsdbReadHandle->window.ekey);
//   }

//   // allocate buffer in order to load data blocks from file
//   memset(pTsdbReadHandle->suppInfo.pstatis, 0, sizeof(SColumnDataAgg));
//   memset(pTsdbReadHandle->suppInfo.plist, 0, POINTER_BYTES);

//   tsdbInitDataBlockLoadInfo(&pTsdbReadHandle->dataBlockLoadInfo);
//   tsdbInitCompBlockLoadInfo(&pTsdbReadHandle->compBlockLoadInfo);

//   SArray* pTable = NULL;
//   //  STsdbMeta* pMeta = tsdbGetMeta(pTsdbReadHandle->pTsdb);

//   //  pTsdbReadHandle->pTableCheckInfo = destroyTableCheckInfo(pTsdbReadHandle->pTableCheckInfo);

H
Haojun Liao 已提交
496
//   pTsdbReadHandle->pTableCheckInfo = NULL;  // createDataBlockScanInfo(pTsdbReadHandle, groupList, pMeta,
H
Hongze Cheng 已提交
497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
//                                             // &pTable);
//   if (pTsdbReadHandle->pTableCheckInfo == NULL) {
//     //    tsdbReaderClose(pTsdbReadHandle);
//     terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
//   }

//   //  pTsdbReadHandle->prev = doFreeColumnInfoData(pTsdbReadHandle->prev);
//   //  pTsdbReadHandle->next = doFreeColumnInfoData(pTsdbReadHandle->next);
// }

// SArray* tsdbGetQueriedTableList(STsdbReader** pHandle) {
//   assert(pHandle != NULL);

//   STsdbReader* pTsdbReadHandle = (STsdbReader*)pHandle;

//   size_t  size = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
//   SArray* res = taosArrayInit(size, POINTER_BYTES);
//   return res;
// }

// static int32_t binarySearchForBlock(SBlock* pBlock, int32_t numOfBlocks, TSKEY skey, int32_t order) {
//   int32_t firstSlot = 0;
//   int32_t lastSlot = numOfBlocks - 1;
H
Hongze Cheng 已提交
520

H
Hongze Cheng 已提交
521
//   int32_t midSlot = firstSlot;
H
Hongze Cheng 已提交
522

H
Hongze Cheng 已提交
523 524 525
//   while (1) {
//     numOfBlocks = lastSlot - firstSlot + 1;
//     midSlot = (firstSlot + (numOfBlocks >> 1));
H
Hongze Cheng 已提交
526

H
Hongze Cheng 已提交
527
//     if (numOfBlocks == 1) break;
H
Hongze Cheng 已提交
528

H
Hongze Cheng 已提交
529 530 531 532 533 534 535 536 537 538 539
//     if (skey > pBlock[midSlot].maxKey.ts) {
//       if (numOfBlocks == 2) break;
//       if ((order == TSDB_ORDER_DESC) && (skey < pBlock[midSlot + 1].minKey.ts)) break;
//       firstSlot = midSlot + 1;
//     } else if (skey < pBlock[midSlot].minKey.ts) {
//       if ((order == TSDB_ORDER_ASC) && (skey > pBlock[midSlot - 1].maxKey.ts)) break;
//       lastSlot = midSlot - 1;
//     } else {
//       break;  // got the slot
//     }
//   }
H
Hongze Cheng 已提交
540

H
Hongze Cheng 已提交
541 542
//   return midSlot;
// }
H
Hongze Cheng 已提交
543

H
Haojun Liao 已提交
544
static int32_t doLoadBlockIndex(STsdbReader* pReader, SDataFReader* pFileReader, SArray* pIndexList) {
545
  SArray* aBlockIdx = taosArrayInit(8, sizeof(SBlockIdx));
H
Hongze Cheng 已提交
546

547
  int64_t st = taosGetTimestampUs();
548
  int32_t code = tsdbReadBlockIdx(pFileReader, aBlockIdx, NULL);
H
Haojun Liao 已提交
549
  if (code != TSDB_CODE_SUCCESS) {
550
    goto _end;
H
Haojun Liao 已提交
551
  }
H
Hongze Cheng 已提交
552

553 554
  size_t num = taosArrayGetSize(aBlockIdx);
  if (num == 0) {
H
Hongze Cheng 已提交
555
    taosArrayClear(aBlockIdx);
H
Haojun Liao 已提交
556 557
    return TSDB_CODE_SUCCESS;
  }
H
Hongze Cheng 已提交
558

559 560 561 562
  int64_t et1 = taosGetTimestampUs();

  SBlockIdx* pBlockIdx = NULL;
  for (int32_t i = 0; i < num; ++i) {
563
    pBlockIdx = (SBlockIdx*)taosArrayGet(aBlockIdx, i);
H
Haojun Liao 已提交
564

565
    // uid check
H
Hongze Cheng 已提交
566
    if (pBlockIdx->suid != pReader->suid) {
H
Haojun Liao 已提交
567 568 569 570
      continue;
    }

    // this block belongs to a table that is not queried.
H
Hongze Cheng 已提交
571
    void* p = taosHashGet(pReader->status.pTableMap, &pBlockIdx->uid, sizeof(uint64_t));
H
Haojun Liao 已提交
572 573 574 575 576 577
    if (p == NULL) {
      continue;
    }

    STableBlockScanInfo* pScanInfo = p;
    if (pScanInfo->pBlockList == NULL) {
578
      pScanInfo->pBlockList = taosArrayInit(4, sizeof(int32_t));
H
Haojun Liao 已提交
579 580
    }

H
Hongze Cheng 已提交
581
    taosArrayPush(pIndexList, pBlockIdx);
H
Haojun Liao 已提交
582
  }
H
Hongze Cheng 已提交
583

584
  int64_t et2 = taosGetTimestampUs();
585
  tsdbDebug("load block index for %d tables completed, elapsed time:%.2f ms, set blockIdx:%.2f ms, size:%.2f Kb %s",
586
            (int32_t)num, (et1 - st) / 1000.0, (et2 - et1) / 1000.0, num * sizeof(SBlockIdx) / 1024.0, pReader->idStr);
587 588 589

  pReader->cost.headFileLoadTime += (et1 - st) / 1000.0;

590
_end:
H
Hongze Cheng 已提交
591
  taosArrayDestroy(aBlockIdx);
H
Haojun Liao 已提交
592 593
  return code;
}
H
Hongze Cheng 已提交
594

595 596
static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, uint32_t* numOfValidTables,
                               int32_t* numOfBlocks) {
H
Haojun Liao 已提交
597 598
  size_t numOfTables = taosArrayGetSize(pIndexList);
  *numOfValidTables = 0;
H
Hongze Cheng 已提交
599

600
  int64_t st = taosGetTimestampUs();
601
  size_t  size = 0;
602

603
  STableBlockScanInfo* px = NULL;
dengyihao's avatar
dengyihao 已提交
604
  while (1) {
605 606 607 608 609
    px = taosHashIterate(pReader->status.pTableMap, px);
    if (px == NULL) {
      break;
    }

610
    tMapDataClear(&px->mapData);
611 612 613
    taosArrayClear(px->pBlockList);
  }

dengyihao's avatar
dengyihao 已提交
614
  for (int32_t i = 0; i < numOfTables; ++i) {
H
Haojun Liao 已提交
615
    SBlockIdx* pBlockIdx = taosArrayGet(pIndexList, i);
H
Hongze Cheng 已提交
616

617
    STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pBlockIdx->uid, sizeof(int64_t));
H
Hongze Cheng 已提交
618

619 620
    tMapDataReset(&pScanInfo->mapData);
    tsdbReadBlock(pReader->pFileReader, pBlockIdx, &pScanInfo->mapData, NULL);
621

622 623
    size += pScanInfo->mapData.nData;
    for (int32_t j = 0; j < pScanInfo->mapData.nItem; ++j) {
H
Haojun Liao 已提交
624
      SBlock block = {0};
625
      tMapDataGetItemByIdx(&pScanInfo->mapData, j, &block, tGetBlock);
H
Hongze Cheng 已提交
626

627
      // 1. time range check
628
      if (block.minKey.ts > pReader->window.ekey || block.maxKey.ts < pReader->window.skey) {
H
Haojun Liao 已提交
629 630
        continue;
      }
H
Hongze Cheng 已提交
631

632
      // 2. version range check
633 634 635
      if (block.minVersion > pReader->verRange.maxVer || block.maxVersion < pReader->verRange.minVer) {
        continue;
      }
636

637
      void* p = taosArrayPush(pScanInfo->pBlockList, &j);
H
Haojun Liao 已提交
638
      if (p == NULL) {
639
        tMapDataClear(&pScanInfo->mapData);
H
Haojun Liao 已提交
640 641
        return TSDB_CODE_OUT_OF_MEMORY;
      }
642 643

      (*numOfBlocks) += 1;
H
Haojun Liao 已提交
644
    }
H
Hongze Cheng 已提交
645

H
Haojun Liao 已提交
646 647 648 649
    if (pScanInfo->pBlockList != NULL && taosArrayGetSize(pScanInfo->pBlockList) > 0) {
      (*numOfValidTables) += 1;
    }
  }
H
Hongze Cheng 已提交
650

651
  double el = (taosGetTimestampUs() - st) / 1000.0;
652
  tsdbDebug("load block of %d tables completed, blocks:%d in %d tables, size:%.2f Kb, elapsed time:%.2f ms %s",
653
            numOfTables, *numOfBlocks, *numOfValidTables, size / 1000.0, el, pReader->idStr);
654 655 656

  pReader->cost.numOfBlocks += (*numOfBlocks);
  pReader->cost.headFileLoadTime += el;
657

H
Haojun Liao 已提交
658 659
  return TSDB_CODE_SUCCESS;
}
H
Hongze Cheng 已提交
660

661 662
// todo remove pblock parameter
static void setBlockAllDumped(SFileBlockDumpInfo* pDumpInfo, SBlock* pBlock, int32_t order) {
663
  int32_t step = ASCENDING_TRAVERSE(order) ? 1 : -1;
H
Haojun Liao 已提交
664

665
  pDumpInfo->allDumped = true;
666
  pDumpInfo->lastKey = pBlock->maxKey.ts + step;
H
Haojun Liao 已提交
667 668
}

669 670
static void doCopyColVal(SColumnInfoData* pColInfoData, int32_t rowIndex, int32_t colIndex, SColVal* pColVal,
                         SBlockLoadSuppInfo* pSup) {
H
Haojun Liao 已提交
671
  if (IS_VAR_DATA_TYPE(pColVal->type)) {
672
    if (pColVal->isNull || pColVal->isNone) {
H
Haojun Liao 已提交
673 674 675 676 677 678 679
      colDataAppendNULL(pColInfoData, rowIndex);
    } else {
      varDataSetLen(pSup->buildBuf[colIndex], pColVal->value.nData);
      memcpy(varDataVal(pSup->buildBuf[colIndex]), pColVal->value.pData, pColVal->value.nData);
      colDataAppend(pColInfoData, rowIndex, pSup->buildBuf[colIndex], false);
    }
  } else {
680
    colDataAppend(pColInfoData, rowIndex, (const char*)&pColVal->value, pColVal->isNull || pColVal->isNone);
H
Haojun Liao 已提交
681
  }
H
Haojun Liao 已提交
682 683
}

684 685 686 687 688
static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter) {
  SFileDataBlockInfo* pFBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index);
  return pFBlockInfo;
}

689
static SBlock* getCurrentBlock(SDataBlockIter* pBlockIter) { return &pBlockIter->block; }
690

691
static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) {
692
  SReaderStatus*  pStatus = &pReader->status;
693
  SDataBlockIter* pBlockIter = &pStatus->blockIter;
H
Hongze Cheng 已提交
694

695
  SBlockData*         pBlockData = &pStatus->fileBlockData;
H
Haojun Liao 已提交
696
  SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(pBlockIter);
697
  SBlock*             pBlock = getCurrentBlock(pBlockIter);
H
Haojun Liao 已提交
698
  SSDataBlock*        pResBlock = pReader->pResBlock;
699
  int32_t             numOfOutputCols = blockDataGetNumOfCols(pResBlock);
H
Haojun Liao 已提交
700

H
Haojun Liao 已提交
701
  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;
702
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
H
Haojun Liao 已提交
703

H
Haojun Liao 已提交
704
  SColVal cv = {0};
705
  int64_t st = taosGetTimestampUs();
706 707
  bool    asc = ASCENDING_TRAVERSE(pReader->order);
  int32_t step = asc ? 1 : -1;
708

709
  int32_t rowIndex = 0;
710 711
  int32_t remain = asc ? (pBlockData->nRow - pDumpInfo->rowIndex) : (pDumpInfo->rowIndex + 1);

712 713 714 715 716 717 718 719
  int32_t endIndex = 0;
  if (remain <= pReader->capacity) {
    endIndex = pBlockData->nRow;
  } else {
    endIndex = pDumpInfo->rowIndex + step * pReader->capacity;
    remain = pReader->capacity;
  }

720
  int32_t          i = 0;
721 722
  SColumnInfoData* pColData = taosArrayGet(pResBlock->pDataBlock, i);
  if (pColData->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
723
    for (int32_t j = pDumpInfo->rowIndex; j < endIndex && j >= 0; j += step) {
724 725 726 727 728
      colDataAppend(pColData, rowIndex++, (const char*)&pBlockData->aTSKEY[j], false);
    }
    i += 1;
  }

729 730 731
  int32_t colIndex = 0;
  int32_t num = taosArrayGetSize(pBlockData->aIdx);
  while (i < numOfOutputCols && colIndex < num) {
732 733 734
    rowIndex = 0;
    pColData = taosArrayGet(pResBlock->pDataBlock, i);

H
Hongze Cheng 已提交
735
    SColData* pData = tBlockDataGetColDataByIdx(pBlockData, colIndex);
736 737

    if (pData->cid == pColData->info.colId) {
738
      for (int32_t j = pDumpInfo->rowIndex; j < endIndex && j >= 0; j += step) {
739 740
        tColDataGetValue(pData, j, &cv);
        doCopyColVal(pColData, rowIndex++, i, &cv, pSupInfo);
H
Haojun Liao 已提交
741
      }
742
      colIndex += 1;
743
      ASSERT(rowIndex == remain);
744 745
    } else {  // the specified column does not exist in file block, fill with null data
      colDataAppendNNULL(pColData, 0, remain);
H
Haojun Liao 已提交
746
    }
747 748 749 750

    i += 1;
  }

751
  while (i < numOfOutputCols) {
752 753 754
    pColData = taosArrayGet(pResBlock->pDataBlock, i);
    colDataAppendNNULL(pColData, 0, remain);
    i += 1;
H
Haojun Liao 已提交
755
  }
H
Haojun Liao 已提交
756

757
  pResBlock->info.rows = remain;
758
  pDumpInfo->rowIndex += step * remain;
759 760

  setBlockAllDumped(pDumpInfo, pBlock, pReader->order);
H
Haojun Liao 已提交
761

762
  double elapsedTime = (taosGetTimestampUs() - st) / 1000.0;
H
Haojun Liao 已提交
763
  pReader->cost.blockLoadTime += elapsedTime;
H
Haojun Liao 已提交
764

765
  int32_t unDumpedRows = asc ? pBlock->nRow - pDumpInfo->rowIndex : pDumpInfo->rowIndex + 1;
H
Haojun Liao 已提交
766
  tsdbDebug("%p copy file block to sdatablock, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64
767
            ", rows:%d, remain:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", elapsed time:%.2f ms, %s",
768 769 770 771 772 773
            pReader, pBlockIter->index, pFBlock->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, remain, unDumpedRows,
            pBlock->minVersion, pBlock->maxVersion, elapsedTime, pReader->idStr);

  return TSDB_CODE_SUCCESS;
}

774 775
static int32_t doLoadFileBlockData(STsdbReader* pReader, SDataBlockIter* pBlockIter,
                                   STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData) {
776 777 778
  int64_t st = taosGetTimestampUs();

  SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(pBlockIter);
779
  SBlock*             pBlock = getCurrentBlock(pBlockIter);
780

781 782
  SSDataBlock* pResBlock = pReader->pResBlock;
  int32_t      numOfCols = blockDataGetNumOfCols(pResBlock);
783 784 785 786

  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;

787 788 789
  SBlockIdx blockIdx = {.suid = pReader->suid, .uid = pBlockScanInfo->uid};
  int32_t   code =
      tsdbReadColData(pReader->pFileReader, &blockIdx, pBlock, pSupInfo->colIds, numOfCols, pBlockData, NULL, NULL);
790 791 792 793
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }

794
  double elapsedTime = (taosGetTimestampUs() - st) / 1000.0;
795 796 797 798
  pReader->cost.blockLoadTime += elapsedTime;

  pDumpInfo->allDumped = false;
  tsdbDebug("%p load file block into buffer, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64
799
            ", rows:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", elapsed time:%.2f ms, %s",
800
            pReader, pBlockIter->index, pFBlock->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->nRow,
H
Haojun Liao 已提交
801
            pBlock->minVersion, pBlock->maxVersion, elapsedTime, pReader->idStr);
802

H
Haojun Liao 已提交
803
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
804 805

_error:
H
Haojun Liao 已提交
806 807 808 809 810
  tsdbError("%p error occurs in loading file block, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64
            ", rows:%d, %s",
            pReader, pBlockIter->index, pFBlock->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->nRow,
            pReader->idStr);
  return code;
H
Haojun Liao 已提交
811
}
H
Hongze Cheng 已提交
812

H
Haojun Liao 已提交
813 814 815
static void cleanupBlockOrderSupporter(SBlockOrderSupporter* pSup) {
  taosMemoryFreeClear(pSup->numOfBlocksPerTable);
  taosMemoryFreeClear(pSup->indexPerTable);
H
Hongze Cheng 已提交
816

H
Haojun Liao 已提交
817 818 819 820
  for (int32_t i = 0; i < pSup->numOfTables; ++i) {
    SBlockOrderWrapper* pBlockInfo = pSup->pDataBlockInfo[i];
    taosMemoryFreeClear(pBlockInfo);
  }
H
Hongze Cheng 已提交
821

H
Haojun Liao 已提交
822 823
  taosMemoryFreeClear(pSup->pDataBlockInfo);
}
H
Hongze Cheng 已提交
824

H
Haojun Liao 已提交
825 826
static int32_t initBlockOrderSupporter(SBlockOrderSupporter* pSup, int32_t numOfTables) {
  ASSERT(numOfTables >= 1);
H
Hongze Cheng 已提交
827

H
Haojun Liao 已提交
828
  pSup->numOfBlocksPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables);
829 830
  pSup->indexPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables);
  pSup->pDataBlockInfo = taosMemoryCalloc(1, POINTER_BYTES * numOfTables);
H
Hongze Cheng 已提交
831

H
Haojun Liao 已提交
832 833 834 835
  if (pSup->numOfBlocksPerTable == NULL || pSup->indexPerTable == NULL || pSup->pDataBlockInfo == NULL) {
    cleanupBlockOrderSupporter(pSup);
    return TSDB_CODE_OUT_OF_MEMORY;
  }
H
Hongze Cheng 已提交
836

H
Haojun Liao 已提交
837 838
  return TSDB_CODE_SUCCESS;
}
H
Hongze Cheng 已提交
839

H
Haojun Liao 已提交
840
static int32_t fileDataBlockOrderCompar(const void* pLeft, const void* pRight, void* param) {
841
  int32_t leftIndex = *(int32_t*)pLeft;
H
Haojun Liao 已提交
842
  int32_t rightIndex = *(int32_t*)pRight;
H
Hongze Cheng 已提交
843

H
Haojun Liao 已提交
844
  SBlockOrderSupporter* pSupporter = (SBlockOrderSupporter*)param;
H
Hongze Cheng 已提交
845

H
Haojun Liao 已提交
846 847
  int32_t leftTableBlockIndex = pSupporter->indexPerTable[leftIndex];
  int32_t rightTableBlockIndex = pSupporter->indexPerTable[rightIndex];
H
Hongze Cheng 已提交
848

H
Haojun Liao 已提交
849 850 851 852 853 854 855
  if (leftTableBlockIndex > pSupporter->numOfBlocksPerTable[leftIndex]) {
    /* left block is empty */
    return 1;
  } else if (rightTableBlockIndex > pSupporter->numOfBlocksPerTable[rightIndex]) {
    /* right block is empty */
    return -1;
  }
H
Hongze Cheng 已提交
856

857
  SBlockOrderWrapper* pLeftBlock = &pSupporter->pDataBlockInfo[leftIndex][leftTableBlockIndex];
H
Haojun Liao 已提交
858
  SBlockOrderWrapper* pRightBlock = &pSupporter->pDataBlockInfo[rightIndex][rightTableBlockIndex];
H
Hongze Cheng 已提交
859

860 861 862 863
  return pLeftBlock->offset > pRightBlock->offset ? 1 : -1;
}

static int32_t doSetCurrentBlock(SDataBlockIter* pBlockIter) {
864
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(pBlockIter);
865 866 867 868 869 870 871 872 873 874
  STableBlockScanInfo* pScanInfo = taosHashGet(pBlockIter->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));

  int32_t* mapDataIndex = taosArrayGet(pScanInfo->pBlockList, pFBlock->tbBlockIdx);
  tMapDataGetItemByIdx(&pScanInfo->mapData, *mapDataIndex, &pBlockIter->block, tGetBlock);

#if 0
  qDebug("check file block, table uid:%"PRIu64" index:%d offset:%"PRId64", ", pScanInfo->uid, *mapDataIndex, pBlockIter->block.aSubBlock[0].offset);
#endif

  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
875
}
H
Hongze Cheng 已提交
876

H
Haojun Liao 已提交
877
static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks) {
878
  bool asc = ASCENDING_TRAVERSE(pReader->order);
H
Haojun Liao 已提交
879

880
  pBlockIter->numOfBlocks = numOfBlocks;
881 882
  taosArrayClear(pBlockIter->blockList);

883 884
  // access data blocks according to the offset of each block in asc/desc order.
  int32_t numOfTables = (int32_t)taosHashGetSize(pReader->status.pTableMap);
H
Haojun Liao 已提交
885

886
  int64_t st = taosGetTimestampUs();
H
Haojun Liao 已提交
887

888
  SBlockOrderSupporter sup = {0};
889
  int32_t              code = initBlockOrderSupporter(&sup, numOfTables);
890 891 892
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
H
Haojun Liao 已提交
893

894 895 896 897 898 899 900
  int32_t cnt = 0;
  void*   ptr = NULL;
  while (1) {
    ptr = taosHashIterate(pReader->status.pTableMap, ptr);
    if (ptr == NULL) {
      break;
    }
H
Haojun Liao 已提交
901

902 903 904 905
    STableBlockScanInfo* pTableScanInfo = (STableBlockScanInfo*)ptr;
    if (pTableScanInfo->pBlockList == NULL || taosArrayGetSize(pTableScanInfo->pBlockList) == 0) {
      continue;
    }
H
Haojun Liao 已提交
906

907 908
    size_t num = taosArrayGetSize(pTableScanInfo->pBlockList);
    sup.numOfBlocksPerTable[sup.numOfTables] = num;
H
Haojun Liao 已提交
909

910 911 912 913 914
    char* buf = taosMemoryMalloc(sizeof(SBlockOrderWrapper) * num);
    if (buf == NULL) {
      cleanupBlockOrderSupporter(&sup);
      return TSDB_CODE_TDB_OUT_OF_MEMORY;
    }
H
Haojun Liao 已提交
915

916
    sup.pDataBlockInfo[sup.numOfTables] = (SBlockOrderWrapper*)buf;
917
    SBlock block = {0};
918 919
    for (int32_t k = 0; k < num; ++k) {
      SBlockOrderWrapper wrapper = {0};
920 921 922 923

      int32_t* mapDataIndex = taosArrayGet(pTableScanInfo->pBlockList, k);
      tMapDataGetItemByIdx(&pTableScanInfo->mapData, *mapDataIndex, &block, tGetBlock);

924
      wrapper.uid = pTableScanInfo->uid;
925
      wrapper.offset = block.aSubBlock[0].offset;
H
Haojun Liao 已提交
926

927 928 929 930 931 932
      sup.pDataBlockInfo[sup.numOfTables][k] = wrapper;
      cnt++;
    }

    sup.numOfTables += 1;
  }
H
Haojun Liao 已提交
933

934
  ASSERT(numOfBlocks == cnt);
H
Haojun Liao 已提交
935

936 937 938 939 940
  // since there is only one table qualified, blocks are not sorted
  if (sup.numOfTables == 1) {
    for (int32_t i = 0; i < numOfBlocks; ++i) {
      SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[0][i].uid, .tbBlockIdx = i};
      taosArrayPush(pBlockIter->blockList, &blockInfo);
941
    }
942

943
    int64_t et = taosGetTimestampUs();
944 945
    tsdbDebug("%p create blocks info struct completed for one table, %d blocks not sorted, elapsed time:%.2f ms %s",
              pReader, cnt, (et - st) / 1000.0, pReader->idStr);
H
Haojun Liao 已提交
946

947
    pBlockIter->index = asc ? 0 : (numOfBlocks - 1);
H
Haojun Liao 已提交
948
    cleanupBlockOrderSupporter(&sup);
949
    doSetCurrentBlock(pBlockIter);
950
    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
951
  }
H
Haojun Liao 已提交
952

953 954
  tsdbDebug("%p create data blocks info struct completed, %d blocks in %d tables %s", pReader, cnt, sup.numOfTables,
            pReader->idStr);
955

956
  assert(cnt <= numOfBlocks && sup.numOfTables <= numOfTables);
H
Haojun Liao 已提交
957

958 959 960 961 962
  SMultiwayMergeTreeInfo* pTree = NULL;
  uint8_t                 ret = tMergeTreeCreate(&pTree, sup.numOfTables, &sup, fileDataBlockOrderCompar);
  if (ret != TSDB_CODE_SUCCESS) {
    cleanupBlockOrderSupporter(&sup);
    return TSDB_CODE_TDB_OUT_OF_MEMORY;
H
Haojun Liao 已提交
963
  }
H
Haojun Liao 已提交
964

965 966 967 968
  int32_t numOfTotal = 0;
  while (numOfTotal < cnt) {
    int32_t pos = tMergeTreeGetChosenIndex(pTree);
    int32_t index = sup.indexPerTable[pos]++;
H
Haojun Liao 已提交
969

970 971
    SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[pos][index].uid, .tbBlockIdx = index};
    taosArrayPush(pBlockIter->blockList, &blockInfo);
H
Haojun Liao 已提交
972

973 974 975 976
    // set data block index overflow, in order to disable the offset comparator
    if (sup.indexPerTable[pos] >= sup.numOfBlocksPerTable[pos]) {
      sup.indexPerTable[pos] = sup.numOfBlocksPerTable[pos] + 1;
    }
H
Haojun Liao 已提交
977

978 979
    numOfTotal += 1;
    tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree));
H
Haojun Liao 已提交
980
  }
H
Haojun Liao 已提交
981

982
  int64_t et = taosGetTimestampUs();
983 984
  tsdbDebug("%p %d data blocks access order completed, elapsed time:%.2f ms %s", pReader, cnt, (et - st) / 1000.0,
            pReader->idStr);
985 986
  cleanupBlockOrderSupporter(&sup);
  taosMemoryFree(pTree);
H
Haojun Liao 已提交
987

988
  pBlockIter->index = asc ? 0 : (numOfBlocks - 1);
989 990
  doSetCurrentBlock(pBlockIter);

991
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
992
}
H
Hongze Cheng 已提交
993

H
Haojun Liao 已提交
994
static bool blockIteratorNext(SDataBlockIter* pBlockIter) {
995 996
  bool asc = ASCENDING_TRAVERSE(pBlockIter->order);

997
  int32_t step = asc ? 1 : -1;
998
  if ((pBlockIter->index >= pBlockIter->numOfBlocks - 1 && asc) || (pBlockIter->index <= 0 && (!asc))) {
999 1000 1001
    return false;
  }

1002
  pBlockIter->index += step;
1003 1004
  doSetCurrentBlock(pBlockIter);

1005 1006 1007
  return true;
}

1008 1009 1010
/**
 * This is an two rectangles overlap cases.
 */
1011
static int32_t dataBlockPartiallyRequired(STimeWindow* pWindow, SVersionRange* pVerRange, SBlock* pBlock) {
1012 1013 1014 1015
  return (pWindow->ekey < pBlock->maxKey.ts && pWindow->ekey >= pBlock->minKey.ts) ||
         (pWindow->skey > pBlock->minKey.ts && pWindow->skey <= pBlock->maxKey.ts) ||
         (pVerRange->minVer > pBlock->minVersion && pVerRange->minVer <= pBlock->maxVersion) ||
         (pVerRange->maxVer < pBlock->maxVersion && pVerRange->maxVer >= pBlock->minVersion);
H
Haojun Liao 已提交
1016
}
H
Hongze Cheng 已提交
1017

1018 1019
static SBlock* getNeighborBlockOfSameTable(SFileDataBlockInfo* pFBlockInfo, STableBlockScanInfo* pTableBlockScanInfo,
                                           int32_t* nextIndex, int32_t order) {
1020 1021 1022
  bool asc = ASCENDING_TRAVERSE(order);
  if (asc && pFBlockInfo->tbBlockIdx >= taosArrayGetSize(pTableBlockScanInfo->pBlockList) - 1) {
    return NULL;
1023 1024
  }

1025
  if (!asc && pFBlockInfo->tbBlockIdx == 0) {
1026 1027 1028
    return NULL;
  }

1029
  int32_t step = asc ? 1 : -1;
1030
  *nextIndex = pFBlockInfo->tbBlockIdx + step;
1031

1032
  SBlock*  pBlock = taosMemoryCalloc(1, sizeof(SBlock));
1033 1034 1035 1036
  int32_t* indexInMapdata = taosArrayGet(pTableBlockScanInfo->pBlockList, *nextIndex);

  tMapDataGetItemByIdx(&pTableBlockScanInfo->mapData, *indexInMapdata, pBlock, tGetBlock);
  return pBlock;
1037 1038 1039 1040 1041
}

static int32_t findFileBlockInfoIndex(SDataBlockIter* pBlockIter, SFileDataBlockInfo* pFBlockInfo) {
  ASSERT(pBlockIter != NULL && pFBlockInfo != NULL);

1042
  int32_t step = ASCENDING_TRAVERSE(pBlockIter->order) ? 1 : -1;
1043 1044
  int32_t index = pBlockIter->index;

1045
  while (index < pBlockIter->numOfBlocks && index >= 0) {
1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057
    SFileDataBlockInfo* pFBlock = taosArrayGet(pBlockIter->blockList, index);
    if (pFBlock->uid == pFBlockInfo->uid && pFBlock->tbBlockIdx == pFBlockInfo->tbBlockIdx) {
      return index;
    }

    index += step;
  }

  ASSERT(0);
  return -1;
}

1058
static int32_t setFileBlockActiveInBlockIter(SDataBlockIter* pBlockIter, int32_t index, int32_t step) {
1059 1060 1061 1062 1063
  if (index < 0 || index >= pBlockIter->numOfBlocks) {
    return -1;
  }

  SFileDataBlockInfo fblock = *(SFileDataBlockInfo*)taosArrayGet(pBlockIter->blockList, index);
1064 1065 1066 1067 1068
  pBlockIter->index += step;

  if (index != pBlockIter->index) {
    taosArrayRemove(pBlockIter->blockList, index);
    taosArrayInsert(pBlockIter->blockList, pBlockIter->index, &fblock);
1069

1070 1071 1072
    SFileDataBlockInfo* pBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index);
    ASSERT(pBlockInfo->uid == fblock.uid && pBlockInfo->tbBlockIdx == fblock.tbBlockIdx);
  }
1073

1074
  doSetCurrentBlock(pBlockIter);
1075 1076 1077 1078 1079 1080 1081 1082 1083 1084
  return TSDB_CODE_SUCCESS;
}

static bool overlapWithNeighborBlock(SBlock* pBlock, SBlock* pNeighbor, int32_t order) {
  // it is the last block in current file, no chance to overlap with neighbor blocks.
  if (ASCENDING_TRAVERSE(order)) {
    return pBlock->maxKey.ts == pNeighbor->minKey.ts;
  } else {
    return pBlock->minKey.ts == pNeighbor->maxKey.ts;
  }
H
Haojun Liao 已提交
1085
}
H
Hongze Cheng 已提交
1086

1087
static bool bufferDataInFileBlockGap(int32_t order, TSDBKEY key, SBlock* pBlock) {
H
Haojun Liao 已提交
1088
  bool ascScan = ASCENDING_TRAVERSE(order);
H
Hongze Cheng 已提交
1089

1090
  return (ascScan && (key.ts != TSKEY_INITIAL_VAL && key.ts <= pBlock->minKey.ts)) ||
1091
         (!ascScan && (key.ts != TSKEY_INITIAL_VAL && key.ts >= pBlock->maxKey.ts));
H
Haojun Liao 已提交
1092
}
H
Hongze Cheng 已提交
1093

H
Haojun Liao 已提交
1094
static bool keyOverlapFileBlock(TSDBKEY key, SBlock* pBlock, SVersionRange* pVerRange) {
1095 1096
  return (key.ts >= pBlock->minKey.ts && key.ts <= pBlock->maxKey.ts) && (pBlock->maxVersion >= pVerRange->minVer) &&
         (pBlock->minVersion <= pVerRange->maxVer);
H
Haojun Liao 已提交
1097 1098
}

1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132
static bool doCheckforDatablockOverlap(STableBlockScanInfo* pBlockScanInfo, const SBlock* pBlock) {
  size_t num = taosArrayGetSize(pBlockScanInfo->delSkyline);

  for (int32_t i = pBlockScanInfo->fileDelIndex; i < num; i += 1) {
    TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, i);
    if (p->ts >= pBlock->minKey.ts && p->ts <= pBlock->maxKey.ts) {
      if (p->version >= pBlock->minVersion) {
        return true;
      }
    } else if (p->ts < pBlock->minKey.ts) {  // p->ts < pBlock->minKey.ts
      if (p->version >= pBlock->minVersion) {
        if (i < num - 1) {
          TSDBKEY* pnext = taosArrayGet(pBlockScanInfo->delSkyline, i + 1);
          if (i + 1 == num - 1) {  // pnext is the last point
            if (pnext->ts >= pBlock->minKey.ts) {
              return true;
            }
          } else {
            if (pnext->ts >= pBlock->minKey.ts && pnext->version >= pBlock->minVersion) {
              return true;
            }
          }
        } else {  // it must be the last point
          ASSERT(p->version == 0);
        }
      }
    } else {  // (p->ts > pBlock->maxKey.ts) {
      return false;
    }
  }

  return false;
}

1133
static bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SBlock* pBlock, int32_t order) {
1134 1135 1136 1137
  if (pBlockScanInfo->delSkyline == NULL) {
    return false;
  }

1138
  // ts is not overlap
1139
  TSDBKEY* pFirst = taosArrayGet(pBlockScanInfo->delSkyline, 0);
L
Liu Jicong 已提交
1140
  TSDBKEY* pLast = taosArrayGetLast(pBlockScanInfo->delSkyline);
1141 1142 1143 1144 1145
  if (pBlock->minKey.ts > pLast->ts || pBlock->maxKey.ts < pFirst->ts) {
    return false;
  }

  // version is not overlap
1146 1147 1148 1149
  if (ASCENDING_TRAVERSE(order)) {
    return doCheckforDatablockOverlap(pBlockScanInfo, pBlock);
  } else {
    int32_t index = pBlockScanInfo->fileDelIndex;
1150
    while (1) {
1151 1152 1153 1154 1155
      TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, index);
      if (p->ts > pBlock->minKey.ts && index > 0) {
        index -= 1;
      } else {  // find the first point that is smaller than the minKey.ts of dataBlock.
        break;
1156 1157 1158
      }
    }

1159 1160
    return doCheckforDatablockOverlap(pBlockScanInfo, pBlock);
  }
1161 1162
}

1163 1164 1165 1166
// 1. the version of all rows should be less than the endVersion
// 2. current block should not overlap with next neighbor block
// 3. current timestamp should not be overlap with each other
// 4. output buffer should be large enough to hold all rows in current block
1167
// 5. delete info should not overlap with current block data
1168 1169
static bool fileBlockShouldLoad(STsdbReader* pReader, SFileDataBlockInfo* pFBlock, SBlock* pBlock,
                                STableBlockScanInfo* pScanInfo, TSDBKEY key) {
1170 1171 1172
  int32_t neighborIndex = 0;
  SBlock* pNeighbor = getNeighborBlockOfSameTable(pFBlock, pScanInfo, &neighborIndex, pReader->order);

1173
  // overlap with neighbor
1174 1175 1176
  bool overlapWithNeighbor = false;
  if (pNeighbor) {
    overlapWithNeighbor = overlapWithNeighborBlock(pBlock, pNeighbor, pReader->order);
1177
    taosMemoryFree(pNeighbor);
1178 1179
  }

1180
  // has duplicated ts of different version in this block
L
Liu Jicong 已提交
1181 1182
  bool hasDup = (pBlock->nSubBlock == 1) ? pBlock->hasDup : true;
  bool overlapWithDel = overlapWithDelSkyline(pScanInfo, pBlock, pReader->order);
1183

1184
  return (overlapWithNeighbor || hasDup || dataBlockPartiallyRequired(&pReader->window, &pReader->verRange, pBlock) ||
1185
          keyOverlapFileBlock(key, pBlock, &pReader->verRange) || (pBlock->nRow > pReader->capacity) || overlapWithDel);
H
Haojun Liao 已提交
1186 1187
}

1188
static int32_t buildDataBlockFromBuf(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, int64_t endKey) {
1189
  if (!(pBlockScanInfo->iiter.hasVal || pBlockScanInfo->iter.hasVal)) {
1190 1191
    return TSDB_CODE_SUCCESS;
  }
H
Haojun Liao 已提交
1192

1193 1194 1195
  SSDataBlock* pBlock = pReader->pResBlock;

  int64_t st = taosGetTimestampUs();
1196
  int32_t code = buildDataBlockFromBufImpl(pBlockScanInfo, endKey, pReader->capacity, pReader);
H
Haojun Liao 已提交
1197

1198
  blockDataUpdateTsWindow(pBlock, 0);
1199
  pBlock->info.uid = pBlockScanInfo->uid;
1200

1201
  setComposedBlockFlag(pReader, true);
1202

1203
  double elapsedTime = (taosGetTimestampUs() - st) / 1000.0;
S
Shengliang Guan 已提交
1204
  tsdbDebug("%p build data block from cache completed, elapsed time:%.2f ms, numOfRows:%d, brange:%" PRId64
1205 1206 1207
            " - %" PRId64 " %s",
            pReader, elapsedTime, pBlock->info.rows, pBlock->info.window.skey, pBlock->info.window.ekey,
            pReader->idStr);
1208 1209

  pReader->cost.buildmemBlock += elapsedTime;
H
Haojun Liao 已提交
1210 1211 1212
  return code;
}

1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232
static bool tryCopyDistinctRowFromFileBlock(STsdbReader* pReader, SBlockData* pBlockData, int64_t key, SFileBlockDumpInfo* pDumpInfo) {

  // opt version
  // 1. it is not a border point
  // 2. the direct next point is not an duplicated timestamp
  if ((pDumpInfo->rowIndex < pDumpInfo->totalRows - 1 && pReader->order == TSDB_ORDER_ASC) ||
      (pDumpInfo->rowIndex > 0 && pReader->order == TSDB_ORDER_DESC)) {
    int32_t step = pReader->order == TSDB_ORDER_ASC? 1:-1;

    int64_t nextKey = pBlockData->aTSKEY[pDumpInfo->rowIndex + step];
    if (nextKey != key) { // merge is not needed
      doAppendRowFromBlock(pReader->pResBlock, pReader, pBlockData, pDumpInfo->rowIndex);
      pDumpInfo->rowIndex += step;
      return true;
    }
  }

  return false;
}

1233
static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, TSDBROW* pRow,
H
Haojun Liao 已提交
1234
                                     SIterInfo* pIter, int64_t key) {
1235
  SRowMerger          merge = {0};
H
Haojun Liao 已提交
1236
  STSRow*             pTSRow = NULL;
1237 1238 1239 1240
  SBlockData*         pBlockData = &pReader->status.fileBlockData;
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;

  TSDBKEY k = TSDBROW_KEY(pRow);
1241
  TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
1242
  SArray* pDelList = pBlockScanInfo->delSkyline;
1243
  bool freeTSRow = false;
1244

1245 1246 1247
  // ascending order traverse
  if (ASCENDING_TRAVERSE(pReader->order)) {
    if (key < k.ts) {
1248 1249 1250 1251 1252 1253 1254 1255
      // imem & mem are all empty, only file exist
      if (tryCopyDistinctRowFromFileBlock(pReader, pBlockData, key, pDumpInfo)) {
        return TSDB_CODE_SUCCESS;
      } else {
        tRowMergerInit(&merge, &fRow, pReader->pSchema);
        doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
        tRowMergerGetRow(&merge, &pTSRow);
      }
1256
    } else if (k.ts < key) {  // k.ts < key
1257
      doMergeMultiRows(pRow, pBlockScanInfo->uid, pIter, pDelList, &pTSRow, pReader, &freeTSRow);
1258 1259 1260
    } else {  // k.ts == key, ascending order: file block ----> imem rows -----> mem rows
      tRowMergerInit(&merge, &fRow, pReader->pSchema);
      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
1261 1262

      tRowMerge(&merge, pRow);
1263
      doMergeRowsInBuf(pIter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
1264 1265

      tRowMergerGetRow(&merge, &pTSRow);
1266
    }
1267 1268
  } else {  // descending order scan
    if (key < k.ts) {
1269
      doMergeMultiRows(pRow, pBlockScanInfo->uid, pIter, pDelList, &pTSRow, pReader, &freeTSRow);
1270
    } else if (k.ts < key) {
1271 1272 1273 1274 1275 1276 1277
      if (tryCopyDistinctRowFromFileBlock(pReader, pBlockData, key, pDumpInfo)) {
        return TSDB_CODE_SUCCESS;
      } else {
        tRowMergerInit(&merge, &fRow, pReader->pSchema);
        doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
        tRowMergerGetRow(&merge, &pTSRow);
      }
1278 1279 1280 1281
    } else {  // descending order: mem rows -----> imem rows ------> file block
      updateSchema(pRow, pBlockScanInfo->uid, pReader);

      tRowMergerInit(&merge, pRow, pReader->pSchema);
1282
      doMergeRowsInBuf(pIter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
1283 1284 1285 1286 1287 1288

      tRowMerge(&merge, &fRow);
      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);

      tRowMergerGetRow(&merge, &pTSRow);
    }
1289 1290
  }

1291
  tRowMergerClear(&merge);
1292
  doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow);
H
Haojun Liao 已提交
1293 1294

  taosMemoryFree(pTSRow);
1295 1296 1297
  return TSDB_CODE_SUCCESS;
}

1298 1299 1300 1301
static int32_t doMergeThreeLevelRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) {
  SRowMerger merge = {0};
  STSRow*    pTSRow = NULL;

1302 1303
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
  SBlockData*         pBlockData = &pReader->status.fileBlockData;
dengyihao's avatar
dengyihao 已提交
1304
  SArray*             pDelList = pBlockScanInfo->delSkyline;
1305

1306 1307
  TSDBROW* pRow = getValidRow(&pBlockScanInfo->iter, pDelList, pReader);
  TSDBROW* piRow = getValidRow(&pBlockScanInfo->iiter, pDelList, pReader);
1308
  ASSERT(pRow != NULL && piRow != NULL);
H
Haojun Liao 已提交
1309

1310
  int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex];
1311
  bool    freeTSRow = false;
H
Haojun Liao 已提交
1312

1313
  uint64_t uid = pBlockScanInfo->uid;
H
Haojun Liao 已提交
1314

1315 1316 1317
  TSDBKEY k = TSDBROW_KEY(pRow);
  TSDBKEY ik = TSDBROW_KEY(piRow);
  if (ASCENDING_TRAVERSE(pReader->order)) {
1318 1319
    // [1&2] key <= [k.ts && ik.ts]
    if (key <= k.ts && key <= ik.ts) {
1320 1321 1322
      TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
      tRowMergerInit(&merge, &fRow, pReader->pSchema);

1323
      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
H
Haojun Liao 已提交
1324

1325 1326
      if (ik.ts == key) {
        tRowMerge(&merge, piRow);
1327
        doMergeRowsInBuf(&pBlockScanInfo->iiter, uid, key, pBlockScanInfo->delSkyline, &merge, pReader);
1328 1329
      }

1330 1331
      if (k.ts == key) {
        tRowMerge(&merge, pRow);
1332
        doMergeRowsInBuf(&pBlockScanInfo->iter, uid, key, pBlockScanInfo->delSkyline, &merge, pReader);
1333 1334 1335
      }

      tRowMergerGetRow(&merge, &pTSRow);
1336
      doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow);
1337
      return TSDB_CODE_SUCCESS;
1338
    } else {  // key > ik.ts || key > k.ts
1339 1340
      ASSERT(key != ik.ts);

1341
      // [3] ik.ts < key <= k.ts
1342
      // [4] ik.ts < k.ts <= key
1343
      if (ik.ts < k.ts) {
1344
        doMergeMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, &pTSRow, pReader, &freeTSRow);
1345
        doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow);
1346 1347 1348
        if (freeTSRow) {
          taosMemoryFree(pTSRow);
        }
1349 1350 1351
        return TSDB_CODE_SUCCESS;
      }

1352 1353
      // [5] k.ts < key   <= ik.ts
      // [6] k.ts < ik.ts <= key
1354
      if (k.ts < ik.ts) {
1355
        doMergeMultiRows(pRow, uid, &pBlockScanInfo->iter, pDelList, &pTSRow, pReader, &freeTSRow);
1356
        doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow);
1357 1358 1359
        if (freeTSRow) {
          taosMemoryFree(pTSRow);
        }
1360 1361 1362
        return TSDB_CODE_SUCCESS;
      }

1363
      // [7] k.ts == ik.ts < key
1364
      if (k.ts == ik.ts) {
1365 1366
        ASSERT(key > ik.ts && key > k.ts);

1367
        doMergeMemIMemRows(pRow, piRow, pBlockScanInfo, pReader, &pTSRow);
1368
        doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow);
1369
        taosMemoryFree(pTSRow);
1370 1371 1372
        return TSDB_CODE_SUCCESS;
      }
    }
1373 1374 1375 1376 1377 1378
  } else {  // descending order scan
    // [1/2] k.ts >= ik.ts && k.ts >= key
    if (k.ts >= ik.ts && k.ts >= key) {
      updateSchema(pRow, uid, pReader);

      tRowMergerInit(&merge, pRow, pReader->pSchema);
1379
      doMergeRowsInBuf(&pBlockScanInfo->iter, uid, key, pBlockScanInfo->delSkyline, &merge, pReader);
1380 1381 1382

      if (ik.ts == k.ts) {
        tRowMerge(&merge, piRow);
1383
        doMergeRowsInBuf(&pBlockScanInfo->iiter, uid, key, pBlockScanInfo->delSkyline, &merge, pReader);
1384 1385 1386 1387 1388 1389 1390 1391 1392
      }

      if (k.ts == key) {
        TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
        tRowMerge(&merge, &fRow);
        doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
      }

      tRowMergerGetRow(&merge, &pTSRow);
1393
      doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow);
1394 1395
      return TSDB_CODE_SUCCESS;
    } else {
1396
      ASSERT(ik.ts != k.ts);  // this case has been included in the previous if branch
1397 1398 1399 1400

      // [3] ik.ts > k.ts >= Key
      // [4] ik.ts > key >= k.ts
      if (ik.ts > key) {
1401
        doMergeMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, &pTSRow, pReader, &freeTSRow);
1402
        doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow);
1403 1404 1405
        if (freeTSRow) {
          taosMemoryFree(pTSRow);
        }
1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416
        return TSDB_CODE_SUCCESS;
      }

      // [5] key > ik.ts > k.ts
      // [6] key > k.ts > ik.ts
      if (key > ik.ts) {
        TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
        tRowMergerInit(&merge, &fRow, pReader->pSchema);

        doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
        tRowMergerGetRow(&merge, &pTSRow);
1417
        doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow);
1418
        taosMemoryFree(pTSRow);
1419 1420 1421 1422 1423
        return TSDB_CODE_SUCCESS;
      }

      //[7] key = ik.ts > k.ts
      if (key == ik.ts) {
1424
        doMergeMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, &pTSRow, pReader, &freeTSRow);
1425 1426 1427 1428 1429

        TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
        tRowMerge(&merge, &fRow);
        doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
        tRowMergerGetRow(&merge, &pTSRow);
1430
        doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow);
1431 1432

        taosMemoryFree(pTSRow);
1433 1434 1435 1436 1437 1438
        return TSDB_CODE_SUCCESS;
      }
    }
  }

  ASSERT(0);
S
Shengliang Guan 已提交
1439
  return -1;
1440 1441
}

dengyihao's avatar
dengyihao 已提交
1442 1443
static bool isValidFileBlockRow(SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo,
                                STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) {
1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454
  // check for version and time range
  int64_t ver = pBlockData->aVersion[pDumpInfo->rowIndex];
  if (ver > pReader->verRange.maxVer || ver < pReader->verRange.minVer) {
    return false;
  }

  int64_t ts = pBlockData->aTSKEY[pDumpInfo->rowIndex];
  if (ts > pReader->window.ekey || ts < pReader->window.skey) {
    return false;
  }

1455
  TSDBKEY k = {.ts = ts, .version = ver};
1456
  if (hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->fileDelIndex, &k, pReader->order)) {
1457 1458 1459
    return false;
  }

1460 1461 1462
  return true;
}

1463
static bool outOfTimeWindow(int64_t ts, STimeWindow* pWindow) { return (ts > pWindow->ekey) || (ts < pWindow->skey); }
1464

1465 1466 1467 1468 1469
static int32_t buildComposedDataBlockImpl(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) {
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
  SBlockData*         pBlockData = &pReader->status.fileBlockData;

  int64_t  key = pBlockData->aTSKEY[pDumpInfo->rowIndex];
1470 1471
  TSDBROW* pRow = getValidRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader);
  TSDBROW* piRow = getValidRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader);
1472

1473
  if (pBlockScanInfo->iter.hasVal && pBlockScanInfo->iiter.hasVal) {
1474
    return doMergeThreeLevelRows(pReader, pBlockScanInfo);
1475
  } else {
1476
    // imem + file
1477
    if (pBlockScanInfo->iiter.hasVal) {
H
Haojun Liao 已提交
1478
      return doMergeBufAndFileRows(pReader, pBlockScanInfo, piRow, &pBlockScanInfo->iiter, key);
1479 1480
    }

1481
    // mem + file
1482
    if (pBlockScanInfo->iter.hasVal) {
H
Haojun Liao 已提交
1483
      return doMergeBufAndFileRows(pReader, pBlockScanInfo, pRow, &pBlockScanInfo->iter, key);
H
Haojun Liao 已提交
1484
    }
1485

1486
    // imem & mem are all empty, only file exist
1487 1488 1489 1490
    if (tryCopyDistinctRowFromFileBlock(pReader, pBlockData, key, pDumpInfo)) {
      return TSDB_CODE_SUCCESS;
    } else {
      TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
1491

1492 1493
      STSRow*    pTSRow = NULL;
      SRowMerger merge = {0};
H
Haojun Liao 已提交
1494

1495 1496 1497 1498
      tRowMergerInit(&merge, &fRow, pReader->pSchema);
      doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge);
      tRowMergerGetRow(&merge, &pTSRow);
      doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow);
1499

1500 1501 1502 1503
      taosMemoryFree(pTSRow);
      tRowMergerClear(&merge);
      return TSDB_CODE_SUCCESS;
    }
1504 1505 1506
  }
}

1507
static int32_t buildComposedDataBlock(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) {
1508 1509
  SSDataBlock* pResBlock = pReader->pResBlock;

1510
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
1511 1512
  SBlockData*         pBlockData = &pReader->status.fileBlockData;
  int32_t             step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1;
1513

1514 1515
  int64_t st = taosGetTimestampUs();

1516
  while (1) {
1517 1518
    // todo check the validate of row in file block
    {
1519
      if (!isValidFileBlockRow(pBlockData, pDumpInfo, pBlockScanInfo, pReader)) {
1520 1521
        pDumpInfo->rowIndex += step;

1522
        SBlock* pBlock = getCurrentBlock(&pReader->status.blockIter);
1523 1524 1525 1526 1527 1528 1529 1530 1531
        if (pDumpInfo->rowIndex >= pBlock->nRow || pDumpInfo->rowIndex < 0) {
          setBlockAllDumped(pDumpInfo, pBlock, pReader->order);
          break;
        }

        continue;
      }
    }

1532
    buildComposedDataBlockImpl(pReader, pBlockScanInfo);
1533
    SBlock* pBlock = getCurrentBlock(&pReader->status.blockIter);
1534

1535 1536 1537 1538 1539 1540 1541 1542
    // currently loaded file data block is consumed
    if (pDumpInfo->rowIndex >= pBlock->nRow || pDumpInfo->rowIndex < 0) {
      setBlockAllDumped(pDumpInfo, pBlock, pReader->order);
      break;
    }

    if (pResBlock->info.rows >= pReader->capacity) {
      break;
1543 1544 1545 1546
    }
  }

  pResBlock->info.uid = pBlockScanInfo->uid;
1547 1548
  blockDataUpdateTsWindow(pResBlock, 0);

1549
  setComposedBlockFlag(pReader, true);
1550
  int64_t et = taosGetTimestampUs();
1551

1552 1553 1554 1555
  tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64
            " rows:%d, elapsed time:%.2f ms %s",
            pReader, pBlockScanInfo->uid, pResBlock->info.window.skey, pResBlock->info.window.ekey,
            pResBlock->info.rows, (et - st) / 1000.0, pReader->idStr);
1556

1557 1558 1559 1560 1561
  return TSDB_CODE_SUCCESS;
}

void setComposedBlockFlag(STsdbReader* pReader, bool composed) { pReader->status.composedDataBlock = composed; }

1562
static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) {
1563 1564 1565 1566
  if (pBlockScanInfo->iterInit) {
    return TSDB_CODE_SUCCESS;
  }

1567
  int32_t code = TSDB_CODE_SUCCESS;
1568 1569 1570 1571 1572 1573 1574 1575 1576

  TSDBKEY startKey = {0};
  if (ASCENDING_TRAVERSE(pReader->order)) {
    startKey = (TSDBKEY){.ts = pReader->window.skey, .version = pReader->verRange.minVer};
  } else {
    startKey = (TSDBKEY){.ts = pReader->window.ekey, .version = pReader->verRange.maxVer};
  }

  int32_t backward = (!ASCENDING_TRAVERSE(pReader->order));
1577 1578

  STbData* d = NULL;
H
Hongze Cheng 已提交
1579 1580
  if (pReader->pReadSnap->pMem != NULL) {
    tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid, &d);
1581
    if (d != NULL) {
1582
      code = tsdbTbDataIterCreate(d, &startKey, backward, &pBlockScanInfo->iter.iter);
1583
      if (code == TSDB_CODE_SUCCESS) {
1584
        pBlockScanInfo->iter.hasVal = (tsdbTbDataIterGet(pBlockScanInfo->iter.iter) != NULL);
1585

H
Haojun Liao 已提交
1586
        tsdbDebug("%p uid:%" PRId64 ", check data in mem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64
1587 1588
                  "-%" PRId64 " %s",
                  pReader, pBlockScanInfo->uid, startKey.ts, pReader->order, d->minKey, d->maxKey, pReader->idStr);
1589
      } else {
1590 1591
        tsdbError("%p uid:%" PRId64 ", failed to create iterator for imem, code:%s, %s", pReader, pBlockScanInfo->uid,
                  tstrerror(code), pReader->idStr);
1592
        return code;
1593 1594
      }
    }
H
Haojun Liao 已提交
1595
  } else {
1596
    tsdbDebug("%p uid:%" PRId64 ", no data in mem, %s", pReader, pBlockScanInfo->uid, pReader->idStr);
H
Haojun Liao 已提交
1597 1598
  }

1599
  STbData* di = NULL;
H
Hongze Cheng 已提交
1600 1601
  if (pReader->pReadSnap->pIMem != NULL) {
    tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->suid, pBlockScanInfo->uid, &di);
1602
    if (di != NULL) {
1603
      code = tsdbTbDataIterCreate(di, &startKey, backward, &pBlockScanInfo->iiter.iter);
1604
      if (code == TSDB_CODE_SUCCESS) {
1605
        pBlockScanInfo->iiter.hasVal = (tsdbTbDataIterGet(pBlockScanInfo->iiter.iter) != NULL);
1606

H
Haojun Liao 已提交
1607
        tsdbDebug("%p uid:%" PRId64 ", check data in imem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64
1608
                  "-%" PRId64 " %s",
1609
                  pReader, pBlockScanInfo->uid, startKey.ts, pReader->order, di->minKey, di->maxKey, pReader->idStr);
1610
      } else {
1611 1612
        tsdbError("%p uid:%" PRId64 ", failed to create iterator for mem, code:%s, %s", pReader, pBlockScanInfo->uid,
                  tstrerror(code), pReader->idStr);
1613
        return code;
1614 1615
      }
    }
H
Haojun Liao 已提交
1616 1617
  } else {
    tsdbDebug("%p uid:%" PRId64 ", no data in imem, %s", pReader, pBlockScanInfo->uid, pReader->idStr);
1618 1619
  }

1620 1621
  initDelSkylineIterator(pBlockScanInfo, pReader, d, di);

1622
  pBlockScanInfo->iterInit = true;
H
Haojun Liao 已提交
1623 1624 1625
  return TSDB_CODE_SUCCESS;
}

dengyihao's avatar
dengyihao 已提交
1626 1627
int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, STbData* pMemTbData,
                               STbData* piMemTbData) {
1628 1629 1630
  if (pBlockScanInfo->delSkyline != NULL) {
    return TSDB_CODE_SUCCESS;
  }
1631

1632 1633 1634
  int32_t code = 0;
  STsdb*  pTsdb = pReader->pTsdb;

1635 1636
  SArray* pDelData = taosArrayInit(4, sizeof(SDelData));

H
Hongze Cheng 已提交
1637
  SDelFile* pDelFile = pReader->pReadSnap->fs.pDelFile;
1638 1639 1640
  if (pDelFile) {
    SDelFReader* pDelFReader = NULL;
    code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL);
1641
    if (code != TSDB_CODE_SUCCESS) {
1642 1643 1644 1645 1646
      goto _err;
    }

    SArray* aDelIdx = taosArrayInit(4, sizeof(SDelIdx));
    if (aDelIdx == NULL) {
1647
      tsdbDelFReaderClose(&pDelFReader);
1648 1649 1650
      goto _err;
    }

1651
    code = tsdbReadDelIdx(pDelFReader, aDelIdx, NULL);
1652 1653 1654
    if (code != TSDB_CODE_SUCCESS) {
      taosArrayDestroy(aDelIdx);
      tsdbDelFReaderClose(&pDelFReader);
1655 1656
      goto _err;
    }
1657

1658 1659 1660
    SDelIdx  idx = {.suid = pReader->suid, .uid = pBlockScanInfo->uid};
    SDelIdx* pIdx = taosArraySearch(aDelIdx, &idx, tCmprDelIdx, TD_EQ);

H
Haojun Liao 已提交
1661 1662
    if (pIdx != NULL) {
      code = tsdbReadDelData(pDelFReader, pIdx, pDelData, NULL);
1663 1664 1665 1666 1667 1668 1669
    }

    taosArrayDestroy(aDelIdx);
    tsdbDelFReaderClose(&pDelFReader);

    if (code != TSDB_CODE_SUCCESS) {
      goto _err;
1670
    }
1671
  }
1672

1673 1674 1675 1676 1677 1678 1679
  SDelData* p = NULL;
  if (pMemTbData != NULL) {
    p = pMemTbData->pHead;
    while (p) {
      taosArrayPush(pDelData, p);
      p = p->pNext;
    }
1680 1681
  }

1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695
  if (piMemTbData != NULL) {
    p = piMemTbData->pHead;
    while (p) {
      taosArrayPush(pDelData, p);
      p = p->pNext;
    }
  }

  if (taosArrayGetSize(pDelData) > 0) {
    pBlockScanInfo->delSkyline = taosArrayInit(4, sizeof(TSDBKEY));
    code = tsdbBuildDeleteSkyline(pDelData, 0, (int32_t)(taosArrayGetSize(pDelData) - 1), pBlockScanInfo->delSkyline);
  }

  taosArrayDestroy(pDelData);
dengyihao's avatar
dengyihao 已提交
1696 1697
  pBlockScanInfo->iter.index =
      ASCENDING_TRAVERSE(pReader->order) ? 0 : taosArrayGetSize(pBlockScanInfo->delSkyline) - 1;
1698 1699
  pBlockScanInfo->iiter.index = pBlockScanInfo->iter.index;
  pBlockScanInfo->fileDelIndex = pBlockScanInfo->iter.index;
1700 1701
  return code;

1702 1703 1704
_err:
  taosArrayDestroy(pDelData);
  return code;
1705 1706
}

1707 1708 1709
static TSDBKEY getCurrentKeyInBuf(SDataBlockIter* pBlockIter, STsdbReader* pReader) {
  TSDBKEY key = {.ts = TSKEY_INITIAL_VAL};

1710
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(pBlockIter);
1711 1712
  STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));

1713 1714
  initMemDataIterator(pScanInfo, pReader);
  TSDBROW* pRow = getValidRow(&pScanInfo->iter, pScanInfo->delSkyline, pReader);
1715
  if (pRow != NULL) {
1716 1717 1718
    key = TSDBROW_KEY(pRow);
  }

1719
  pRow = getValidRow(&pScanInfo->iiter, pScanInfo->delSkyline, pReader);
1720
  if (pRow != NULL) {
1721 1722 1723 1724 1725 1726 1727 1728 1729
    TSDBKEY k = TSDBROW_KEY(pRow);
    if (key.ts > k.ts) {
      key = k;
    }
  }

  return key;
}

H
Haojun Liao 已提交
1730 1731
static int32_t moveToNextFile(STsdbReader* pReader, int32_t* numOfBlocks) {
  SReaderStatus* pStatus = &pReader->status;
1732

1733
  size_t  numOfTables = taosHashGetSize(pReader->status.pTableMap);
1734
  SArray* pIndexList = taosArrayInit(numOfTables, sizeof(SBlockIdx));
H
Haojun Liao 已提交
1735 1736

  while (1) {
1737
    bool hasNext = filesetIteratorNext(&pStatus->fileIter, pReader);
1738
    if (!hasNext) {  // no data files on disk
H
Haojun Liao 已提交
1739 1740 1741
      break;
    }

H
Haojun Liao 已提交
1742
    taosArrayClear(pIndexList);
H
Haojun Liao 已提交
1743 1744
    int32_t code = doLoadBlockIndex(pReader, pReader->pFileReader, pIndexList);
    if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
1745
      taosArrayDestroy(pIndexList);
H
Haojun Liao 已提交
1746 1747 1748 1749 1750 1751 1752
      return code;
    }

    if (taosArrayGetSize(pIndexList) > 0) {
      uint32_t numOfValidTable = 0;
      code = doLoadFileBlock(pReader, pIndexList, &numOfValidTable, numOfBlocks);
      if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
1753
        taosArrayDestroy(pIndexList);
H
Haojun Liao 已提交
1754 1755 1756 1757 1758 1759 1760 1761 1762 1763
        return code;
      }

      if (numOfValidTable > 0) {
        break;
      }
    }
    // no blocks in current file, try next files
  }

H
Haojun Liao 已提交
1764
  taosArrayDestroy(pIndexList);
H
Haojun Liao 已提交
1765 1766 1767
  return TSDB_CODE_SUCCESS;
}

1768 1769 1770
static int32_t doBuildDataBlock(STsdbReader* pReader) {
  int32_t code = TSDB_CODE_SUCCESS;

1771
  SReaderStatus*  pStatus = &pReader->status;
1772 1773
  SDataBlockIter* pBlockIter = &pStatus->blockIter;

1774 1775
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(pBlockIter);
  STableBlockScanInfo* pScanInfo = taosHashGet(pStatus->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));
1776

1777
  SBlock* pBlock = getCurrentBlock(pBlockIter);
1778 1779 1780

  TSDBKEY key = getCurrentKeyInBuf(pBlockIter, pReader);
  if (fileBlockShouldLoad(pReader, pFBlock, pBlock, pScanInfo, key)) {
H
Haojun Liao 已提交
1781 1782
    tBlockDataReset(&pStatus->fileBlockData);
    tBlockDataClearData(&pStatus->fileBlockData);
1783
    code = doLoadFileBlockData(pReader, pBlockIter, pScanInfo, &pStatus->fileBlockData);
1784 1785 1786 1787 1788
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    // build composed data block
1789
    code = buildComposedDataBlock(pReader, pScanInfo);
1790 1791
  } else if (bufferDataInFileBlockGap(pReader->order, key, pBlock)) {
    // data in memory that are earlier than current file block
1792
    // todo rows in buffer should be less than the file block in asc, greater than file block in desc
1793
    int64_t endKey = (ASCENDING_TRAVERSE(pReader->order)) ? pBlock->minKey.ts : pBlock->maxKey.ts;
1794
    code = buildDataBlockFromBuf(pReader, pScanInfo, endKey);
1795
  } else {  // whole block is required, return it directly
1796
    SDataBlockInfo* pInfo = &pReader->pResBlock->info;
1797 1798 1799
    pInfo->rows = pBlock->nRow;
    pInfo->uid = pScanInfo->uid;
    pInfo->window = (STimeWindow){.skey = pBlock->minKey.ts, .ekey = pBlock->maxKey.ts};
1800
    setComposedBlockFlag(pReader, false);
1801
    setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlock, pReader->order);
1802 1803 1804 1805 1806
  }

  return code;
}

H
Haojun Liao 已提交
1807
static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader) {
1808 1809
  SReaderStatus* pStatus = &pReader->status;

1810
  while (1) {
1811 1812 1813
    if (pStatus->pTableIter == NULL) {
      pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, NULL);
      if (pStatus->pTableIter == NULL) {
H
Haojun Liao 已提交
1814
        return TSDB_CODE_SUCCESS;
1815 1816 1817 1818
      }
    }

    STableBlockScanInfo* pBlockScanInfo = pStatus->pTableIter;
1819
    initMemDataIterator(pBlockScanInfo, pReader);
1820

1821
    int64_t endKey = (ASCENDING_TRAVERSE(pReader->order)) ? INT64_MAX : INT64_MIN;
1822
    int32_t code = buildDataBlockFromBuf(pReader, pBlockScanInfo, endKey);
H
Haojun Liao 已提交
1823 1824 1825 1826
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

1827
    if (pReader->pResBlock->info.rows > 0) {
H
Haojun Liao 已提交
1828
      return TSDB_CODE_SUCCESS;
1829 1830 1831 1832 1833
    }

    // current table is exhausted, let's try the next table
    pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, pStatus->pTableIter);
    if (pStatus->pTableIter == NULL) {
H
Haojun Liao 已提交
1834
      return TSDB_CODE_SUCCESS;
1835 1836 1837 1838
    }
  }
}

1839
// set the correct start position in case of the first/last file block, according to the query time window
1840
static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter) {
1841
  SBlock* pBlock = getCurrentBlock(pBlockIter);
1842

1843 1844 1845
  SReaderStatus* pStatus = &pReader->status;

  SFileBlockDumpInfo* pDumpInfo = &pStatus->fBlockDumpInfo;
1846 1847 1848

  pDumpInfo->totalRows = pBlock->nRow;
  pDumpInfo->allDumped = false;
1849
  pDumpInfo->rowIndex = ASCENDING_TRAVERSE(pReader->order) ? 0 : pBlock->nRow - 1;
1850 1851
}

1852
static int32_t initForFirstBlockInFile(STsdbReader* pReader, SDataBlockIter* pBlockIter) {
1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866
  int32_t numOfBlocks = 0;
  int32_t code = moveToNextFile(pReader, &numOfBlocks);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

  // all data files are consumed, try data in buffer
  if (numOfBlocks == 0) {
    pReader->status.loadFromFile = false;
    return code;
  }

  // initialize the block iterator for a new fileset
  code = initBlockIterator(pReader, pBlockIter, numOfBlocks);
1867 1868

  // set the correct start position according to the query time window
1869
  initBlockDumpInfo(pReader, pBlockIter);
1870 1871 1872
  return code;
}

1873
static bool fileBlockPartiallyRead(SFileBlockDumpInfo* pDumpInfo, bool asc) {
1874 1875
  return (!pDumpInfo->allDumped) &&
         ((pDumpInfo->rowIndex > 0 && asc) || (pDumpInfo->rowIndex < (pDumpInfo->totalRows - 1) && (!asc)));
1876 1877
}

1878
static int32_t buildBlockFromFiles(STsdbReader* pReader) {
H
Haojun Liao 已提交
1879
  int32_t code = TSDB_CODE_SUCCESS;
1880 1881
  bool    asc = ASCENDING_TRAVERSE(pReader->order);

1882 1883
  SDataBlockIter* pBlockIter = &pReader->status.blockIter;

1884
  while (1) {
1885
    SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(&pReader->status.blockIter);
1886 1887
    STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));

1888 1889
    SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;

1890
    if (fileBlockPartiallyRead(pDumpInfo, asc)) {  // file data block is partially loaded
1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905
      code = buildComposedDataBlock(pReader, pScanInfo);
    } else {
      // current block are exhausted, try the next file block
      if (pDumpInfo->allDumped) {
        // try next data block in current file
        bool hasNext = blockIteratorNext(&pReader->status.blockIter);
        if (hasNext) {  // check for the next block in the block accessed order list
          initBlockDumpInfo(pReader, pBlockIter);
        } else {  // data blocks in current file are exhausted, let's try the next file now
          code = initForFirstBlockInFile(pReader, pBlockIter);

          // error happens or all the data files are completely checked
          if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) {
            return code;
          }
1906
        }
H
Haojun Liao 已提交
1907
      }
1908 1909 1910

      // current block is not loaded yet, or data in buffer may overlap with the file block.
      code = doBuildDataBlock(pReader);
1911 1912
    }

1913 1914 1915 1916 1917 1918 1919 1920
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    if (pReader->pResBlock->info.rows > 0) {
      return TSDB_CODE_SUCCESS;
    }
  }
1921
}
H
refact  
Hongze Cheng 已提交
1922

1923 1924
static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idStr,
                                  int8_t* pLevel) {
1925
  if (VND_IS_RSMA(pVnode)) {
1926
    int8_t  level = 0;
1927 1928
    int64_t now = taosGetTimestamp(pVnode->config.tsdbCfg.precision);

1929
    for (int8_t i = 0; i < TSDB_RETENTION_MAX; ++i) {
1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942
      SRetention* pRetention = retentions + level;
      if (pRetention->keep <= 0) {
        if (level > 0) {
          --level;
        }
        break;
      }
      if ((now - pRetention->keep) <= winSKey) {
        break;
      }
      ++level;
    }

1943
    const char* str = (idStr != NULL) ? idStr : "";
1944 1945

    if (level == TSDB_RETENTION_L0) {
1946
      *pLevel = TSDB_RETENTION_L0;
C
Cary Xu 已提交
1947
      tsdbDebug("vgId:%d, rsma level %d is selected to query %s", TD_VID(pVnode), TSDB_RETENTION_L0, str);
1948 1949
      return VND_RSMA0(pVnode);
    } else if (level == TSDB_RETENTION_L1) {
1950
      *pLevel = TSDB_RETENTION_L1;
C
Cary Xu 已提交
1951
      tsdbDebug("vgId:%d, rsma level %d is selected to query %s", TD_VID(pVnode), TSDB_RETENTION_L1, str);
1952 1953
      return VND_RSMA1(pVnode);
    } else {
1954
      *pLevel = TSDB_RETENTION_L2;
C
Cary Xu 已提交
1955
      tsdbDebug("vgId:%d, rsma level %d is selected to query %s", TD_VID(pVnode), TSDB_RETENTION_L2, str);
1956 1957 1958 1959 1960 1961 1962
      return VND_RSMA2(pVnode);
    }
  }

  return VND_TSDB(pVnode);
}

H
Haojun Liao 已提交
1963
SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level) {
L
Liu Jicong 已提交
1964
  int64_t startVer = (pCond->startVersion == -1) ? 0 : pCond->startVersion;
H
Haojun Liao 已提交
1965 1966

  int64_t endVer = 0;
L
Liu Jicong 已提交
1967 1968
  if (pCond->endVersion ==
      -1) {  // user not specified end version, set current maximum version of vnode as the endVersion
H
Haojun Liao 已提交
1969 1970
    endVer = pVnode->state.applied;
  } else {
L
Liu Jicong 已提交
1971
    endVer = (pCond->endVersion > pVnode->state.applied) ? pVnode->state.applied : pCond->endVersion;
1972 1973
  }

H
Haojun Liao 已提交
1974
  return (SVersionRange){.minVer = startVer, .maxVer = endVer};
1975 1976
}

H
Hongze Cheng 已提交
1977 1978 1979 1980
// // todo not unref yet, since it is not support multi-group interpolation query
// static UNUSED_FUNC void changeQueryHandleForInterpQuery(STsdbReader* pHandle) {
//   // filter the queried time stamp in the first place
//   STsdbReader* pTsdbReadHandle = (STsdbReader*)pHandle;
H
refact  
Hongze Cheng 已提交
1981

H
Hongze Cheng 已提交
1982 1983
//   // starts from the buffer in case of descending timestamp order check data blocks
//   size_t numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
H
refact  
Hongze Cheng 已提交
1984

H
Hongze Cheng 已提交
1985 1986
//   int32_t i = 0;
//   while (i < numOfTables) {
H
Haojun Liao 已提交
1987
//     STableBlockScanInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i);
H
refact  
Hongze Cheng 已提交
1988

H
Hongze Cheng 已提交
1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002
//     // the first qualified table for interpolation query
//     //    if ((pTsdbReadHandle->window.skey <= pCheckInfo->pTableObj->lastKey) &&
//     //        (pCheckInfo->pTableObj->lastKey != TSKEY_INITIAL_VAL)) {
//     //      break;
//     //    }

//     i++;
//   }

//   // there are no data in all the tables
//   if (i == numOfTables) {
//     return;
//   }

H
Haojun Liao 已提交
2003
//   STableBlockScanInfo info = *(STableBlockScanInfo*)taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i);
H
Hongze Cheng 已提交
2004 2005 2006 2007 2008 2009
//   taosArrayClear(pTsdbReadHandle->pTableCheckInfo);

//   info.lastKey = pTsdbReadHandle->window.skey;
//   taosArrayPush(pTsdbReadHandle->pTableCheckInfo, &info);
// }

2010
bool hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey, int32_t order) {
2011 2012 2013 2014
  ASSERT(pKey != NULL);
  if (pDelList == NULL) {
    return false;
  }
L
Liu Jicong 已提交
2015 2016 2017
  size_t  num = taosArrayGetSize(pDelList);
  bool    asc = ASCENDING_TRAVERSE(order);
  int32_t step = asc ? 1 : -1;
2018

2019 2020 2021 2022 2023 2024
  if (asc) {
    if (*index >= num - 1) {
      TSDBKEY* last = taosArrayGetLast(pDelList);
      ASSERT(pKey->ts >= last->ts);

      if (pKey->ts > last->ts) {
2025
        return false;
2026 2027 2028
      } else if (pKey->ts == last->ts) {
        TSDBKEY* prev = taosArrayGet(pDelList, num - 2);
        return (prev->version >= pKey->version);
2029 2030
      }
    } else {
2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060
      TSDBKEY* pCurrent = taosArrayGet(pDelList, *index);
      TSDBKEY* pNext = taosArrayGet(pDelList, (*index) + 1);

      if (pKey->ts < pCurrent->ts) {
        return false;
      }

      if (pCurrent->ts <= pKey->ts && pNext->ts >= pKey->ts && pCurrent->version >= pKey->version) {
        return true;
      }

      while (pNext->ts <= pKey->ts && (*index) < num - 1) {
        (*index) += 1;

        if ((*index) < num - 1) {
          pCurrent = taosArrayGet(pDelList, *index);
          pNext = taosArrayGet(pDelList, (*index) + 1);

          // it is not a consecutive deletion range, ignore it
          if (pCurrent->version == 0 && pNext->version > 0) {
            continue;
          }

          if (pCurrent->ts <= pKey->ts && pNext->ts >= pKey->ts && pCurrent->version >= pKey->version) {
            return true;
          }
        }
      }

      return false;
2061 2062
    }
  } else {
2063 2064
    if (*index <= 0) {
      TSDBKEY* pFirst = taosArrayGet(pDelList, 0);
2065

2066 2067 2068 2069 2070 2071 2072
      if (pKey->ts < pFirst->ts) {
        return false;
      } else if (pKey->ts == pFirst->ts) {
        return pFirst->version >= pKey->version;
      } else {
        ASSERT(0);
      }
2073
    } else {
2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100
      TSDBKEY* pCurrent = taosArrayGet(pDelList, *index);
      TSDBKEY* pPrev = taosArrayGet(pDelList, (*index) - 1);

      if (pKey->ts > pCurrent->ts) {
        return false;
      }

      if (pPrev->ts <= pKey->ts && pCurrent->ts >= pKey->ts && pPrev->version >= pKey->version) {
        return true;
      }

      while (pPrev->ts >= pKey->ts && (*index) > 1) {
        (*index) += step;

        if ((*index) >= 1) {
          pCurrent = taosArrayGet(pDelList, *index);
          pPrev = taosArrayGet(pDelList, (*index) - 1);

          // it is not a consecutive deletion range, ignore it
          if (pCurrent->version > 0 && pPrev->version == 0) {
            continue;
          }

          if (pPrev->ts <= pKey->ts && pCurrent->ts >= pKey->ts && pPrev->version >= pKey->version) {
            return true;
          }
        }
2101 2102 2103 2104 2105
      }

      return false;
    }
  }
2106 2107

  return false;
2108 2109 2110 2111
}

TSDBROW* getValidRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader) {
  if (!pIter->hasVal) {
H
Haojun Liao 已提交
2112 2113
    return NULL;
  }
H
Hongze Cheng 已提交
2114

2115
  TSDBROW* pRow = tsdbTbDataIterGet(pIter->iter);
H
Haojun Liao 已提交
2116
  TSDBKEY key = {.ts = pRow->pTSRow->ts, .version = pRow->version};
2117
  if (outOfTimeWindow(key.ts, &pReader->window)) {
2118
    pIter->hasVal = false;
H
Haojun Liao 已提交
2119 2120
    return NULL;
  }
H
Hongze Cheng 已提交
2121

2122
  // it is a valid data version
dengyihao's avatar
dengyihao 已提交
2123
  if ((key.version <= pReader->verRange.maxVer && key.version >= pReader->verRange.minVer) &&
2124
      (!hasBeenDropped(pDelList, &pIter->index, &key, pReader->order))) {
H
Haojun Liao 已提交
2125 2126
    return pRow;
  }
H
Hongze Cheng 已提交
2127

2128
  while (1) {
2129 2130
    pIter->hasVal = tsdbTbDataIterNext(pIter->iter);
    if (!pIter->hasVal) {
H
Haojun Liao 已提交
2131 2132
      return NULL;
    }
H
Hongze Cheng 已提交
2133

2134
    pRow = tsdbTbDataIterGet(pIter->iter);
H
Hongze Cheng 已提交
2135

H
Haojun Liao 已提交
2136
    key = TSDBROW_KEY(pRow);
2137
    if (outOfTimeWindow(key.ts, &pReader->window)) {
2138
      pIter->hasVal = false;
H
Haojun Liao 已提交
2139 2140
      return NULL;
    }
H
Hongze Cheng 已提交
2141

dengyihao's avatar
dengyihao 已提交
2142
    if (key.version <= pReader->verRange.maxVer && key.version >= pReader->verRange.minVer &&
2143
        (!hasBeenDropped(pDelList, &pIter->index, &key, pReader->order))) {
H
Haojun Liao 已提交
2144 2145 2146 2147
      return pRow;
    }
  }
}
H
Hongze Cheng 已提交
2148

2149 2150
int32_t doMergeRowsInBuf(SIterInfo* pIter, uint64_t uid, int64_t ts, SArray* pDelList, SRowMerger* pMerger,
                         STsdbReader* pReader) {
H
Haojun Liao 已提交
2151
  while (1) {
2152 2153
    pIter->hasVal = tsdbTbDataIterNext(pIter->iter);
    if (!pIter->hasVal) {
H
Haojun Liao 已提交
2154 2155
      break;
    }
H
Hongze Cheng 已提交
2156

2157
    // data exists but not valid
2158
    TSDBROW* pRow = getValidRow(pIter, pDelList, pReader);
2159 2160 2161 2162 2163
    if (pRow == NULL) {
      break;
    }

    // ts is not identical, quit
H
Haojun Liao 已提交
2164
    TSDBKEY k = TSDBROW_KEY(pRow);
2165
    if (k.ts != ts) {
H
Haojun Liao 已提交
2166 2167 2168
      break;
    }

2169 2170
    int32_t   sversion = TSDBROW_SVERSION(pRow);
    STSchema* pTSchema = NULL;
2171
    if (pReader->pSchema == NULL || sversion != pReader->pSchema->version) {
2172
      metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->suid, uid, sversion, &pTSchema);
2173 2174 2175
      if (pReader->pSchema == NULL) {
        pReader->pSchema = pTSchema;
      }
2176 2177 2178 2179 2180
    } else {
      pTSchema = pReader->pSchema;
    }

    tRowMergerAdd(pMerger, pRow, pTSchema);
M
Minglei Jin 已提交
2181

2182
    if (pTSchema != pReader->pSchema) {
M
Minglei Jin 已提交
2183 2184
      taosMemoryFree(pTSchema);
    }
H
Haojun Liao 已提交
2185 2186 2187 2188 2189
  }

  return TSDB_CODE_SUCCESS;
}

2190
static int32_t doMergeRowsInFileBlockImpl(SBlockData* pBlockData, int32_t rowIndex, int64_t key, SRowMerger* pMerger,
2191
                                          SVersionRange* pVerRange, int32_t step) {
2192 2193
  while (pBlockData->aTSKEY[rowIndex] == key && rowIndex < pBlockData->nRow && rowIndex >= 0) {
    if (pBlockData->aVersion[rowIndex] > pVerRange->maxVer || pBlockData->aVersion[rowIndex] < pVerRange->minVer) {
2194
      rowIndex += step;
2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211
      continue;
    }

    TSDBROW fRow = tsdbRowFromBlockData(pBlockData, rowIndex);
    tRowMerge(pMerger, &fRow);
    rowIndex += step;
  }

  return rowIndex;
}

typedef enum {
  CHECK_FILEBLOCK_CONT = 0x1,
  CHECK_FILEBLOCK_QUIT = 0x2,
} CHECK_FILEBLOCK_STATE;

static int32_t checkForNeighborFileBlock(STsdbReader* pReader, STableBlockScanInfo* pScanInfo, SBlock* pBlock,
2212 2213
                                         SFileDataBlockInfo* pFBlock, SRowMerger* pMerger, int64_t key,
                                         CHECK_FILEBLOCK_STATE* state) {
2214
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
2215
  SBlockData*         pBlockData = &pReader->status.fileBlockData;
2216

2217
  *state = CHECK_FILEBLOCK_QUIT;
2218
  int32_t step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1;
2219 2220 2221

  int32_t nextIndex = -1;
  SBlock* pNeighborBlock = getNeighborBlockOfSameTable(pFBlock, pScanInfo, &nextIndex, pReader->order);
2222
  if (pNeighborBlock == NULL) {  // do nothing
2223 2224 2225 2226
    return 0;
  }

  bool overlap = overlapWithNeighborBlock(pBlock, pNeighborBlock, pReader->order);
2227 2228
  taosMemoryFree(pNeighborBlock);

2229
  if (overlap) {  // load next block
2230
    SReaderStatus*  pStatus = &pReader->status;
2231 2232
    SDataBlockIter* pBlockIter = &pStatus->blockIter;

2233
    // 1. find the next neighbor block in the scan block list
2234
    SFileDataBlockInfo fb = {.uid = pFBlock->uid, .tbBlockIdx = nextIndex};
2235
    int32_t            neighborIndex = findFileBlockInfoIndex(pBlockIter, &fb);
2236

2237
    // 2. remove it from the scan block list
2238
    setFileBlockActiveInBlockIter(pBlockIter, neighborIndex, step);
2239

2240
    // 3. load the neighbor block, and set it to be the currently accessed file data block
H
Haojun Liao 已提交
2241 2242
    tBlockDataReset(&pStatus->fileBlockData);
    tBlockDataClearData(&pStatus->fileBlockData);
2243 2244 2245 2246 2247
    int32_t code = doLoadFileBlockData(pReader, pBlockIter, pScanInfo, &pStatus->fileBlockData);
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

2248
    // 4. check the data values
2249 2250 2251 2252
    initBlockDumpInfo(pReader, pBlockIter);

    pDumpInfo->rowIndex =
        doMergeRowsInFileBlockImpl(pBlockData, pDumpInfo->rowIndex, key, pMerger, &pReader->verRange, step);
H
Haojun Liao 已提交
2253
    if (pDumpInfo->rowIndex >= pDumpInfo->totalRows) {
2254 2255 2256 2257 2258 2259 2260
      *state = CHECK_FILEBLOCK_CONT;
    }
  }

  return TSDB_CODE_SUCCESS;
}

2261 2262
int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pScanInfo, STsdbReader* pReader,
                                SRowMerger* pMerger) {
2263 2264
  SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;

2265
  bool    asc = ASCENDING_TRAVERSE(pReader->order);
2266
  int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex];
2267
  int32_t step = asc ? 1 : -1;
2268

2269
  pDumpInfo->rowIndex += step;
2270
  if ((pDumpInfo->rowIndex <= pBlockData->nRow - 1 && asc) || (pDumpInfo->rowIndex >= 0 && !asc)) {
2271 2272 2273
    pDumpInfo->rowIndex =
        doMergeRowsInFileBlockImpl(pBlockData, pDumpInfo->rowIndex, key, pMerger, &pReader->verRange, step);
  }
2274

2275 2276 2277 2278
  // all rows are consumed, let's try next file block
  if ((pDumpInfo->rowIndex >= pBlockData->nRow && asc) || (pDumpInfo->rowIndex < 0 && !asc)) {
    while (1) {
      CHECK_FILEBLOCK_STATE st;
2279

2280
      SFileDataBlockInfo* pFileBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter);
2281
      SBlock*             pCurrentBlock = getCurrentBlock(&pReader->status.blockIter);
2282 2283 2284
      checkForNeighborFileBlock(pReader, pScanInfo, pCurrentBlock, pFileBlockInfo, pMerger, key, &st);
      if (st == CHECK_FILEBLOCK_QUIT) {
        break;
2285
      }
2286
    }
H
Haojun Liao 已提交
2287
  }
2288

H
Haojun Liao 已提交
2289 2290 2291
  return TSDB_CODE_SUCCESS;
}

2292
void updateSchema(TSDBROW* pRow, uint64_t uid, STsdbReader* pReader) {
2293 2294 2295
  int32_t sversion = TSDBROW_SVERSION(pRow);

  if (pReader->pSchema == NULL) {
M
Minglei Jin 已提交
2296
    metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->suid, uid, sversion, &pReader->pSchema);
2297 2298
  } else if (pReader->pSchema->version != sversion) {
    taosMemoryFreeClear(pReader->pSchema);
M
Minglei Jin 已提交
2299
    metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->suid, uid, sversion, &pReader->pSchema);
2300 2301 2302
  }
}

dengyihao's avatar
dengyihao 已提交
2303
void doMergeMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, STSRow** pTSRow,
2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322
                      STsdbReader* pReader, bool* freeTSRow) {
  {  // if the timestamp of the next valid row has a different ts, return current row directly
    TSDBROW current = *pRow;
    pIter->hasVal = tsdbTbDataIterNext(pIter->iter);

    if (!pIter->hasVal) {
      *pTSRow = current.pTSRow;
      *freeTSRow = false;
      return;
    } else {  // has next point in mem/imem
      TSDBROW* pNextRow = getValidRow(pIter, pDelList, pReader);
      if (TSDBROW_KEY(&current).ts != TSDBROW_KEY(pNextRow).ts) {
        *pTSRow = current.pTSRow;
        *freeTSRow = false;
        return;
      }
    }
  }

2323
  SRowMerger merge = {0};
2324
  TSDBKEY    k = TSDBROW_KEY(pRow);
2325

2326
  // get the correct schema for data in memory
2327 2328
  int32_t   sversion = TSDBROW_SVERSION(pRow);
  STSchema* pTSchema = NULL;
2329
  if (pReader->pSchema == NULL || sversion != pReader->pSchema->version) {
2330
    metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->suid, uid, sversion, &pTSchema);
2331 2332 2333
    if (pReader->pSchema == NULL) {
      pReader->pSchema = pTSchema;
    }
2334 2335 2336
  } else {
    pTSchema = pReader->pSchema;
  }
H
Haojun Liao 已提交
2337

2338 2339
  tRowMergerInit2(&merge, pReader->pSchema, pRow, pTSchema);
  doMergeRowsInBuf(pIter, uid, k.ts, pDelList, &merge, pReader);
2340
  tRowMergerGetRow(&merge, pTSRow);
2341
  tRowMergerClear(&merge);
M
Minglei Jin 已提交
2342

2343 2344
  *freeTSRow = true;

M
Minglei Jin 已提交
2345 2346 2347
  if (sversion != pReader->pSchema->version) {
    taosMemoryFree(pTSchema);
  }
2348 2349
}

2350 2351
void doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader,
                        STSRow** pTSRow) {
H
Haojun Liao 已提交
2352 2353
  SRowMerger merge = {0};

2354 2355 2356
  TSDBKEY k = TSDBROW_KEY(pRow);
  TSDBKEY ik = TSDBROW_KEY(piRow);

2357 2358 2359 2360
  if (ASCENDING_TRAVERSE(pReader->order)) {  // ascending order imem --> mem
    updateSchema(piRow, pBlockScanInfo->uid, pReader);

    tRowMergerInit(&merge, piRow, pReader->pSchema);
2361
    doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, &merge, pReader);
2362

2363
    tRowMerge(&merge, pRow);
2364
    doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
2365 2366
  } else {
    updateSchema(pRow, pBlockScanInfo->uid, pReader);
2367

2368
    tRowMergerInit(&merge, pRow, pReader->pSchema);
2369
    doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
2370 2371

    tRowMerge(&merge, piRow);
2372
    doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, &merge, pReader);
2373
  }
2374 2375 2376 2377

  tRowMergerGetRow(&merge, pTSRow);
}

2378
int32_t tsdbGetNextRowInMem(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, STSRow** pTSRow,
2379
                            int64_t endKey, bool* freeTSRow) {
2380 2381
  TSDBROW* pRow = getValidRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader);
  TSDBROW* piRow = getValidRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader);
dengyihao's avatar
dengyihao 已提交
2382
  SArray*  pDelList = pBlockScanInfo->delSkyline;
H
Haojun Liao 已提交
2383

2384 2385
  // todo refactor
  bool asc = ASCENDING_TRAVERSE(pReader->order);
2386
  if (pBlockScanInfo->iter.hasVal) {
2387 2388 2389 2390 2391 2392
    TSDBKEY k = TSDBROW_KEY(pRow);
    if ((k.ts >= endKey && asc) || (k.ts <= endKey && !asc)) {
      pRow = NULL;
    }
  }

2393
  if (pBlockScanInfo->iiter.hasVal) {
2394 2395 2396 2397 2398 2399
    TSDBKEY k = TSDBROW_KEY(piRow);
    if ((k.ts >= endKey && asc) || (k.ts <= endKey && !asc)) {
      piRow = NULL;
    }
  }

2400
  if (pBlockScanInfo->iter.hasVal && pBlockScanInfo->iiter.hasVal && pRow != NULL && piRow != NULL) {
2401
    TSDBKEY k = TSDBROW_KEY(pRow);
2402
    TSDBKEY ik = TSDBROW_KEY(piRow);
H
Haojun Liao 已提交
2403

2404
    if (ik.ts < k.ts) {  // ik.ts < k.ts
2405
      doMergeMultiRows(piRow, pBlockScanInfo->uid, &pBlockScanInfo->iiter, pDelList, pTSRow, pReader, freeTSRow);
2406
    } else if (k.ts < ik.ts) {
2407
      doMergeMultiRows(pRow, pBlockScanInfo->uid, &pBlockScanInfo->iter, pDelList, pTSRow, pReader, freeTSRow);
2408 2409
    } else {  // ik.ts == k.ts
      doMergeMemIMemRows(pRow, piRow, pBlockScanInfo, pReader, pTSRow);
2410
      *freeTSRow = true;
H
Haojun Liao 已提交
2411
    }
2412 2413

    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
2414 2415
  }

2416
  if (pBlockScanInfo->iter.hasVal && pRow != NULL) {
2417
    doMergeMultiRows(pRow, pBlockScanInfo->uid, &pBlockScanInfo->iter, pDelList, pTSRow, pReader, freeTSRow);
H
Haojun Liao 已提交
2418 2419 2420
    return TSDB_CODE_SUCCESS;
  }

2421
  if (pBlockScanInfo->iiter.hasVal && piRow != NULL) {
2422
    doMergeMultiRows(piRow, pBlockScanInfo->uid, &pBlockScanInfo->iiter, pDelList, pTSRow, pReader, freeTSRow);
H
Haojun Liao 已提交
2423 2424 2425 2426 2427 2428
    return TSDB_CODE_SUCCESS;
  }

  return TSDB_CODE_SUCCESS;
}

2429
int32_t doAppendRowFromTSRow(SSDataBlock* pBlock, STsdbReader* pReader, STSRow* pTSRow) {
2430 2431 2432
  int32_t numOfRows = pBlock->info.rows;
  int32_t numOfCols = (int32_t)taosArrayGetSize(pBlock->pDataBlock);

2433
  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;
2434
  STSchema*           pSchema = pReader->pSchema;
2435

2436
  SColVal colVal = {0};
2437
  int32_t i = 0, j = 0;
H
Haojun Liao 已提交
2438

2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458
  SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, i);
  if (pColInfoData->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
    colDataAppend(pColInfoData, numOfRows, (const char*)&pTSRow->ts, false);
    i += 1;
  }

  while (i < numOfCols && j < pSchema->numOfCols) {
    pColInfoData = taosArrayGet(pBlock->pDataBlock, i);
    col_id_t colId = pColInfoData->info.colId;

    if (colId == pSchema->columns[j].colId) {
      tTSRowGetVal(pTSRow, pReader->pSchema, j, &colVal);
      doCopyColVal(pColInfoData, numOfRows, i, &colVal, pSupInfo);
      i += 1;
      j += 1;
    } else if (colId < pSchema->columns[j].colId) {
      colDataAppendNULL(pColInfoData, numOfRows);
      i += 1;
    } else if (colId > pSchema->columns[j].colId) {
      j += 1;
2459
    }
2460 2461
  }

2462
  // set null value since current column does not exist in the "pSchema"
2463
  while (i < numOfCols) {
2464 2465 2466 2467 2468
    pColInfoData = taosArrayGet(pBlock->pDataBlock, i);
    colDataAppendNULL(pColInfoData, numOfRows);
    i += 1;
  }

2469 2470 2471 2472
  pBlock->info.rows += 1;
  return TSDB_CODE_SUCCESS;
}

2473 2474 2475 2476 2477 2478 2479 2480 2481
int32_t doAppendRowFromBlock(SSDataBlock* pResBlock, STsdbReader* pReader, SBlockData* pBlockData, int32_t rowIndex) {
  int32_t i = 0, j = 0;
  int32_t outputRowIndex = pResBlock->info.rows;

  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;

  SColumnInfoData* pColData = taosArrayGet(pResBlock->pDataBlock, i);
  if (pColData->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
    colDataAppendInt64(pColData, outputRowIndex, &pBlockData->aTSKEY[rowIndex]);
2482
    i += 1;
2483 2484 2485 2486 2487 2488
  }

  SColVal cv = {0};
  int32_t numOfInputCols = taosArrayGetSize(pBlockData->aIdx);
  int32_t numOfOutputCols = blockDataGetNumOfCols(pResBlock);

2489
  while (i < numOfOutputCols && j < numOfInputCols) {
2490
    SColumnInfoData* pCol = taosArrayGet(pResBlock->pDataBlock, i);
2491
    SColData*        pData = tBlockDataGetColDataByIdx(pBlockData, j);
2492 2493

    if (pData->cid == pCol->info.colId) {
2494 2495
      tColDataGetValue(pData, rowIndex, &cv);
      doCopyColVal(pCol, outputRowIndex, i, &cv, pSupInfo);
2496 2497 2498 2499 2500 2501 2502 2503 2504 2505
      j += 1;
    } else {  // the specified column does not exist in file block, fill with null data
      colDataAppendNULL(pCol, outputRowIndex);
    }

    i += 1;
  }

  while (i < numOfOutputCols) {
    SColumnInfoData* pCol = taosArrayGet(pResBlock->pDataBlock, i);
2506
    colDataAppendNULL(pCol, outputRowIndex);
2507 2508 2509 2510 2511 2512 2513
    i += 1;
  }

  pResBlock->info.rows += 1;
  return TSDB_CODE_SUCCESS;
}

2514 2515
int32_t buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t endKey, int32_t capacity,
                                  STsdbReader* pReader) {
H
Haojun Liao 已提交
2516 2517 2518 2519
  SSDataBlock* pBlock = pReader->pResBlock;

  do {
    STSRow* pTSRow = NULL;
2520 2521
    bool freeTSRow = false;
    tsdbGetNextRowInMem(pBlockScanInfo, pReader, &pTSRow, endKey, &freeTSRow);
2522 2523
    if (pTSRow == NULL) {
      break;
H
Haojun Liao 已提交
2524 2525
    }

2526
    doAppendRowFromTSRow(pBlock, pReader, pTSRow);
2527 2528 2529
    if (freeTSRow) {
      taosMemoryFree(pTSRow);
    }
H
Haojun Liao 已提交
2530 2531

    // no data in buffer, return immediately
2532
    if (!(pBlockScanInfo->iter.hasVal || pBlockScanInfo->iiter.hasVal)) {
H
Haojun Liao 已提交
2533 2534 2535
      break;
    }

2536
    if (pBlock->info.rows >= capacity) {
H
Haojun Liao 已提交
2537 2538 2539 2540
      break;
    }
  } while (1);

2541
  ASSERT(pBlock->info.rows <= capacity);
H
Haojun Liao 已提交
2542 2543
  return TSDB_CODE_SUCCESS;
}
H
Hongze Cheng 已提交
2544

2545
// todo refactor, use arraylist instead
H
Hongze Cheng 已提交
2546
int32_t tsdbSetTableId(STsdbReader* pReader, int64_t uid) {
2547 2548 2549 2550 2551
  ASSERT(pReader != NULL);
  taosHashClear(pReader->status.pTableMap);

  STableBlockScanInfo info = {.lastKey = 0, .uid = uid};
  taosHashPut(pReader->status.pTableMap, &info.uid, sizeof(uint64_t), &info, sizeof(info));
H
Hongze Cheng 已提交
2552 2553 2554
  return TDB_CODE_SUCCESS;
}

dengyihao's avatar
dengyihao 已提交
2555 2556 2557 2558 2559 2560
void* tsdbGetIdx(SMeta* pMeta) {
  if (pMeta == NULL) {
    return NULL;
  }
  return metaGetIdx(pMeta);
}
dengyihao's avatar
dengyihao 已提交
2561

dengyihao's avatar
dengyihao 已提交
2562 2563 2564 2565 2566 2567
void* tsdbGetIvtIdx(SMeta* pMeta) {
  if (pMeta == NULL) {
    return NULL;
  }
  return metaGetIvtIdx(pMeta);
}
L
Liu Jicong 已提交
2568

2569 2570 2571 2572
uint64_t getReaderMaxVersion(STsdbReader *pReader) {
  return pReader->verRange.maxVer;
}

C
Cary Xu 已提交
2573 2574 2575 2576 2577 2578 2579 2580 2581 2582
/**
 * @brief Get all suids since suid
 *
 * @param pMeta
 * @param suid return all suids in one vnode if suid is 0
 * @param list
 * @return int32_t
 */
int32_t tsdbGetStbIdList(SMeta* pMeta, int64_t suid, SArray* list) {
  SMStbCursor* pCur = metaOpenStbCursor(pMeta, suid);
L
Liu Jicong 已提交
2583
  if (!pCur) {
C
Cary Xu 已提交
2584 2585
    return TSDB_CODE_FAILED;
  }
C
Cary Xu 已提交
2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599

  while (1) {
    tb_uid_t id = metaStbCursorNext(pCur);
    if (id == 0) {
      break;
    }

    taosArrayPush(list, &id);
  }

  metaCloseStbCursor(pCur);
  return TSDB_CODE_SUCCESS;
}

H
refact  
Hongze Cheng 已提交
2600
// ====================================== EXPOSED APIs ======================================
2601 2602
int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTableList, STsdbReader** ppReader,
                       const char* idstr) {
2603 2604
  int32_t code = tsdbReaderCreate(pVnode, pCond, ppReader, 4096, idstr);
  if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
2605 2606
    goto _err;
  }
H
Hongze Cheng 已提交
2607

2608
  // check for query time window
H
Haojun Liao 已提交
2609
  STsdbReader* pReader = *ppReader;
2610
  if (isEmptyQueryTimeWindow(&pReader->window)) {
H
Haojun Liao 已提交
2611 2612 2613
    tsdbDebug("%p query window not overlaps with the data set, no result returned, %s", pReader, pReader->idStr);
    return TSDB_CODE_SUCCESS;
  }
H
Hongze Cheng 已提交
2614

2615 2616 2617
  if (pCond->type == TIMEWINDOW_RANGE_EXTERNAL) {
    // update the SQueryTableDataCond to create inner reader
    STimeWindow w = pCond->twindows;
2618
    int32_t     order = pCond->order;
2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636
    if (order == TSDB_ORDER_ASC) {
      pCond->twindows.ekey = pCond->twindows.skey;
      pCond->twindows.skey = INT64_MIN;
      pCond->order = TSDB_ORDER_DESC;
    } else {
      pCond->twindows.skey = pCond->twindows.ekey;
      pCond->twindows.ekey = INT64_MAX;
      pCond->order = TSDB_ORDER_ASC;
    }

    code = tsdbReaderCreate(pVnode, pCond, &pReader->innerReader[0], 1, idstr);
    if (code != TSDB_CODE_SUCCESS) {
      goto _err;
    }

    if (order == TSDB_ORDER_ASC) {
      pCond->twindows.skey = w.ekey;
      pCond->twindows.ekey = INT64_MAX;
2637
    } else {
2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653
      pCond->twindows.skey = INT64_MIN;
      pCond->twindows.ekey = w.ekey;
    }
    code = tsdbReaderCreate(pVnode, pCond, &pReader->innerReader[1], 1, idstr);
    if (code != TSDB_CODE_SUCCESS) {
      goto _err;
    }
  }

  if (pCond->suid != 0) {
    pReader->pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, pReader->suid, -1);
  } else if (taosArrayGetSize(pTableList) > 0) {
    STableKeyInfo* pKey = taosArrayGet(pTableList, 0);
    pReader->pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, pKey->uid, -1);
  }

2654 2655
  int32_t numOfTables = taosArrayGetSize(pTableList);
  pReader->status.pTableMap = createDataBlockScanInfo(pReader, pTableList->pData, numOfTables);
H
Haojun Liao 已提交
2656 2657 2658
  if (pReader->status.pTableMap == NULL) {
    tsdbReaderClose(pReader);
    *ppReader = NULL;
H
Haojun Liao 已提交
2659

H
Haojun Liao 已提交
2660 2661 2662
    code = TSDB_CODE_TDB_OUT_OF_MEMORY;
    goto _err;
  }
H
Hongze Cheng 已提交
2663

H
Hongze Cheng 已提交
2664
  code = tsdbTakeReadSnap(pReader->pTsdb, &pReader->pReadSnap);
2665 2666 2667
  if (code != TSDB_CODE_SUCCESS) {
    goto _err;
  }
H
Hongze Cheng 已提交
2668

2669 2670
  if (pReader->type == TIMEWINDOW_RANGE_CONTAINED) {
    SDataBlockIter* pBlockIter = &pReader->status.blockIter;
2671

2672
    initFilesetIterator(&pReader->status.fileIter, pReader->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr);
2673
    resetDataBlockIterator(&pReader->status.blockIter, pReader->order, pReader->status.pTableMap);
2674 2675 2676 2677 2678 2679 2680 2681 2682 2683

    // no data in files, let's try buffer in memory
    if (pReader->status.fileIter.numOfFiles == 0) {
      pReader->status.loadFromFile = false;
    } else {
      code = initForFirstBlockInFile(pReader, pBlockIter);
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
    }
2684
  } else {
2685
    STsdbReader*    pPrevReader = pReader->innerReader[0];
2686 2687
    SDataBlockIter* pBlockIter = &pPrevReader->status.blockIter;

2688 2689
    initFilesetIterator(&pPrevReader->status.fileIter, pPrevReader->pReadSnap->fs.aDFileSet, pPrevReader->order,
                        pPrevReader->idStr);
2690
    resetDataBlockIterator(&pPrevReader->status.blockIter, pPrevReader->order, pReader->status.pTableMap);
2691 2692 2693 2694 2695 2696 2697 2698 2699

    // no data in files, let's try buffer in memory
    if (pPrevReader->status.fileIter.numOfFiles == 0) {
      pPrevReader->status.loadFromFile = false;
    } else {
      code = initForFirstBlockInFile(pPrevReader, pBlockIter);
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
2700 2701 2702
    }
  }

2703
  tsdbDebug("%p total numOfTable:%d in this query %s", pReader, numOfTables, pReader->idStr);
H
Hongze Cheng 已提交
2704
  return code;
H
Hongze Cheng 已提交
2705 2706

_err:
S
Shengliang Guan 已提交
2707
  tsdbError("failed to create data reader, code:%s %s", tstrerror(code), pReader->idStr);
H
Hongze Cheng 已提交
2708
  return code;
H
refact  
Hongze Cheng 已提交
2709 2710 2711
}

void tsdbReaderClose(STsdbReader* pReader) {
2712 2713
  if (pReader == NULL) {
    return;
2714
  }
H
refact  
Hongze Cheng 已提交
2715

2716
  SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;
H
Hongze Cheng 已提交
2717
  tsdbUntakeReadSnap(pReader->pTsdb, pReader->pReadSnap);
H
Hongze Cheng 已提交
2718

2719 2720 2721 2722
  taosMemoryFreeClear(pSupInfo->plist);
  taosMemoryFree(pSupInfo->colIds);

  taosArrayDestroy(pSupInfo->pColAgg);
L
Liu Jicong 已提交
2723
  for (int32_t i = 0; i < blockDataGetNumOfCols(pReader->pResBlock); ++i) {
2724 2725 2726 2727 2728
    if (pSupInfo->buildBuf[i] != NULL) {
      taosMemoryFreeClear(pSupInfo->buildBuf[i]);
    }
  }
  taosMemoryFree(pSupInfo->buildBuf);
H
Haojun Liao 已提交
2729
  tBlockDataClear(&pReader->status.fileBlockData, true);
2730 2731

  cleanupDataBlockIterator(&pReader->status.blockIter);
2732 2733

  size_t numOfTables = taosHashGetSize(pReader->status.pTableMap);
2734
  destroyBlockScanInfo(pReader->status.pTableMap);
2735
  blockDataDestroy(pReader->pResBlock);
2736

H
Haojun Liao 已提交
2737 2738 2739
  if (pReader->pFileReader != NULL) {
    tsdbDataFReaderClose(&pReader->pFileReader);
  }
H
refact  
Hongze Cheng 已提交
2740

2741
  SIOCostSummary* pCost = &pReader->cost;
H
refact  
Hongze Cheng 已提交
2742

2743
  tsdbDebug("%p :io-cost summary: head-file:%" PRIu64 ", head-file time:%.2f ms, SMA:%" PRId64
2744 2745
            " SMA-time:%.2f ms, fileBlocks:%" PRId64 ", fileBlocks-time:%.2f ms, "
            "build in-memory-block-time:%.2f ms, STableBlockScanInfo size:%.2f Kb %s",
2746
            pReader, pCost->headFileLoad, pCost->headFileLoadTime, pCost->smaData, pCost->smaLoadTime,
2747
            pCost->numOfBlocks, pCost->blockLoadTime, pCost->buildmemBlock,
2748
            numOfTables * sizeof(STableBlockScanInfo) / 1000.0, pReader->idStr);
H
refact  
Hongze Cheng 已提交
2749

2750 2751 2752
  taosMemoryFree(pReader->idStr);
  taosMemoryFree(pReader->pSchema);
  taosMemoryFreeClear(pReader);
H
refact  
Hongze Cheng 已提交
2753 2754
}

2755
static bool doTsdbNextDataBlock(STsdbReader* pReader) {
H
Haojun Liao 已提交
2756
  // cleanup the data that belongs to the previous data block
2757 2758
  SSDataBlock* pBlock = pReader->pResBlock;
  blockDataCleanup(pBlock);
H
Hongze Cheng 已提交
2759

2760
  SReaderStatus* pStatus = &pReader->status;
H
Haojun Liao 已提交
2761

2762 2763 2764 2765 2766
  if (pStatus->loadFromFile) {
    int32_t code = buildBlockFromFiles(pReader);
    if (code != TSDB_CODE_SUCCESS) {
      return false;
    }
2767

2768 2769 2770
    if (pBlock->info.rows > 0) {
      return true;
    } else {
H
Haojun Liao 已提交
2771
      buildBlockFromBufferSequentially(pReader);
2772
      return pBlock->info.rows > 0;
H
Haojun Liao 已提交
2773
    }
2774 2775 2776
  } else {  // no data in files, let's try the buffer
    buildBlockFromBufferSequentially(pReader);
    return pBlock->info.rows > 0;
H
Haojun Liao 已提交
2777
  }
2778

2779
  return false;
H
refact  
Hongze Cheng 已提交
2780 2781
}

2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818
bool tsdbNextDataBlock(STsdbReader* pReader) {
  if (isEmptyQueryTimeWindow(&pReader->window)) {
    return false;
  }

  if (pReader->innerReader[0] != NULL) {
    bool ret = doTsdbNextDataBlock(pReader->innerReader[0]);
    if (ret) {
      pReader->step = EXTERNAL_ROWS_PREV;
      return ret;
    }

    tsdbReaderClose(pReader->innerReader[0]);
    pReader->innerReader[0] = NULL;
  }

  pReader->step = EXTERNAL_ROWS_MAIN;
  bool ret = doTsdbNextDataBlock(pReader);
  if (ret) {
    return ret;
  }

  if (pReader->innerReader[1] != NULL) {
    bool ret1 = doTsdbNextDataBlock(pReader->innerReader[1]);
    if (ret1) {
      pReader->step = EXTERNAL_ROWS_NEXT;
      return ret1;
    }

    tsdbReaderClose(pReader->innerReader[1]);
    pReader->innerReader[1] = NULL;
  }

  return false;
}

static void setBlockInfo(STsdbReader* pReader, SDataBlockInfo* pDataBlockInfo) {
2819 2820 2821 2822
  ASSERT(pDataBlockInfo != NULL && pReader != NULL);
  pDataBlockInfo->rows = pReader->pResBlock->info.rows;
  pDataBlockInfo->uid = pReader->pResBlock->info.uid;
  pDataBlockInfo->window = pReader->pResBlock->info.window;
H
Hongze Cheng 已提交
2823 2824
}

2825 2826
void tsdbRetrieveDataBlockInfo(STsdbReader* pReader, SDataBlockInfo* pDataBlockInfo) {
  if (pReader->type == TIMEWINDOW_RANGE_EXTERNAL) {
2827
    if (pReader->step == EXTERNAL_ROWS_MAIN) {
2828
      setBlockInfo(pReader, pDataBlockInfo);
2829
    } else if (pReader->step == EXTERNAL_ROWS_PREV) {
2830 2831 2832 2833 2834 2835 2836 2837 2838
      setBlockInfo(pReader->innerReader[0], pDataBlockInfo);
    } else {
      setBlockInfo(pReader->innerReader[1], pDataBlockInfo);
    }
  } else {
    setBlockInfo(pReader, pDataBlockInfo);
  }
}

2839
int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SColumnDataAgg*** pBlockStatis, bool* allHave) {
H
Hongze Cheng 已提交
2840
  int32_t code = 0;
2841
  *allHave = false;
H
Hongze Cheng 已提交
2842

2843
  if (pReader->type == TIMEWINDOW_RANGE_EXTERNAL) {
2844 2845 2846 2847
    *pBlockStatis = NULL;
    return TSDB_CODE_SUCCESS;
  }

2848
  // there is no statistics data for composed block
2849 2850 2851 2852
  if (pReader->status.composedDataBlock) {
    *pBlockStatis = NULL;
    return TSDB_CODE_SUCCESS;
  }
H
Hongze Cheng 已提交
2853

2854
  SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(&pReader->status.blockIter);
H
Hongze Cheng 已提交
2855

2856
  SBlock* pBlock = getCurrentBlock(&pReader->status.blockIter);
2857
  int64_t stime = taosGetTimestampUs();
H
Hongze Cheng 已提交
2858

2859 2860
  SBlockLoadSuppInfo* pSup = &pReader->suppInfo;

2861
  if (tBlockHasSma(pBlock)) {
2862
    code = tsdbReadBlockSma(pReader->pFileReader, pBlock, pSup->pColAgg, NULL);
2863
    if (code != TSDB_CODE_SUCCESS) {
2864 2865
      tsdbDebug("vgId:%d, failed to load block SMA for uid %" PRIu64 ", code:%s, %s", 0, pFBlock->uid, tstrerror(code),
                pReader->idStr);
2866 2867
      return code;
    }
2868 2869 2870
  } else {
    *pBlockStatis = NULL;
    return TSDB_CODE_SUCCESS;
2871
  }
H
Hongze Cheng 已提交
2872

2873
  *allHave = true;
H
Hongze Cheng 已提交
2874

2875 2876
  // always load the first primary timestamp column data
  SColumnDataAgg* pTsAgg = &pSup->tsColAgg;
2877

2878 2879
  pTsAgg->numOfNull = 0;
  pTsAgg->colId = PRIMARYKEY_TIMESTAMP_COL_ID;
2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895
  pTsAgg->min = pReader->pResBlock->info.window.skey;
  pTsAgg->max = pReader->pResBlock->info.window.ekey;
  pSup->plist[0] = pTsAgg;

  // update the number of NULL data rows
  size_t numOfCols = blockDataGetNumOfCols(pReader->pResBlock);

  int32_t i = 0, j = 0;
  while (j < numOfCols && i < taosArrayGetSize(pSup->pColAgg)) {
    SColumnDataAgg* pAgg = taosArrayGet(pSup->pColAgg, i);
    if (pAgg->colId == pSup->colIds[j]) {
      if (IS_BSMA_ON(&(pReader->pSchema->columns[i]))) {
        pSup->plist[j] = pAgg;
      } else {
        *allHave = false;
      }
2896 2897
      i += 1;
      j += 1;
2898 2899 2900 2901 2902 2903 2904
    } else if (pAgg->colId < pSup->colIds[j]) {
      i += 1;
    } else if (pSup->colIds[j] < pAgg->colId) {
      j += 1;
    }
  }

2905
  double elapsed = (taosGetTimestampUs() - stime) / 1000.0;
2906
  pReader->cost.smaLoadTime += elapsed;
2907
  pReader->cost.smaData += 1;
2908 2909 2910

  *pBlockStatis = pSup->plist;

2911
  tsdbDebug("vgId:%d, succeed to load block SMA for uid %" PRIu64 ", elapsed time:%.2f ms, %s", 0, pFBlock->uid,
2912 2913
            elapsed, pReader->idStr);

H
Hongze Cheng 已提交
2914
  return code;
H
Hongze Cheng 已提交
2915 2916
}

2917
static SArray* doRetrieveDataBlock(STsdbReader* pReader) {
H
Haojun Liao 已提交
2918 2919 2920
  SReaderStatus* pStatus = &pReader->status;

  if (pStatus->composedDataBlock) {
2921
    return pReader->pResBlock->pDataBlock;
2922
  }
2923

2924
  SFileDataBlockInfo*  pFBlock = getCurrentBlockInfo(&pStatus->blockIter);
2925
  STableBlockScanInfo* pBlockScanInfo = taosHashGet(pStatus->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid));
2926

H
Haojun Liao 已提交
2927 2928 2929
  tBlockDataReset(&pStatus->fileBlockData);
  tBlockDataClearData(&pStatus->fileBlockData);
  int32_t code = doLoadFileBlockData(pReader, &pStatus->blockIter, pBlockScanInfo, &pStatus->fileBlockData);
2930
  if (code != TSDB_CODE_SUCCESS) {
H
Hongze Cheng 已提交
2931
    tBlockDataClear(&pStatus->fileBlockData, 1);
H
Haojun Liao 已提交
2932

2933 2934
    terrno = code;
    return NULL;
2935
  }
2936 2937 2938

  copyBlockDataToSDataBlock(pReader, pBlockScanInfo);
  return pReader->pResBlock->pDataBlock;
H
Hongze Cheng 已提交
2939 2940
}

2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952
SArray* tsdbRetrieveDataBlock(STsdbReader* pReader, SArray* pIdList) {
  if (pReader->type == TIMEWINDOW_RANGE_EXTERNAL) {
    if (pReader->step == EXTERNAL_ROWS_PREV) {
      return doRetrieveDataBlock(pReader->innerReader[0]);
    } else if (pReader->step == EXTERNAL_ROWS_NEXT) {
      return doRetrieveDataBlock(pReader->innerReader[1]);
    }
  }

  return doRetrieveDataBlock(pReader);
}

H
Haojun Liao 已提交
2953
int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) {
2954 2955 2956
  if (isEmptyQueryTimeWindow(&pReader->window)) {
    return TSDB_CODE_SUCCESS;
  }
H
Hongze Cheng 已提交
2957

L
Liu Jicong 已提交
2958
  pReader->order = pCond->order;
2959
  pReader->type = TIMEWINDOW_RANGE_CONTAINED;
2960
  pReader->status.loadFromFile = true;
dengyihao's avatar
dengyihao 已提交
2961
  pReader->status.pTableIter = NULL;
H
Haojun Liao 已提交
2962
  pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows);
H
Hongze Cheng 已提交
2963

2964
  // allocate buffer in order to load data blocks from file
2965
  memset(&pReader->suppInfo.tsColAgg, 0, sizeof(SColumnDataAgg));
2966 2967
  memset(pReader->suppInfo.plist, 0, POINTER_BYTES);

2968
  pReader->suppInfo.tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID;
2969
  tsdbDataFReaderClose(&pReader->pFileReader);
2970

2971
  int32_t numOfTables = taosHashGetSize(pReader->status.pTableMap);
L
Liu Jicong 已提交
2972 2973
  tsdbDataFReaderClose(&pReader->pFileReader);

H
Hongze Cheng 已提交
2974
  initFilesetIterator(&pReader->status.fileIter, pReader->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr);
2975
  resetDataBlockIterator(&pReader->status.blockIter, pReader->order, pReader->status.pTableMap);
2976
  resetDataBlockScanInfo(pReader->status.pTableMap);
2977

2978
  int32_t         code = 0;
2979 2980
  SDataBlockIter* pBlockIter = &pReader->status.blockIter;

2981 2982 2983 2984 2985 2986
  // no data in files, let's try buffer in memory
  if (pReader->status.fileIter.numOfFiles == 0) {
    pReader->status.loadFromFile = false;
  } else {
    code = initForFirstBlockInFile(pReader, pBlockIter);
    if (code != TSDB_CODE_SUCCESS) {
2987 2988
      tsdbError("%p reset reader failed, numOfTables:%d, query range:%" PRId64 " - %" PRId64 " in query %s", pReader,
                numOfTables, pReader->window.skey, pReader->window.ekey, pReader->idStr);
2989 2990 2991
      return code;
    }
  }
H
Hongze Cheng 已提交
2992

dengyihao's avatar
dengyihao 已提交
2993 2994
  tsdbDebug("%p reset reader, suid:%" PRIu64 ", numOfTables:%d, query range:%" PRId64 " - %" PRId64 " in query %s",
            pReader, pReader->suid, numOfTables, pReader->window.skey, pReader->window.ekey, pReader->idStr);
2995

2996
  return code;
H
Hongze Cheng 已提交
2997
}
H
Hongze Cheng 已提交
2998

2999 3000 3001
static int32_t getBucketIndex(int32_t startRow, int32_t bucketRange, int32_t numOfRows) {
  return (numOfRows - startRow) / bucketRange;
}
H
Hongze Cheng 已提交
3002

3003 3004 3005 3006
int32_t tsdbGetFileBlocksDistInfo(STsdbReader* pReader, STableBlockDistInfo* pTableBlockInfo) {
  int32_t code = TSDB_CODE_SUCCESS;
  pTableBlockInfo->totalSize = 0;
  pTableBlockInfo->totalRows = 0;
H
Hongze Cheng 已提交
3007

3008 3009
  // find the start data block in file
  SReaderStatus* pStatus = &pReader->status;
H
Hongze Cheng 已提交
3010

3011 3012 3013
  STsdbCfg* pc = &pReader->pTsdb->pVnode->config.tsdbCfg;
  pTableBlockInfo->defMinRows = pc->minRows;
  pTableBlockInfo->defMaxRows = pc->maxRows;
H
Hongze Cheng 已提交
3014

3015
  int32_t bucketRange = ceil((pc->maxRows - pc->minRows) / 20.0);
H
Hongze Cheng 已提交
3016

3017
  pTableBlockInfo->numOfFiles += 1;
H
Hongze Cheng 已提交
3018

3019 3020
  int32_t numOfTables = (int32_t)taosHashGetSize(pStatus->pTableMap);
  int     defaultRows = 4096;
H
Hongze Cheng 已提交
3021

3022 3023
  SDataBlockIter* pBlockIter = &pStatus->blockIter;
  pTableBlockInfo->numOfFiles += pStatus->fileIter.numOfFiles;
H
Haojun Liao 已提交
3024 3025 3026 3027

  if (pBlockIter->numOfBlocks > 0) {
    pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks;
  }
H
Hongze Cheng 已提交
3028

3029
  pTableBlockInfo->numOfTables = numOfTables;
H
Haojun Liao 已提交
3030
  bool hasNext = (pBlockIter->numOfBlocks > 0);
H
Hongze Cheng 已提交
3031

3032 3033
  while (true) {
    if (hasNext) {
H
Haojun Liao 已提交
3034
      SBlock* pBlock = getCurrentBlock(pBlockIter);
H
Hongze Cheng 已提交
3035

3036 3037
      int32_t numOfRows = pBlock->nRow;
      pTableBlockInfo->totalRows += numOfRows;
H
Hongze Cheng 已提交
3038

3039 3040 3041
      if (numOfRows > pTableBlockInfo->maxRows) {
        pTableBlockInfo->maxRows = numOfRows;
      }
H
refact  
Hongze Cheng 已提交
3042

3043 3044 3045
      if (numOfRows < pTableBlockInfo->minRows) {
        pTableBlockInfo->minRows = numOfRows;
      }
H
refact  
Hongze Cheng 已提交
3046

3047 3048 3049
      if (numOfRows < defaultRows) {
        pTableBlockInfo->numOfSmallBlocks += 1;
      }
H
refact  
Hongze Cheng 已提交
3050

3051 3052
      int32_t bucketIndex = getBucketIndex(pTableBlockInfo->defMinRows, bucketRange, numOfRows);
      pTableBlockInfo->blockRowsHisto[bucketIndex]++;
3053 3054

      hasNext = blockIteratorNext(&pStatus->blockIter);
3055 3056 3057 3058 3059
    } else {
      code = initForFirstBlockInFile(pReader, pBlockIter);
      if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) {
        break;
      }
H
refact  
Hongze Cheng 已提交
3060

3061
      pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks;
3062
      hasNext = (pBlockIter->numOfBlocks > 0);
3063
    }
H
refact  
Hongze Cheng 已提交
3064

H
Hongze Cheng 已提交
3065 3066
    //    tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %s", pReader, numOfBlocks, numOfTables,
    //              pReader->pFileGroup->fid, pReader->idStr);
3067
  }
H
Hongze Cheng 已提交
3068

H
refact  
Hongze Cheng 已提交
3069 3070
  return code;
}
H
Hongze Cheng 已提交
3071

H
refact  
Hongze Cheng 已提交
3072
int64_t tsdbGetNumOfRowsInMemTable(STsdbReader* pReader) {
3073
  int64_t rows = 0;
H
Hongze Cheng 已提交
3074

3075 3076
  SReaderStatus* pStatus = &pReader->status;
  pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, NULL);
H
Hongze Cheng 已提交
3077

3078 3079 3080 3081 3082
  while (pStatus->pTableIter != NULL) {
    STableBlockScanInfo* pBlockScanInfo = pStatus->pTableIter;

    STbData* d = NULL;
    if (pReader->pTsdb->mem != NULL) {
3083
      tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid, &d);
3084 3085 3086 3087 3088 3089 3090
      if (d != NULL) {
        rows += tsdbGetNRowsInTbData(d);
      }
    }

    STbData* di = NULL;
    if (pReader->pTsdb->imem != NULL) {
3091
      tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->suid, pBlockScanInfo->uid, &di);
3092 3093 3094 3095 3096 3097 3098 3099
      if (di != NULL) {
        rows += tsdbGetNRowsInTbData(di);
      }
    }

    // current table is exhausted, let's try the next table
    pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, pStatus->pTableIter);
  }
H
Hongze Cheng 已提交
3100

H
refact  
Hongze Cheng 已提交
3101
  return rows;
H
Hongze Cheng 已提交
3102
}
D
dapan1121 已提交
3103

L
Liu Jicong 已提交
3104
int32_t tsdbGetTableSchema(SVnode* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid) {
D
dapan1121 已提交
3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116
  int32_t sversion = 1;

  SMetaReader mr = {0};
  metaReaderInit(&mr, pVnode->pMeta, 0);
  int32_t code = metaGetTableEntryByUid(&mr, uid);
  if (code != TSDB_CODE_SUCCESS) {
    terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
    metaReaderClear(&mr);
    return terrno;
  }

  *suid = 0;
L
Liu Jicong 已提交
3117

D
dapan1121 已提交
3118
  if (mr.me.type == TSDB_CHILD_TABLE) {
D
dapan1121 已提交
3119
    tDecoderClear(&mr.coder);
D
dapan1121 已提交
3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134
    *suid = mr.me.ctbEntry.suid;
    code = metaGetTableEntryByUid(&mr, *suid);
    if (code != TSDB_CODE_SUCCESS) {
      terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
      metaReaderClear(&mr);
      return terrno;
    }
    sversion = mr.me.stbEntry.schemaRow.version;
  } else {
    ASSERT(mr.me.type == TSDB_NORMAL_TABLE);
    sversion = mr.me.ntbEntry.schemaRow.version;
  }

  metaReaderClear(&mr);
  *pSchema = metaGetTbTSchema(pVnode->pMeta, uid, sversion);
L
Liu Jicong 已提交
3135

D
dapan1121 已提交
3136 3137
  return TSDB_CODE_SUCCESS;
}
H
Hongze Cheng 已提交
3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167

int32_t tsdbTakeReadSnap(STsdb* pTsdb, STsdbReadSnap** ppSnap) {
  int32_t code = 0;

  // alloc
  *ppSnap = (STsdbReadSnap*)taosMemoryCalloc(1, sizeof(STsdbReadSnap));
  if (*ppSnap == NULL) {
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _exit;
  }

  // lock
  code = taosThreadRwlockRdlock(&pTsdb->rwLock);
  if (code) {
    code = TAOS_SYSTEM_ERROR(code);
    goto _exit;
  }

  // take snapshot
  (*ppSnap)->pMem = pTsdb->mem;
  (*ppSnap)->pIMem = pTsdb->imem;

  if ((*ppSnap)->pMem) {
    tsdbRefMemTable((*ppSnap)->pMem);
  }

  if ((*ppSnap)->pIMem) {
    tsdbRefMemTable((*ppSnap)->pIMem);
  }

H
Hongze Cheng 已提交
3168
  // fs
H
Hongze Cheng 已提交
3169 3170 3171 3172 3173
  code = tsdbFSRef(pTsdb, &(*ppSnap)->fs);
  if (code) {
    taosThreadRwlockUnlock(&pTsdb->rwLock);
    goto _exit;
  }
H
Hongze Cheng 已提交
3174 3175 3176 3177 3178 3179 3180 3181

  // unlock
  code = taosThreadRwlockUnlock(&pTsdb->rwLock);
  if (code) {
    code = TAOS_SYSTEM_ERROR(code);
    goto _exit;
  }

S
Shengliang Guan 已提交
3182
  tsdbTrace("vgId:%d, take read snapshot", TD_VID(pTsdb->pVnode));
H
Hongze Cheng 已提交
3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196
_exit:
  return code;
}

void tsdbUntakeReadSnap(STsdb* pTsdb, STsdbReadSnap* pSnap) {
  if (pSnap) {
    if (pSnap->pMem) {
      tsdbUnrefMemTable(pSnap->pMem);
    }

    if (pSnap->pIMem) {
      tsdbUnrefMemTable(pSnap->pIMem);
    }

H
Hongze Cheng 已提交
3197
    tsdbFSUnref(pTsdb, &pSnap->fs);
H
Hongze Cheng 已提交
3198
    taosMemoryFree(pSnap);
H
Hongze Cheng 已提交
3199
  }
H
Hongze Cheng 已提交
3200

S
Shengliang Guan 已提交
3201
  tsdbTrace("vgId:%d, untake read snapshot", TD_VID(pTsdb->pVnode));
H
Hongze Cheng 已提交
3202
}