tsdbRead.c 141.4 KB
Newer Older
H
hjxilinx 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

16 17 18 19 20 21
#include "tsdb.h"
#include "tsdbDef.h"
#include "tsdbFS.h"
#include "tsdbLog.h"
#include "tsdbReadImpl.h"
#include "ttime.h"
S
Shengliang Guan 已提交
22
#include "texception.h"
H
hjxilinx 已提交
23
#include "os.h"
24
#include "talgo.h"
25
#include "tcompare.h"
26 27
#include "tdataformat.h"
#include "tskiplist.h"
28

29
#include "taosdef.h"
30
#include "tlosertree.h"
H
Hongze Cheng 已提交
31
#include "tsdbDef.h"
32
#include "tmsg.h"
33

34
#define EXTRA_BYTES 2
35
#define ASCENDING_TRAVERSE(o)   (o == TSDB_ORDER_ASC)
36
#define QH_GET_NUM_OF_COLS(handle) ((size_t)(taosArrayGetSize((handle)->pColumns)))
H
hjxilinx 已提交
37

H
Haojun Liao 已提交
38 39 40 41
#define GET_FILE_DATA_BLOCK_INFO(_checkInfo, _block)                                   \
  ((SDataBlockInfo){.window = {.skey = (_block)->keyFirst, .ekey = (_block)->keyLast}, \
                    .numOfCols = (_block)->numOfCols,                                  \
                    .rows = (_block)->numOfRows,                                       \
42
                    .uid = (_checkInfo)->tableId})
H
Haojun Liao 已提交
43

H
hjxilinx 已提交
44
enum {
45 46
  TSDB_QUERY_TYPE_ALL      = 1,
  TSDB_QUERY_TYPE_LAST     = 2,
H
hjxilinx 已提交
47 48
};

49 50 51 52 53 54
enum {
  TSDB_CACHED_TYPE_NONE    = 0,
  TSDB_CACHED_TYPE_LASTROW = 1,
  TSDB_CACHED_TYPE_LAST    = 2,
};

55 56
typedef struct SQueryFilePos {
  int32_t fid;
57 58
  int32_t slot;
  int32_t pos;
59
  int64_t lastKey;
60 61
  int32_t rows;
  bool    mixBlock;
62
  bool    blockCompleted;
63
  STimeWindow win;
64
} SQueryFilePos;
H
hjxilinx 已提交
65

66
typedef struct SDataBlockLoadInfo {
H
Hongze Cheng 已提交
67
  SDFileSet*  fileGroup;
68
  int32_t     slot;
69
  uint64_t    uid;
70
  SArray*     pLoadedCols;
71
} SDataBlockLoadInfo;
H
hjxilinx 已提交
72

73
typedef struct SLoadCompBlockInfo {
H
hjLiao 已提交
74
  int32_t tid; /* table tid */
75 76
  int32_t fileId;
} SLoadCompBlockInfo;
H
hjxilinx 已提交
77

78 79 80 81 82 83
enum {
  CHECKINFO_CHOSEN_MEM  = 0,
  CHECKINFO_CHOSEN_IMEM = 1,
  CHECKINFO_CHOSEN_BOTH = 2    //for update=2(merge case)
};

84
typedef struct STableCheckInfo {
85
  uint64_t      tableId;
H
Haojun Liao 已提交
86
  TSKEY         lastKey;
H
Haojun Liao 已提交
87
  SBlockInfo*   pCompInfo;
H
Haojun Liao 已提交
88
  int32_t       compSize;
89
  int32_t       numOfBlocks:29; // number of qualified data blocks not the original blocks
90
  uint8_t       chosen:2;       // indicate which iterator should move forward
H
Haojun Liao 已提交
91
  bool          initBuf:1;        // whether to initialize the in-memory skip list iterator or not
H
Haojun Liao 已提交
92 93
  SSkipListIterator* iter;      // mem buffer skip list iterator
  SSkipListIterator* iiter;     // imem buffer skip list iterator
94
} STableCheckInfo;
95

96
typedef struct STableBlockInfo {
H
Haojun Liao 已提交
97 98
  SBlock          *compBlock;
  STableCheckInfo *pTableCheckInfo;
99
} STableBlockInfo;
100

101 102
typedef struct SBlockOrderSupporter {
  int32_t             numOfTables;
H
Haojun Liao 已提交
103
  STableBlockInfo**   pDataBlockInfo;
104
  int32_t*            blockIndexArray;
105
  int32_t*            numOfBlocksPerTable;
106 107
} SBlockOrderSupporter;

H
Haojun Liao 已提交
108 109 110
typedef struct SIOCostSummary {
  int64_t blockLoadTime;
  int64_t statisInfoLoadTime;
H
Haojun Liao 已提交
111
  int64_t checkForNextTime;
112 113
  int64_t headFileLoad;
  int64_t headFileLoadTime;
H
Haojun Liao 已提交
114 115
} SIOCostSummary;

116 117
typedef struct STsdbReadHandle {
  STsdb*     pTsdb;
H
Haojun Liao 已提交
118 119 120 121 122 123 124 125 126
  SQueryFilePos  cur;              // current position
  int16_t        order;
  STimeWindow    window;           // the primary query time window that applies to all queries
  SDataStatis*   statis;           // query level statistics, only one table block statistics info exists at any time
  int32_t        numOfBlocks;
  SArray*        pColumns;         // column list, SColumnInfoData array list
  bool           locateStart;
  int32_t        outputCapacity;
  int32_t        realNumOfRows;
H
Haojun Liao 已提交
127
  SArray*        pTableCheckInfo;  // SArray<STableCheckInfo>
H
Haojun Liao 已提交
128 129
  int32_t        activeIndex;
  bool           checkFiles;       // check file stage
D
init  
dapan1121 已提交
130
  int8_t         cachelastrow;     // check if last row cached
131
  bool           loadExternalRow;  // load time window external data rows
H
Haojun Liao 已提交
132 133
  bool           currentLoadExternalRows; // current load external rows
  int32_t        loadType;         // block load type
H
Haojun Liao 已提交
134
  char          *idStr;            // query info handle, for debug purpose
H
Haojun Liao 已提交
135
  int32_t        type;             // query type: retrieve all data blocks, 2. retrieve only last row, 3. retrieve direct prev|next rows
H
Hongze Cheng 已提交
136 137 138
  SDFileSet*     pFileGroup;
  SFSIter        fileIter;
  SReadH         rhelper;
H
Haojun Liao 已提交
139
  STableBlockInfo* pDataBlockInfo;
H
Haojun Liao 已提交
140
  SDataCols     *pDataCols;        // in order to hold current file data block
H
Haojun Liao 已提交
141
  int32_t        allocSize;        // allocated data block size
H
Haojun Liao 已提交
142
  SArray        *defaultLoadColumn;// default load column
H
Haojun Liao 已提交
143
  SDataBlockLoadInfo dataBlockLoadInfo; /* record current block load information */
H
Haojun Liao 已提交
144
  SLoadCompBlockInfo compBlockLoadInfo; /* record current compblock information in SQueryAttr */
H
Haojun Liao 已提交
145

146 147
  SArray        *prev;             // previous row which is before than time window
  SArray        *next;             // next row which is after the query time window
H
Haojun Liao 已提交
148
  SIOCostSummary cost;
149
} STsdbReadHandle;
150

H
Haojun Liao 已提交
151 152 153
typedef struct STableGroupSupporter {
  int32_t    numOfCols;
  SColIndex* pCols;
154
  SSchema*   pTagSchema;
H
Haojun Liao 已提交
155 156
} STableGroupSupporter;

157
static STimeWindow updateLastrowForEachGroup(STableGroupInfo *groupList);
158 159
static int32_t checkForCachedLastRow(STsdbReadHandle* pTsdbReadHandle, STableGroupInfo *groupList);
static int32_t checkForCachedLast(STsdbReadHandle* pTsdbReadHandle);
H
Haojun Liao 已提交
160
// static int32_t tsdbGetCachedLastRow(STable* pTable, STSRow** pRes, TSKEY* lastKey);
H
Haojun Liao 已提交
161

H
Haojun Liao 已提交
162
static void    changeQueryHandleForInterpQuery(tsdbReaderT pHandle);
163
static void    doMergeTwoLevelData(STsdbReadHandle* pTsdbReadHandle, STableCheckInfo* pCheckInfo, SBlock* pBlock);
164
static int32_t binarySearchForKey(char* pValue, int num, TSKEY key, int order);
165
static int32_t tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int maxRowsToRead, STimeWindow* win, STsdbReadHandle* pTsdbReadHandle);
166
static int32_t tsdbCheckInfoCompar(const void* key1, const void* key2);
167 168 169
//static int32_t doGetExternalRow(STsdbReadHandle* pTsdbReadHandle, int16_t type, void* pMemRef);
//static void*   doFreeColumnInfoData(SArray* pColumnInfoData);
//static void*   destroyTableCheckInfo(SArray* pTableCheckInfo);
H
Haojun Liao 已提交
170
static bool    tsdbGetExternalRow(tsdbReaderT pHandle);
Y
TD-1733  
yihaoDeng 已提交
171

172
static void tsdbInitDataBlockLoadInfo(SDataBlockLoadInfo* pBlockLoadInfo) {
H
hjxilinx 已提交
173
  pBlockLoadInfo->slot = -1;
174
  pBlockLoadInfo->uid  = 0;
H
hjxilinx 已提交
175
  pBlockLoadInfo->fileGroup = NULL;
H
hjxilinx 已提交
176 177
}

178
static void tsdbInitCompBlockLoadInfo(SLoadCompBlockInfo* pCompBlockLoadInfo) {
H
hjLiao 已提交
179
  pCompBlockLoadInfo->tid = -1;
180 181
  pCompBlockLoadInfo->fileId = -1;
}
H
hjxilinx 已提交
182

183 184
static SArray* getColumnIdList(STsdbReadHandle* pTsdbReadHandle) {
  size_t numOfCols = QH_GET_NUM_OF_COLS(pTsdbReadHandle);
H
Haojun Liao 已提交
185 186 187 188
  assert(numOfCols <= TSDB_MAX_COLUMNS);

  SArray* pIdList = taosArrayInit(numOfCols, sizeof(int16_t));
  for (int32_t i = 0; i < numOfCols; ++i) {
189
    SColumnInfoData* pCol = taosArrayGet(pTsdbReadHandle->pColumns, i);
H
Haojun Liao 已提交
190 191 192 193 194 195
    taosArrayPush(pIdList, &pCol->info.colId);
  }

  return pIdList;
}

196 197
static SArray* getDefaultLoadColumns(STsdbReadHandle* pTsdbReadHandle, bool loadTS) {
  SArray* pLocalIdList = getColumnIdList(pTsdbReadHandle);
H
Haojun Liao 已提交
198 199 200 201 202

  // check if the primary time stamp column needs to load
  int16_t colId = *(int16_t*)taosArrayGet(pLocalIdList, 0);

  // the primary timestamp column does not be included in the the specified load column list, add it
H
Haojun Liao 已提交
203 204
  if (loadTS && colId != PRIMARYKEY_TIMESTAMP_COL_ID) {
    int16_t columnId = PRIMARYKEY_TIMESTAMP_COL_ID;
H
Haojun Liao 已提交
205 206 207 208 209 210
    taosArrayInsert(pLocalIdList, 0, &columnId);
  }

  return pLocalIdList;
}

H
Haojun Liao 已提交
211
//int64_t tsdbGetNumOfRowsInMemTable(tsdbReaderT* pHandle) {
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
//  STsdbReadHandle* pTsdbReadHandle = (STsdbReadHandle*) pHandle;
//
//  int64_t rows = 0;
//  STsdbMemTable* pMemTable = pTsdbReadHandle->pMemTable;
//  if (pMemTable == NULL) { return rows; }
//
////  STableData* pMem  = NULL;
////  STableData* pIMem = NULL;
//
////  SMemTable* pMemT = pMemRef->snapshot.mem;
////  SMemTable* pIMemT = pMemRef->snapshot.imem;
//
//  size_t size = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
//  for (int32_t i = 0; i < size; ++i) {
//    STableCheckInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i);
//
////    if (pMemT && pCheckInfo->tableId < pMemT->maxTables) {
////      pMem = pMemT->tData[pCheckInfo->tableId];
////      rows += (pMem && pMem->uid == pCheckInfo->tableId) ? pMem->numOfRows : 0;
////    }
////    if (pIMemT && pCheckInfo->tableId < pIMemT->maxTables) {
////      pIMem = pIMemT->tData[pCheckInfo->tableId];
////      rows += (pIMem && pIMem->uid == pCheckInfo->tableId) ? pIMem->numOfRows : 0;
////    }
//  }
//  return rows;
//}

240 241 242
static SArray* createCheckInfoFromTableGroup(STsdbReadHandle* pTsdbReadHandle, STableGroupInfo* pGroupList) {
  size_t numOfGroup = taosArrayGetSize(pGroupList->pGroupList);
  assert(numOfGroup >= 1);
H
Haojun Liao 已提交
243 244 245 246 247 248 249 250

  // allocate buffer in order to load data blocks from file
  SArray* pTableCheckInfo = taosArrayInit(pGroupList->numOfTables, sizeof(STableCheckInfo));
  if (pTableCheckInfo == NULL) {
    return NULL;
  }

  // todo apply the lastkey of table check to avoid to load header file
251
  for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
252 253 254 255 256 257 258 259
    SArray* group = *(SArray**) taosArrayGet(pGroupList->pGroupList, i);

    size_t gsize = taosArrayGetSize(group);
    assert(gsize > 0);

    for (int32_t j = 0; j < gsize; ++j) {
      STableKeyInfo* pKeyInfo = (STableKeyInfo*) taosArrayGet(group, j);

260
      STableCheckInfo info = { .lastKey = pKeyInfo->lastKey, .tableId = pKeyInfo->uid};
261 262 263
      if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
        if (info.lastKey == INT64_MIN || info.lastKey < pTsdbReadHandle->window.skey) {
          info.lastKey = pTsdbReadHandle->window.skey;
264 265
        }

266
        assert(info.lastKey >= pTsdbReadHandle->window.skey && info.lastKey <= pTsdbReadHandle->window.ekey);
H
Haojun Liao 已提交
267
      } else {
268
        assert(info.lastKey >= pTsdbReadHandle->window.ekey && info.lastKey <= pTsdbReadHandle->window.skey);
H
Haojun Liao 已提交
269 270 271
      }

      taosArrayPush(pTableCheckInfo, &info);
H
Haojun Liao 已提交
272
      tsdbDebug("%p check table uid:%"PRId64" from lastKey:%"PRId64" %s", pTsdbReadHandle, info.tableId, info.lastKey, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
273 274 275
    }
  }

276
  // TODO  group table according to the tag value.
277
  taosArraySort(pTableCheckInfo, tsdbCheckInfoCompar);
H
Haojun Liao 已提交
278 279 280
  return pTableCheckInfo;
}

281 282
static void resetCheckInfo(STsdbReadHandle* pTsdbReadHandle) {
  size_t numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
H
Haojun Liao 已提交
283 284 285 286
  assert(numOfTables >= 1);

  // todo apply the lastkey of table check to avoid to load header file
  for (int32_t i = 0; i < numOfTables; ++i) {
287 288
    STableCheckInfo* pCheckInfo = (STableCheckInfo*) taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i);
    pCheckInfo->lastKey = pTsdbReadHandle->window.skey;
H
Haojun Liao 已提交
289 290
    pCheckInfo->iter    = tSkipListDestroyIter(pCheckInfo->iter);
    pCheckInfo->iiter   = tSkipListDestroyIter(pCheckInfo->iiter);
291
    pCheckInfo->initBuf = false;
H
Haojun Liao 已提交
292

293 294
    if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
      assert(pCheckInfo->lastKey >= pTsdbReadHandle->window.skey);
295
    } else {
296
      assert(pCheckInfo->lastKey <= pTsdbReadHandle->window.skey);
297
    }
H
Haojun Liao 已提交
298 299 300
  }
}

H
Haojun Liao 已提交
301 302 303
// only one table, not need to sort again
static SArray* createCheckInfoFromCheckInfo(STableCheckInfo* pCheckInfo, TSKEY skey, SArray** psTable) {
  SArray* pNew = taosArrayInit(1, sizeof(STableCheckInfo));
D
fix bug  
dapan1121 已提交
304

H
Haojun Liao 已提交
305
  STableCheckInfo info = { .lastKey = skey};
H
Haojun Liao 已提交
306

H
Haojun Liao 已提交
307 308
  info.tableId = pCheckInfo->tableId;
  taosArrayPush(pNew, &info);
H
Haojun Liao 已提交
309 310 311
  return pNew;
}

312 313
static bool emptyQueryTimewindow(STsdbReadHandle* pTsdbReadHandle) {
  assert(pTsdbReadHandle != NULL);
314

315 316
  STimeWindow* w = &pTsdbReadHandle->window;
  bool asc = ASCENDING_TRAVERSE(pTsdbReadHandle->order);
317 318 319 320

  return ((asc && w->skey > w->ekey) || (!asc && w->ekey > w->skey));
}

321 322
// Update the query time window according to the data time to live(TTL) information, in order to avoid to return
// the expired data to client, even it is queried already.
323
static int64_t getEarliestValidTimestamp(STsdb* pTsdb) {
324 325 326
  STsdbCfg* pCfg = &pTsdb->config;

  int64_t now = taosGetTimestamp(pCfg->precision);
327
  return now - (tsTickPerDay[pCfg->precision] * pCfg->keep) + 1;  // needs to add one tick
328 329
}

330 331
static void setQueryTimewindow(STsdbReadHandle* pTsdbReadHandle, STsdbQueryCond* pCond) {
  pTsdbReadHandle->window = pCond->twindow;
332

333
  bool    updateTs = false;
334 335 336 337
  int64_t startTs = getEarliestValidTimestamp(pTsdbReadHandle->pTsdb);
  if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
    if (startTs > pTsdbReadHandle->window.skey) {
      pTsdbReadHandle->window.skey = startTs;
338 339
      pCond->twindow.skey = startTs;
      updateTs = true;
340 341
    }
  } else {
342 343
    if (startTs > pTsdbReadHandle->window.ekey) {
      pTsdbReadHandle->window.ekey = startTs;
344 345
      pCond->twindow.ekey = startTs;
      updateTs = true;
346 347 348
    }
  }

349
  if (updateTs) {
H
Haojun Liao 已提交
350 351 352
    tsdbDebug("%p update the query time window, old:%" PRId64 " - %" PRId64 ", new:%" PRId64 " - %" PRId64 ", %s",
              pTsdbReadHandle, pCond->twindow.skey, pCond->twindow.ekey, pTsdbReadHandle->window.skey,
              pTsdbReadHandle->window.ekey, pTsdbReadHandle->idStr);
353
  }
354 355
}

H
Haojun Liao 已提交
356
static STsdbReadHandle* tsdbQueryTablesImpl(STsdb* tsdb, STsdbQueryCond* pCond, uint64_t qId, uint64_t taskId) {
357 358
  STsdbReadHandle* pReadHandle = calloc(1, sizeof(STsdbReadHandle));
  if (pReadHandle == NULL) {
359
    goto _end;
360
  }
H
Haojun Liao 已提交
361

362 363 364 365 366 367 368 369 370 371 372 373 374 375 376
  pReadHandle->order       = pCond->order;
  pReadHandle->pTsdb       = tsdb;
  pReadHandle->type        = TSDB_QUERY_TYPE_ALL;
  pReadHandle->cur.fid     = INT32_MIN;
  pReadHandle->cur.win     = TSWINDOW_INITIALIZER;
  pReadHandle->checkFiles  = true;
  pReadHandle->activeIndex = 0;   // current active table index
  pReadHandle->allocSize   = 0;
  pReadHandle->locateStart = false;
  pReadHandle->loadType    = pCond->type;

  pReadHandle->outputCapacity  = 4096;//((STsdb*)tsdb)->config.maxRowsPerFileBlock;
  pReadHandle->loadExternalRow = pCond->loadExternalRows;
  pReadHandle->currentLoadExternalRows = pCond->loadExternalRows;

H
Haojun Liao 已提交
377
  char buf[128] = {0};
H
Haojun Liao 已提交
378
  snprintf(buf, tListLen(buf), "TID:0x%"PRIx64" QID:0x%"PRIx64, taskId, qId);
H
Haojun Liao 已提交
379 380
  pReadHandle->idStr = strdup(buf);

381
  if (tsdbInitReadH(&pReadHandle->rhelper, (STsdb*)tsdb) != 0) {
382
    goto _end;
B
Bomin Zhang 已提交
383
  }
H
Haojun Liao 已提交
384

385 386
  assert(pCond != NULL);
  setQueryTimewindow(pReadHandle, pCond);
387

388 389
  if (pCond->numOfCols > 0) {
    // allocate buffer in order to load data blocks from file
390 391
    pReadHandle->statis = calloc(pCond->numOfCols, sizeof(SDataStatis));
    if (pReadHandle->statis == NULL) {
392
      goto _end;
393
    }
H
Haojun Liao 已提交
394

395
    // todo: use list instead of array?
396 397
    pReadHandle->pColumns = taosArrayInit(pCond->numOfCols, sizeof(SColumnInfoData));
    if (pReadHandle->pColumns == NULL) {
398
      goto _end;
399
    }
H
Haojun Liao 已提交
400

401 402
    for (int32_t i = 0; i < pCond->numOfCols; ++i) {
      SColumnInfoData colInfo = {{0}, 0};
H
Haojun Liao 已提交
403

404
      colInfo.info = pCond->colList[i];
405
      colInfo.pData = calloc(1, EXTRA_BYTES + pReadHandle->outputCapacity * pCond->colList[i].bytes);
406
      if (colInfo.pData == NULL) {
407
        goto _end;
408
      }
409

410 411
      taosArrayPush(pReadHandle->pColumns, &colInfo);
      pReadHandle->statis[i].colId = colInfo.info.colId;
B
Bomin Zhang 已提交
412
    }
H
Haojun Liao 已提交
413

414
    pReadHandle->defaultLoadColumn = getDefaultLoadColumns(pReadHandle, true);
H
Haojun Liao 已提交
415
  }
416

417 418
  pReadHandle->pDataCols = tdNewDataCols(1000, pReadHandle->pTsdb->config.maxRowsPerFileBlock);
  if (pReadHandle->pDataCols == NULL) {
H
Haojun Liao 已提交
419
    tsdbError("%p failed to malloc buf for pDataCols, %s", pReadHandle, pReadHandle->idStr);
H
Haojun Liao 已提交
420
    terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
421
    goto _end;
H
hjxilinx 已提交
422
  }
423

424 425
  tsdbInitDataBlockLoadInfo(&pReadHandle->dataBlockLoadInfo);
  tsdbInitCompBlockLoadInfo(&pReadHandle->compBlockLoadInfo);
426

H
Haojun Liao 已提交
427
  return (tsdbReaderT)pReadHandle;
428

429
  _end:
430
  tsdbCleanupReadHandle(pReadHandle);
431
  terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
432
  return NULL;
H
hjxilinx 已提交
433 434
}

H
Haojun Liao 已提交
435
tsdbReaderT* tsdbQueryTables(STsdb* tsdb, STsdbQueryCond* pCond, STableGroupInfo* groupList, uint64_t qId, uint64_t taskId) {
H
Haojun Liao 已提交
436
  STsdbReadHandle* pTsdbReadHandle = tsdbQueryTablesImpl(tsdb, pCond, qId, taskId);
437
  if (pTsdbReadHandle == NULL) {
438 439 440
    return NULL;
  }

441
  if (emptyQueryTimewindow(pTsdbReadHandle)) {
H
Haojun Liao 已提交
442
    return (tsdbReaderT*) pTsdbReadHandle;
443
  }
H
Haojun Liao 已提交
444 445

  // todo apply the lastkey of table check to avoid to load header file
446
  pTsdbReadHandle->pTableCheckInfo = createCheckInfoFromTableGroup(pTsdbReadHandle, groupList);
447
  if (pTsdbReadHandle->pTableCheckInfo == NULL) {
448
//    tsdbCleanupReadHandle(pTsdbReadHandle);
H
Haojun Liao 已提交
449 450 451 452
    terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
    return NULL;
  }

453 454 455
  tsdbDebug("%p total numOfTable:%" PRIzu " in this query, group %"PRIzu" %s", pTsdbReadHandle, taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo),
      taosArrayGetSize(groupList->pGroupList), pTsdbReadHandle->idStr);

H
Haojun Liao 已提交
456
  return (tsdbReaderT) pTsdbReadHandle;
H
Haojun Liao 已提交
457 458
}

H
Haojun Liao 已提交
459
void tsdbResetQueryHandle(tsdbReaderT queryHandle, STsdbQueryCond *pCond) {
460
  STsdbReadHandle* pTsdbReadHandle = queryHandle;
H
Haojun Liao 已提交
461

462 463 464
  if (emptyQueryTimewindow(pTsdbReadHandle)) {
    if (pCond->order != pTsdbReadHandle->order) {
      pTsdbReadHandle->order = pCond->order;
dengyihao's avatar
dengyihao 已提交
465
      TSWAP(pTsdbReadHandle->window.skey, pTsdbReadHandle->window.ekey, int64_t);
466 467 468 469 470
    }

    return;
  }

471 472 473 474 475 476 477 478 479
  pTsdbReadHandle->order       = pCond->order;
  pTsdbReadHandle->window      = pCond->twindow;
  pTsdbReadHandle->type        = TSDB_QUERY_TYPE_ALL;
  pTsdbReadHandle->cur.fid     = -1;
  pTsdbReadHandle->cur.win     = TSWINDOW_INITIALIZER;
  pTsdbReadHandle->checkFiles  = true;
  pTsdbReadHandle->activeIndex = 0;   // current active table index
  pTsdbReadHandle->locateStart = false;
  pTsdbReadHandle->loadExternalRow = pCond->loadExternalRows;
H
Haojun Liao 已提交
480 481

  if (ASCENDING_TRAVERSE(pCond->order)) {
482
    assert(pTsdbReadHandle->window.skey <= pTsdbReadHandle->window.ekey);
H
Haojun Liao 已提交
483
  } else {
484
    assert(pTsdbReadHandle->window.skey >= pTsdbReadHandle->window.ekey);
H
Haojun Liao 已提交
485 486 487
  }

  // allocate buffer in order to load data blocks from file
488
  memset(pTsdbReadHandle->statis, 0, sizeof(SDataStatis));
H
Haojun Liao 已提交
489

490 491
  tsdbInitDataBlockLoadInfo(&pTsdbReadHandle->dataBlockLoadInfo);
  tsdbInitCompBlockLoadInfo(&pTsdbReadHandle->compBlockLoadInfo);
H
Haojun Liao 已提交
492

493
  resetCheckInfo(pTsdbReadHandle);
H
Haojun Liao 已提交
494 495
}

H
Haojun Liao 已提交
496
void tsdbResetQueryHandleForNewTable(tsdbReaderT queryHandle, STsdbQueryCond *pCond, STableGroupInfo* groupList) {
497
  STsdbReadHandle* pTsdbReadHandle = queryHandle;
H
Haojun Liao 已提交
498

499 500 501 502 503 504 505 506 507
  pTsdbReadHandle->order       = pCond->order;
  pTsdbReadHandle->window      = pCond->twindow;
  pTsdbReadHandle->type        = TSDB_QUERY_TYPE_ALL;
  pTsdbReadHandle->cur.fid     = -1;
  pTsdbReadHandle->cur.win     = TSWINDOW_INITIALIZER;
  pTsdbReadHandle->checkFiles  = true;
  pTsdbReadHandle->activeIndex = 0;   // current active table index
  pTsdbReadHandle->locateStart = false;
  pTsdbReadHandle->loadExternalRow = pCond->loadExternalRows;
H
Haojun Liao 已提交
508 509

  if (ASCENDING_TRAVERSE(pCond->order)) {
510
    assert(pTsdbReadHandle->window.skey <= pTsdbReadHandle->window.ekey);
H
Haojun Liao 已提交
511
  } else {
512
    assert(pTsdbReadHandle->window.skey >= pTsdbReadHandle->window.ekey);
H
Haojun Liao 已提交
513 514 515
  }

  // allocate buffer in order to load data blocks from file
516
  memset(pTsdbReadHandle->statis, 0, sizeof(SDataStatis));
H
Haojun Liao 已提交
517

518 519
  tsdbInitDataBlockLoadInfo(&pTsdbReadHandle->dataBlockLoadInfo);
  tsdbInitCompBlockLoadInfo(&pTsdbReadHandle->compBlockLoadInfo);
H
Haojun Liao 已提交
520

H
Haojun Liao 已提交
521
  SArray* pTable = NULL;
522
//  STsdbMeta* pMeta = tsdbGetMeta(pTsdbReadHandle->pTsdb);
H
Haojun Liao 已提交
523

524
//  pTsdbReadHandle->pTableCheckInfo = destroyTableCheckInfo(pTsdbReadHandle->pTableCheckInfo);
H
Haojun Liao 已提交
525

526 527
  pTsdbReadHandle->pTableCheckInfo = NULL;//createCheckInfoFromTableGroup(pTsdbReadHandle, groupList, pMeta, &pTable);
  if (pTsdbReadHandle->pTableCheckInfo == NULL) {
528
//    tsdbCleanupReadHandle(pTsdbReadHandle);
H
Haojun Liao 已提交
529 530
    terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
  }
H
Haojun Liao 已提交
531

532 533
//  pTsdbReadHandle->prev = doFreeColumnInfoData(pTsdbReadHandle->prev);
//  pTsdbReadHandle->next = doFreeColumnInfoData(pTsdbReadHandle->next);
H
Haojun Liao 已提交
534 535
}

H
Haojun Liao 已提交
536
tsdbReaderT tsdbQueryLastRow(STsdb *tsdb, STsdbQueryCond *pCond, STableGroupInfo *groupList, uint64_t qId, uint64_t taskId) {
537
  pCond->twindow = updateLastrowForEachGroup(groupList);
H
Haojun Liao 已提交
538 539 540 541 542 543

  // no qualified table
  if (groupList->numOfTables == 0) {
    return NULL;
  }

H
Haojun Liao 已提交
544
  STsdbReadHandle *pTsdbReadHandle = (STsdbReadHandle*) tsdbQueryTables(tsdb, pCond, groupList, qId, taskId);
545
  if (pTsdbReadHandle == NULL) {
546 547 548
    return NULL;
  }

549
  int32_t code = checkForCachedLastRow(pTsdbReadHandle, groupList);
H
Haojun Liao 已提交
550 551 552 553
  if (code != TSDB_CODE_SUCCESS) { // set the numOfTables to be 0
    terrno = code;
    return NULL;
  }
H
Haojun Liao 已提交
554 555

  assert(pCond->order == TSDB_ORDER_ASC && pCond->twindow.skey <= pCond->twindow.ekey);
556 557
  if (pTsdbReadHandle->cachelastrow) {
    pTsdbReadHandle->type = TSDB_QUERY_TYPE_LAST;
D
init  
dapan1121 已提交
558 559
  }
  
560
  return pTsdbReadHandle;
D
init  
dapan1121 已提交
561 562
}

563
#if 0
H
Haojun Liao 已提交
564
tsdbReaderT tsdbQueryCacheLast(STsdb *tsdb, STsdbQueryCond *pCond, STableGroupInfo *groupList, uint64_t qId, STsdbMemTable* pMemRef) {
565 566
  STsdbReadHandle *pTsdbReadHandle = (STsdbReadHandle*) tsdbQueryTables(tsdb, pCond, groupList, qId, pMemRef);
  if (pTsdbReadHandle == NULL) {
567 568 569
    return NULL;
  }

570
  int32_t code = checkForCachedLast(pTsdbReadHandle);
D
init  
dapan1121 已提交
571 572 573 574 575
  if (code != TSDB_CODE_SUCCESS) { // set the numOfTables to be 0
    terrno = code;
    return NULL;
  }

576 577
  if (pTsdbReadHandle->cachelastrow) {
    pTsdbReadHandle->type = TSDB_QUERY_TYPE_LAST;
D
fix bug  
dapan1121 已提交
578
  }
D
init  
dapan1121 已提交
579
  
580
  return pTsdbReadHandle;
H
hjxilinx 已提交
581 582
}

583
#endif
H
Haojun Liao 已提交
584
SArray* tsdbGetQueriedTableList(tsdbReaderT *pHandle) {
585
  assert(pHandle != NULL);
H
Haojun Liao 已提交
586

587
  STsdbReadHandle *pTsdbReadHandle = (STsdbReadHandle*) pHandle;
H
Haojun Liao 已提交
588

589
  size_t size = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
590
  SArray* res = taosArrayInit(size, POINTER_BYTES);
591 592 593
  return res;
}

H
Haojun Liao 已提交
594 595 596 597 598 599
// leave only one table for each group
static STableGroupInfo* trimTableGroup(STimeWindow* window, STableGroupInfo* pGroupList) {
  assert(pGroupList);
  size_t numOfGroup = taosArrayGetSize(pGroupList->pGroupList);

  STableGroupInfo* pNew = calloc(1, sizeof(STableGroupInfo));
Y
yihaoDeng 已提交
600
  pNew->pGroupList = taosArrayInit(numOfGroup, POINTER_BYTES);
H
Haojun Liao 已提交
601 602 603 604 605 606 607 608

  for(int32_t i = 0; i < numOfGroup; ++i) {
    SArray* oneGroup = taosArrayGetP(pGroupList->pGroupList, i);
    size_t numOfTables = taosArrayGetSize(oneGroup);

    SArray* px = taosArrayInit(4, sizeof(STableKeyInfo));
    for (int32_t j = 0; j < numOfTables; ++j) {
      STableKeyInfo* pInfo = (STableKeyInfo*)taosArrayGet(oneGroup, j);
609 610 611 612 613
//      if (window->skey <= pInfo->lastKey && ((STable*)pInfo->pTable)->lastKey != TSKEY_INITIAL_VAL) {
//        taosArrayPush(px, pInfo);
//        pNew->numOfTables += 1;
//        break;
//      }
H
Haojun Liao 已提交
614 615 616 617 618 619 620 621 622 623 624 625 626
    }

    // there are no data in this group
    if (taosArrayGetSize(px) == 0) {
      taosArrayDestroy(px);
    } else {
      taosArrayPush(pNew->pGroupList, &px);
    }
  }

  return pNew;
}

H
Haojun Liao 已提交
627
tsdbReaderT tsdbQueryRowsInExternalWindow(STsdb *tsdb, STsdbQueryCond* pCond, STableGroupInfo *groupList, uint64_t qId, uint64_t taskId) {
H
Haojun Liao 已提交
628 629
  STableGroupInfo* pNew = trimTableGroup(&pCond->twindow, groupList);

630 631 632 633 634 635 636 637 638 639 640 641
  if (pNew->numOfTables == 0) {
    tsdbDebug("update query time range to invalidate time window");

    assert(taosArrayGetSize(pNew->pGroupList) == 0);
    bool asc = ASCENDING_TRAVERSE(pCond->order);
    if (asc) {
      pCond->twindow.ekey = pCond->twindow.skey - 1;
    } else {
      pCond->twindow.skey = pCond->twindow.ekey - 1;
    }
  }

H
Haojun Liao 已提交
642
  STsdbReadHandle *pTsdbReadHandle = (STsdbReadHandle*) tsdbQueryTables(tsdb, pCond, pNew, qId, taskId);
643 644
  pTsdbReadHandle->loadExternalRow = true;
  pTsdbReadHandle->currentLoadExternalRows = true;
645

646
  return pTsdbReadHandle;
647 648
}

649
static bool initTableMemIterator(STsdbReadHandle* pHandle, STableCheckInfo* pCheckInfo) {
650
  if (pCheckInfo->initBuf) {
651 652
    return true;
  }
H
Haojun Liao 已提交
653

654
  pCheckInfo->initBuf = true;
655
  int32_t order = pHandle->order;
H
Haojun Liao 已提交
656

657 658 659
  STbData** pMem = NULL;
  STbData** pIMem = NULL;

H
Haojun Liao 已提交
660
  TSKEY tLastKey = 0;  /// keyToTkey(pCheckInfo->lastKey);
661 662 663
  if (pHandle->pTsdb->mem != NULL) {
    pMem = taosHashGet(pHandle->pTsdb->mem->pHashIdx, &pCheckInfo->tableId, sizeof(pCheckInfo->tableId));
    if (pMem != NULL) {
H
Haojun Liao 已提交
664
      pCheckInfo->iter =
665
          tSkipListCreateIterFromVal((*pMem)->pData, (const char*)&tLastKey, TSDB_DATA_TYPE_TIMESTAMP, order);
H
Haojun Liao 已提交
666
    }
667
  }
H
Haojun Liao 已提交
668

669 670 671
  if (pHandle->pTsdb->imem != NULL) {
    pIMem = taosHashGet(pHandle->pTsdb->imem->pHashIdx, &pCheckInfo->tableId, sizeof(pCheckInfo->tableId));
    if (pIMem != NULL) {
H
Haojun Liao 已提交
672
      pCheckInfo->iiter =
673
          tSkipListCreateIterFromVal((*pIMem)->pData, (const char*)&tLastKey, TSDB_DATA_TYPE_TIMESTAMP, order);
H
Haojun Liao 已提交
674
    }
675
  }
H
Haojun Liao 已提交
676

677 678 679 680
  // both iterators are NULL, no data in buffer right now
  if (pCheckInfo->iter == NULL && pCheckInfo->iiter == NULL) {
    return false;
  }
H
Haojun Liao 已提交
681

682 683 684 685 686
  bool memEmpty  = (pCheckInfo->iter == NULL) || (pCheckInfo->iter != NULL && !tSkipListIterNext(pCheckInfo->iter));
  bool imemEmpty = (pCheckInfo->iiter == NULL) || (pCheckInfo->iiter != NULL && !tSkipListIterNext(pCheckInfo->iiter));
  if (memEmpty && imemEmpty) { // buffer is empty
    return false;
  }
H
Haojun Liao 已提交
687

688 689 690
  if (!memEmpty) {
    SSkipListNode* node = tSkipListIterGet(pCheckInfo->iter);
    assert(node != NULL);
H
Haojun Liao 已提交
691

H
Haojun Liao 已提交
692 693
    STSRow* row = (STSRow*)SL_GET_NODE_DATA(node);
    TSKEY   key = TD_ROW_KEY(row);  // first timestamp in buffer
694
    tsdbDebug("%p uid:%" PRId64 ", check data in mem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64
H
Haojun Liao 已提交
695 696
              "-%" PRId64 ", lastKey:%" PRId64 ", numOfRows:%"PRId64", %s",
              pHandle, pCheckInfo->tableId, key, order, (*pMem)->keyMin, (*pMem)->keyMax, pCheckInfo->lastKey, (*pMem)->nrows, pHandle->idStr);
H
Haojun Liao 已提交
697 698 699 700 701 702 703

    if (ASCENDING_TRAVERSE(order)) {
      assert(pCheckInfo->lastKey <= key);
    } else {
      assert(pCheckInfo->lastKey >= key);
    }

704
  } else {
H
Haojun Liao 已提交
705
    tsdbDebug("%p uid:%"PRId64", no data in mem, %s", pHandle, pCheckInfo->tableId, pHandle->idStr);
706
  }
H
Haojun Liao 已提交
707

708 709 710
  if (!imemEmpty) {
    SSkipListNode* node = tSkipListIterGet(pCheckInfo->iiter);
    assert(node != NULL);
H
Haojun Liao 已提交
711

H
Haojun Liao 已提交
712 713
    STSRow* row = (STSRow*)SL_GET_NODE_DATA(node);
    TSKEY   key = TD_ROW_KEY(row);  // first timestamp in buffer
714
    tsdbDebug("%p uid:%" PRId64 ", check data in imem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64
H
Haojun Liao 已提交
715 716
              "-%" PRId64 ", lastKey:%" PRId64 ", numOfRows:%"PRId64", %s",
              pHandle, pCheckInfo->tableId, key, order, (*pIMem)->keyMin, (*pIMem)->keyMax, pCheckInfo->lastKey, (*pIMem)->nrows, pHandle->idStr);
H
Haojun Liao 已提交
717 718 719 720 721 722

    if (ASCENDING_TRAVERSE(order)) {
      assert(pCheckInfo->lastKey <= key);
    } else {
      assert(pCheckInfo->lastKey >= key);
    }
723
  } else {
H
Haojun Liao 已提交
724
    tsdbDebug("%p uid:%"PRId64", no data in imem, %s", pHandle, pCheckInfo->tableId, pHandle->idStr);
725
  }
H
Haojun Liao 已提交
726

727 728 729
  return true;
}

H
Haojun Liao 已提交
730 731 732 733 734
static void destroyTableMemIterator(STableCheckInfo* pCheckInfo) {
  tSkipListDestroyIter(pCheckInfo->iter);
  tSkipListDestroyIter(pCheckInfo->iiter);
}

735
static TSKEY extractFirstTraverseKey(STableCheckInfo* pCheckInfo, int32_t order, int32_t update) {
H
Haojun Liao 已提交
736
  STSRow *rmem = NULL, *rimem = NULL;
737 738 739
  if (pCheckInfo->iter) {
    SSkipListNode* node = tSkipListIterGet(pCheckInfo->iter);
    if (node != NULL) {
H
Haojun Liao 已提交
740
      rmem = (STSRow*)SL_GET_NODE_DATA(node);
741 742 743 744 745 746
    }
  }

  if (pCheckInfo->iiter) {
    SSkipListNode* node = tSkipListIterGet(pCheckInfo->iiter);
    if (node != NULL) {
H
Haojun Liao 已提交
747
      rimem = (STSRow*)SL_GET_NODE_DATA(node);
748 749 750 751 752 753 754 755 756
    }
  }

  if (rmem == NULL && rimem == NULL) {
    return TSKEY_INITIAL_VAL;
  }

  if (rmem != NULL && rimem == NULL) {
    pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
H
Haojun Liao 已提交
757
    return TD_ROW_KEY(rmem);
758 759 760 761
  }

  if (rmem == NULL && rimem != NULL) {
    pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
H
Haojun Liao 已提交
762
    return TD_ROW_KEY(rimem);
763 764
  }

H
Haojun Liao 已提交
765 766
  TSKEY r1 = TD_ROW_KEY(rmem);
  TSKEY r2 = TD_ROW_KEY(rimem);
767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789

  if (r1 == r2) {
    if(update == TD_ROW_DISCARD_UPDATE){
      pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
      tSkipListIterNext(pCheckInfo->iter);
    }
    else if(update == TD_ROW_OVERWRITE_UPDATE) {
      pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
      tSkipListIterNext(pCheckInfo->iiter);
    } else {
      pCheckInfo->chosen = CHECKINFO_CHOSEN_BOTH;
    }
    return r1;
  } else if (r1 < r2 && ASCENDING_TRAVERSE(order)) {
    pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
    return r1;
  }
  else {
    pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
    return r2;
  }
}

H
Haojun Liao 已提交
790 791
static STSRow* getSRowInTableMem(STableCheckInfo* pCheckInfo, int32_t order, int32_t update, STSRow** extraRow) {
  STSRow *rmem = NULL, *rimem = NULL;
H
Haojun Liao 已提交
792 793 794
  if (pCheckInfo->iter) {
    SSkipListNode* node = tSkipListIterGet(pCheckInfo->iter);
    if (node != NULL) {
H
Haojun Liao 已提交
795
      rmem = (STSRow*)SL_GET_NODE_DATA(node);
H
Haojun Liao 已提交
796 797
    }
  }
798

H
Haojun Liao 已提交
799 800 801
  if (pCheckInfo->iiter) {
    SSkipListNode* node = tSkipListIterGet(pCheckInfo->iiter);
    if (node != NULL) {
H
Haojun Liao 已提交
802
      rimem = (STSRow*)SL_GET_NODE_DATA(node);
H
Haojun Liao 已提交
803 804
    }
  }
805

H
Haojun Liao 已提交
806 807
  if (rmem == NULL && rimem == NULL) {
    return NULL;
H
Haojun Liao 已提交
808
  }
809

H
Haojun Liao 已提交
810
  if (rmem != NULL && rimem == NULL) {
H
Haojun Liao 已提交
811 812 813
    pCheckInfo->chosen = 0;
    return rmem;
  }
814

H
Haojun Liao 已提交
815
  if (rmem == NULL && rimem != NULL) {
H
Haojun Liao 已提交
816 817 818
    pCheckInfo->chosen = 1;
    return rimem;
  }
819

H
Haojun Liao 已提交
820 821
  TSKEY r1 = TD_ROW_KEY(rmem);
  TSKEY r2 = TD_ROW_KEY(rimem);
H
Haojun Liao 已提交
822

823 824
  if (r1 == r2) {
    if (update == TD_ROW_DISCARD_UPDATE) {
H
TD-1439  
Hongze Cheng 已提交
825
      tSkipListIterNext(pCheckInfo->iter);
826
      pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
H
TD-1439  
Hongze Cheng 已提交
827
      return rimem;
828
    } else if(update == TD_ROW_OVERWRITE_UPDATE){
H
TD-1439  
Hongze Cheng 已提交
829
      tSkipListIterNext(pCheckInfo->iiter);
830 831 832 833
      pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
      return rmem;
    } else {
      pCheckInfo->chosen = CHECKINFO_CHOSEN_BOTH;
H
Haojun Liao 已提交
834
      *extraRow = rimem;
H
TD-1439  
Hongze Cheng 已提交
835 836
      return rmem;
    }
H
Haojun Liao 已提交
837 838 839
  } else {
    if (ASCENDING_TRAVERSE(order)) {
      if (r1 < r2) {
840
        pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
H
Haojun Liao 已提交
841 842
        return rmem;
      } else {
843
        pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
H
Haojun Liao 已提交
844 845 846 847
        return rimem;
      }
    } else {
      if (r1 < r2) {
848
        pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
H
Haojun Liao 已提交
849 850
        return rimem;
      } else {
851
        pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
H
Haojun Liao 已提交
852 853 854 855
        return rmem;
      }
    }
  }
H
Haojun Liao 已提交
856 857
}

858
static bool moveToNextRowInMem(STableCheckInfo* pCheckInfo) {
H
Haojun Liao 已提交
859
  bool hasNext = false;
860
  if (pCheckInfo->chosen == CHECKINFO_CHOSEN_MEM) {
H
Haojun Liao 已提交
861 862 863
    if (pCheckInfo->iter != NULL) {
      hasNext = tSkipListIterNext(pCheckInfo->iter);
    }
864

H
Haojun Liao 已提交
865 866 867
    if (hasNext) {
      return hasNext;
    }
868

H
Haojun Liao 已提交
869 870 871
    if (pCheckInfo->iiter != NULL) {
      return tSkipListIterGet(pCheckInfo->iiter) != NULL;
    }
872
  } else if (pCheckInfo->chosen == CHECKINFO_CHOSEN_IMEM){
873 874 875
    if (pCheckInfo->iiter != NULL) {
      hasNext = tSkipListIterNext(pCheckInfo->iiter);
    }
876

877 878 879
    if (hasNext) {
      return hasNext;
    }
880

881 882
    if (pCheckInfo->iter != NULL) {
      return tSkipListIterGet(pCheckInfo->iter) != NULL;
H
Haojun Liao 已提交
883
    }
884 885 886 887 888 889 890
  } else {
    if (pCheckInfo->iter != NULL) {
      hasNext = tSkipListIterNext(pCheckInfo->iter);
    }
    if (pCheckInfo->iiter != NULL) {
      hasNext = tSkipListIterNext(pCheckInfo->iiter) || hasNext;
    }
H
Haojun Liao 已提交
891
  }
892

H
Haojun Liao 已提交
893 894 895
  return hasNext;
}

896
static bool hasMoreDataInCache(STsdbReadHandle* pHandle) {
H
TD-1439  
Hongze Cheng 已提交
897
  STsdbCfg *pCfg = &pHandle->pTsdb->config;
898 899
  size_t size = taosArrayGetSize(pHandle->pTableCheckInfo);
  assert(pHandle->activeIndex < size && pHandle->activeIndex >= 0 && size >= 1);
D
dapan1121 已提交
900
  pHandle->cur.fid = INT32_MIN;
H
Haojun Liao 已提交
901

902
  STableCheckInfo* pCheckInfo = taosArrayGet(pHandle->pTableCheckInfo, pHandle->activeIndex);
H
Haojun Liao 已提交
903 904 905 906
  if (!pCheckInfo->initBuf) {
    initTableMemIterator(pHandle, pCheckInfo);
  }

H
Haojun Liao 已提交
907
  STSRow* row = getSRowInTableMem(pCheckInfo, pHandle->order, pCfg->update, NULL);
H
Haojun Liao 已提交
908
  if (row == NULL) {
909 910
    return false;
  }
911

H
Haojun Liao 已提交
912
  pCheckInfo->lastKey = TD_ROW_KEY(row);  // first timestamp in buffer
H
Haojun Liao 已提交
913 914
  tsdbDebug("%p uid:%" PRId64", check data in buffer from skey:%" PRId64 ", order:%d, %s", pHandle,
      pCheckInfo->tableId,  pCheckInfo->lastKey, pHandle->order, pHandle->idStr);
H
Haojun Liao 已提交
915

916
  // all data in mem are checked already.
917 918
  if ((pCheckInfo->lastKey > pHandle->window.ekey && ASCENDING_TRAVERSE(pHandle->order)) ||
      (pCheckInfo->lastKey < pHandle->window.ekey && !ASCENDING_TRAVERSE(pHandle->order))) {
919 920
    return false;
  }
H
Haojun Liao 已提交
921

922 923
  int32_t step = ASCENDING_TRAVERSE(pHandle->order)? 1:-1;
  STimeWindow* win = &pHandle->cur.win;
H
Haojun Liao 已提交
924
  pHandle->cur.rows = tsdbReadRowsFromCache(pCheckInfo, pHandle->window.ekey, pHandle->outputCapacity, win, pHandle);
H
Haojun Liao 已提交
925

926 927 928 929
  // update the last key value
  pCheckInfo->lastKey = win->ekey + step;
  pHandle->cur.lastKey = win->ekey + step;
  pHandle->cur.mixBlock = true;
930

931
  if (!ASCENDING_TRAVERSE(pHandle->order)) {
dengyihao's avatar
dengyihao 已提交
932
    TSWAP(win->skey, win->ekey, TSKEY);
933
  }
H
Haojun Liao 已提交
934

935
  return true;
936
}
H
hjxilinx 已提交
937

938 939
static int32_t getFileIdFromKey(TSKEY key, int32_t daysPerFile, int32_t precision) {
  assert(precision >= TSDB_TIME_PRECISION_MICRO || precision <= TSDB_TIME_PRECISION_NANO);
940 941 942
  if (key == TSKEY_INITIAL_VAL) {
    return INT32_MIN;
  }
H
Haojun Liao 已提交
943

D
dapan1121 已提交
944
  if (key < 0) {
945
    key -= (daysPerFile * tsTickPerDay[precision]);
D
dapan1121 已提交
946 947
  }
  
948
  int64_t fid = (int64_t)(key / (daysPerFile * tsTickPerDay[precision]));  // set the starting fileId
949 950 951
  if (fid < 0L && llabs(fid) > INT32_MAX) { // data value overflow for INT32
    fid = INT32_MIN;
  }
H
Haojun Liao 已提交
952

953
  if (fid > 0L && fid > INT32_MAX) {
954 955
    fid = INT32_MAX;
  }
H
Haojun Liao 已提交
956

S
TD-1057  
Shengliang Guan 已提交
957
  return (int32_t)fid;
958 959
}

H
refact  
Hongze Cheng 已提交
960
static int32_t binarySearchForBlock(SBlock* pBlock, int32_t numOfBlocks, TSKEY skey, int32_t order) {
961 962
  int32_t firstSlot = 0;
  int32_t lastSlot = numOfBlocks - 1;
H
Haojun Liao 已提交
963

964
  int32_t midSlot = firstSlot;
H
Haojun Liao 已提交
965

966 967 968
  while (1) {
    numOfBlocks = lastSlot - firstSlot + 1;
    midSlot = (firstSlot + (numOfBlocks >> 1));
H
Haojun Liao 已提交
969

970
    if (numOfBlocks == 1) break;
H
Haojun Liao 已提交
971

972 973 974 975 976 977 978 979 980 981 982
    if (skey > pBlock[midSlot].keyLast) {
      if (numOfBlocks == 2) break;
      if ((order == TSDB_ORDER_DESC) && (skey < pBlock[midSlot + 1].keyFirst)) break;
      firstSlot = midSlot + 1;
    } else if (skey < pBlock[midSlot].keyFirst) {
      if ((order == TSDB_ORDER_ASC) && (skey > pBlock[midSlot - 1].keyLast)) break;
      lastSlot = midSlot - 1;
    } else {
      break;  // got the slot
    }
  }
H
Haojun Liao 已提交
983

984 985
  return midSlot;
}
986

987
static int32_t loadBlockInfo(STsdbReadHandle * pTsdbReadHandle, int32_t index, int32_t* numOfBlocks) {
H
Haojun Liao 已提交
988
  int32_t code = 0;
H
Haojun Liao 已提交
989

990
  STableCheckInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, index);
H
Haojun Liao 已提交
991
  pCheckInfo->numOfBlocks = 0;
992

H
Haojun Liao 已提交
993 994 995 996
  STable table = {.uid = pCheckInfo->tableId, .tid = pCheckInfo->tableId};
  table.pSchema = metaGetTbTSchema(pTsdbReadHandle->pTsdb->pMeta, pCheckInfo->tableId, 0);

  if (tsdbSetReadTable(&pTsdbReadHandle->rhelper, &table) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
997 998 999
    code = terrno;
    return code;
  }
1000

1001
  SBlockIdx* compIndex = pTsdbReadHandle->rhelper.pBlkIdx;
H
Hongze Cheng 已提交
1002

H
Haojun Liao 已提交
1003
  // no data block in this file, try next file
1004
  if (compIndex == NULL || compIndex->uid != pCheckInfo->tableId) {
H
Haojun Liao 已提交
1005 1006
    return 0;  // no data blocks in the file belongs to pCheckInfo->pTable
  }
1007

H
Haojun Liao 已提交
1008 1009 1010 1011 1012 1013 1014 1015
  if (pCheckInfo->compSize < (int32_t)compIndex->len) {
    assert(compIndex->len > 0);

    char* t = realloc(pCheckInfo->pCompInfo, compIndex->len);
    if (t == NULL) {
      terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
      code = TSDB_CODE_TDB_OUT_OF_MEMORY;
      return code;
1016 1017
    }

H
Haojun Liao 已提交
1018 1019 1020
    pCheckInfo->pCompInfo = (SBlockInfo*)t;
    pCheckInfo->compSize = compIndex->len;
  }
1021

1022
  if (tsdbLoadBlockInfo(&(pTsdbReadHandle->rhelper), (void*)(pCheckInfo->pCompInfo)) < 0) {
H
Hongze Cheng 已提交
1023 1024
    return terrno;
  }
H
Haojun Liao 已提交
1025
  SBlockInfo* pCompInfo = pCheckInfo->pCompInfo;
1026

H
Haojun Liao 已提交
1027
  TSKEY s = TSKEY_INITIAL_VAL, e = TSKEY_INITIAL_VAL;
1028

1029 1030
  if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
    assert(pCheckInfo->lastKey <= pTsdbReadHandle->window.ekey && pTsdbReadHandle->window.skey <= pTsdbReadHandle->window.ekey);
H
Haojun Liao 已提交
1031
  } else {
1032
    assert(pCheckInfo->lastKey >= pTsdbReadHandle->window.ekey && pTsdbReadHandle->window.skey >= pTsdbReadHandle->window.ekey);
H
Haojun Liao 已提交
1033
  }
1034

dengyihao's avatar
dengyihao 已提交
1035 1036
  s = TMIN(pCheckInfo->lastKey, pTsdbReadHandle->window.ekey);
  e = TMAX(pCheckInfo->lastKey, pTsdbReadHandle->window.ekey);
1037

H
Haojun Liao 已提交
1038 1039 1040
  // discard the unqualified data block based on the query time window
  int32_t start = binarySearchForBlock(pCompInfo->blocks, compIndex->numOfBlocks, s, TSDB_ORDER_ASC);
  int32_t end = start;
H
TD-100  
hzcheng 已提交
1041

H
Haojun Liao 已提交
1042 1043 1044
  if (s > pCompInfo->blocks[start].keyLast) {
    return 0;
  }
1045

H
Haojun Liao 已提交
1046 1047 1048 1049
  // todo speedup the procedure of located end block
  while (end < (int32_t)compIndex->numOfBlocks && (pCompInfo->blocks[end].keyFirst <= e)) {
    end += 1;
  }
1050

H
Haojun Liao 已提交
1051
  pCheckInfo->numOfBlocks = (end - start);
1052

H
Haojun Liao 已提交
1053 1054 1055
  if (start > 0) {
    memmove(pCompInfo->blocks, &pCompInfo->blocks[start], pCheckInfo->numOfBlocks * sizeof(SBlock));
  }
1056

H
Haojun Liao 已提交
1057 1058 1059
  (*numOfBlocks) += pCheckInfo->numOfBlocks;
  return 0;
}
1060

1061
static int32_t getFileCompInfo(STsdbReadHandle* pTsdbReadHandle, int32_t* numOfBlocks) {
H
Haojun Liao 已提交
1062 1063 1064 1065
  // load all the comp offset value for all tables in this file
  int32_t code = TSDB_CODE_SUCCESS;
  *numOfBlocks = 0;

1066
  pTsdbReadHandle->cost.headFileLoad += 1;
1067 1068
  int64_t s = taosGetTimestampUs();

H
Haojun Liao 已提交
1069
  size_t numOfTables = 0;
1070 1071 1072 1073
  if (pTsdbReadHandle->loadType == BLOCK_LOAD_TABLE_SEQ_ORDER) {
    code = loadBlockInfo(pTsdbReadHandle, pTsdbReadHandle->activeIndex, numOfBlocks);
  } else if (pTsdbReadHandle->loadType == BLOCK_LOAD_OFFSET_SEQ_ORDER) {
    numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
1074

H
Haojun Liao 已提交
1075
    for (int32_t i = 0; i < numOfTables; ++i) {
1076
      code = loadBlockInfo(pTsdbReadHandle, i, numOfBlocks);
H
Haojun Liao 已提交
1077
      if (code != TSDB_CODE_SUCCESS) {
1078 1079
        int64_t e = taosGetTimestampUs();

1080
        pTsdbReadHandle->cost.headFileLoadTime += (e - s);
H
Haojun Liao 已提交
1081 1082 1083 1084 1085
        return code;
      }
    }
  } else {
    assert(0);
1086
  }
1087

1088
  int64_t e = taosGetTimestampUs();
1089
  pTsdbReadHandle->cost.headFileLoadTime += (e - s);
H
Haojun Liao 已提交
1090
  return code;
1091 1092
}

1093
static int32_t doLoadFileDataBlock(STsdbReadHandle* pTsdbReadHandle, SBlock* pBlock, STableCheckInfo* pCheckInfo, int32_t slotIndex) {
H
Haojun Liao 已提交
1094
  int64_t st = taosGetTimestampUs();
1095

H
Haojun Liao 已提交
1096
  STSchema *pSchema = metaGetTbTSchema(pTsdbReadHandle->pTsdb->pMeta, pCheckInfo->tableId, 0);
1097
  int32_t   code = tdInitDataCols(pTsdbReadHandle->pDataCols, pSchema);
H
Haojun Liao 已提交
1098
  if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
1099
    tsdbError("%p failed to malloc buf for pDataCols, %s", pTsdbReadHandle, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1100 1101 1102 1103
    terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
    goto _error;
  }

1104
  code = tdInitDataCols(pTsdbReadHandle->rhelper.pDCols[0], pSchema);
H
Haojun Liao 已提交
1105
  if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
1106
    tsdbError("%p failed to malloc buf for rhelper.pDataCols[0], %s", pTsdbReadHandle, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1107 1108 1109 1110
    terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
    goto _error;
  }

1111
  code = tdInitDataCols(pTsdbReadHandle->rhelper.pDCols[1], pSchema);
H
Haojun Liao 已提交
1112
  if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
1113
    tsdbError("%p failed to malloc buf for rhelper.pDataCols[1], %s", pTsdbReadHandle, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1114 1115 1116
    terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
    goto _error;
  }
1117

1118
  int16_t* colIds = pTsdbReadHandle->defaultLoadColumn->pData;
H
Haojun Liao 已提交
1119

1120
  int32_t ret = tsdbLoadBlockDataCols(&(pTsdbReadHandle->rhelper), pBlock, pCheckInfo->pCompInfo, colIds, (int)(QH_GET_NUM_OF_COLS(pTsdbReadHandle)));
H
Haojun Liao 已提交
1121
  if (ret != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
1122 1123 1124
    int32_t c = terrno;
    assert(c != TSDB_CODE_SUCCESS);
    goto _error;
H
Haojun Liao 已提交
1125
  }
1126

1127
  SDataBlockLoadInfo* pBlockLoadInfo = &pTsdbReadHandle->dataBlockLoadInfo;
1128

1129 1130
  pBlockLoadInfo->fileGroup = pTsdbReadHandle->pFileGroup;
  pBlockLoadInfo->slot = pTsdbReadHandle->cur.slot;
H
Haojun Liao 已提交
1131
  pBlockLoadInfo->uid = pCheckInfo->tableId;
1132

1133
  SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0];
1134
  assert(pCols->numOfRows != 0 && pCols->numOfRows <= pBlock->numOfRows);
1135

1136
  pBlock->numOfRows = pCols->numOfRows;
H
Haojun Liao 已提交
1137

1138
  // Convert from TKEY to TSKEY for primary timestamp column if current block has timestamp before 1970-01-01T00:00:00Z
1139
  if(pBlock->keyFirst < 0 && colIds[0] == PRIMARYKEY_TIMESTAMP_COL_ID) {
1140 1141 1142 1143 1144 1145
    int64_t* src = pCols->cols[0].pData;
    for(int32_t i = 0; i < pBlock->numOfRows; ++i) {
      src[i] = tdGetKey(src[i]);
    }
  }

H
Haojun Liao 已提交
1146
  int64_t elapsedTime = (taosGetTimestampUs() - st);
1147
  pTsdbReadHandle->cost.blockLoadTime += elapsedTime;
1148

H
Haojun Liao 已提交
1149 1150
  tsdbDebug("%p load file block into buffer, index:%d, brange:%"PRId64"-%"PRId64", rows:%d, elapsed time:%"PRId64 " us, %s",
      pTsdbReadHandle, slotIndex, pBlock->keyFirst, pBlock->keyLast, pBlock->numOfRows, elapsedTime, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1151
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
1152 1153 1154 1155

_error:
  pBlock->numOfRows = 0;

H
Haojun Liao 已提交
1156 1157
  tsdbError("%p error occurs in loading file block, index:%d, brange:%"PRId64"-%"PRId64", rows:%d, %s",
            pTsdbReadHandle, slotIndex, pBlock->keyFirst, pBlock->keyLast, pBlock->numOfRows, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1158
  return terrno;
H
hjxilinx 已提交
1159 1160
}

1161 1162 1163 1164 1165
static int32_t getEndPosInDataBlock(STsdbReadHandle* pTsdbReadHandle, SDataBlockInfo* pBlockInfo);
static int32_t doCopyRowsFromFileBlock(STsdbReadHandle* pTsdbReadHandle, int32_t capacity, int32_t numOfRows, int32_t start, int32_t end);
static void moveDataToFront(STsdbReadHandle* pTsdbReadHandle, int32_t numOfRows, int32_t numOfCols);
static void doCheckGeneratedBlockRange(STsdbReadHandle* pTsdbReadHandle);
static void copyAllRemainRowsFromFileBlock(STsdbReadHandle* pTsdbReadHandle, STableCheckInfo* pCheckInfo, SDataBlockInfo* pBlockInfo, int32_t endPos);
1166

1167 1168 1169
static int32_t handleDataMergeIfNeeded(STsdbReadHandle* pTsdbReadHandle, SBlock* pBlock, STableCheckInfo* pCheckInfo){
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
  STsdbCfg*      pCfg = &pTsdbReadHandle->pTsdb->config;
H
Haojun Liao 已提交
1170
  SDataBlockInfo binfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlock);
1171
  TSKEY          key;
H
Haojun Liao 已提交
1172
  int32_t code = TSDB_CODE_SUCCESS;
1173

1174
  /*bool hasData = */ initTableMemIterator(pTsdbReadHandle, pCheckInfo);
H
Haojun Liao 已提交
1175 1176
  assert(cur->pos >= 0 && cur->pos <= binfo.rows);

1177
  key = extractFirstTraverseKey(pCheckInfo, pTsdbReadHandle->order, pCfg->update);
1178

H
Haojun Liao 已提交
1179
  if (key != TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
1180
    tsdbDebug("%p key in mem:%"PRId64", %s", pTsdbReadHandle, key, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1181
  } else {
H
Haojun Liao 已提交
1182
    tsdbDebug("%p no data in mem, %s", pTsdbReadHandle, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1183
  }
H
Haojun Liao 已提交
1184

1185 1186
  if ((ASCENDING_TRAVERSE(pTsdbReadHandle->order) && (key != TSKEY_INITIAL_VAL && key <= binfo.window.ekey)) ||
      (!ASCENDING_TRAVERSE(pTsdbReadHandle->order) && (key != TSKEY_INITIAL_VAL && key >= binfo.window.skey))) {
H
Haojun Liao 已提交
1187

1188 1189
    if ((ASCENDING_TRAVERSE(pTsdbReadHandle->order) && (key != TSKEY_INITIAL_VAL && key < binfo.window.skey)) ||
        (!ASCENDING_TRAVERSE(pTsdbReadHandle->order) && (key != TSKEY_INITIAL_VAL && key > binfo.window.ekey))) {
1190

H
Haojun Liao 已提交
1191
      // do not load file block into buffer
1192
      int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order) ? 1 : -1;
H
Haojun Liao 已提交
1193

1194 1195 1196
      TSKEY maxKey = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? (binfo.window.skey - step):(binfo.window.ekey - step);
      cur->rows = tsdbReadRowsFromCache(pCheckInfo, maxKey, pTsdbReadHandle->outputCapacity, &cur->win, pTsdbReadHandle);
      pTsdbReadHandle->realNumOfRows = cur->rows;
H
Haojun Liao 已提交
1197 1198 1199

      // update the last key value
      pCheckInfo->lastKey = cur->win.ekey + step;
1200
      if (!ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
dengyihao's avatar
dengyihao 已提交
1201
        TSWAP(cur->win.skey, cur->win.ekey, TSKEY);
H
Haojun Liao 已提交
1202
      }
H
Haojun Liao 已提交
1203

H
Haojun Liao 已提交
1204 1205
      cur->mixBlock = true;
      cur->blockCompleted = false;
H
Haojun Liao 已提交
1206
      return code;
H
Haojun Liao 已提交
1207
    }
H
Haojun Liao 已提交
1208

1209

1210
    // return error, add test cases
1211
    if ((code = doLoadFileDataBlock(pTsdbReadHandle, pBlock, pCheckInfo, cur->slot)) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
1212
      return code;
1213 1214
    }

1215
    doMergeTwoLevelData(pTsdbReadHandle, pCheckInfo, pBlock);
1216
  } else {
1217 1218 1219 1220 1221 1222
    /*
     * no data in cache, only load data from file
     * during the query processing, data in cache will not be checked anymore.
     *
     * Here the buffer is not enough, so only part of file block can be loaded into memory buffer
     */
1223 1224
    assert(pTsdbReadHandle->outputCapacity >= binfo.rows);
    int32_t endPos = getEndPosInDataBlock(pTsdbReadHandle, &binfo);
1225

1226 1227 1228
    if ((cur->pos == 0 && endPos == binfo.rows -1 && ASCENDING_TRAVERSE(pTsdbReadHandle->order)) ||
        (cur->pos == (binfo.rows - 1) && endPos == 0 && (!ASCENDING_TRAVERSE(pTsdbReadHandle->order)))) {
      pTsdbReadHandle->realNumOfRows = binfo.rows;
1229 1230 1231 1232

      cur->rows = binfo.rows;
      cur->win  = binfo.window;
      cur->mixBlock = false;
H
Haojun Liao 已提交
1233 1234
      cur->blockCompleted = true;

1235
      if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
H
Haojun Liao 已提交
1236 1237 1238 1239 1240 1241
        cur->lastKey = binfo.window.ekey + 1;
        cur->pos = binfo.rows;
      } else {
        cur->lastKey = binfo.window.skey - 1;
        cur->pos = -1;
      }
H
Haojun Liao 已提交
1242
    } else { // partially copy to dest buffer
1243
      copyAllRemainRowsFromFileBlock(pTsdbReadHandle, pCheckInfo, &binfo, endPos);
1244 1245
      cur->mixBlock = true;
    }
1246

H
Haojun Liao 已提交
1247
    assert(cur->blockCompleted);
H
Haojun Liao 已提交
1248
    if (cur->rows == binfo.rows) {
H
Haojun Liao 已提交
1249 1250
      tsdbDebug("%p whole file block qualified, brange:%"PRId64"-%"PRId64", rows:%d, lastKey:%"PRId64", %s",
                pTsdbReadHandle, cur->win.skey, cur->win.ekey, cur->rows, cur->lastKey, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1251
    } else {
H
Haojun Liao 已提交
1252 1253
      tsdbDebug("%p create data block from remain file block, brange:%"PRId64"-%"PRId64", rows:%d, total:%d, lastKey:%"PRId64", %s",
                pTsdbReadHandle, cur->win.skey, cur->win.ekey, cur->rows, binfo.rows, cur->lastKey, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1254 1255
    }

1256
  }
H
Haojun Liao 已提交
1257 1258

  return code;
1259 1260
}

1261 1262
static int32_t loadFileDataBlock(STsdbReadHandle* pTsdbReadHandle, SBlock* pBlock, STableCheckInfo* pCheckInfo, bool* exists) {
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
H
Haojun Liao 已提交
1263
  int32_t code = TSDB_CODE_SUCCESS;
1264
  bool asc = ASCENDING_TRAVERSE(pTsdbReadHandle->order);
1265

1266
  if (asc) {
H
Haojun Liao 已提交
1267
    // query ended in/started from current block
1268 1269
    if (pTsdbReadHandle->window.ekey < pBlock->keyLast || pCheckInfo->lastKey > pBlock->keyFirst) {
      if ((code = doLoadFileDataBlock(pTsdbReadHandle, pBlock, pCheckInfo, cur->slot)) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
1270 1271
        *exists = false;
        return code;
1272
      }
1273

1274
      SDataCols* pTSCol = pTsdbReadHandle->rhelper.pDCols[0];
H
Haojun Liao 已提交
1275
      assert(pTSCol->cols->type == TSDB_DATA_TYPE_TIMESTAMP && pTSCol->numOfRows == pBlock->numOfRows);
H
Haojun Liao 已提交
1276

1277 1278
      if (pCheckInfo->lastKey > pBlock->keyFirst) {
        cur->pos =
1279
            binarySearchForKey(pTSCol->cols[0].pData, pBlock->numOfRows, pCheckInfo->lastKey, pTsdbReadHandle->order);
1280 1281 1282
      } else {
        cur->pos = 0;
      }
H
Haojun Liao 已提交
1283

H
Haojun Liao 已提交
1284
      assert(pCheckInfo->lastKey <= pBlock->keyLast);
1285
      doMergeTwoLevelData(pTsdbReadHandle, pCheckInfo, pBlock);
1286
    } else {  // the whole block is loaded in to buffer
1287
      cur->pos = asc? 0:(pBlock->numOfRows - 1);
1288
      code = handleDataMergeIfNeeded(pTsdbReadHandle, pBlock, pCheckInfo);
H
[td-32]  
hjxilinx 已提交
1289
    }
1290
  } else {  //desc order, query ended in current block
1291 1292
    if (pTsdbReadHandle->window.ekey > pBlock->keyFirst || pCheckInfo->lastKey < pBlock->keyLast) {
      if ((code = doLoadFileDataBlock(pTsdbReadHandle, pBlock, pCheckInfo, cur->slot)) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
1293 1294
        *exists = false;
        return code;
1295
      }
H
Haojun Liao 已提交
1296

1297
      SDataCols* pTsCol = pTsdbReadHandle->rhelper.pDCols[0];
1298
      if (pCheckInfo->lastKey < pBlock->keyLast) {
1299
        cur->pos = binarySearchForKey(pTsCol->cols[0].pData, pBlock->numOfRows, pCheckInfo->lastKey, pTsdbReadHandle->order);
1300
      } else {
H
Haojun Liao 已提交
1301
        cur->pos = pBlock->numOfRows - 1;
1302
      }
H
Haojun Liao 已提交
1303

H
Haojun Liao 已提交
1304
      assert(pCheckInfo->lastKey >= pBlock->keyFirst);
1305
      doMergeTwoLevelData(pTsdbReadHandle, pCheckInfo, pBlock);
1306
    } else {
1307
      cur->pos = asc? 0:(pBlock->numOfRows-1);
1308
      code = handleDataMergeIfNeeded(pTsdbReadHandle, pBlock, pCheckInfo);
H
[td-32]  
hjxilinx 已提交
1309
    }
1310
  }
1311

1312
  *exists = pTsdbReadHandle->realNumOfRows > 0;
H
Haojun Liao 已提交
1313
  return code;
H
[td-32]  
hjxilinx 已提交
1314 1315
}

1316
static int doBinarySearchKey(char* pValue, int num, TSKEY key, int order) {
1317
  int    firstPos, lastPos, midPos = -1;
H
Haojun Liao 已提交
1318
  int    numOfRows;
1319 1320
  TSKEY* keyList;

1321
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
H
Haojun Liao 已提交
1322

1323
  if (num <= 0) return -1;
1324 1325

  keyList = (TSKEY*)pValue;
1326 1327
  firstPos = 0;
  lastPos = num - 1;
1328

1329
  if (order == TSDB_ORDER_DESC) {
1330 1331 1332 1333 1334
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
1335

H
Haojun Liao 已提交
1336 1337
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
1338

1339 1340 1341 1342 1343 1344 1345 1346
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
1347

1348 1349 1350 1351 1352
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
1353

1354 1355 1356 1357 1358 1359 1360
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
1361

H
Haojun Liao 已提交
1362 1363
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
1364

1365 1366 1367 1368 1369 1370 1371 1372 1373
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
1374

1375 1376 1377
  return midPos;
}

1378
static int32_t doCopyRowsFromFileBlock(STsdbReadHandle* pTsdbReadHandle, int32_t capacity, int32_t numOfRows, int32_t start, int32_t end) {
1379
  char* pData = NULL;
1380
  int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? 1 : -1;
H
Haojun Liao 已提交
1381

1382
  SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0];
1383
  TSKEY* tsArray = pCols->cols[0].pData;
H
Haojun Liao 已提交
1384

1385
  int32_t num = end - start + 1;
H
Haojun Liao 已提交
1386 1387 1388 1389 1390 1391
  assert(num >= 0);

  if (num == 0) {
    return numOfRows;
  }

1392
  int32_t requiredNumOfCols = (int32_t)taosArrayGetSize(pTsdbReadHandle->pColumns);
H
Haojun Liao 已提交
1393

1394
  //data in buffer has greater timestamp, copy data in file block
1395 1396
  int32_t i = 0, j = 0;
  while(i < requiredNumOfCols && j < pCols->numOfCols) {
1397
    SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i);
1398 1399 1400 1401 1402 1403 1404 1405 1406

    SDataCol* src = &pCols->cols[j];
    if (src->colId < pColInfo->info.colId) {
      j++;
      continue;
    }

    int32_t bytes = pColInfo->info.bytes;

1407
    if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
S
TD-1057  
Shengliang Guan 已提交
1408
      pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;
1409
    } else {
S
TD-1057  
Shengliang Guan 已提交
1410
      pData = (char*)pColInfo->pData + (capacity - numOfRows - num) * pColInfo->info.bytes;
1411
    }
1412

L
Liu Jicong 已提交
1413
    if (!isAllRowsNull(src) && pColInfo->info.colId == src->colId) {
1414
      if (pColInfo->info.type != TSDB_DATA_TYPE_BINARY && pColInfo->info.type != TSDB_DATA_TYPE_NCHAR) {
S
TD-1057  
Shengliang Guan 已提交
1415
        memmove(pData, (char*)src->pData + bytes * start, bytes * num);
1416 1417 1418 1419 1420
      } else {  // handle the var-string
        char* dst = pData;

        // todo refactor, only copy one-by-one
        for (int32_t k = start; k < num + start; ++k) {
H
Haojun Liao 已提交
1421 1422 1423 1424 1425
          SCellVal    sVal = {0};
          if(tdGetColDataOfRow(&sVal, src, k) < 0){
            TASSERT(0);
          }
          memcpy(dst, sVal.val, varDataTLen(sVal.val));
1426
          dst += bytes;
1427 1428
        }
      }
1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443

      j++;
      i++;
    } else { // pColInfo->info.colId < src->colId, it is a NULL data
      if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
        char* dst = pData;

        for(int32_t k = start; k < num + start; ++k) {
          setVardataNull(dst, pColInfo->info.type);
          dst += bytes;
        }
      } else {
        setNullN(pData, pColInfo->info.type, pColInfo->info.bytes, num);
      }
      i++;
1444 1445
    }
  }
1446 1447

  while (i < requiredNumOfCols) { // the remain columns are all null data
1448 1449
    SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i);
    if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
S
TD-1057  
Shengliang Guan 已提交
1450
      pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;
1451
    } else {
S
TD-1057  
Shengliang Guan 已提交
1452
      pData = (char*)pColInfo->pData + (capacity - numOfRows - num) * pColInfo->info.bytes;
1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463
    }

    if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
      char* dst = pData;

      for(int32_t k = start; k < num + start; ++k) {
        setVardataNull(dst, pColInfo->info.type);
        dst += pColInfo->info.bytes;
      }
    } else {
      setNullN(pData, pColInfo->info.type, pColInfo->info.bytes, num);
1464
    }
1465 1466

    i++;
1467
  }
H
Haojun Liao 已提交
1468

1469 1470
  pTsdbReadHandle->cur.win.ekey = tsArray[end];
  pTsdbReadHandle->cur.lastKey = tsArray[end] + step;
1471

1472
  return numOfRows + num;
1473 1474
}

1475
// Note: row1 always has high priority
H
Haojun Liao 已提交
1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486
static void mergeTwoRowFromMem(STsdbReadHandle* pTsdbReadHandle, int32_t capacity, int32_t numOfRows, STSRow* row1,
                               STSRow* row2, int32_t numOfCols, uint64_t uid, STSchema* pSchema1, STSchema* pSchema2,
                               bool forceSetNull) {
#if 1
  char*       pData = NULL;
  STSchema*   pSchema;
  STSRow*     row;
  int16_t     colId;
  int16_t     offset;

  bool isRow1DataRow = TD_IS_TP_ROW(row1);
1487 1488 1489
  bool isRow2DataRow;
  bool isChosenRowDataRow;
  int32_t chosen_itr;
H
Haojun Liao 已提交
1490
  SCellVal sVal = {0};
1491

H
Haojun Liao 已提交
1492
  // the schema version info is embeded in STSRow
1493 1494 1495
  int32_t numOfColsOfRow1 = 0;

  if (pSchema1 == NULL) {
H
Haojun Liao 已提交
1496
    pSchema1 = metaGetTbTSchema(pTsdbReadHandle->pTsdb->pMeta, uid, TD_ROW_SVER(row1));
1497
  }
1498

1499 1500
  if(isRow1DataRow) {
    numOfColsOfRow1 = schemaNCols(pSchema1);
H
Haojun Liao 已提交
1501
  } else {
H
Haojun Liao 已提交
1502
    numOfColsOfRow1 = tdRowGetNCols(row1);
D
fix bug  
dapan1121 已提交
1503
  }
1504

1505 1506
  int32_t numOfColsOfRow2 = 0;
  if(row2) {
H
Haojun Liao 已提交
1507
    isRow2DataRow = TD_IS_TP_ROW(row2);
1508
    if (pSchema2 == NULL) {
H
Haojun Liao 已提交
1509
      pSchema2 = metaGetTbTSchema(pTsdbReadHandle->pTsdb->pMeta, uid, TD_ROW_SVER(row2));
1510 1511 1512 1513
    }
    if(isRow2DataRow) {
      numOfColsOfRow2 = schemaNCols(pSchema2);
    } else {
H
Haojun Liao 已提交
1514
      numOfColsOfRow2 = tdRowGetNCols(row2);
1515 1516
    }
  }
C
Cary Xu 已提交
1517

1518 1519 1520

  int32_t i = 0, j = 0, k = 0;
  while(i < numOfCols && (j < numOfColsOfRow1 || k < numOfColsOfRow2)) {
1521
    SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i);
1522

1523
    if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534
      pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;
    } else {
      pData = (char*)pColInfo->pData + (capacity - numOfRows - 1) * pColInfo->info.bytes;
    }

    int32_t colIdOfRow1;
    if(j >= numOfColsOfRow1) {
      colIdOfRow1 = INT32_MAX;
    } else if(isRow1DataRow) {
      colIdOfRow1 = pSchema1->columns[j].colId;
    } else {
H
Haojun Liao 已提交
1535
      SKvRowIdx *pColIdx = tdKvRowColIdxAt(row1, j);
1536 1537 1538 1539 1540 1541 1542 1543 1544
      colIdOfRow1 = pColIdx->colId;
    }

    int32_t colIdOfRow2;
    if(k >= numOfColsOfRow2) {
      colIdOfRow2 = INT32_MAX;
    } else if(isRow2DataRow) {
      colIdOfRow2 = pSchema2->columns[k].colId;
    } else {
H
Haojun Liao 已提交
1545
      SKvRowIdx *pColIdx = tdKvRowColIdxAt(row2, k);
1546 1547 1548 1549 1550
      colIdOfRow2 = pColIdx->colId;
    }

    if(colIdOfRow1 == colIdOfRow2) {
      if(colIdOfRow1 < pColInfo->info.colId) {
C
Cary Xu 已提交
1551
        j++;
1552
        k++;
C
Cary Xu 已提交
1553 1554
        continue;
      }
1555 1556 1557 1558 1559 1560 1561 1562
      row = row1;
      pSchema = pSchema1;
      isChosenRowDataRow = isRow1DataRow;
      chosen_itr = j;
    } else if(colIdOfRow1 < colIdOfRow2) {
      if(colIdOfRow1 < pColInfo->info.colId) {
        j++;
        continue;
C
Cary Xu 已提交
1563
      }
1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580
      row = row1;
      pSchema = pSchema1;
      isChosenRowDataRow = isRow1DataRow;
      chosen_itr = j;
    } else {
      if(colIdOfRow2 < pColInfo->info.colId) {
        k++;
        continue;
      }
      row = row2;
      pSchema = pSchema2;
      chosen_itr = k;
      isChosenRowDataRow = isRow2DataRow;
    }
    if(isChosenRowDataRow) {
      colId = pSchema->columns[chosen_itr].colId;
      offset = pSchema->columns[chosen_itr].offset;
H
Haojun Liao 已提交
1581
      tdSTpRowGetVal(row, colId, pSchema->columns[chosen_itr].type, pSchema->flen, offset, chosen_itr, &sVal);
1582
    } else {
H
Haojun Liao 已提交
1583
      SKvRowIdx *pColIdx = tdKvRowColIdxAt(row, chosen_itr);
1584 1585
      colId = pColIdx->colId;
      offset = pColIdx->offset;
H
Haojun Liao 已提交
1586
      tdSKvRowGetVal(row, colId, offset, chosen_itr, &sVal);
1587 1588
    }

C
Cary Xu 已提交
1589

1590
    if (colId == pColInfo->info.colId) {
H
Haojun Liao 已提交
1591
      if (tdValTypeIsNorm(sVal.valType)) {
C
Cary Xu 已提交
1592 1593 1594
        switch (pColInfo->info.type) {
          case TSDB_DATA_TYPE_BINARY:
          case TSDB_DATA_TYPE_NCHAR:
H
Haojun Liao 已提交
1595
            memcpy(pData, sVal.val, varDataTLen(sVal.val));
C
Cary Xu 已提交
1596 1597 1598 1599 1600
            break;
          case TSDB_DATA_TYPE_NULL:
          case TSDB_DATA_TYPE_BOOL:
          case TSDB_DATA_TYPE_TINYINT:
          case TSDB_DATA_TYPE_UTINYINT:
H
Haojun Liao 已提交
1601
            *(uint8_t *)pData = *(uint8_t *)sVal.val;
C
Cary Xu 已提交
1602 1603 1604
            break;
          case TSDB_DATA_TYPE_SMALLINT:
          case TSDB_DATA_TYPE_USMALLINT:
H
Haojun Liao 已提交
1605
            *(uint16_t *)pData = *(uint16_t *)sVal.val;
C
Cary Xu 已提交
1606 1607 1608
            break;
          case TSDB_DATA_TYPE_INT:
          case TSDB_DATA_TYPE_UINT:
H
Haojun Liao 已提交
1609
            *(uint32_t *)pData = *(uint32_t *)sVal.val;
C
Cary Xu 已提交
1610 1611 1612
            break;
          case TSDB_DATA_TYPE_BIGINT:
          case TSDB_DATA_TYPE_UBIGINT:
H
Haojun Liao 已提交
1613
            *(uint64_t *)pData = *(uint64_t *)sVal.val;
C
Cary Xu 已提交
1614 1615
            break;
          case TSDB_DATA_TYPE_FLOAT:
H
Haojun Liao 已提交
1616
            SET_FLOAT_PTR(pData, sVal.val);
C
Cary Xu 已提交
1617 1618
            break;
          case TSDB_DATA_TYPE_DOUBLE:
H
Haojun Liao 已提交
1619
            SET_DOUBLE_PTR(pData, sVal.val);
C
Cary Xu 已提交
1620 1621
            break;
          case TSDB_DATA_TYPE_TIMESTAMP:
H
Haojun Liao 已提交
1622
            *(TSKEY*)pData = *(TSKEY*)sVal.val;
C
Cary Xu 已提交
1623 1624
            break;
          default:
H
Haojun Liao 已提交
1625 1626 1627 1628 1629 1630 1631
            memcpy(pData, sVal.val, pColInfo->info.bytes);
        }
      } else if (forceSetNull) {
        if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
          setVardataNull(pData, pColInfo->info.type);
        } else {
          setNull(pData, pColInfo->info.type, pColInfo->info.bytes);
C
Cary Xu 已提交
1632
        }
1633 1634
      }
      i++;
C
Cary Xu 已提交
1635

1636
      if(row == row1) {
C
Cary Xu 已提交
1637
        j++;
1638 1639 1640 1641 1642
      } else {
        k++;
      }
    } else {
      if(forceSetNull) {
C
Cary Xu 已提交
1643 1644 1645 1646 1647 1648
        if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
          setVardataNull(pData, pColInfo->info.type);
        } else {
          setNull(pData, pColInfo->info.type, pColInfo->info.bytes);
        }
      }
1649
      i++;
1650
    }
1651
  }
1652

1653 1654
  if(forceSetNull) {
    while (i < numOfCols) { // the remain columns are all null data
1655 1656
      SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i);
      if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
C
Cary Xu 已提交
1657 1658 1659 1660 1661
        pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;
      } else {
        pData = (char*)pColInfo->pData + (capacity - numOfRows - 1) * pColInfo->info.bytes;
      }

1662 1663 1664 1665
      if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
        setVardataNull(pData, pColInfo->info.type);
      } else {
        setNull(pData, pColInfo->info.type, pColInfo->info.bytes);
1666
      }
1667

1668
      i++;
1669 1670
    }
  }
H
Haojun Liao 已提交
1671
#endif
1672
}
1673

1674 1675
static void moveDataToFront(STsdbReadHandle* pTsdbReadHandle, int32_t numOfRows, int32_t numOfCols) {
  if (numOfRows == 0 || ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
1676 1677 1678 1679
    return;
  }

  // if the buffer is not full in case of descending order query, move the data in the front of the buffer
1680 1681
  if (numOfRows < pTsdbReadHandle->outputCapacity) {
    int32_t emptySize = pTsdbReadHandle->outputCapacity - numOfRows;
1682
    for(int32_t i = 0; i < numOfCols; ++i) {
1683
      SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i);
S
TD-1057  
Shengliang Guan 已提交
1684
      memmove((char*)pColInfo->pData, (char*)pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes);
1685 1686 1687 1688
    }
  }
}

1689
static void getQualifiedRowsPos(STsdbReadHandle* pTsdbReadHandle, int32_t startPos, int32_t endPos, int32_t numOfExisted,
1690
                                int32_t* start, int32_t* end) {
1691 1692
  *start = -1;

1693
  if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
1694
    int32_t remain = endPos - startPos + 1;
1695 1696
    if (remain + numOfExisted > pTsdbReadHandle->outputCapacity) {
      *end = (pTsdbReadHandle->outputCapacity - numOfExisted) + startPos - 1;
H
Haojun Liao 已提交
1697 1698
    } else {
      *end = endPos;
1699 1700 1701 1702 1703
    }

    *start = startPos;
  } else {
    int32_t remain = (startPos - endPos) + 1;
1704 1705
    if (remain + numOfExisted > pTsdbReadHandle->outputCapacity) {
      *end = startPos + 1 - (pTsdbReadHandle->outputCapacity - numOfExisted);
H
Haojun Liao 已提交
1706 1707
    } else {
      *end = endPos;
1708 1709 1710 1711 1712 1713 1714
    }

    *start = *end;
    *end = startPos;
  }
}

1715 1716
static void updateInfoAfterMerge(STsdbReadHandle* pTsdbReadHandle, STableCheckInfo* pCheckInfo, int32_t numOfRows, int32_t endPos) {
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
1717 1718

  pCheckInfo->lastKey = cur->lastKey;
1719
  pTsdbReadHandle->realNumOfRows = numOfRows;
1720 1721 1722 1723
  cur->rows = numOfRows;
  cur->pos = endPos;
}

1724 1725
static void doCheckGeneratedBlockRange(STsdbReadHandle* pTsdbReadHandle) {
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
H
Haojun Liao 已提交
1726 1727

  if (cur->rows > 0) {
1728 1729
    if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
      assert(cur->win.skey >= pTsdbReadHandle->window.skey && cur->win.ekey <= pTsdbReadHandle->window.ekey);
H
Haojun Liao 已提交
1730
    } else {
1731
      assert(cur->win.skey >= pTsdbReadHandle->window.ekey && cur->win.ekey <= pTsdbReadHandle->window.skey);
H
Haojun Liao 已提交
1732 1733
    }

1734
    SColumnInfoData* pColInfoData = taosArrayGet(pTsdbReadHandle->pColumns, 0);
H
Haojun Liao 已提交
1735 1736
    assert(cur->win.skey == ((TSKEY*)pColInfoData->pData)[0] && cur->win.ekey == ((TSKEY*)pColInfoData->pData)[cur->rows-1]);
  } else {
1737
    cur->win = pTsdbReadHandle->window;
H
Haojun Liao 已提交
1738

1739 1740
    int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? 1:-1;
    cur->lastKey = pTsdbReadHandle->window.ekey + step;
H
Haojun Liao 已提交
1741 1742 1743
  }
}

1744 1745
static void copyAllRemainRowsFromFileBlock(STsdbReadHandle* pTsdbReadHandle, STableCheckInfo* pCheckInfo, SDataBlockInfo* pBlockInfo, int32_t endPos) {
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
H
Haojun Liao 已提交
1746

1747
  SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0];
H
Haojun Liao 已提交
1748 1749
  TSKEY* tsArray = pCols->cols[0].pData;

1750 1751
  int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? 1:-1;
  int32_t numOfCols = (int32_t)(QH_GET_NUM_OF_COLS(pTsdbReadHandle));
H
Haojun Liao 已提交
1752 1753 1754 1755 1756 1757

  int32_t pos = cur->pos;

  int32_t start = cur->pos;
  int32_t end = endPos;

1758
  if (!ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
dengyihao's avatar
dengyihao 已提交
1759
    TSWAP(start, end, int32_t);
H
Haojun Liao 已提交
1760 1761
  }

1762 1763
  assert(pTsdbReadHandle->outputCapacity >= (end - start + 1));
  int32_t numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, 0, start, end);
H
Haojun Liao 已提交
1764 1765 1766

  // the time window should always be ascending order: skey <= ekey
  cur->win = (STimeWindow) {.skey = tsArray[start], .ekey = tsArray[end]};
H
Haojun Liao 已提交
1767
  cur->mixBlock = (numOfRows != pBlockInfo->rows);
H
Haojun Liao 已提交
1768
  cur->lastKey = tsArray[endPos] + step;
H
Haojun Liao 已提交
1769
  cur->blockCompleted = true;
H
Haojun Liao 已提交
1770 1771

  // if the buffer is not full in case of descending order query, move the data in the front of the buffer
1772
  moveDataToFront(pTsdbReadHandle, numOfRows, numOfCols);
H
Haojun Liao 已提交
1773 1774 1775

  // The value of pos may be -1 or pBlockInfo->rows, and it is invalid in both cases.
  pos = endPos + step;
1776 1777
  updateInfoAfterMerge(pTsdbReadHandle, pCheckInfo, numOfRows, pos);
  doCheckGeneratedBlockRange(pTsdbReadHandle);
H
Haojun Liao 已提交
1778

H
Haojun Liao 已提交
1779 1780
  tsdbDebug("%p uid:%" PRIu64", data block created, mixblock:%d, brange:%"PRIu64"-%"PRIu64" rows:%d, %s",
            pTsdbReadHandle, pCheckInfo->tableId, cur->mixBlock, cur->win.skey, cur->win.ekey, cur->rows, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1781 1782
}

1783
int32_t getEndPosInDataBlock(STsdbReadHandle* pTsdbReadHandle, SDataBlockInfo* pBlockInfo) {
H
Haojun Liao 已提交
1784 1785
  // NOTE: reverse the order to find the end position in data block
  int32_t endPos = -1;
1786
  int32_t order = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? TSDB_ORDER_DESC : TSDB_ORDER_ASC;
H
Haojun Liao 已提交
1787

1788 1789
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
  SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0];
H
Haojun Liao 已提交
1790

1791
  if (ASCENDING_TRAVERSE(pTsdbReadHandle->order) && pTsdbReadHandle->window.ekey >= pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
1792 1793
    endPos = pBlockInfo->rows - 1;
    cur->mixBlock = (cur->pos != 0);
1794
  } else if (!ASCENDING_TRAVERSE(pTsdbReadHandle->order) && pTsdbReadHandle->window.ekey <= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
1795 1796 1797 1798
    endPos = 0;
    cur->mixBlock = (cur->pos != pBlockInfo->rows - 1);
  } else {
    assert(pCols->numOfRows > 0);
1799
    endPos = doBinarySearchKey(pCols->cols[0].pData, pCols->numOfRows, pTsdbReadHandle->window.ekey, order);
H
Haojun Liao 已提交
1800 1801 1802 1803 1804 1805
    cur->mixBlock = true;
  }

  return endPos;
}

H
[td-32]  
hjxilinx 已提交
1806 1807
// only return the qualified data to client in terms of query time window, data rows in the same block but do not
// be included in the query time window will be discarded
1808 1809 1810 1811
static void doMergeTwoLevelData(STsdbReadHandle* pTsdbReadHandle, STableCheckInfo* pCheckInfo, SBlock* pBlock) {
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
  SDataBlockInfo blockInfo = {0};//GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlock);
  STsdbCfg*      pCfg = &pTsdbReadHandle->pTsdb->config;
H
Haojun Liao 已提交
1812

1813
  initTableMemIterator(pTsdbReadHandle, pCheckInfo);
1814

1815 1816
  SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0];
  assert(pCols->cols[0].type == TSDB_DATA_TYPE_TIMESTAMP && pCols->cols[0].colId == PRIMARYKEY_TIMESTAMP_COL_ID &&
H
Haojun Liao 已提交
1817 1818
      cur->pos >= 0 && cur->pos < pBlock->numOfRows);

1819
  TSKEY* tsArray = pCols->cols[0].pData;
H
Haojun Liao 已提交
1820
  assert(pCols->numOfRows == pBlock->numOfRows && tsArray[0] == pBlock->keyFirst && tsArray[pBlock->numOfRows-1] == pBlock->keyLast);
1821 1822

  // for search the endPos, so the order needs to reverse
1823
  int32_t order = (pTsdbReadHandle->order == TSDB_ORDER_ASC)? TSDB_ORDER_DESC:TSDB_ORDER_ASC;
1824

1825 1826
  int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? 1:-1;
  int32_t numOfCols = (int32_t)(QH_GET_NUM_OF_COLS(pTsdbReadHandle));
1827

H
Haojun Liao 已提交
1828
  STable* pTable = NULL;
1829
  int32_t endPos = getEndPosInDataBlock(pTsdbReadHandle, &blockInfo);
H
Haojun Liao 已提交
1830

1831
  tsdbDebug("%p uid:%" PRIu64" start merge data block, file block range:%"PRIu64"-%"PRIu64" rows:%d, start:%d,"
H
Haojun Liao 已提交
1832
            "end:%d, %s",
1833
            pTsdbReadHandle, pCheckInfo->tableId, blockInfo.window.skey, blockInfo.window.ekey,
H
Haojun Liao 已提交
1834
            blockInfo.rows, cur->pos, endPos, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1835

1836 1837
  // compared with the data from in-memory buffer, to generate the correct timestamp array list
  int32_t numOfRows = 0;
H
Haojun Liao 已提交
1838

1839 1840 1841 1842
  int16_t rv1 = -1;
  int16_t rv2 = -1;
  STSchema* pSchema1 = NULL;
  STSchema* pSchema2 = NULL;
D
fix bug  
dapan1121 已提交
1843

H
Haojun Liao 已提交
1844 1845
  int32_t pos = cur->pos;
  cur->win = TSWINDOW_INITIALIZER;
1846

1847 1848
  // no data in buffer, load data from file directly
  if (pCheckInfo->iiter == NULL && pCheckInfo->iter == NULL) {
1849
    copyAllRemainRowsFromFileBlock(pTsdbReadHandle, pCheckInfo, &blockInfo, endPos);
1850
    return;
1851
  } else if (pCheckInfo->iter != NULL || pCheckInfo->iiter != NULL) {
1852 1853
    SSkipListNode* node = NULL;
    do {
H
Haojun Liao 已提交
1854 1855
      STSRow* row2 = NULL;
      STSRow* row1 = getSRowInTableMem(pCheckInfo, pTsdbReadHandle->order, pCfg->update, &row2);
1856
      if (row1 == NULL) {
H
[td-32]  
hjxilinx 已提交
1857
        break;
1858
      }
1859

H
Haojun Liao 已提交
1860
      TSKEY key = TD_ROW_KEY(row1);
1861 1862
      if ((key > pTsdbReadHandle->window.ekey && ASCENDING_TRAVERSE(pTsdbReadHandle->order)) ||
          (key < pTsdbReadHandle->window.ekey && !ASCENDING_TRAVERSE(pTsdbReadHandle->order))) {
1863 1864 1865
        break;
      }

1866 1867
      if (((pos > endPos || tsArray[pos] > pTsdbReadHandle->window.ekey) && ASCENDING_TRAVERSE(pTsdbReadHandle->order)) ||
          ((pos < endPos || tsArray[pos] < pTsdbReadHandle->window.ekey) && !ASCENDING_TRAVERSE(pTsdbReadHandle->order))) {
1868 1869 1870
        break;
      }

1871 1872
      if ((key < tsArray[pos] && ASCENDING_TRAVERSE(pTsdbReadHandle->order)) ||
          (key > tsArray[pos] && !ASCENDING_TRAVERSE(pTsdbReadHandle->order))) {
H
Haojun Liao 已提交
1873
        if (rv1 != TD_ROW_SVER(row1)) {
1874
//          pSchema1 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row1));
H
Haojun Liao 已提交
1875
          rv1 = TD_ROW_SVER(row1);
C
Cary Xu 已提交
1876
        }
H
Haojun Liao 已提交
1877
        if(row2 && rv2 != TD_ROW_SVER(row2)) {
1878
//          pSchema2 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row2));
H
Haojun Liao 已提交
1879
          rv2 = TD_ROW_SVER(row2);
1880 1881
        }
        
H
Haojun Liao 已提交
1882
        mergeTwoRowFromMem(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, numOfRows, row1, row2, numOfCols, pCheckInfo->tableId, pSchema1, pSchema2, true);
1883 1884 1885 1886
        numOfRows += 1;
        if (cur->win.skey == TSKEY_INITIAL_VAL) {
          cur->win.skey = key;
        }
1887

1888
        cur->win.ekey = key;
1889 1890 1891
        cur->lastKey  = key + step;
        cur->mixBlock = true;

1892
        moveToNextRowInMem(pCheckInfo);
1893
      } else if (key == tsArray[pos]) {  // data in buffer has the same timestamp of data in file block, ignore it
H
TD-1439  
Hongze Cheng 已提交
1894
        if (pCfg->update) {
1895
          if(pCfg->update == TD_ROW_PARTIAL_UPDATE) {
1896
            doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, numOfRows, pos, pos);
D
fix bug  
dapan1121 已提交
1897
          }
H
Haojun Liao 已提交
1898
          if (rv1 != TD_ROW_SVER(row1)) {
1899
//            pSchema1 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row1));
H
Haojun Liao 已提交
1900
            rv1 = TD_ROW_SVER(row1);
1901
          }
H
Haojun Liao 已提交
1902
          if(row2 && rv2 != TD_ROW_SVER(row2)) {
1903
//            pSchema2 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row2));
H
Haojun Liao 已提交
1904
            rv2 = TD_ROW_SVER(row2);
1905 1906 1907
          }
          
          bool forceSetNull = pCfg->update != TD_ROW_PARTIAL_UPDATE;
H
Haojun Liao 已提交
1908
          mergeTwoRowFromMem(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, numOfRows, row1, row2, numOfCols, pCheckInfo->tableId, pSchema1, pSchema2, forceSetNull);
H
TD-1439  
Hongze Cheng 已提交
1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922
          numOfRows += 1;
          if (cur->win.skey == TSKEY_INITIAL_VAL) {
            cur->win.skey = key;
          }

          cur->win.ekey = key;
          cur->lastKey = key + step;
          cur->mixBlock = true;

          moveToNextRowInMem(pCheckInfo);
          pos += step;
        } else {
          moveToNextRowInMem(pCheckInfo);
        }
1923 1924
      } else if ((key > tsArray[pos] && ASCENDING_TRAVERSE(pTsdbReadHandle->order)) ||
                  (key < tsArray[pos] && !ASCENDING_TRAVERSE(pTsdbReadHandle->order))) {
1925 1926 1927
        if (cur->win.skey == TSKEY_INITIAL_VAL) {
          cur->win.skey = tsArray[pos];
        }
1928

1929
        int32_t end = doBinarySearchKey(pCols->cols[0].pData, pCols->numOfRows, key, order);
1930 1931
        assert(end != -1);

H
Haojun Liao 已提交
1932
        if (tsArray[end] == key) { // the value of key in cache equals to the end timestamp value, ignore it
1933
          if (pCfg->update == TD_ROW_DISCARD_UPDATE) {
H
Hongze Cheng 已提交
1934 1935 1936 1937
            moveToNextRowInMem(pCheckInfo);
          } else {
            end -= step;
          }
H
Haojun Liao 已提交
1938
        }
1939

1940
        int32_t qstart = 0, qend = 0;
1941
        getQualifiedRowsPos(pTsdbReadHandle, pos, end, numOfRows, &qstart, &qend);
1942

1943
        numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, numOfRows, qstart, qend);
1944 1945
        pos += (qend - qstart + 1) * step;

1946
        cur->win.ekey = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? tsArray[qend]:tsArray[qstart];
1947
        cur->lastKey  = cur->win.ekey + step;
1948
      }
1949
    } while (numOfRows < pTsdbReadHandle->outputCapacity);
H
Haojun Liao 已提交
1950

1951
    if (numOfRows < pTsdbReadHandle->outputCapacity) {
H
Haojun Liao 已提交
1952 1953 1954 1955
      /**
       * if cache is empty, load remain file block data. In contrast, if there are remain data in cache, do NOT
       * copy them all to result buffer, since it may be overlapped with file data block.
       */
1956
      if (node == NULL ||
H
Haojun Liao 已提交
1957
          ((TD_ROW_KEY((STSRow*)SL_GET_NODE_DATA(node)) > pTsdbReadHandle->window.ekey) &&
1958
           ASCENDING_TRAVERSE(pTsdbReadHandle->order)) ||
H
Haojun Liao 已提交
1959
          ((TD_ROW_KEY((STSRow*)SL_GET_NODE_DATA(node)) < pTsdbReadHandle->window.ekey) &&
1960
           !ASCENDING_TRAVERSE(pTsdbReadHandle->order))) {
1961 1962 1963 1964 1965
        // no data in cache or data in cache is greater than the ekey of time window, load data from file block
        if (cur->win.skey == TSKEY_INITIAL_VAL) {
          cur->win.skey = tsArray[pos];
        }

1966
        int32_t start = -1, end = -1;
1967
        getQualifiedRowsPos(pTsdbReadHandle, pos, endPos, numOfRows, &start, &end);
1968

1969
        numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, numOfRows, start, end);
1970
        pos += (end - start + 1) * step;
1971

1972
        cur->win.ekey = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? tsArray[end]:tsArray[start];
1973
        cur->lastKey  = cur->win.ekey + step;
H
Haojun Liao 已提交
1974
        cur->mixBlock = true;
1975
      }
1976 1977
    }
  }
H
Haojun Liao 已提交
1978 1979

  cur->blockCompleted =
1980 1981
      (((pos > endPos || cur->lastKey > pTsdbReadHandle->window.ekey) && ASCENDING_TRAVERSE(pTsdbReadHandle->order)) ||
       ((pos < endPos || cur->lastKey < pTsdbReadHandle->window.ekey) && !ASCENDING_TRAVERSE(pTsdbReadHandle->order)));
1982

1983
  if (!ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
dengyihao's avatar
dengyihao 已提交
1984
    TSWAP(cur->win.skey, cur->win.ekey, TSKEY);
1985
  }
1986

1987 1988 1989
  moveDataToFront(pTsdbReadHandle, numOfRows, numOfCols);
  updateInfoAfterMerge(pTsdbReadHandle, pCheckInfo, numOfRows, pos);
  doCheckGeneratedBlockRange(pTsdbReadHandle);
H
Haojun Liao 已提交
1990

H
Haojun Liao 已提交
1991 1992
  tsdbDebug("%p uid:%" PRIu64", data block created, mixblock:%d, brange:%"PRIu64"-%"PRIu64" rows:%d, %s",
      pTsdbReadHandle, pCheckInfo->tableId, cur->mixBlock, cur->win.skey, cur->win.ekey, cur->rows, pTsdbReadHandle->idStr);
1993 1994
}

1995
int32_t binarySearchForKey(char* pValue, int num, TSKEY key, int order) {
H
[td-32]  
hjxilinx 已提交
1996
  int    firstPos, lastPos, midPos = -1;
H
Haojun Liao 已提交
1997
  int    numOfRows;
1998 1999
  TSKEY* keyList;

H
[td-32]  
hjxilinx 已提交
2000
  if (num <= 0) return -1;
2001 2002

  keyList = (TSKEY*)pValue;
H
[td-32]  
hjxilinx 已提交
2003 2004
  firstPos = 0;
  lastPos = num - 1;
2005

2006
  if (order == TSDB_ORDER_DESC) {
H
[td-32]  
hjxilinx 已提交
2007 2008 2009 2010 2011
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2012

H
Haojun Liao 已提交
2013 2014
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2015

H
[td-32]  
hjxilinx 已提交
2016 2017 2018 2019 2020 2021 2022 2023
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2024

H
[td-32]  
hjxilinx 已提交
2025 2026 2027 2028 2029
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2030

H
[td-32]  
hjxilinx 已提交
2031 2032 2033 2034 2035 2036 2037
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2038

H
Haojun Liao 已提交
2039 2040
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2041

H
[td-32]  
hjxilinx 已提交
2042 2043 2044 2045 2046 2047 2048 2049 2050
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2051

H
[td-32]  
hjxilinx 已提交
2052 2053 2054
  return midPos;
}

2055
static void cleanBlockOrderSupporter(SBlockOrderSupporter* pSupporter, int32_t numOfTables) {
S
TD-1848  
Shengliang Guan 已提交
2056 2057
  tfree(pSupporter->numOfBlocksPerTable);
  tfree(pSupporter->blockIndexArray);
2058 2059

  for (int32_t i = 0; i < numOfTables; ++i) {
H
Haojun Liao 已提交
2060
    STableBlockInfo* pBlockInfo = pSupporter->pDataBlockInfo[i];
S
TD-1848  
Shengliang Guan 已提交
2061
    tfree(pBlockInfo);
2062 2063
  }

S
TD-1848  
Shengliang Guan 已提交
2064
  tfree(pSupporter->pDataBlockInfo);
2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075
}

static int32_t dataBlockOrderCompar(const void* pLeft, const void* pRight, void* param) {
  int32_t leftTableIndex = *(int32_t*)pLeft;
  int32_t rightTableIndex = *(int32_t*)pRight;

  SBlockOrderSupporter* pSupporter = (SBlockOrderSupporter*)param;

  int32_t leftTableBlockIndex = pSupporter->blockIndexArray[leftTableIndex];
  int32_t rightTableBlockIndex = pSupporter->blockIndexArray[rightTableIndex];

2076
  if (leftTableBlockIndex > pSupporter->numOfBlocksPerTable[leftTableIndex]) {
2077 2078
    /* left block is empty */
    return 1;
2079
  } else if (rightTableBlockIndex > pSupporter->numOfBlocksPerTable[rightTableIndex]) {
2080 2081 2082 2083 2084 2085 2086
    /* right block is empty */
    return -1;
  }

  STableBlockInfo* pLeftBlockInfoEx = &pSupporter->pDataBlockInfo[leftTableIndex][leftTableBlockIndex];
  STableBlockInfo* pRightBlockInfoEx = &pSupporter->pDataBlockInfo[rightTableIndex][rightTableBlockIndex];

H
Haojun Liao 已提交
2087
  //    assert(pLeftBlockInfoEx->compBlock->offset != pRightBlockInfoEx->compBlock->offset);
2088
#if 0	// TODO: temporarily comment off requested by Dr. Liao
H
Haojun Liao 已提交
2089 2090
  if (pLeftBlockInfoEx->compBlock->offset == pRightBlockInfoEx->compBlock->offset &&
      pLeftBlockInfoEx->compBlock->last == pRightBlockInfoEx->compBlock->last) {
B
Bomin Zhang 已提交
2091
    tsdbError("error in header file, two block with same offset:%" PRId64, (int64_t)pLeftBlockInfoEx->compBlock->offset);
2092
  }
H
Haojun Liao 已提交
2093
#endif
2094

H
Haojun Liao 已提交
2095
  return pLeftBlockInfoEx->compBlock->offset > pRightBlockInfoEx->compBlock->offset ? 1 : -1;
2096 2097
}

2098
static int32_t createDataBlocksInfo(STsdbReadHandle* pTsdbReadHandle, int32_t numOfBlocks, int32_t* numOfAllocBlocks) {
H
Haojun Liao 已提交
2099 2100
  size_t size = sizeof(STableBlockInfo) * numOfBlocks;

2101 2102 2103
  if (pTsdbReadHandle->allocSize < size) {
    pTsdbReadHandle->allocSize = (int32_t)size;
    char* tmp = realloc(pTsdbReadHandle->pDataBlockInfo, pTsdbReadHandle->allocSize);
H
Haojun Liao 已提交
2104 2105 2106 2107
    if (tmp == NULL) {
      return TSDB_CODE_TDB_OUT_OF_MEMORY;
    }

2108
    pTsdbReadHandle->pDataBlockInfo = (STableBlockInfo*) tmp;
2109 2110
  }

2111
  memset(pTsdbReadHandle->pDataBlockInfo, 0, size);
2112 2113
  *numOfAllocBlocks = numOfBlocks;

H
Haojun Liao 已提交
2114
  // access data blocks according to the offset of each block in asc/desc order.
2115
  int32_t numOfTables = (int32_t)taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
2116

2117 2118
  SBlockOrderSupporter sup = {0};
  sup.numOfTables = numOfTables;
2119
  sup.numOfBlocksPerTable = calloc(1, sizeof(int32_t) * numOfTables);
2120 2121 2122
  sup.blockIndexArray = calloc(1, sizeof(int32_t) * numOfTables);
  sup.pDataBlockInfo = calloc(1, POINTER_BYTES * numOfTables);

2123
  if (sup.numOfBlocksPerTable == NULL || sup.blockIndexArray == NULL || sup.pDataBlockInfo == NULL) {
2124
    cleanBlockOrderSupporter(&sup, 0);
2125
    return TSDB_CODE_TDB_OUT_OF_MEMORY;
2126
  }
H
Haojun Liao 已提交
2127

2128
  int32_t cnt = 0;
2129
  int32_t numOfQualTables = 0;
H
Haojun Liao 已提交
2130

2131
  for (int32_t j = 0; j < numOfTables; ++j) {
2132
    STableCheckInfo* pTableCheck = (STableCheckInfo*)taosArrayGet(pTsdbReadHandle->pTableCheckInfo, j);
2133 2134 2135
    if (pTableCheck->numOfBlocks <= 0) {
      continue;
    }
H
Haojun Liao 已提交
2136

H
refact  
Hongze Cheng 已提交
2137
    SBlock* pBlock = pTableCheck->pCompInfo->blocks;
2138
    sup.numOfBlocksPerTable[numOfQualTables] = pTableCheck->numOfBlocks;
2139

H
Haojun Liao 已提交
2140
    char* buf = malloc(sizeof(STableBlockInfo) * pTableCheck->numOfBlocks);
2141
    if (buf == NULL) {
2142
      cleanBlockOrderSupporter(&sup, numOfQualTables);
2143
      return TSDB_CODE_TDB_OUT_OF_MEMORY;
2144 2145
    }

2146
    sup.pDataBlockInfo[numOfQualTables] = (STableBlockInfo*)buf;
2147 2148

    for (int32_t k = 0; k < pTableCheck->numOfBlocks; ++k) {
H
Haojun Liao 已提交
2149
      STableBlockInfo* pBlockInfo = &sup.pDataBlockInfo[numOfQualTables][k];
2150

H
Haojun Liao 已提交
2151 2152
      pBlockInfo->compBlock = &pBlock[k];
      pBlockInfo->pTableCheckInfo = pTableCheck;
2153 2154 2155
      cnt++;
    }

2156
    numOfQualTables++;
2157 2158
  }

H
Haojun Liao 已提交
2159
  assert(numOfBlocks == cnt);
2160

H
Haojun Liao 已提交
2161 2162
  // since there is only one table qualified, blocks are not sorted
  if (numOfQualTables == 1) {
2163
    memcpy(pTsdbReadHandle->pDataBlockInfo, sup.pDataBlockInfo[0], sizeof(STableBlockInfo) * numOfBlocks);
H
Haojun Liao 已提交
2164
    cleanBlockOrderSupporter(&sup, numOfQualTables);
2165

H
Haojun Liao 已提交
2166 2167
    tsdbDebug("%p create data blocks info struct completed for 1 table, %d blocks not sorted %s", pTsdbReadHandle, cnt,
        pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
2168 2169
    return TSDB_CODE_SUCCESS;
  }
2170

H
Haojun Liao 已提交
2171 2172
  tsdbDebug("%p create data blocks info struct completed, %d blocks in %d tables %s", pTsdbReadHandle, cnt,
      numOfQualTables, pTsdbReadHandle->idStr);
2173

2174
  assert(cnt <= numOfBlocks && numOfQualTables <= numOfTables);  // the pTableQueryInfo[j]->numOfBlocks may be 0
2175
  sup.numOfTables = numOfQualTables;
2176

2177 2178
  SMultiwayMergeTreeInfo* pTree = NULL;
  uint8_t ret = tMergeTreeCreate(&pTree, sup.numOfTables, &sup, dataBlockOrderCompar);
2179 2180
  if (ret != TSDB_CODE_SUCCESS) {
    cleanBlockOrderSupporter(&sup, numOfTables);
2181
    return TSDB_CODE_TDB_OUT_OF_MEMORY;
2182 2183 2184 2185 2186
  }

  int32_t numOfTotal = 0;

  while (numOfTotal < cnt) {
2187
    int32_t pos = tMergeTreeGetChosenIndex(pTree);
2188 2189
    int32_t index = sup.blockIndexArray[pos]++;

H
Haojun Liao 已提交
2190
    STableBlockInfo* pBlocksInfo = sup.pDataBlockInfo[pos];
2191
    pTsdbReadHandle->pDataBlockInfo[numOfTotal++] = pBlocksInfo[index];
2192 2193

    // set data block index overflow, in order to disable the offset comparator
2194 2195
    if (sup.blockIndexArray[pos] >= sup.numOfBlocksPerTable[pos]) {
      sup.blockIndexArray[pos] = sup.numOfBlocksPerTable[pos] + 1;
2196
    }
2197

H
Haojun Liao 已提交
2198
    tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree));
2199 2200 2201 2202 2203
  }

  /*
   * available when no import exists
   * for(int32_t i = 0; i < cnt - 1; ++i) {
H
Haojun Liao 已提交
2204
   *   assert((*pDataBlockInfo)[i].compBlock->offset < (*pDataBlockInfo)[i+1].compBlock->offset);
2205 2206 2207
   * }
   */

H
Haojun Liao 已提交
2208
  tsdbDebug("%p %d data blocks sort completed, %s", pTsdbReadHandle, cnt, pTsdbReadHandle->idStr);
2209 2210 2211 2212 2213 2214
  cleanBlockOrderSupporter(&sup, numOfTables);
  free(pTree);

  return TSDB_CODE_SUCCESS;
}

2215
static int32_t getFirstFileDataBlock(STsdbReadHandle* pTsdbReadHandle, bool* exists);
H
Haojun Liao 已提交
2216

2217 2218 2219
static int32_t getDataBlockRv(STsdbReadHandle* pTsdbReadHandle, STableBlockInfo* pNext, bool *exists) {
  int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? 1 : -1;
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
H
Haojun Liao 已提交
2220 2221

  while(1) {
2222
    int32_t code = loadFileDataBlock(pTsdbReadHandle, pNext->compBlock, pNext->pTableCheckInfo, exists);
H
Haojun Liao 已提交
2223 2224 2225 2226
    if (code != TSDB_CODE_SUCCESS || *exists) {
      return code;
    }

2227 2228
    if ((cur->slot == pTsdbReadHandle->numOfBlocks - 1 && ASCENDING_TRAVERSE(pTsdbReadHandle->order)) ||
        (cur->slot == 0 && !ASCENDING_TRAVERSE(pTsdbReadHandle->order))) {
H
Haojun Liao 已提交
2229
      // all data blocks in current file has been checked already, try next file if exists
2230
      return getFirstFileDataBlock(pTsdbReadHandle, exists);
H
Haojun Liao 已提交
2231 2232 2233 2234
    } else {  // next block of the same file
      cur->slot += step;
      cur->mixBlock = false;
      cur->blockCompleted = false;
2235
      pNext = &pTsdbReadHandle->pDataBlockInfo[cur->slot];
H
Haojun Liao 已提交
2236 2237 2238 2239
    }
  }
}

2240 2241 2242
static int32_t getFirstFileDataBlock(STsdbReadHandle* pTsdbReadHandle, bool* exists) {
  pTsdbReadHandle->numOfBlocks = 0;
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
H
Haojun Liao 已提交
2243 2244 2245

  int32_t code = TSDB_CODE_SUCCESS;

2246
  int32_t numOfBlocks = 0;
2247
  int32_t numOfTables = (int32_t)taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
2248

2249
  STsdbCfg* pCfg = &pTsdbReadHandle->pTsdb->config;
2250 2251
  STimeWindow win = TSWINDOW_INITIALIZER;

H
Hongze Cheng 已提交
2252
  while (true) {
2253
    tsdbRLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Hongze Cheng 已提交
2254

2255 2256
    if ((pTsdbReadHandle->pFileGroup = tsdbFSIterNext(&pTsdbReadHandle->fileIter)) == NULL) {
      tsdbUnLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Hongze Cheng 已提交
2257 2258 2259
      break;
    }

2260
    tsdbGetFidKeyRange(pCfg->daysPerFile, pCfg->precision, pTsdbReadHandle->pFileGroup->fid, &win.skey, &win.ekey);
2261 2262

    // current file are not overlapped with query time window, ignore remain files
2263 2264 2265
    if ((ASCENDING_TRAVERSE(pTsdbReadHandle->order) && win.skey > pTsdbReadHandle->window.ekey) ||
        (!ASCENDING_TRAVERSE(pTsdbReadHandle->order) && win.ekey < pTsdbReadHandle->window.ekey)) {
      tsdbUnLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Haojun Liao 已提交
2266 2267
      tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, %s", pTsdbReadHandle,
                pTsdbReadHandle->window.skey, pTsdbReadHandle->window.ekey, pTsdbReadHandle->idStr);
2268 2269
      pTsdbReadHandle->pFileGroup = NULL;
      assert(pTsdbReadHandle->numOfBlocks == 0);
2270 2271 2272
      break;
    }

2273 2274
    if (tsdbSetAndOpenReadFSet(&pTsdbReadHandle->rhelper, pTsdbReadHandle->pFileGroup) < 0) {
      tsdbUnLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Hongze Cheng 已提交
2275 2276 2277 2278
      code = terrno;
      break;
    }

2279
    tsdbUnLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Hongze Cheng 已提交
2280

2281
    if (tsdbLoadBlockIdx(&pTsdbReadHandle->rhelper) < 0) {
H
Hongze Cheng 已提交
2282 2283 2284 2285
      code = terrno;
      break;
    }

2286
    if ((code = getFileCompInfo(pTsdbReadHandle, &numOfBlocks)) != TSDB_CODE_SUCCESS) {
2287 2288
      break;
    }
H
Haojun Liao 已提交
2289

H
Haojun Liao 已提交
2290 2291
    tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %s", pTsdbReadHandle, numOfBlocks, numOfTables,
              pTsdbReadHandle->pFileGroup->fid, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
2292

2293 2294 2295 2296
    assert(numOfBlocks >= 0);
    if (numOfBlocks == 0) {
      continue;
    }
H
Haojun Liao 已提交
2297

2298
    // todo return error code to query engine
2299
    if ((code = createDataBlocksInfo(pTsdbReadHandle, numOfBlocks, &pTsdbReadHandle->numOfBlocks)) != TSDB_CODE_SUCCESS) {
2300 2301
      break;
    }
H
Haojun Liao 已提交
2302

2303 2304
    assert(numOfBlocks >= pTsdbReadHandle->numOfBlocks);
    if (pTsdbReadHandle->numOfBlocks > 0) {
2305 2306 2307
      break;
    }
  }
H
Haojun Liao 已提交
2308

2309
  // no data in file anymore
2310
  if (pTsdbReadHandle->numOfBlocks <= 0 || code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
2311
    if (code == TSDB_CODE_SUCCESS) {
2312
      assert(pTsdbReadHandle->pFileGroup == NULL);
H
Haojun Liao 已提交
2313 2314
    }

D
dapan1121 已提交
2315
    cur->fid = INT32_MIN;  // denote that there are no data in file anymore
H
Haojun Liao 已提交
2316 2317
    *exists = false;
    return code;
2318
  }
H
Haojun Liao 已提交
2319

2320 2321 2322
  assert(pTsdbReadHandle->pFileGroup != NULL && pTsdbReadHandle->numOfBlocks > 0);
  cur->slot = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? 0:pTsdbReadHandle->numOfBlocks-1;
  cur->fid = pTsdbReadHandle->pFileGroup->fid;
H
Haojun Liao 已提交
2323

2324 2325
  STableBlockInfo* pBlockInfo = &pTsdbReadHandle->pDataBlockInfo[cur->slot];
  return getDataBlockRv(pTsdbReadHandle, pBlockInfo, exists);
H
Haojun Liao 已提交
2326 2327 2328 2329 2330 2331 2332
}

static bool isEndFileDataBlock(SQueryFilePos* cur, int32_t numOfBlocks, bool ascTrav) {
  assert(cur != NULL && numOfBlocks > 0);
  return (cur->slot == numOfBlocks - 1 && ascTrav) || (cur->slot == 0 && !ascTrav);
}

2333 2334
static void moveToNextDataBlockInCurrentFile(STsdbReadHandle* pTsdbReadHandle) {
  int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? 1 : -1;
H
Haojun Liao 已提交
2335

2336 2337
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
  assert(cur->slot < pTsdbReadHandle->numOfBlocks && cur->slot >= 0);
H
Haojun Liao 已提交
2338 2339 2340 2341

  cur->slot += step;
  cur->mixBlock       = false;
  cur->blockCompleted = false;
2342
}
2343
#if 0
H
Haojun Liao 已提交
2344
int32_t tsdbGetFileBlocksDistInfo(tsdbReaderT* queryHandle, STableBlockDist* pTableBlockInfo) {
2345
  STsdbReadHandle* pTsdbReadHandle = (STsdbReadHandle*) queryHandle;
H
Haojun Liao 已提交
2346

H
Haojun Liao 已提交
2347
  pTableBlockInfo->totalSize = 0;
2348
  pTableBlockInfo->totalRows = 0;
2349
  STsdbFS* pFileHandle = REPO_FS(pTsdbReadHandle->pTsdb);
H
Haojun Liao 已提交
2350 2351

  // find the start data block in file
2352 2353 2354
  pTsdbReadHandle->locateStart = true;
  STsdbCfg* pCfg = &pTsdbReadHandle->pTsdb->config;
  int32_t   fid = getFileIdFromKey(pTsdbReadHandle->window.skey, pCfg->daysPerFile, pCfg->precision);
H
Haojun Liao 已提交
2355 2356

  tsdbRLockFS(pFileHandle);
2357 2358
  tsdbFSIterInit(&pTsdbReadHandle->fileIter, pFileHandle, pTsdbReadHandle->order);
  tsdbFSIterSeek(&pTsdbReadHandle->fileIter, fid);
H
Haojun Liao 已提交
2359 2360
  tsdbUnLockFS(pFileHandle);

H
Haojun Liao 已提交
2361
  pTableBlockInfo->numOfFiles += 1;
H
Haojun Liao 已提交
2362

H
Haojun Liao 已提交
2363
  int32_t     code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
2364
  int32_t     numOfBlocks = 0;
2365
  int32_t     numOfTables = (int32_t)taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
2366
  int         defaultRows = TSDB_DEFAULT_BLOCK_ROWS(pCfg->maxRowsPerFileBlock);
H
Haojun Liao 已提交
2367 2368 2369 2370
  STimeWindow win = TSWINDOW_INITIALIZER;

  while (true) {
    numOfBlocks = 0;
2371
    tsdbRLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Haojun Liao 已提交
2372

2373 2374
    if ((pTsdbReadHandle->pFileGroup = tsdbFSIterNext(&pTsdbReadHandle->fileIter)) == NULL) {
      tsdbUnLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Haojun Liao 已提交
2375 2376 2377
      break;
    }

2378
    tsdbGetFidKeyRange(pCfg->daysPerFile, pCfg->precision, pTsdbReadHandle->pFileGroup->fid, &win.skey, &win.ekey);
H
Haojun Liao 已提交
2379 2380

    // current file are not overlapped with query time window, ignore remain files
2381 2382 2383
    if ((ASCENDING_TRAVERSE(pTsdbReadHandle->order) && win.skey > pTsdbReadHandle->window.ekey) ||
    (!ASCENDING_TRAVERSE(pTsdbReadHandle->order) && win.ekey < pTsdbReadHandle->window.ekey)) {
      tsdbUnLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Haojun Liao 已提交
2384 2385
      tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, %s", pTsdbReadHandle,
                pTsdbReadHandle->window.skey, pTsdbReadHandle->window.ekey, pTsdbReadHandle->idStr);
2386
      pTsdbReadHandle->pFileGroup = NULL;
H
Haojun Liao 已提交
2387 2388 2389
      break;
    }

H
Haojun Liao 已提交
2390
    pTableBlockInfo->numOfFiles += 1;
2391 2392
    if (tsdbSetAndOpenReadFSet(&pTsdbReadHandle->rhelper, pTsdbReadHandle->pFileGroup) < 0) {
      tsdbUnLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Haojun Liao 已提交
2393 2394 2395 2396
      code = terrno;
      break;
    }

2397
    tsdbUnLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Haojun Liao 已提交
2398

2399
    if (tsdbLoadBlockIdx(&pTsdbReadHandle->rhelper) < 0) {
H
Haojun Liao 已提交
2400 2401 2402 2403
      code = terrno;
      break;
    }

2404
    if ((code = getFileCompInfo(pTsdbReadHandle, &numOfBlocks)) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
2405 2406 2407
      break;
    }

H
Haojun Liao 已提交
2408 2409
    tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %s", pTsdbReadHandle, numOfBlocks, numOfTables,
              pTsdbReadHandle->pFileGroup->fid, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
2410 2411 2412 2413 2414 2415

    if (numOfBlocks == 0) {
      continue;
    }

    for (int32_t i = 0; i < numOfTables; ++i) {
2416
      STableCheckInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i);
H
Haojun Liao 已提交
2417 2418 2419

      SBlock* pBlock = pCheckInfo->pCompInfo->blocks;
      for (int32_t j = 0; j < pCheckInfo->numOfBlocks; ++j) {
H
Haojun Liao 已提交
2420
        pTableBlockInfo->totalSize += pBlock[j].len;
H
Haojun Liao 已提交
2421

H
Haojun Liao 已提交
2422
        int32_t numOfRows = pBlock[j].numOfRows;
2423 2424 2425
        pTableBlockInfo->totalRows += numOfRows;
        if (numOfRows > pTableBlockInfo->maxRows) pTableBlockInfo->maxRows = numOfRows;
        if (numOfRows < pTableBlockInfo->minRows) pTableBlockInfo->minRows = numOfRows;
2426
        if (numOfRows < defaultRows) pTableBlockInfo->numOfSmallBlocks+=1;
2427 2428 2429
        int32_t  stepIndex = (numOfRows-1)/TSDB_BLOCK_DIST_STEP_ROWS;
        SFileBlockInfo *blockInfo = (SFileBlockInfo*)taosArrayGet(pTableBlockInfo->dataBlockInfos, stepIndex);
        blockInfo->numBlocksOfStep++;
H
Haojun Liao 已提交
2430 2431 2432 2433 2434 2435
      }
    }
  }

  return code;
}
2436
#endif
H
Haojun Liao 已提交
2437

2438 2439 2440
static int32_t getDataBlocksInFiles(STsdbReadHandle* pTsdbReadHandle, bool* exists) {
  STsdbFS*       pFileHandle = REPO_FS(pTsdbReadHandle->pTsdb);
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
2441 2442

  // find the start data block in file
2443 2444 2445 2446
  if (!pTsdbReadHandle->locateStart) {
    pTsdbReadHandle->locateStart = true;
    STsdbCfg* pCfg = &pTsdbReadHandle->pTsdb->config;
    int32_t   fid = getFileIdFromKey(pTsdbReadHandle->window.skey, pCfg->daysPerFile, pCfg->precision);
H
Haojun Liao 已提交
2447

H
Hongze Cheng 已提交
2448
    tsdbRLockFS(pFileHandle);
2449 2450
    tsdbFSIterInit(&pTsdbReadHandle->fileIter, pFileHandle, pTsdbReadHandle->order);
    tsdbFSIterSeek(&pTsdbReadHandle->fileIter, fid);
H
Hongze Cheng 已提交
2451
    tsdbUnLockFS(pFileHandle);
2452

2453
    return getFirstFileDataBlock(pTsdbReadHandle, exists);
2454
  } else {
2455
    // check if current file block is all consumed
2456
    STableBlockInfo* pBlockInfo = &pTsdbReadHandle->pDataBlockInfo[cur->slot];
2457
    STableCheckInfo* pCheckInfo = pBlockInfo->pTableCheckInfo;
H
Haojun Liao 已提交
2458

2459
    // current block is done, try next
H
Haojun Liao 已提交
2460
    if ((!cur->mixBlock) || cur->blockCompleted) {
H
Haojun Liao 已提交
2461
      // all data blocks in current file has been checked already, try next file if exists
2462
    } else {
H
Haojun Liao 已提交
2463 2464
      tsdbDebug("%p continue in current data block, index:%d, pos:%d, %s", pTsdbReadHandle, cur->slot, cur->pos,
                pTsdbReadHandle->idStr);
2465 2466
      int32_t code = handleDataMergeIfNeeded(pTsdbReadHandle, pBlockInfo->compBlock, pCheckInfo);
      *exists = (pTsdbReadHandle->realNumOfRows > 0);
H
Haojun Liao 已提交
2467

H
Haojun Liao 已提交
2468 2469 2470 2471 2472 2473 2474
      if (code != TSDB_CODE_SUCCESS || *exists) {
        return code;
      }
    }

    // current block is empty, try next block in file
    // all data blocks in current file has been checked already, try next file if exists
2475 2476
    if (isEndFileDataBlock(cur, pTsdbReadHandle->numOfBlocks, ASCENDING_TRAVERSE(pTsdbReadHandle->order))) {
      return getFirstFileDataBlock(pTsdbReadHandle, exists);
H
Haojun Liao 已提交
2477
    } else {
2478 2479 2480
      moveToNextDataBlockInCurrentFile(pTsdbReadHandle);
      STableBlockInfo* pNext = &pTsdbReadHandle->pDataBlockInfo[cur->slot];
      return getDataBlockRv(pTsdbReadHandle, pNext, exists);
2481 2482
    }
  }
2483 2484
}

2485 2486
static bool doHasDataInBuffer(STsdbReadHandle* pTsdbReadHandle) {
  size_t numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
2487
  
2488 2489
  while (pTsdbReadHandle->activeIndex < numOfTables) {
    if (hasMoreDataInCache(pTsdbReadHandle)) {
2490 2491
      return true;
    }
H
Haojun Liao 已提交
2492

2493
    pTsdbReadHandle->activeIndex += 1;
2494
  }
H
Haojun Liao 已提交
2495

2496 2497 2498
  return false;
}

2499
//todo not unref yet, since it is not support multi-group interpolation query
H
Haojun Liao 已提交
2500
static UNUSED_FUNC void changeQueryHandleForInterpQuery(tsdbReaderT pHandle) {
H
Haojun Liao 已提交
2501
  // filter the queried time stamp in the first place
2502
  STsdbReadHandle* pTsdbReadHandle = (STsdbReadHandle*) pHandle;
H
Haojun Liao 已提交
2503 2504

  // starts from the buffer in case of descending timestamp order check data blocks
2505
  size_t numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
H
Haojun Liao 已提交
2506 2507 2508

  int32_t i = 0;
  while(i < numOfTables) {
2509
    STableCheckInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i);
H
Haojun Liao 已提交
2510 2511

    // the first qualified table for interpolation query
2512 2513 2514 2515
//    if ((pTsdbReadHandle->window.skey <= pCheckInfo->pTableObj->lastKey) &&
//        (pCheckInfo->pTableObj->lastKey != TSKEY_INITIAL_VAL)) {
//      break;
//    }
H
Haojun Liao 已提交
2516 2517 2518 2519 2520 2521 2522 2523 2524

    i++;
  }

  // there are no data in all the tables
  if (i == numOfTables) {
    return;
  }

2525 2526
  STableCheckInfo info = *(STableCheckInfo*) taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i);
  taosArrayClear(pTsdbReadHandle->pTableCheckInfo);
H
Haojun Liao 已提交
2527

2528 2529
  info.lastKey = pTsdbReadHandle->window.skey;
  taosArrayPush(pTsdbReadHandle->pTableCheckInfo, &info);
H
Haojun Liao 已提交
2530 2531 2532
}

static int tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int maxRowsToRead, STimeWindow* win,
2533
                                 STsdbReadHandle* pTsdbReadHandle) {
H
Haojun Liao 已提交
2534
  int     numOfRows = 0;
2535 2536
  int32_t numOfCols = (int32_t)taosArrayGetSize(pTsdbReadHandle->pColumns);
  STsdbCfg* pCfg = &pTsdbReadHandle->pTsdb->config;
H
Haojun Liao 已提交
2537 2538 2539
  win->skey = TSKEY_INITIAL_VAL;

  int64_t st = taosGetTimestampUs();
D
fix bug  
dapan1121 已提交
2540 2541
  int16_t rv = -1;
  STSchema* pSchema = NULL;
H
Haojun Liao 已提交
2542 2543

  do {
H
Haojun Liao 已提交
2544
    STSRow* row = getSRowInTableMem(pCheckInfo, pTsdbReadHandle->order, pCfg->update, NULL);
H
Haojun Liao 已提交
2545 2546 2547 2548
    if (row == NULL) {
      break;
    }

H
Haojun Liao 已提交
2549
    TSKEY key = TD_ROW_KEY(row);
2550 2551 2552
    if ((key > maxKey && ASCENDING_TRAVERSE(pTsdbReadHandle->order)) || (key < maxKey && !ASCENDING_TRAVERSE(pTsdbReadHandle->order))) {
      tsdbDebug("%p key:%"PRIu64" beyond qrange:%"PRId64" - %"PRId64", no more data in buffer", pTsdbReadHandle, key, pTsdbReadHandle->window.skey,
                pTsdbReadHandle->window.ekey);
H
Haojun Liao 已提交
2553 2554 2555 2556 2557 2558 2559 2560 2561

      break;
    }

    if (win->skey == INT64_MIN) {
      win->skey = key;
    }

    win->ekey = key;
H
Haojun Liao 已提交
2562
    if (rv != TD_ROW_SVER(row)) {
2563
      pSchema = metaGetTbTSchema(pTsdbReadHandle->pTsdb->pMeta, pCheckInfo->tableId, 0);
H
Haojun Liao 已提交
2564
      rv = TD_ROW_SVER(row);
D
fix bug  
dapan1121 已提交
2565
    }
2566
    mergeTwoRowFromMem(pTsdbReadHandle, maxRowsToRead, numOfRows, row, NULL, numOfCols, pCheckInfo->tableId, pSchema, NULL, true);
H
Haojun Liao 已提交
2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577

    if (++numOfRows >= maxRowsToRead) {
      moveToNextRowInMem(pCheckInfo);
      break;
    }

  } while(moveToNextRowInMem(pCheckInfo));

  assert(numOfRows <= maxRowsToRead);

  // if the buffer is not full in case of descending order query, move the data in the front of the buffer
2578
  if (!ASCENDING_TRAVERSE(pTsdbReadHandle->order) && numOfRows < maxRowsToRead) {
H
Haojun Liao 已提交
2579 2580 2581
    int32_t emptySize = maxRowsToRead - numOfRows;

    for(int32_t i = 0; i < numOfCols; ++i) {
2582
      SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i);
H
Haojun Liao 已提交
2583 2584 2585 2586 2587
      memmove((char*)pColInfo->pData, (char*)pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes);
    }
  }

  int64_t elapsedTime = taosGetTimestampUs() - st;
H
Haojun Liao 已提交
2588 2589
  tsdbDebug("%p build data block from cache completed, elapsed time:%"PRId64" us, numOfRows:%d, numOfCols:%d, %s", pTsdbReadHandle,
            elapsedTime, numOfRows, numOfCols, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
2590 2591 2592 2593

  return numOfRows;
}

2594 2595
static int32_t getAllTableList(SMeta* pMeta, uint64_t uid, SArray* list) {
  SMCtbCursor* pCur = metaOpenCtbCursor(pMeta, uid);
H
Haojun Liao 已提交
2596

2597 2598 2599 2600 2601
  while (1) {
    tb_uid_t id = metaCtbCursorNext(pCur);
    if (id == 0) {
      break;
    }
H
Haojun Liao 已提交
2602

2603
    STableKeyInfo info = {.lastKey = TSKEY_INITIAL_VAL, uid = id};
H
Haojun Liao 已提交
2604 2605 2606
    taosArrayPush(list, &info);
  }

2607
  metaCloseCtbCurosr(pCur);
H
Haojun Liao 已提交
2608 2609 2610 2611 2612 2613 2614 2615
  return TSDB_CODE_SUCCESS;
}

static void destroyHelper(void* param) {
  if (param == NULL) {
    return;
  }

2616 2617 2618 2619 2620 2621
//  tQueryInfo* pInfo = (tQueryInfo*)param;
//  if (pInfo->optr != TSDB_RELATION_IN) {
//    tfree(pInfo->q);
//  } else {
//    taosHashCleanup((SHashObj *)(pInfo->q));
//  }
H
Haojun Liao 已提交
2622 2623 2624 2625

  free(param);
}

2626 2627 2628 2629 2630
#define TSDB_PREV_ROW  0x1
#define TSDB_NEXT_ROW  0x2

static bool  loadBlockOfActiveTable(STsdbReadHandle* pTsdbReadHandle) {
  if (pTsdbReadHandle->checkFiles) {
H
Haojun Liao 已提交
2631 2632
    // check if the query range overlaps with the file data block
    bool exists = true;
H
Haojun Liao 已提交
2633

2634
    int32_t code = getDataBlocksInFiles(pTsdbReadHandle, &exists);
H
Haojun Liao 已提交
2635
    if (code != TSDB_CODE_SUCCESS) {
2636
      pTsdbReadHandle->checkFiles = false;
H
Haojun Liao 已提交
2637 2638
      return false;
    }
H
Haojun Liao 已提交
2639

H
Haojun Liao 已提交
2640
    if (exists) {
H
Haojun Liao 已提交
2641
      tsdbRetrieveDataBlock((tsdbReaderT*) pTsdbReadHandle, NULL);
2642 2643 2644
      if (pTsdbReadHandle->currentLoadExternalRows && pTsdbReadHandle->window.skey == pTsdbReadHandle->window.ekey) {
        SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, 0);
        assert(*(int64_t*)pColInfo->pData == pTsdbReadHandle->window.skey);
H
Haojun Liao 已提交
2645 2646
      }

2647
      pTsdbReadHandle->currentLoadExternalRows = false; // clear the flag, since the exact matched row is found.
H
Haojun Liao 已提交
2648 2649
      return exists;
    }
H
Haojun Liao 已提交
2650

2651
    pTsdbReadHandle->checkFiles = false;
H
Haojun Liao 已提交
2652
  }
H
Haojun Liao 已提交
2653

2654 2655
  if (hasMoreDataInCache(pTsdbReadHandle)) {
    pTsdbReadHandle->currentLoadExternalRows = false;
H
Haojun Liao 已提交
2656 2657
    return true;
  }
H
Haojun Liao 已提交
2658

H
Haojun Liao 已提交
2659
  // current result is empty
2660 2661
  if (pTsdbReadHandle->currentLoadExternalRows && pTsdbReadHandle->window.skey == pTsdbReadHandle->window.ekey && pTsdbReadHandle->cur.rows == 0) {
//    STsdbMemTable* pMemRef = pTsdbReadHandle->pMemTable;
H
Haojun Liao 已提交
2662

2663 2664
//    doGetExternalRow(pTsdbReadHandle, TSDB_PREV_ROW, pMemRef);
//    doGetExternalRow(pTsdbReadHandle, TSDB_NEXT_ROW, pMemRef);
H
Haojun Liao 已提交
2665

2666
    bool result = tsdbGetExternalRow(pTsdbReadHandle);
H
Haojun Liao 已提交
2667

2668 2669 2670
//    pTsdbReadHandle->prev = doFreeColumnInfoData(pTsdbReadHandle->prev);
//    pTsdbReadHandle->next = doFreeColumnInfoData(pTsdbReadHandle->next);
    pTsdbReadHandle->currentLoadExternalRows = false;
H
Haojun Liao 已提交
2671 2672

    return result;
2673
  }
H
Haojun Liao 已提交
2674

H
Haojun Liao 已提交
2675 2676
  return false;
}
2677

2678
static bool loadCachedLastRow(STsdbReadHandle* pTsdbReadHandle) {
H
Haojun Liao 已提交
2679
  // the last row is cached in buffer, return it directly.
2680 2681 2682
  // here note that the pTsdbReadHandle->window must be the TS_INITIALIZER
  int32_t numOfCols  = (int32_t)(QH_GET_NUM_OF_COLS(pTsdbReadHandle));
  size_t  numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
H
Haojun Liao 已提交
2683 2684
  assert(numOfTables > 0 && numOfCols > 0);

2685
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
2686

H
Haojun Liao 已提交
2687
  STSRow*  pRow = NULL;
H
Haojun Liao 已提交
2688
  TSKEY    key  = TSKEY_INITIAL_VAL;
2689 2690 2691 2692 2693 2694 2695 2696
  int32_t  step = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? 1:-1;

  if (++pTsdbReadHandle->activeIndex < numOfTables) {
    STableCheckInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, pTsdbReadHandle->activeIndex);
//    int32_t ret = tsdbGetCachedLastRow(pCheckInfo->pTableObj, &pRow, &key);
//    if (ret != TSDB_CODE_SUCCESS) {
//      return false;
//    }
H
Haojun Liao 已提交
2697
    mergeTwoRowFromMem(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, 0, pRow, NULL, numOfCols, pCheckInfo->tableId, NULL, NULL, true);
H
Haojun Liao 已提交
2698
    tfree(pRow);
H
Haojun Liao 已提交
2699

H
Haojun Liao 已提交
2700 2701 2702 2703 2704 2705 2706 2707 2708 2709
    // update the last key value
    pCheckInfo->lastKey = key + step;

    cur->rows     = 1;  // only one row
    cur->lastKey  = key + step;
    cur->mixBlock = true;
    cur->win.skey = key;
    cur->win.ekey = key;

    return true;
2710
  }
H
Haojun Liao 已提交
2711

H
Haojun Liao 已提交
2712 2713 2714
  return false;
}

D
init  
dapan1121 已提交
2715

D
update  
dapan1121 已提交
2716

2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868
//static bool loadCachedLast(STsdbReadHandle* pTsdbReadHandle) {
//  // the last row is cached in buffer, return it directly.
//  // here note that the pTsdbReadHandle->window must be the TS_INITIALIZER
//  int32_t tgNumOfCols = (int32_t)QH_GET_NUM_OF_COLS(pTsdbReadHandle);
//  size_t  numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
//  int32_t numOfRows = 0;
//  assert(numOfTables > 0 && tgNumOfCols > 0);
//  SQueryFilePos* cur = &pTsdbReadHandle->cur;
//  TSKEY priKey = TSKEY_INITIAL_VAL;
//  int32_t priIdx = -1;
//  SColumnInfoData* pColInfo = NULL;
//
//  while (++pTsdbReadHandle->activeIndex < numOfTables) {
//    STableCheckInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, pTsdbReadHandle->activeIndex);
//    STable* pTable = pCheckInfo->pTableObj;
//    char* pData = NULL;
//
//    int32_t numOfCols = pTable->maxColNum;
//
//    if (pTable->lastCols == NULL || pTable->maxColNum <= 0) {
//      tsdbWarn("no last cached for table %s, uid:%" PRIu64 ",tid:%d", pTable->name->data, pTable->uid, pTable->tableId);
//      continue;
//    }
//
//    int32_t i = 0, j = 0;
//    while(i < tgNumOfCols && j < numOfCols) {
//      pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i);
//      if (pTable->lastCols[j].colId < pColInfo->info.colId) {
//        j++;
//        continue;
//      } else if (pTable->lastCols[j].colId > pColInfo->info.colId) {
//        i++;
//        continue;
//      }
//
//      pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;
//
//      if (pTable->lastCols[j].bytes > 0) {
//        void* value = pTable->lastCols[j].pData;
//        switch (pColInfo->info.type) {
//          case TSDB_DATA_TYPE_BINARY:
//          case TSDB_DATA_TYPE_NCHAR:
//            memcpy(pData, value, varDataTLen(value));
//            break;
//          case TSDB_DATA_TYPE_NULL:
//          case TSDB_DATA_TYPE_BOOL:
//          case TSDB_DATA_TYPE_TINYINT:
//          case TSDB_DATA_TYPE_UTINYINT:
//            *(uint8_t *)pData = *(uint8_t *)value;
//            break;
//          case TSDB_DATA_TYPE_SMALLINT:
//          case TSDB_DATA_TYPE_USMALLINT:
//            *(uint16_t *)pData = *(uint16_t *)value;
//            break;
//          case TSDB_DATA_TYPE_INT:
//          case TSDB_DATA_TYPE_UINT:
//            *(uint32_t *)pData = *(uint32_t *)value;
//            break;
//          case TSDB_DATA_TYPE_BIGINT:
//          case TSDB_DATA_TYPE_UBIGINT:
//            *(uint64_t *)pData = *(uint64_t *)value;
//            break;
//          case TSDB_DATA_TYPE_FLOAT:
//            SET_FLOAT_PTR(pData, value);
//            break;
//          case TSDB_DATA_TYPE_DOUBLE:
//            SET_DOUBLE_PTR(pData, value);
//            break;
//          case TSDB_DATA_TYPE_TIMESTAMP:
//            if (pColInfo->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
//              priKey = tdGetKey(*(TKEY *)value);
//              priIdx = i;
//
//              i++;
//              j++;
//              continue;
//            } else {
//              *(TSKEY *)pData = *(TSKEY *)value;
//            }
//            break;
//          default:
//            memcpy(pData, value, pColInfo->info.bytes);
//        }
//
//        for (int32_t n = 0; n < tgNumOfCols; ++n) {
//          if (n == i) {
//            continue;
//          }
//
//          pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, n);
//          pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;;
//
//          if (pColInfo->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
////            *(TSKEY *)pData = pTable->lastCols[j].ts;
//            continue;
//          }
//
//          if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
//            setVardataNull(pData, pColInfo->info.type);
//          } else {
//            setNull(pData, pColInfo->info.type, pColInfo->info.bytes);
//          }
//        }
//
//        numOfRows++;
//        assert(numOfRows < pTsdbReadHandle->outputCapacity);
//      }
//
//      i++;
//      j++;
//    }
//
//    // leave the real ts column as the last row, because last function only (not stable) use the last row as res
//    if (priKey != TSKEY_INITIAL_VAL) {
//      pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, priIdx);
//      pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;
//
//      *(TSKEY *)pData = priKey;
//
//      for (int32_t n = 0; n < tgNumOfCols; ++n) {
//        if (n == priIdx) {
//          continue;
//        }
//
//        pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, n);
//        pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;;
//
//        assert (pColInfo->info.colId != PRIMARYKEY_TIMESTAMP_COL_ID);
//
//        if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
//          setVardataNull(pData, pColInfo->info.type);
//        } else {
//          setNull(pData, pColInfo->info.type, pColInfo->info.bytes);
//        }
//      }
//
//      numOfRows++;
//    }
//
//    if (numOfRows > 0) {
//      cur->rows     = numOfRows;
//      cur->mixBlock = true;
//
//      return true;
//    }
//  }
//
//  return false;
//}

static bool loadDataBlockFromTableSeq(STsdbReadHandle* pTsdbReadHandle) {
  size_t numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
H
Haojun Liao 已提交
2869 2870 2871
  assert(numOfTables > 0);

  int64_t stime = taosGetTimestampUs();
H
Haojun Liao 已提交
2872

2873 2874
  while(pTsdbReadHandle->activeIndex < numOfTables) {
    if (loadBlockOfActiveTable(pTsdbReadHandle)) {
H
Haojun Liao 已提交
2875 2876 2877
      return true;
    }

2878
    STableCheckInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, pTsdbReadHandle->activeIndex);
H
Haojun Liao 已提交
2879 2880
    pCheckInfo->numOfBlocks = 0;

2881 2882 2883 2884 2885
    pTsdbReadHandle->activeIndex += 1;
    pTsdbReadHandle->locateStart = false;
    pTsdbReadHandle->checkFiles  = true;
    pTsdbReadHandle->cur.rows    = 0;
    pTsdbReadHandle->currentLoadExternalRows = pTsdbReadHandle->loadExternalRow;
H
Haojun Liao 已提交
2886 2887 2888 2889

    terrno = TSDB_CODE_SUCCESS;

    int64_t elapsedTime = taosGetTimestampUs() - stime;
2890
    pTsdbReadHandle->cost.checkForNextTime += elapsedTime;
H
Haojun Liao 已提交
2891 2892 2893
  }

  return false;
2894 2895
}

H
Haojun Liao 已提交
2896
// handle data in cache situation
H
Haojun Liao 已提交
2897
bool tsdbNextDataBlock(tsdbReaderT pHandle) {
2898
  STsdbReadHandle* pTsdbReadHandle = (STsdbReadHandle*) pHandle;
Y
yihaoDeng 已提交
2899

2900
  if (emptyQueryTimewindow(pTsdbReadHandle)) {
H
Haojun Liao 已提交
2901
    tsdbDebug("%p query window not overlaps with the data set, no result returned, %s", pTsdbReadHandle, pTsdbReadHandle->idStr);
2902 2903 2904
    return false;
  }

Y
yihaoDeng 已提交
2905 2906 2907
  int64_t stime = taosGetTimestampUs();
  int64_t elapsedTime = stime;

2908
  // TODO refactor: remove "type"
2909 2910 2911 2912 2913
  if (pTsdbReadHandle->type == TSDB_QUERY_TYPE_LAST) {
    if (pTsdbReadHandle->cachelastrow == TSDB_CACHED_TYPE_LASTROW) {
//      return loadCachedLastRow(pTsdbReadHandle);
    } else if (pTsdbReadHandle->cachelastrow == TSDB_CACHED_TYPE_LAST) {
//      return loadCachedLast(pTsdbReadHandle);
D
init  
dapan1121 已提交
2914
    }
H
Haojun Liao 已提交
2915
  }
Y
yihaoDeng 已提交
2916

2917 2918
  if (pTsdbReadHandle->loadType == BLOCK_LOAD_TABLE_SEQ_ORDER) {
    return loadDataBlockFromTableSeq(pTsdbReadHandle);
H
Haojun Liao 已提交
2919
  } else { // loadType == RR and Offset Order
2920
    if (pTsdbReadHandle->checkFiles) {
H
Haojun Liao 已提交
2921 2922 2923
      // check if the query range overlaps with the file data block
      bool exists = true;

2924
      int32_t code = getDataBlocksInFiles(pTsdbReadHandle, &exists);
H
Haojun Liao 已提交
2925
      if (code != TSDB_CODE_SUCCESS) {
2926 2927
        pTsdbReadHandle->activeIndex = 0;
        pTsdbReadHandle->checkFiles = false;
H
Haojun Liao 已提交
2928 2929 2930 2931 2932

        return false;
      }

      if (exists) {
2933
        pTsdbReadHandle->cost.checkForNextTime += (taosGetTimestampUs() - stime);
H
Haojun Liao 已提交
2934 2935
        return exists;
      }
Y
yihaoDeng 已提交
2936

2937 2938
      pTsdbReadHandle->activeIndex = 0;
      pTsdbReadHandle->checkFiles = false;
Y
yihaoDeng 已提交
2939 2940
    }

H
Haojun Liao 已提交
2941
    // TODO: opt by consider the scan order
2942
    bool ret = doHasDataInBuffer(pTsdbReadHandle);
H
Haojun Liao 已提交
2943
    terrno = TSDB_CODE_SUCCESS;
Y
yihaoDeng 已提交
2944

H
Haojun Liao 已提交
2945
    elapsedTime = taosGetTimestampUs() - stime;
2946
    pTsdbReadHandle->cost.checkForNextTime += elapsedTime;
H
Haojun Liao 已提交
2947
    return ret;
Y
yihaoDeng 已提交
2948 2949
  }
}
2950

2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015
//static int32_t doGetExternalRow(STsdbReadHandle* pTsdbReadHandle, int16_t type, STsdbMemTable* pMemRef) {
//  STsdbReadHandle* pSecQueryHandle = NULL;
//
//  if (type == TSDB_PREV_ROW && pTsdbReadHandle->prev) {
//    return TSDB_CODE_SUCCESS;
//  }
//
//  if (type == TSDB_NEXT_ROW && pTsdbReadHandle->next) {
//    return TSDB_CODE_SUCCESS;
//  }
//
//  // prepare the structure
//  int32_t numOfCols = (int32_t) QH_GET_NUM_OF_COLS(pTsdbReadHandle);
//
//  if (type == TSDB_PREV_ROW) {
//    pTsdbReadHandle->prev = taosArrayInit(numOfCols, sizeof(SColumnInfoData));
//    if (pTsdbReadHandle->prev == NULL) {
//      terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
//      goto out_of_memory;
//    }
//  } else {
//    pTsdbReadHandle->next = taosArrayInit(numOfCols, sizeof(SColumnInfoData));
//    if (pTsdbReadHandle->next == NULL) {
//      terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
//      goto out_of_memory;
//    }
//  }
//
//  SArray* row = (type == TSDB_PREV_ROW)? pTsdbReadHandle->prev : pTsdbReadHandle->next;
//
//  for (int32_t i = 0; i < numOfCols; ++i) {
//    SColumnInfoData* pCol = taosArrayGet(pTsdbReadHandle->pColumns, i);
//
//    SColumnInfoData colInfo = {{0}, 0};
//    colInfo.info = pCol->info;
//    colInfo.pData = calloc(1, pCol->info.bytes);
//    if (colInfo.pData == NULL) {
//      terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
//      goto out_of_memory;
//    }
//
//    taosArrayPush(row, &colInfo);
//  }
//
//  // load the previous row
//  STsdbQueryCond cond = {.numOfCols = numOfCols, .loadExternalRows = false, .type = BLOCK_LOAD_OFFSET_SEQ_ORDER};
//  if (type == TSDB_PREV_ROW) {
//    cond.order = TSDB_ORDER_DESC;
//    cond.twindow = (STimeWindow){pTsdbReadHandle->window.skey, INT64_MIN};
//  } else {
//    cond.order = TSDB_ORDER_ASC;
//    cond.twindow = (STimeWindow){pTsdbReadHandle->window.skey, INT64_MAX};
//  }
//
//  cond.colList = calloc(cond.numOfCols, sizeof(SColumnInfo));
//  if (cond.colList == NULL) {
//    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
//    goto out_of_memory;
//  }
//
//  for (int32_t i = 0; i < cond.numOfCols; ++i) {
//    SColumnInfoData* pColInfoData = taosArrayGet(pTsdbReadHandle->pColumns, i);
//    memcpy(&cond.colList[i], &pColInfoData->info, sizeof(SColumnInfo));
//  }
//
H
Haojun Liao 已提交
3016
//  pSecQueryHandle = tsdbQueryTablesImpl(pTsdbReadHandle->pTsdb, &cond, pTsdbReadHandle->idStr, pMemRef);
3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056
//  tfree(cond.colList);
//
//  // current table, only one table
//  STableCheckInfo* pCurrent = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, pTsdbReadHandle->activeIndex);
//
//  SArray* psTable = NULL;
//  pSecQueryHandle->pTableCheckInfo = createCheckInfoFromCheckInfo(pCurrent, pSecQueryHandle->window.skey, &psTable);
//  if (pSecQueryHandle->pTableCheckInfo == NULL) {
//    taosArrayDestroy(psTable);
//    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
//    goto out_of_memory;
//  }
//
//
//  tsdbMayTakeMemSnapshot(pSecQueryHandle, psTable);
//  if (!tsdbNextDataBlock((void*)pSecQueryHandle)) {
//    // no result in current query, free the corresponding result rows structure
//    if (type == TSDB_PREV_ROW) {
//      pTsdbReadHandle->prev = doFreeColumnInfoData(pTsdbReadHandle->prev);
//    } else {
//      pTsdbReadHandle->next = doFreeColumnInfoData(pTsdbReadHandle->next);
//    }
//
//    goto out_of_memory;
//  }
//
//  SDataBlockInfo blockInfo = {{0}, 0};
//  tsdbRetrieveDataBlockInfo((void*)pSecQueryHandle, &blockInfo);
//  tsdbRetrieveDataBlock((void*)pSecQueryHandle, pSecQueryHandle->defaultLoadColumn);
//
//  row = (type == TSDB_PREV_ROW)? pTsdbReadHandle->prev:pTsdbReadHandle->next;
//  int32_t pos = (type == TSDB_PREV_ROW)?pSecQueryHandle->cur.rows - 1:0;
//
//  for (int32_t i = 0; i < numOfCols; ++i) {
//    SColumnInfoData* pCol = taosArrayGet(row, i);
//    SColumnInfoData* s = taosArrayGet(pSecQueryHandle->pColumns, i);
//    memcpy((char*)pCol->pData, (char*)s->pData + s->info.bytes * pos, pCol->info.bytes);
//  }
//
//out_of_memory:
3057
//  tsdbCleanupReadHandle(pSecQueryHandle);
3058 3059 3060
//  return terrno;
//}

H
Haojun Liao 已提交
3061
bool tsdbGetExternalRow(tsdbReaderT pHandle) {
3062 3063
  STsdbReadHandle* pTsdbReadHandle = (STsdbReadHandle*) pHandle;
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
H
Haojun Liao 已提交
3064

H
Haojun Liao 已提交
3065 3066
  cur->fid = INT32_MIN;
  cur->mixBlock = true;
3067
  if (pTsdbReadHandle->prev == NULL || pTsdbReadHandle->next == NULL) {
H
Haojun Liao 已提交
3068 3069
    cur->rows = 0;
    return false;
H
Haojun Liao 已提交
3070 3071
  }

3072
  int32_t numOfCols = (int32_t) QH_GET_NUM_OF_COLS(pTsdbReadHandle);
H
Haojun Liao 已提交
3073
  for (int32_t i = 0; i < numOfCols; ++i) {
3074 3075
    SColumnInfoData* pColInfoData = taosArrayGet(pTsdbReadHandle->pColumns, i);
    SColumnInfoData* first = taosArrayGet(pTsdbReadHandle->prev, i);
H
Haojun Liao 已提交
3076 3077 3078

    memcpy(pColInfoData->pData, first->pData, pColInfoData->info.bytes);

3079
    SColumnInfoData* sec = taosArrayGet(pTsdbReadHandle->next, i);
sangshuduo's avatar
sangshuduo 已提交
3080
    memcpy(((char*)pColInfoData->pData) + pColInfoData->info.bytes, sec->pData, pColInfoData->info.bytes);
H
Haojun Liao 已提交
3081 3082

    if (i == 0 && pColInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP) {
H
Haojun Liao 已提交
3083
      cur->win.skey = *(TSKEY*)pColInfoData->pData;
sangshuduo's avatar
sangshuduo 已提交
3084
      cur->win.ekey = *(TSKEY*)(((char*)pColInfoData->pData) + TSDB_KEYSIZE);
H
Haojun Liao 已提交
3085 3086 3087
    }
  }

H
Haojun Liao 已提交
3088 3089
  cur->rows = 2;
  return true;
3090 3091
}

3092
/*
3093
 * if lastRow == NULL, return TSDB_CODE_TDB_NO_CACHE_LAST_ROW
3094
 * else set pRes and return TSDB_CODE_SUCCESS and save lastKey
3095
 */
H
Haojun Liao 已提交
3096
// int32_t tsdbGetCachedLastRow(STable* pTable, STSRow** pRes, TSKEY* lastKey) {
3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112
//  int32_t code = TSDB_CODE_SUCCESS;
//
//  TSDB_RLOCK_TABLE(pTable);
//
//  if (!pTable->lastRow) {
//    code = TSDB_CODE_TDB_NO_CACHE_LAST_ROW;
//    goto out;
//  }
//
//  if (pRes) {
//    *pRes = tdMemRowDup(pTable->lastRow);
//    if (*pRes == NULL) {
//      code = TSDB_CODE_TDB_OUT_OF_MEMORY;
//    }
//  }
//
H
Haojun Liao 已提交
3113
// out:
3114 3115 3116 3117
//  TSDB_RUNLOCK_TABLE(pTable);
//  return code;
//}

H
Haojun Liao 已提交
3118
bool isTsdbCacheLastRow(tsdbReaderT* pTsdbReadHandle) {
3119
  return ((STsdbReadHandle *)pTsdbReadHandle)->cachelastrow > TSDB_CACHED_TYPE_NONE;
D
fix bug  
dapan1121 已提交
3120 3121
}

3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148
int32_t checkForCachedLastRow(STsdbReadHandle* pTsdbReadHandle, STableGroupInfo *groupList) {
  assert(pTsdbReadHandle != NULL && groupList != NULL);

//  TSKEY    key = TSKEY_INITIAL_VAL;
//
//  SArray* group = taosArrayGetP(groupList->pGroupList, 0);
//  assert(group != NULL);
//
//  STableKeyInfo* pInfo = (STableKeyInfo*)taosArrayGet(group, 0);
//
//  int32_t code = 0;
//
//  if (((STable*)pInfo->pTable)->lastRow) {
//    code = tsdbGetCachedLastRow(pInfo->pTable, NULL, &key);
//    if (code != TSDB_CODE_SUCCESS) {
//      pTsdbReadHandle->cachelastrow = TSDB_CACHED_TYPE_NONE;
//    } else {
//      pTsdbReadHandle->cachelastrow = TSDB_CACHED_TYPE_LASTROW;
//    }
//  }
//
//  // update the tsdb query time range
//  if (pTsdbReadHandle->cachelastrow != TSDB_CACHED_TYPE_NONE) {
//    pTsdbReadHandle->window      = TSWINDOW_INITIALIZER;
//    pTsdbReadHandle->checkFiles  = false;
//    pTsdbReadHandle->activeIndex = -1;  // start from -1
//  }
H
Haojun Liao 已提交
3149

3150
  return TSDB_CODE_SUCCESS;
3151 3152
}

3153 3154
int32_t checkForCachedLast(STsdbReadHandle* pTsdbReadHandle) {
  assert(pTsdbReadHandle != NULL);
D
update  
dapan1121 已提交
3155 3156

  int32_t code = 0;
3157 3158 3159
//  if (pTsdbReadHandle->pTsdb && atomic_load_8(&pTsdbReadHandle->pTsdb->hasCachedLastColumn)){
//    pTsdbReadHandle->cachelastrow = TSDB_CACHED_TYPE_LAST;
//  }
D
update  
dapan1121 已提交
3160 3161

  // update the tsdb query time range
3162 3163 3164
  if (pTsdbReadHandle->cachelastrow) {
    pTsdbReadHandle->checkFiles  = false;
    pTsdbReadHandle->activeIndex = -1;  // start from -1
D
update  
dapan1121 已提交
3165 3166 3167 3168 3169 3170
  }

  return code;
}


3171
STimeWindow updateLastrowForEachGroup(STableGroupInfo *groupList) {
H
Haojun Liao 已提交
3172
  STimeWindow window = {INT64_MAX, INT64_MIN};
H
Haojun Liao 已提交
3173

H
Haojun Liao 已提交
3174
  int32_t totalNumOfTable = 0;
3175
  SArray* emptyGroup = taosArrayInit(16, sizeof(int32_t));
H
Haojun Liao 已提交
3176

H
Haojun Liao 已提交
3177 3178 3179 3180 3181
  // NOTE: starts from the buffer in case of descending timestamp order check data blocks
  size_t numOfGroups = taosArrayGetSize(groupList->pGroupList);
  for(int32_t j = 0; j < numOfGroups; ++j) {
    SArray* pGroup = taosArrayGetP(groupList->pGroupList, j);
    TSKEY   key = TSKEY_INITIAL_VAL;
H
Haojun Liao 已提交
3182

H
Haojun Liao 已提交
3183
    STableKeyInfo keyInfo = {0};
H
Haojun Liao 已提交
3184

H
Haojun Liao 已提交
3185 3186
    size_t numOfTables = taosArrayGetSize(pGroup);
    for(int32_t i = 0; i < numOfTables; ++i) {
3187
      STableKeyInfo* pInfo = (STableKeyInfo*) taosArrayGet(pGroup, i);
H
Haojun Liao 已提交
3188

H
Haojun Liao 已提交
3189
      // if the lastKey equals to INT64_MIN, there is no data in this table
3190
      TSKEY lastKey = 0;//((STable*)(pInfo->pTable))->lastKey;
H
Haojun Liao 已提交
3191 3192
      if (key < lastKey) {
        key = lastKey;
H
Haojun Liao 已提交
3193

3194
//        keyInfo.pTable  = pInfo->pTable;
H
Haojun Liao 已提交
3195
        keyInfo.lastKey = key;
3196
        pInfo->lastKey  = key;
H
Haojun Liao 已提交
3197

H
Haojun Liao 已提交
3198 3199 3200
        if (key < window.skey) {
          window.skey = key;
        }
3201

H
Haojun Liao 已提交
3202 3203 3204 3205
        if (key > window.ekey) {
          window.ekey = key;
        }
      }
3206
    }
H
Haojun Liao 已提交
3207

H
Haojun Liao 已提交
3208
    // more than one table in each group, only one table left for each group
3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220
//    if (keyInfo.pTable != NULL) {
//      totalNumOfTable++;
//      if (taosArrayGetSize(pGroup) == 1) {
//        // do nothing
//      } else {
//        taosArrayClear(pGroup);
//        taosArrayPush(pGroup, &keyInfo);
//      }
//    } else {  // mark all the empty groups, and remove it later
//      taosArrayDestroy(pGroup);
//      taosArrayPush(emptyGroup, &j);
//    }
3221
  }
H
Haojun Liao 已提交
3222

H
Haojun Liao 已提交
3223 3224 3225
  // window does not being updated, so set the original
  if (window.skey == INT64_MAX && window.ekey == INT64_MIN) {
    window = TSWINDOW_INITIALIZER;
H
Haojun Liao 已提交
3226
    assert(totalNumOfTable == 0 && taosArrayGetSize(groupList->pGroupList) == numOfGroups);
H
Haojun Liao 已提交
3227 3228
  }

H
Haojun Liao 已提交
3229
  taosArrayRemoveBatch(groupList->pGroupList, TARRAY_GET_START(emptyGroup), (int32_t) taosArrayGetSize(emptyGroup));
3230 3231
  taosArrayDestroy(emptyGroup);

H
Haojun Liao 已提交
3232
  groupList->numOfTables = totalNumOfTable;
H
Haojun Liao 已提交
3233
  return window;
H
hjxilinx 已提交
3234 3235
}

H
Haojun Liao 已提交
3236
void tsdbRetrieveDataBlockInfo(tsdbReaderT* pTsdbReadHandle, SDataBlockInfo* pDataBlockInfo) {
3237
  STsdbReadHandle* pHandle = (STsdbReadHandle*)pTsdbReadHandle;
3238
  SQueryFilePos* cur = &pHandle->cur;
3239 3240

  uint64_t uid = 0;
H
Haojun Liao 已提交
3241

3242
  // there are data in file
D
dapan1121 已提交
3243
  if (pHandle->cur.fid != INT32_MIN) {
3244
    STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[cur->slot];
3245
    uid = pBlockInfo->pTableCheckInfo->tableId;
H
[td-32]  
hjxilinx 已提交
3246
  } else {
3247
    STableCheckInfo* pCheckInfo = taosArrayGet(pHandle->pTableCheckInfo, pHandle->activeIndex);
3248
    uid = pCheckInfo->tableId;
3249
  }
3250

3251 3252
  pDataBlockInfo->uid    = uid;
  pDataBlockInfo->rows   = cur->rows;
H
Haojun Liao 已提交
3253
  pDataBlockInfo->window = cur->win;
S
TD-1057  
Shengliang Guan 已提交
3254
  pDataBlockInfo->numOfCols = (int32_t)(QH_GET_NUM_OF_COLS(pHandle));
3255
}
H
hjxilinx 已提交
3256

H
Haojun Liao 已提交
3257 3258 3259
/*
 * return null for mixed data block, if not a complete file data block, the statistics value will always return NULL
 */
H
Haojun Liao 已提交
3260
int32_t tsdbRetrieveDataBlockStatisInfo(tsdbReaderT* pTsdbReadHandle, SDataStatis** pBlockStatis) {
3261
  STsdbReadHandle* pHandle = (STsdbReadHandle*) pTsdbReadHandle;
H
Haojun Liao 已提交
3262

H
Haojun Liao 已提交
3263 3264
  SQueryFilePos* c = &pHandle->cur;
  if (c->mixBlock) {
H
Haojun Liao 已提交
3265 3266 3267
    *pBlockStatis = NULL;
    return TSDB_CODE_SUCCESS;
  }
H
Haojun Liao 已提交
3268

H
Haojun Liao 已提交
3269 3270 3271 3272
  STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[c->slot];
  assert((c->slot >= 0 && c->slot < pHandle->numOfBlocks) || ((c->slot == pHandle->numOfBlocks) && (c->slot == 0)));

  // file block with sub-blocks has no statistics data
H
Haojun Liao 已提交
3273 3274 3275 3276
  if (pBlockInfo->compBlock->numOfSubBlocks > 1) {
    *pBlockStatis = NULL;
    return TSDB_CODE_SUCCESS;
  }
H
Haojun Liao 已提交
3277 3278

  int64_t stime = taosGetTimestampUs();
3279 3280
  int     statisStatus = tsdbLoadBlockStatis(&pHandle->rhelper, pBlockInfo->compBlock);
  if (statisStatus < TSDB_STATIS_OK) {
H
Hongze Cheng 已提交
3281
    return terrno;
3282 3283 3284
  } else if (statisStatus > TSDB_STATIS_OK) {
    *pBlockStatis = NULL;
    return TSDB_CODE_SUCCESS;
H
Hongze Cheng 已提交
3285
  }
H
Haojun Liao 已提交
3286

H
Haojun Liao 已提交
3287 3288
  int16_t* colIds = pHandle->defaultLoadColumn->pData;

H
Haojun Liao 已提交
3289
  size_t numOfCols = QH_GET_NUM_OF_COLS(pHandle);
H
Haojun Liao 已提交
3290
  memset(pHandle->statis, 0, numOfCols * sizeof(SDataStatis));
3291
  for(int32_t i = 0; i < numOfCols; ++i) {
H
Haojun Liao 已提交
3292
    pHandle->statis[i].colId = colIds[i];
3293
  }
H
Haojun Liao 已提交
3294

3295
  tsdbGetBlockStatis(&pHandle->rhelper, pHandle->statis, (int)numOfCols, pBlockInfo->compBlock);
H
Haojun Liao 已提交
3296 3297 3298

  // always load the first primary timestamp column data
  SDataStatis* pPrimaryColStatis = &pHandle->statis[0];
3299
  assert(pPrimaryColStatis->colId == PRIMARYKEY_TIMESTAMP_COL_ID);
H
Haojun Liao 已提交
3300 3301 3302 3303 3304

  pPrimaryColStatis->numOfNull = 0;
  pPrimaryColStatis->min = pBlockInfo->compBlock->keyFirst;
  pPrimaryColStatis->max = pBlockInfo->compBlock->keyLast;

H
Haojun Liao 已提交
3305
  //update the number of NULL data rows
H
Haojun Liao 已提交
3306
  for(int32_t i = 1; i < numOfCols; ++i) {
3307
    if (pHandle->statis[i].numOfNull == -1) { // set the column data are all NULL
H
Haojun Liao 已提交
3308 3309 3310
      pHandle->statis[i].numOfNull = pBlockInfo->compBlock->numOfRows;
    }
  }
H
Haojun Liao 已提交
3311 3312 3313 3314

  int64_t elapsed = taosGetTimestampUs() - stime;
  pHandle->cost.statisInfoLoadTime += elapsed;

H
Haojun Liao 已提交
3315
  *pBlockStatis = pHandle->statis;
3316
  return TSDB_CODE_SUCCESS;
H
hjxilinx 已提交
3317 3318
}

H
Haojun Liao 已提交
3319
SArray* tsdbRetrieveDataBlock(tsdbReaderT* pTsdbReadHandle, SArray* pIdList) {
H
[td-32]  
hjxilinx 已提交
3320
  /**
H
hjxilinx 已提交
3321
   * In the following two cases, the data has been loaded to SColumnInfoData.
H
[td-32]  
hjxilinx 已提交
3322 3323
   * 1. data is from cache, 2. data block is not completed qualified to query time range
   */
3324
  STsdbReadHandle* pHandle = (STsdbReadHandle*)pTsdbReadHandle;
3325

D
dapan1121 已提交
3326
  if (pHandle->cur.fid == INT32_MIN) {
H
[td-32]  
hjxilinx 已提交
3327 3328
    return pHandle->pColumns;
  } else {
H
Haojun Liao 已提交
3329 3330
    STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[pHandle->cur.slot];
    STableCheckInfo* pCheckInfo = pBlockInfo->pTableCheckInfo;
3331

3332
    if (pHandle->cur.mixBlock) {
H
[td-32]  
hjxilinx 已提交
3333 3334
      return pHandle->pColumns;
    } else {
H
Haojun Liao 已提交
3335
      SDataBlockInfo binfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlockInfo->compBlock);
3336
      assert(pHandle->realNumOfRows <= binfo.rows);
H
Haojun Liao 已提交
3337

H
hjxilinx 已提交
3338 3339
      // data block has been loaded, todo extract method
      SDataBlockLoadInfo* pBlockLoadInfo = &pHandle->dataBlockLoadInfo;
H
Haojun Liao 已提交
3340

H
Hongze Cheng 已提交
3341
      if (pBlockLoadInfo->slot == pHandle->cur.slot && pBlockLoadInfo->fileGroup->fid == pHandle->cur.fid &&
H
Haojun Liao 已提交
3342
          pBlockLoadInfo->uid == pCheckInfo->tableId) {
H
hjxilinx 已提交
3343
        return pHandle->pColumns;
H
Haojun Liao 已提交
3344
      } else {  // only load the file block
H
refact  
Hongze Cheng 已提交
3345
        SBlock* pBlock = pBlockInfo->compBlock;
H
Haojun Liao 已提交
3346
        if (doLoadFileDataBlock(pHandle, pBlock, pCheckInfo, pHandle->cur.slot) != TSDB_CODE_SUCCESS) {
3347 3348
          return NULL;
        }
H
Haojun Liao 已提交
3349

H
Haojun Liao 已提交
3350
        // todo refactor
H
Haojun Liao 已提交
3351
        int32_t numOfRows = doCopyRowsFromFileBlock(pHandle, pHandle->outputCapacity, 0, 0, pBlock->numOfRows - 1);
H
Haojun Liao 已提交
3352

H
Haojun Liao 已提交
3353
        // if the buffer is not full in case of descending order query, move the data in the front of the buffer
3354
        if (!ASCENDING_TRAVERSE(pHandle->order) && numOfRows < pHandle->outputCapacity) {
H
Haojun Liao 已提交
3355
          int32_t emptySize = pHandle->outputCapacity - numOfRows;
S
TD-1057  
Shengliang Guan 已提交
3356
          int32_t reqNumOfCols = (int32_t)taosArrayGetSize(pHandle->pColumns);
H
Haojun Liao 已提交
3357

H
Haojun Liao 已提交
3358 3359
          for(int32_t i = 0; i < reqNumOfCols; ++i) {
            SColumnInfoData* pColInfo = taosArrayGet(pHandle->pColumns, i);
S
TD-1057  
Shengliang Guan 已提交
3360
            memmove((char*)pColInfo->pData, (char*)pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes);
H
Haojun Liao 已提交
3361 3362
          }
        }
H
Haojun Liao 已提交
3363

H
hjxilinx 已提交
3364 3365
        return pHandle->pColumns;
      }
H
[td-32]  
hjxilinx 已提交
3366 3367
    }
  }
H
hjxilinx 已提交
3368
}
3369
#if 0
3370
void filterPrepare(void* expr, void* param) {
3371
  tExprNode* pExpr = (tExprNode*)expr;
H
[td-32]  
hjxilinx 已提交
3372
  if (pExpr->_node.info != NULL) {
3373 3374
    return;
  }
3375

H
[td-32]  
hjxilinx 已提交
3376
  pExpr->_node.info = calloc(1, sizeof(tQueryInfo));
H
Haojun Liao 已提交
3377

3378
  STSchema*   pTSSchema = (STSchema*) param;
H
hjxilinx 已提交
3379 3380 3381
  tQueryInfo* pInfo = pExpr->_node.info;
  tVariant*   pCond = pExpr->_node.pRight->pVal;
  SSchema*    pSchema = pExpr->_node.pLeft->pSchema;
3382

3383 3384
  pInfo->sch      = *pSchema;
  pInfo->optr     = pExpr->_node.optr;
Y
yihaoDeng 已提交
3385
  pInfo->compare  = getComparFunc(pInfo->sch.type, pInfo->optr);
H
Haojun Liao 已提交
3386
  pInfo->indexed  = pTSSchema->columns->colId == pInfo->sch.colId;
H
Haojun Liao 已提交
3387

H
hjxilinx 已提交
3388
  if (pInfo->optr == TSDB_RELATION_IN) {
Y
yihaoDeng 已提交
3389
     int dummy = -1;
3390
     SHashObj *pObj = NULL;
Y
yihaoDeng 已提交
3391 3392 3393 3394
     if (pInfo->sch.colId == TSDB_TBNAME_COLUMN_INDEX) {
        pObj = taosHashInit(256, taosGetDefaultHashFunction(pInfo->sch.type), true, false);
        SArray *arr = (SArray *)(pCond->arr);
        for (size_t i = 0; i < taosArrayGetSize(arr); i++) {
Y
yihaoDeng 已提交
3395
          char* p = taosArrayGetP(arr, i);
3396 3397
          strntolower_s(varDataVal(p), varDataVal(p), varDataLen(p));
          taosHashPut(pObj, varDataVal(p), varDataLen(p), &dummy, sizeof(dummy));
Y
yihaoDeng 已提交
3398 3399 3400 3401
        }
     } else {
       buildFilterSetFromBinary((void **)&pObj, pCond->pz, pCond->nLen);
     }
3402
     pInfo->q = (char *)pObj;
H
Haojun Liao 已提交
3403
  } else if (pCond != NULL) {
3404 3405 3406 3407
    uint32_t size = pCond->nLen * TSDB_NCHAR_SIZE;
    if (size < (uint32_t)pSchema->bytes) {
      size = pSchema->bytes;
    }
3408 3409
    // to make sure tonchar does not cause invalid write, since the '\0' needs at least sizeof(wchar_t) space.
    pInfo->q = calloc(1, size + TSDB_NCHAR_SIZE + VARSTR_HEADER_SIZE);
3410
    tVariantDump(pCond, pInfo->q, pSchema->type, true);
weixin_48148422's avatar
weixin_48148422 已提交
3411
  }
3412 3413
}

3414
#endif
3415

H
Haojun Liao 已提交
3416
static int32_t tableGroupComparFn(const void *p1, const void *p2, const void *param) {
3417
#if 0
3418
  STableGroupSupporter* pTableGroupSupp = (STableGroupSupporter*) param;
3419 3420
  STable* pTable1 = ((STableKeyInfo*) p1)->uid;
  STable* pTable2 = ((STableKeyInfo*) p2)->uid;
H
Haojun Liao 已提交
3421

3422 3423 3424
  for (int32_t i = 0; i < pTableGroupSupp->numOfCols; ++i) {
    SColIndex* pColIndex = &pTableGroupSupp->pCols[i];
    int32_t colIndex = pColIndex->colIndex;
H
Haojun Liao 已提交
3425

H
Haojun Liao 已提交
3426
    assert(colIndex >= TSDB_TBNAME_COLUMN_INDEX);
H
Haojun Liao 已提交
3427

3428 3429 3430 3431
    char *  f1 = NULL;
    char *  f2 = NULL;
    int32_t type = 0;
    int32_t bytes = 0;
H
Haojun Liao 已提交
3432

H
Haojun Liao 已提交
3433 3434 3435
    if (colIndex == TSDB_TBNAME_COLUMN_INDEX) {
      f1 = (char*) TABLE_NAME(pTable1);
      f2 = (char*) TABLE_NAME(pTable2);
3436
      type = TSDB_DATA_TYPE_BINARY;
3437
      bytes = tGetTbnameColumnSchema()->bytes;
3438
    } else {
Y
yihaoDeng 已提交
3439 3440 3441 3442 3443 3444 3445
      if (pTableGroupSupp->pTagSchema && colIndex < pTableGroupSupp->pTagSchema->numOfCols) {
        STColumn* pCol = schemaColAt(pTableGroupSupp->pTagSchema, colIndex);
        bytes = pCol->bytes;
        type = pCol->type;
        f1 = tdGetKVRowValOfCol(pTable1->tagVal, pCol->colId);
        f2 = tdGetKVRowValOfCol(pTable2->tagVal, pCol->colId);
      } 
3446
    }
H
Haojun Liao 已提交
3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460

    // this tags value may be NULL
    if (f1 == NULL && f2 == NULL) {
      continue;
    }

    if (f1 == NULL) {
      return -1;
    }

    if (f2 == NULL) {
      return 1;
    }

3461 3462 3463 3464 3465 3466 3467
    int32_t ret = doCompare(f1, f2, type, bytes);
    if (ret == 0) {
      continue;
    } else {
      return ret;
    }
  }
3468
#endif
3469 3470 3471
  return 0;
}

H
Haojun Liao 已提交
3472
static int tsdbCheckInfoCompar(const void* key1, const void* key2) {
3473
  if (((STableCheckInfo*)key1)->tableId < ((STableCheckInfo*)key2)->tableId) {
H
Haojun Liao 已提交
3474
    return -1;
3475
  } else if (((STableCheckInfo*)key1)->tableId > ((STableCheckInfo*)key2)->tableId) {
H
Haojun Liao 已提交
3476 3477 3478 3479 3480 3481 3482 3483 3484
    return 1;
  } else {
    ASSERT(false);
    return 0;
  }
}

void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTables, TSKEY skey,
                          STableGroupSupporter* pSupp, __ext_compar_fn_t compareFn) {
3485
  STable* pTable = taosArrayGetP(pTableList, 0);
H
Haojun Liao 已提交
3486 3487
  SArray* g = taosArrayInit(16, sizeof(STableKeyInfo));

3488
  STableKeyInfo info = {.lastKey = skey};
H
Haojun Liao 已提交
3489
  taosArrayPush(g, &info);
3490

3491
  for (int32_t i = 1; i < numOfTables; ++i) {
3492 3493
    STable** prev = taosArrayGet(pTableList, i - 1);
    STable** p = taosArrayGet(pTableList, i);
H
Haojun Liao 已提交
3494

H
hjxilinx 已提交
3495
    int32_t ret = compareFn(prev, p, pSupp);
3496
    assert(ret == 0 || ret == -1);
H
Haojun Liao 已提交
3497

3498
    if (ret == 0) {
3499
      STableKeyInfo info1 = {.lastKey = skey};
H
Haojun Liao 已提交
3500
      taosArrayPush(g, &info1);
3501 3502
    } else {
      taosArrayPush(pGroups, &g);  // current group is ended, start a new group
H
Haojun Liao 已提交
3503 3504
      g = taosArrayInit(16, sizeof(STableKeyInfo));

3505
      STableKeyInfo info1 = {.lastKey = skey};
H
Haojun Liao 已提交
3506
      taosArrayPush(g, &info1);
3507 3508
    }
  }
H
Haojun Liao 已提交
3509

3510
  taosArrayPush(pGroups, &g);
3511 3512
}

3513
SArray* createTableGroup(SArray* pTableList, SSchemaWrapper* pTagSchema, SColIndex* pCols, int32_t numOfOrderCols, TSKEY skey) {
3514
  assert(pTableList != NULL);
3515
  SArray* pTableGroup = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
3516

3517 3518
  size_t size = taosArrayGetSize(pTableList);
  if (size == 0) {
S
Shengliang Guan 已提交
3519
    tsdbDebug("no qualified tables");
3520 3521
    return pTableGroup;
  }
H
Haojun Liao 已提交
3522

3523
  if (numOfOrderCols == 0 || size == 1) { // no group by tags clause or only one table
3524
    SArray* sa = taosArrayDup(pTableList);
H
Haojun Liao 已提交
3525 3526 3527 3528
    if (sa == NULL) {
      taosArrayDestroy(pTableGroup);
      return NULL;
    }
H
Haojun Liao 已提交
3529

3530
    taosArrayPush(pTableGroup, &sa);
S
TD-1057  
Shengliang Guan 已提交
3531
    tsdbDebug("all %" PRIzu " tables belong to one group", size);
3532
  } else {
H
Haojun Liao 已提交
3533 3534
    STableGroupSupporter sup = {0};
    sup.numOfCols = numOfOrderCols;
3535
    sup.pTagSchema = pTagSchema->pSchema;
H
Haojun Liao 已提交
3536 3537
    sup.pCols = pCols;

3538 3539
    taosqsort(pTableList->pData, size, sizeof(STableKeyInfo), &sup, tableGroupComparFn);
    createTableGroupImpl(pTableGroup, pTableList, size, skey, &sup, tableGroupComparFn);
3540
  }
H
Haojun Liao 已提交
3541

3542 3543 3544
  return pTableGroup;
}

3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628
//static bool tableFilterFp(const void* pNode, void* param) {
//  tQueryInfo* pInfo = (tQueryInfo*) param;
//
//  STable* pTable = (STable*)(SL_GET_NODE_DATA((SSkipListNode*)pNode));
//
//  char* val = NULL;
//  if (pInfo->sch.colId == TSDB_TBNAME_COLUMN_INDEX) {
//    val = (char*) TABLE_NAME(pTable);
//  } else {
//    val = tdGetKVRowValOfCol(pTable->tagVal, pInfo->sch.colId);
//  }
//
//  if (pInfo->optr == TSDB_RELATION_ISNULL || pInfo->optr == TSDB_RELATION_NOTNULL) {
//    if (pInfo->optr == TSDB_RELATION_ISNULL) {
//      return (val == NULL) || isNull(val, pInfo->sch.type);
//    } else if (pInfo->optr == TSDB_RELATION_NOTNULL) {
//      return (val != NULL) && (!isNull(val, pInfo->sch.type));
//    }
//  } else if (pInfo->optr == TSDB_RELATION_IN) {
//     int type = pInfo->sch.type;
//     if (type == TSDB_DATA_TYPE_BOOL || IS_SIGNED_NUMERIC_TYPE(type) || type == TSDB_DATA_TYPE_TIMESTAMP) {
//       int64_t v;
//       GET_TYPED_DATA(v, int64_t, pInfo->sch.type, val);
//       return NULL != taosHashGet((SHashObj *)pInfo->q, (char *)&v, sizeof(v));
//     } else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
//       uint64_t v;
//       GET_TYPED_DATA(v, uint64_t, pInfo->sch.type, val);
//       return NULL != taosHashGet((SHashObj *)pInfo->q, (char *)&v, sizeof(v));
//     }
//     else if (type == TSDB_DATA_TYPE_DOUBLE || type == TSDB_DATA_TYPE_FLOAT) {
//       double v;
//       GET_TYPED_DATA(v, double, pInfo->sch.type, val);
//       return NULL != taosHashGet((SHashObj *)pInfo->q, (char *)&v, sizeof(v));
//     } else if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR){
//       return NULL != taosHashGet((SHashObj *)pInfo->q, varDataVal(val), varDataLen(val));
//     }
//
//  }
//
//  int32_t ret = 0;
//  if (val == NULL) { //the val is possible to be null, so check it out carefully
//    ret = -1; // val is missing in table tags value pairs
//  } else {
//    ret = pInfo->compare(val, pInfo->q);
//  }
//
//  switch (pInfo->optr) {
//    case TSDB_RELATION_EQUAL: {
//      return ret == 0;
//    }
//    case TSDB_RELATION_NOT_EQUAL: {
//      return ret != 0;
//    }
//    case TSDB_RELATION_GREATER_EQUAL: {
//      return ret >= 0;
//    }
//    case TSDB_RELATION_GREATER: {
//      return ret > 0;
//    }
//    case TSDB_RELATION_LESS_EQUAL: {
//      return ret <= 0;
//    }
//    case TSDB_RELATION_LESS: {
//      return ret < 0;
//    }
//    case TSDB_RELATION_LIKE: {
//      return ret == 0;
//    }
//    case TSDB_RELATION_MATCH: {
//      return ret == 0;
//    }
//    case TSDB_RELATION_NMATCH: {
//      return ret == 0;
//    }
//    case TSDB_RELATION_IN: {
//      return ret == 1;
//    }
//
//    default:
//      assert(false);
//  }
//
//  return true;
//}
H
Haojun Liao 已提交
3629

3630
//static void getTableListfromSkipList(tExprNode *pExpr, SSkipList *pSkipList, SArray *result, SExprTraverseSupp *param);
3631

3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643
//static int32_t doQueryTableList(STable* pSTable, SArray* pRes, tExprNode* pExpr) {
//  // query according to the expression tree
//  SExprTraverseSupp supp = {
//      .nodeFilterFn = (__result_filter_fn_t) tableFilterFp,
//      .setupInfoFn = filterPrepare,
//      .pExtInfo = pSTable->tagSchema,
//      };
//
//  getTableListfromSkipList(pExpr, pSTable->pIndex, pRes, &supp);
//  tExprTreeDestroy(pExpr, destroyHelper);
//  return TSDB_CODE_SUCCESS;
//}
3644

H
Haojun Liao 已提交
3645
int32_t tsdbQuerySTableByTagCond(void* pMeta, uint64_t uid, TSKEY skey, const char* pTagCond, size_t len,
3646
                                 int16_t tagNameRelType, const char* tbnameCond, STableGroupInfo* pGroupInfo,
3647
                                 SColIndex* pColIndex, int32_t numOfCols, uint64_t reqId, uint64_t taskId) {
H
Haojun Liao 已提交
3648
  STbCfg* pTbCfg = metaGetTbInfoByUid(pMeta, uid);
3649
  if (pTbCfg == NULL) {
H
Haojun Liao 已提交
3650
    tsdbError("%p failed to get stable, uid:%"PRIu64", TID:0x%"PRIx64" QID:0x%"PRIx64, pMeta, uid, taskId, reqId);
3651 3652
    terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
    goto _error;
3653
  }
H
Haojun Liao 已提交
3654

3655
  if (pTbCfg->type != META_SUPER_TABLE) {
H
Haojun Liao 已提交
3656
    tsdbError("%p query normal tag not allowed, uid:%" PRIu64 ", TID:0x%"PRIx64" QID:0x%"PRIx64, pMeta, uid, taskId, reqId);
3657
    terrno = TSDB_CODE_OPS_NOT_SUPPORT; //basically, this error is caused by invalid sql issued by client
3658
    goto _error;
H
hjxilinx 已提交
3659
  }
3660 3661

  //NOTE: not add ref count for super table
H
Haojun Liao 已提交
3662
  SArray* res = taosArrayInit(8, sizeof(STableKeyInfo));
H
Haojun Liao 已提交
3663
  SSchemaWrapper* pTagSchema = metaGetTableSchema(pMeta, uid, 0, true);
H
Haojun Liao 已提交
3664

weixin_48148422's avatar
weixin_48148422 已提交
3665 3666
  // no tags and tbname condition, all child tables of this stable are involved
  if (tbnameCond == NULL && (pTagCond == NULL || len == 0)) {
H
Haojun Liao 已提交
3667
    int32_t ret = getAllTableList(pMeta, uid, res);
3668 3669
    if (ret != TSDB_CODE_SUCCESS) {
      goto _error;
3670
    }
3671

sangshuduo's avatar
sangshuduo 已提交
3672
    pGroupInfo->numOfTables = (uint32_t) taosArrayGetSize(res);
H
Haojun Liao 已提交
3673
    pGroupInfo->pGroupList  = createTableGroup(res, pTagSchema, pColIndex, numOfCols, skey);
H
Haojun Liao 已提交
3674

H
Haojun Liao 已提交
3675 3676
    tsdbDebug("%p no table name/tag condition, all tables qualified, numOfTables:%u, group:%zu, TID:0x%"PRIx64" QID:0x%"PRIx64, pMeta,
              pGroupInfo->numOfTables, taosArrayGetSize(pGroupInfo->pGroupList), taskId, reqId);
3677

3678
    taosArrayDestroy(res);
3679 3680
    return ret;
  }
3681

H
hjxilinx 已提交
3682
  int32_t ret = TSDB_CODE_SUCCESS;
3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726
//  tExprNode* expr = NULL;
//
//  TRY(TSDB_MAX_TAG_CONDITIONS) {
//    expr = exprTreeFromTableName(tbnameCond);
//    if (expr == NULL) {
//      expr = exprTreeFromBinary(pTagCond, len);
//    } else {
//      CLEANUP_PUSH_VOID_PTR_PTR(true, tExprTreeDestroy, expr, NULL);
//      tExprNode* tagExpr = exprTreeFromBinary(pTagCond, len);
//      if (tagExpr != NULL) {
//        CLEANUP_PUSH_VOID_PTR_PTR(true, tExprTreeDestroy, tagExpr, NULL);
//        tExprNode* tbnameExpr = expr;
//        expr = calloc(1, sizeof(tExprNode));
//        if (expr == NULL) {
//          THROW( TSDB_CODE_TDB_OUT_OF_MEMORY );
//        }
//        expr->nodeType = TSQL_NODE_EXPR;
//        expr->_node.optr = (uint8_t)tagNameRelType;
//        expr->_node.pLeft = tagExpr;
//        expr->_node.pRight = tbnameExpr;
//      }
//    }
//    CLEANUP_EXECUTE();
//
//  } CATCH( code ) {
//    CLEANUP_EXECUTE();
//    terrno = code;
//    tsdbUnlockRepoMeta(tsdb);     // unlock tsdb in any cases
//
//    goto _error;
//    // TODO: more error handling
//  } END_TRY
//
//  doQueryTableList(pTable, res, expr);
//  pGroupInfo->numOfTables = (uint32_t)taosArrayGetSize(res);
//  pGroupInfo->pGroupList  = createTableGroup(res, pTagSchema, pColIndex, numOfCols, skey);
//
//  tsdbDebug("%p stable tid:%d, uid:%"PRIu64" query, numOfTables:%u, belong to %" PRIzu " groups", tsdb, pTable->tableId,
//      pTable->uid, pGroupInfo->numOfTables, taosArrayGetSize(pGroupInfo->pGroupList));
//
//  taosArrayDestroy(res);
//
//  if (tsdbUnlockRepoMeta(tsdb) < 0) goto _error;
//  return ret;
3727 3728 3729

  _error:
  return terrno;
3730
}
3731

H
Haojun Liao 已提交
3732 3733
int32_t tsdbGetOneTableGroup(void* pMeta, uint64_t uid, TSKEY startKey, STableGroupInfo* pGroupInfo) {
  STbCfg* pTbCfg = metaGetTbInfoByUid(pMeta, uid);
3734
  if (pTbCfg == NULL) {
3735 3736
    terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
    goto _error;
3737
  }
3738

3739 3740
  pGroupInfo->numOfTables = 1;
  pGroupInfo->pGroupList = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
3741

H
Haojun Liao 已提交
3742 3743
  SArray* group = taosArrayInit(1, sizeof(STableKeyInfo));

3744
  STableKeyInfo info = {.lastKey = startKey, .uid = uid};
H
Haojun Liao 已提交
3745
  taosArrayPush(group, &info);
H
Haojun Liao 已提交
3746

3747
  taosArrayPush(pGroupInfo->pGroupList, &group);
3748
  return TSDB_CODE_SUCCESS;
3749 3750 3751

  _error:
  return terrno;
3752
}
3753

3754
#if 0
3755
int32_t tsdbGetTableGroupFromIdList(STsdb* tsdb, SArray* pTableIdList, STableGroupInfo* pGroupInfo) {
B
Bomin Zhang 已提交
3756 3757 3758
  if (tsdbRLockRepoMeta(tsdb) < 0) {
    return terrno;
  }
3759 3760 3761 3762

  assert(pTableIdList != NULL);
  size_t size = taosArrayGetSize(pTableIdList);
  pGroupInfo->pGroupList = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
3763
  SArray* group = taosArrayInit(1, sizeof(STableKeyInfo));
3764

B
Bomin Zhang 已提交
3765
  for(int32_t i = 0; i < size; ++i) {
3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776
    STableIdInfo *id = taosArrayGet(pTableIdList, i);

    STable* pTable = tsdbGetTableByUid(tsdbGetMeta(tsdb), id->uid);
    if (pTable == NULL) {
      tsdbWarn("table uid:%"PRIu64", tid:%d has been drop already", id->uid, id->tid);
      continue;
    }

    if (pTable->type == TSDB_SUPER_TABLE) {
      tsdbError("direct query on super tale is not allowed, table uid:%"PRIu64", tid:%d", id->uid, id->tid);
      terrno = TSDB_CODE_QRY_INVALID_MSG;
D
fix bug  
dapan1121 已提交
3777 3778 3779
      tsdbUnlockRepoMeta(tsdb);
      taosArrayDestroy(group);
      return terrno;
3780 3781
    }

H
Haojun Liao 已提交
3782 3783
    STableKeyInfo info = {.pTable = pTable, .lastKey = id->key};
    taosArrayPush(group, &info);
3784 3785
  }

B
Bomin Zhang 已提交
3786 3787 3788 3789
  if (tsdbUnlockRepoMeta(tsdb) < 0) {
    taosArrayDestroy(group);
    return terrno;
  }
3790

sangshuduo's avatar
sangshuduo 已提交
3791
  pGroupInfo->numOfTables = (uint32_t) taosArrayGetSize(group);
B
Bomin Zhang 已提交
3792 3793 3794 3795 3796
  if (pGroupInfo->numOfTables > 0) {
    taosArrayPush(pGroupInfo->pGroupList, &group);
  } else {
    taosArrayDestroy(group);
  }
3797 3798 3799

  return TSDB_CODE_SUCCESS;
}
3800
#endif
3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815
static void* doFreeColumnInfoData(SArray* pColumnInfoData) {
  if (pColumnInfoData == NULL) {
    return NULL;
  }

  size_t cols = taosArrayGetSize(pColumnInfoData);
  for (int32_t i = 0; i < cols; ++i) {
    SColumnInfoData* pColInfo = taosArrayGet(pColumnInfoData, i);
    tfree(pColInfo->pData);
  }

  taosArrayDestroy(pColumnInfoData);
  return NULL;
}

H
Haojun Liao 已提交
3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828
static void* destroyTableCheckInfo(SArray* pTableCheckInfo) {
  size_t size = taosArrayGetSize(pTableCheckInfo);
  for (int32_t i = 0; i < size; ++i) {
    STableCheckInfo* p = taosArrayGet(pTableCheckInfo, i);
    destroyTableMemIterator(p);

    tfree(p->pCompInfo);
  }

  taosArrayDestroy(pTableCheckInfo);
  return NULL;
}

3829

H
Haojun Liao 已提交
3830
void tsdbCleanupReadHandle(tsdbReaderT queryHandle) {
3831 3832
  STsdbReadHandle* pTsdbReadHandle = (STsdbReadHandle*)queryHandle;
  if (pTsdbReadHandle == NULL) {
3833 3834
    return;
  }
3835

3836
  pTsdbReadHandle->pColumns = doFreeColumnInfoData(pTsdbReadHandle->pColumns);
3837

3838 3839 3840
  taosArrayDestroy(pTsdbReadHandle->defaultLoadColumn);
  tfree(pTsdbReadHandle->pDataBlockInfo);
  tfree(pTsdbReadHandle->statis);
3841

3842
  if (!emptyQueryTimewindow(pTsdbReadHandle)) {
3843
//    tsdbMayUnTakeMemSnapshot(pTsdbReadHandle);
3844
  } else {
3845
    assert(pTsdbReadHandle->pTableCheckInfo == NULL);
3846 3847
  }

3848 3849
  if (pTsdbReadHandle->pTableCheckInfo != NULL) {
    pTsdbReadHandle->pTableCheckInfo = destroyTableCheckInfo(pTsdbReadHandle->pTableCheckInfo);
3850
  }
3851

3852
  tsdbDestroyReadH(&pTsdbReadHandle->rhelper);
H
Haojun Liao 已提交
3853

3854 3855
  tdFreeDataCols(pTsdbReadHandle->pDataCols);
  pTsdbReadHandle->pDataCols = NULL;
H
Haojun Liao 已提交
3856

3857 3858
  pTsdbReadHandle->prev = doFreeColumnInfoData(pTsdbReadHandle->prev);
  pTsdbReadHandle->next = doFreeColumnInfoData(pTsdbReadHandle->next);
3859

3860
  SIOCostSummary* pCost = &pTsdbReadHandle->cost;
3861

H
Haojun Liao 已提交
3862 3863
  tsdbDebug("%p :io-cost summary: head-file read cnt:%"PRIu64", head-file time:%"PRIu64" us, statis-info:%"PRId64" us, datablock:%" PRId64" us, check data:%"PRId64" us, %s",
      pTsdbReadHandle, pCost->headFileLoad, pCost->headFileLoadTime, pCost->statisInfoLoadTime, pCost->blockLoadTime, pCost->checkForNextTime, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
3864

3865
  tfree(pTsdbReadHandle);
3866
}
3867

3868
#if 0
H
Haojun Liao 已提交
3869
void tsdbDestroyTableGroup(STableGroupInfo *pGroupList) {
3870 3871 3872 3873 3874 3875 3876 3877 3878 3879
  assert(pGroupList != NULL);

  size_t numOfGroup = taosArrayGetSize(pGroupList->pGroupList);

  for(int32_t i = 0; i < numOfGroup; ++i) {
    SArray* p = taosArrayGetP(pGroupList->pGroupList, i);

    size_t numOfTables = taosArrayGetSize(p);
    for(int32_t j = 0; j < numOfTables; ++j) {
      STable* pTable = taosArrayGetP(p, j);
3880 3881 3882 3883
      if (pTable != NULL) { // in case of handling retrieve data from tsdb
        tsdbUnRefTable(pTable);
      }
      //assert(pTable != NULL);
3884 3885 3886 3887 3888
    }

    taosArrayDestroy(p);
  }

3889
  taosHashCleanup(pGroupList->map);
3890
  taosArrayDestroy(pGroupList->pGroupList);
H
Haojun Liao 已提交
3891
  pGroupList->numOfTables = 0;
3892
}
H
Haojun Liao 已提交
3893 3894 3895 3896 3897 3898 3899

static void applyFilterToSkipListNode(SSkipList *pSkipList, tExprNode *pExpr, SArray *pResult, SExprTraverseSupp *param) {
  SSkipListIterator* iter = tSkipListCreateIter(pSkipList);

  // Scan each node in the skiplist by using iterator
  while (tSkipListIterNext(iter)) {
    SSkipListNode *pNode = tSkipListIterGet(iter);
H
Haojun Liao 已提交
3900
    if (exprTreeApplyFilter(pExpr, pNode, param)) {
H
Haojun Liao 已提交
3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923
      taosArrayPush(pResult, &(SL_GET_NODE_DATA(pNode)));
    }
  }

  tSkipListDestroyIter(iter);
}

typedef struct {
  char*    v;
  int32_t  optr;
} SEndPoint;

typedef struct {
  SEndPoint* start;
  SEndPoint* end;
} SQueryCond;

// todo check for malloc failure
static int32_t setQueryCond(tQueryInfo *queryColInfo, SQueryCond* pCond) {
  int32_t optr = queryColInfo->optr;

  if (optr == TSDB_RELATION_GREATER || optr == TSDB_RELATION_GREATER_EQUAL ||
      optr == TSDB_RELATION_EQUAL || optr == TSDB_RELATION_NOT_EQUAL) {
Y
yihaoDeng 已提交
3924
    pCond->start       = calloc(1, sizeof(SEndPoint));
H
Haojun Liao 已提交
3925
    pCond->start->optr = queryColInfo->optr;
Y
yihaoDeng 已提交
3926
    pCond->start->v    = queryColInfo->q;
H
Haojun Liao 已提交
3927
  } else if (optr == TSDB_RELATION_LESS || optr == TSDB_RELATION_LESS_EQUAL) {
Y
yihaoDeng 已提交
3928
    pCond->end       = calloc(1, sizeof(SEndPoint));
H
Haojun Liao 已提交
3929
    pCond->end->optr = queryColInfo->optr;
Y
yihaoDeng 已提交
3930 3931 3932 3933 3934 3935
    pCond->end->v    = queryColInfo->q;
  } else if (optr == TSDB_RELATION_IN) {
    pCond->start       = calloc(1, sizeof(SEndPoint));
    pCond->start->optr = queryColInfo->optr;
    pCond->start->v    = queryColInfo->q; 
  } else if (optr == TSDB_RELATION_LIKE) {
H
Haojun Liao 已提交
3936
    assert(0);
3937 3938
  } else if (optr == TSDB_RELATION_MATCH) {
    assert(0);
3939 3940
  } else if (optr == TSDB_RELATION_NMATCH) {
    assert(0);
H
Haojun Liao 已提交
3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023
  }

  return TSDB_CODE_SUCCESS;
}

static void queryIndexedColumn(SSkipList* pSkipList, tQueryInfo* pQueryInfo, SArray* result) {
  SSkipListIterator* iter = NULL;

  SQueryCond cond = {0};
  if (setQueryCond(pQueryInfo, &cond) != TSDB_CODE_SUCCESS) {
    //todo handle error
  }

  if (cond.start != NULL) {
    iter = tSkipListCreateIterFromVal(pSkipList, (char*) cond.start->v, pSkipList->type, TSDB_ORDER_ASC);
  } else {
    iter = tSkipListCreateIterFromVal(pSkipList, (char*)(cond.end ? cond.end->v: NULL), pSkipList->type, TSDB_ORDER_DESC);
  }

  if (cond.start != NULL) {
    int32_t optr = cond.start->optr;

    if (optr == TSDB_RELATION_EQUAL) {   // equals
      while(tSkipListIterNext(iter)) {
        SSkipListNode* pNode = tSkipListIterGet(iter);

        int32_t ret = pQueryInfo->compare(SL_GET_NODE_KEY(pSkipList, pNode), cond.start->v);
        if (ret != 0) {
          break;
        }

        STableKeyInfo info = {.pTable = (void*)SL_GET_NODE_DATA(pNode), .lastKey = TSKEY_INITIAL_VAL};
        taosArrayPush(result, &info);
      }
    } else if (optr == TSDB_RELATION_GREATER || optr == TSDB_RELATION_GREATER_EQUAL) { // greater equal
      bool comp = true;
      int32_t ret = 0;

      while(tSkipListIterNext(iter)) {
        SSkipListNode* pNode = tSkipListIterGet(iter);

        if (comp) {
          ret = pQueryInfo->compare(SL_GET_NODE_KEY(pSkipList, pNode), cond.start->v);
          assert(ret >= 0);
        }

        if (ret == 0 && optr == TSDB_RELATION_GREATER) {
          continue;
        } else {
          STableKeyInfo info = {.pTable = (void*)SL_GET_NODE_DATA(pNode), .lastKey = TSKEY_INITIAL_VAL};
          taosArrayPush(result, &info);
          comp = false;
        }
      }
    } else if (optr == TSDB_RELATION_NOT_EQUAL) {   // not equal
      bool comp = true;

      while(tSkipListIterNext(iter)) {
        SSkipListNode* pNode = tSkipListIterGet(iter);
        comp = comp && (pQueryInfo->compare(SL_GET_NODE_KEY(pSkipList, pNode), cond.start->v) == 0);
        if (comp) {
          continue;
        }

        STableKeyInfo info = {.pTable = (void*)SL_GET_NODE_DATA(pNode), .lastKey = TSKEY_INITIAL_VAL};
        taosArrayPush(result, &info);
      }

      tSkipListDestroyIter(iter);

      comp = true;
      iter = tSkipListCreateIterFromVal(pSkipList, (char*) cond.start->v, pSkipList->type, TSDB_ORDER_DESC);
      while(tSkipListIterNext(iter)) {
        SSkipListNode* pNode = tSkipListIterGet(iter);
        comp = comp && (pQueryInfo->compare(SL_GET_NODE_KEY(pSkipList, pNode), cond.start->v) == 0);
        if (comp) {
          continue;
        }

        STableKeyInfo info = {.pTable = (void*)SL_GET_NODE_DATA(pNode), .lastKey = TSKEY_INITIAL_VAL};
        taosArrayPush(result, &info);
      }

Y
yihaoDeng 已提交
4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036
    } else if (optr == TSDB_RELATION_IN) {
      while(tSkipListIterNext(iter)) {
        SSkipListNode* pNode = tSkipListIterGet(iter);

        int32_t ret = pQueryInfo->compare(SL_GET_NODE_KEY(pSkipList, pNode), cond.start->v);
        if (ret != 0) {
          break;
        }

        STableKeyInfo info = {.pTable = (void*)SL_GET_NODE_DATA(pNode), .lastKey = TSKEY_INITIAL_VAL};
        taosArrayPush(result, &info);
      }
      
H
Haojun Liao 已提交
4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097
    } else {
      assert(0);
    }
  } else {
    int32_t optr = cond.end ? cond.end->optr : TSDB_RELATION_INVALID;
    if (optr == TSDB_RELATION_LESS || optr == TSDB_RELATION_LESS_EQUAL) {
      bool    comp = true;
      int32_t ret = 0;

      while (tSkipListIterNext(iter)) {
        SSkipListNode *pNode = tSkipListIterGet(iter);

        if (comp) {
          ret = pQueryInfo->compare(SL_GET_NODE_KEY(pSkipList, pNode), cond.end->v);
          assert(ret <= 0);
        }

        if (ret == 0 && optr == TSDB_RELATION_LESS) {
          continue;
        } else {
          STableKeyInfo info = {.pTable = (void *)SL_GET_NODE_DATA(pNode), .lastKey = TSKEY_INITIAL_VAL};
          taosArrayPush(result, &info);
          comp = false;  // no need to compare anymore
        }
      }
    } else {
      assert(pQueryInfo->optr == TSDB_RELATION_ISNULL || pQueryInfo->optr == TSDB_RELATION_NOTNULL);

      while (tSkipListIterNext(iter)) {
        SSkipListNode *pNode = tSkipListIterGet(iter);

        bool isnull = isNull(SL_GET_NODE_KEY(pSkipList, pNode), pQueryInfo->sch.type);
        if ((pQueryInfo->optr == TSDB_RELATION_ISNULL && isnull) ||
            (pQueryInfo->optr == TSDB_RELATION_NOTNULL && (!isnull))) {
          STableKeyInfo info = {.pTable = (void *)SL_GET_NODE_DATA(pNode), .lastKey = TSKEY_INITIAL_VAL};
          taosArrayPush(result, &info);
        }
      }
    }
  }

  free(cond.start);
  free(cond.end);
  tSkipListDestroyIter(iter);
}

static void queryIndexlessColumn(SSkipList* pSkipList, tQueryInfo* pQueryInfo, SArray* res, __result_filter_fn_t filterFp) {
  SSkipListIterator* iter = tSkipListCreateIter(pSkipList);

  while (tSkipListIterNext(iter)) {
    bool addToResult = false;

    SSkipListNode *pNode = tSkipListIterGet(iter);

    char *pData = SL_GET_NODE_DATA(pNode);
    tstr *name = (tstr*) tsdbGetTableName((void*) pData);

    // todo speed up by using hash
    if (pQueryInfo->sch.colId == TSDB_TBNAME_COLUMN_INDEX) {
      if (pQueryInfo->optr == TSDB_RELATION_IN) {
        addToResult = pQueryInfo->compare(name, pQueryInfo->q);
4098 4099 4100
      } else if (pQueryInfo->optr == TSDB_RELATION_LIKE ||
                 pQueryInfo->optr == TSDB_RELATION_MATCH ||
                 pQueryInfo->optr == TSDB_RELATION_NMATCH) {
H
Haojun Liao 已提交
4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131
        addToResult = !pQueryInfo->compare(name, pQueryInfo->q);
      }
    } else {
      addToResult = filterFp(pNode, pQueryInfo);
    }

    if (addToResult) {
      STableKeyInfo info = {.pTable = (void*)pData, .lastKey = TSKEY_INITIAL_VAL};
      taosArrayPush(res, &info);
    }
  }

  tSkipListDestroyIter(iter);
}

// Apply the filter expression to each node in the skiplist to acquire the qualified nodes in skip list
void getTableListfromSkipList(tExprNode *pExpr, SSkipList *pSkipList, SArray *result, SExprTraverseSupp *param) {
  if (pExpr == NULL) {
    return;
  }

  tExprNode *pLeft  = pExpr->_node.pLeft;
  tExprNode *pRight = pExpr->_node.pRight;

  // column project
  if (pLeft->nodeType != TSQL_NODE_EXPR && pRight->nodeType != TSQL_NODE_EXPR) {
    assert(pLeft->nodeType == TSQL_NODE_COL && (pRight->nodeType == TSQL_NODE_VALUE || pRight->nodeType == TSQL_NODE_DUMMY));

    param->setupInfoFn(pExpr, param->pExtInfo);

    tQueryInfo *pQueryInfo = pExpr->_node.info;
4132 4133
    if (pQueryInfo->indexed && (pQueryInfo->optr != TSDB_RELATION_LIKE
                                && pQueryInfo->optr != TSDB_RELATION_MATCH && pQueryInfo->optr != TSDB_RELATION_NMATCH
4134
                                && pQueryInfo->optr != TSDB_RELATION_IN)) {
H
Haojun Liao 已提交
4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149
      queryIndexedColumn(pSkipList, pQueryInfo, result);
    } else {
      queryIndexlessColumn(pSkipList, pQueryInfo, result, param->nodeFilterFn);
    }

    return;
  }

  // The value of hasPK is always 0.
  uint8_t weight = pLeft->_node.hasPK + pRight->_node.hasPK;
  assert(weight == 0 && pSkipList != NULL && taosArrayGetSize(result) == 0);

  //apply the hierarchical filter expression to every node in skiplist to find the qualified nodes
  applyFilterToSkipListNode(pSkipList, pExpr, result, param);
}
4150
#endif