tsdbRead.c 141.2 KB
Newer Older
H
hjxilinx 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

16 17 18 19 20 21 22
#include "tsdb.h"
#include "tsdbDef.h"
#include "tsdbFS.h"
#include "tsdbLog.h"
#include "tsdbReadImpl.h"
#include "ttime.h"
#include "exception.h"
H
hjxilinx 已提交
23
#include "os.h"
24
#include "talgo.h"
25
#include "tcompare.h"
26 27
#include "tdataformat.h"
#include "tskiplist.h"
28

29
#include "taosdef.h"
30
#include "tlosertree.h"
H
Hongze Cheng 已提交
31
#include "tsdbDef.h"
32
#include "tmsg.h"
33

34
#define EXTRA_BYTES 2
35
#define ASCENDING_TRAVERSE(o)   (o == TSDB_ORDER_ASC)
36
#define QH_GET_NUM_OF_COLS(handle) ((size_t)(taosArrayGetSize((handle)->pColumns)))
H
hjxilinx 已提交
37

H
Haojun Liao 已提交
38 39 40 41
#define GET_FILE_DATA_BLOCK_INFO(_checkInfo, _block)                                   \
  ((SDataBlockInfo){.window = {.skey = (_block)->keyFirst, .ekey = (_block)->keyLast}, \
                    .numOfCols = (_block)->numOfCols,                                  \
                    .rows = (_block)->numOfRows,                                       \
42
                    .uid = (_checkInfo)->tableId})
H
Haojun Liao 已提交
43

H
hjxilinx 已提交
44
enum {
45 46
  TSDB_QUERY_TYPE_ALL      = 1,
  TSDB_QUERY_TYPE_LAST     = 2,
H
hjxilinx 已提交
47 48
};

49 50 51 52 53 54
enum {
  TSDB_CACHED_TYPE_NONE    = 0,
  TSDB_CACHED_TYPE_LASTROW = 1,
  TSDB_CACHED_TYPE_LAST    = 2,
};

55 56
typedef struct SQueryFilePos {
  int32_t fid;
57 58
  int32_t slot;
  int32_t pos;
59
  int64_t lastKey;
60 61
  int32_t rows;
  bool    mixBlock;
62
  bool    blockCompleted;
63
  STimeWindow win;
64
} SQueryFilePos;
H
hjxilinx 已提交
65

66
typedef struct SDataBlockLoadInfo {
H
Hongze Cheng 已提交
67
  SDFileSet*  fileGroup;
68
  int32_t     slot;
69
  uint64_t    uid;
70
  SArray*     pLoadedCols;
71
} SDataBlockLoadInfo;
H
hjxilinx 已提交
72

73
typedef struct SLoadCompBlockInfo {
H
hjLiao 已提交
74
  int32_t tid; /* table tid */
75 76
  int32_t fileId;
} SLoadCompBlockInfo;
H
hjxilinx 已提交
77

78 79 80 81 82 83
enum {
  CHECKINFO_CHOSEN_MEM  = 0,
  CHECKINFO_CHOSEN_IMEM = 1,
  CHECKINFO_CHOSEN_BOTH = 2    //for update=2(merge case)
};

84
typedef struct STableCheckInfo {
85
  uint64_t      tableId;
H
Haojun Liao 已提交
86
  TSKEY         lastKey;
H
Haojun Liao 已提交
87
  SBlockInfo*   pCompInfo;
H
Haojun Liao 已提交
88
  int32_t       compSize;
89
  int32_t       numOfBlocks:29; // number of qualified data blocks not the original blocks
90
  uint8_t       chosen:2;       // indicate which iterator should move forward
H
Haojun Liao 已提交
91 92 93
  bool          initBuf;        // whether to initialize the in-memory skip list iterator or not
  SSkipListIterator* iter;      // mem buffer skip list iterator
  SSkipListIterator* iiter;     // imem buffer skip list iterator
94
} STableCheckInfo;
95

96
typedef struct STableBlockInfo {
H
Haojun Liao 已提交
97 98
  SBlock          *compBlock;
  STableCheckInfo *pTableCheckInfo;
99
} STableBlockInfo;
100

101 102
typedef struct SBlockOrderSupporter {
  int32_t             numOfTables;
H
Haojun Liao 已提交
103
  STableBlockInfo**   pDataBlockInfo;
104
  int32_t*            blockIndexArray;
105
  int32_t*            numOfBlocksPerTable;
106 107
} SBlockOrderSupporter;

H
Haojun Liao 已提交
108 109 110
typedef struct SIOCostSummary {
  int64_t blockLoadTime;
  int64_t statisInfoLoadTime;
H
Haojun Liao 已提交
111
  int64_t checkForNextTime;
112 113
  int64_t headFileLoad;
  int64_t headFileLoadTime;
H
Haojun Liao 已提交
114 115
} SIOCostSummary;

116 117
typedef struct STsdbReadHandle {
  STsdb*     pTsdb;
H
Haojun Liao 已提交
118 119 120 121 122 123 124 125 126
  SQueryFilePos  cur;              // current position
  int16_t        order;
  STimeWindow    window;           // the primary query time window that applies to all queries
  SDataStatis*   statis;           // query level statistics, only one table block statistics info exists at any time
  int32_t        numOfBlocks;
  SArray*        pColumns;         // column list, SColumnInfoData array list
  bool           locateStart;
  int32_t        outputCapacity;
  int32_t        realNumOfRows;
H
Haojun Liao 已提交
127
  SArray*        pTableCheckInfo;  // SArray<STableCheckInfo>
H
Haojun Liao 已提交
128 129
  int32_t        activeIndex;
  bool           checkFiles;       // check file stage
D
init  
dapan1121 已提交
130
  int8_t         cachelastrow;     // check if last row cached
131
  bool           loadExternalRow;  // load time window external data rows
H
Haojun Liao 已提交
132 133
  bool           currentLoadExternalRows; // current load external rows
  int32_t        loadType;         // block load type
H
Haojun Liao 已提交
134
  char          *idStr;            // query info handle, for debug purpose
H
Haojun Liao 已提交
135
  int32_t        type;             // query type: retrieve all data blocks, 2. retrieve only last row, 3. retrieve direct prev|next rows
H
Hongze Cheng 已提交
136 137 138
  SDFileSet*     pFileGroup;
  SFSIter        fileIter;
  SReadH         rhelper;
H
Haojun Liao 已提交
139
  STableBlockInfo* pDataBlockInfo;
H
Haojun Liao 已提交
140
  SDataCols     *pDataCols;        // in order to hold current file data block
H
Haojun Liao 已提交
141
  int32_t        allocSize;        // allocated data block size
H
Haojun Liao 已提交
142
  SArray        *defaultLoadColumn;// default load column
H
Haojun Liao 已提交
143
  SDataBlockLoadInfo dataBlockLoadInfo; /* record current block load information */
H
Haojun Liao 已提交
144
  SLoadCompBlockInfo compBlockLoadInfo; /* record current compblock information in SQueryAttr */
H
Haojun Liao 已提交
145

146 147
  SArray        *prev;             // previous row which is before than time window
  SArray        *next;             // next row which is after the query time window
H
Haojun Liao 已提交
148
  SIOCostSummary cost;
149
} STsdbReadHandle;
150

H
Haojun Liao 已提交
151 152 153
typedef struct STableGroupSupporter {
  int32_t    numOfCols;
  SColIndex* pCols;
154
  SSchema*   pTagSchema;
H
Haojun Liao 已提交
155 156
} STableGroupSupporter;

157
static STimeWindow updateLastrowForEachGroup(STableGroupInfo *groupList);
158 159 160
static int32_t checkForCachedLastRow(STsdbReadHandle* pTsdbReadHandle, STableGroupInfo *groupList);
static int32_t checkForCachedLast(STsdbReadHandle* pTsdbReadHandle);
//static int32_t tsdbGetCachedLastRow(STable* pTable, SMemRow* pRes, TSKEY* lastKey);
H
Haojun Liao 已提交
161

H
Haojun Liao 已提交
162
static void    changeQueryHandleForInterpQuery(tsdbReaderT pHandle);
163
static void    doMergeTwoLevelData(STsdbReadHandle* pTsdbReadHandle, STableCheckInfo* pCheckInfo, SBlock* pBlock);
164
static int32_t binarySearchForKey(char* pValue, int num, TSKEY key, int order);
165
static int32_t tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int maxRowsToRead, STimeWindow* win, STsdbReadHandle* pTsdbReadHandle);
166
static int32_t tsdbCheckInfoCompar(const void* key1, const void* key2);
167 168 169
//static int32_t doGetExternalRow(STsdbReadHandle* pTsdbReadHandle, int16_t type, void* pMemRef);
//static void*   doFreeColumnInfoData(SArray* pColumnInfoData);
//static void*   destroyTableCheckInfo(SArray* pTableCheckInfo);
H
Haojun Liao 已提交
170
static bool    tsdbGetExternalRow(tsdbReaderT pHandle);
Y
TD-1733  
yihaoDeng 已提交
171

172
static void tsdbInitDataBlockLoadInfo(SDataBlockLoadInfo* pBlockLoadInfo) {
H
hjxilinx 已提交
173
  pBlockLoadInfo->slot = -1;
174
  pBlockLoadInfo->uid  = 0;
H
hjxilinx 已提交
175
  pBlockLoadInfo->fileGroup = NULL;
H
hjxilinx 已提交
176 177
}

178
static void tsdbInitCompBlockLoadInfo(SLoadCompBlockInfo* pCompBlockLoadInfo) {
H
hjLiao 已提交
179
  pCompBlockLoadInfo->tid = -1;
180 181
  pCompBlockLoadInfo->fileId = -1;
}
H
hjxilinx 已提交
182

183 184
static SArray* getColumnIdList(STsdbReadHandle* pTsdbReadHandle) {
  size_t numOfCols = QH_GET_NUM_OF_COLS(pTsdbReadHandle);
H
Haojun Liao 已提交
185 186 187 188
  assert(numOfCols <= TSDB_MAX_COLUMNS);

  SArray* pIdList = taosArrayInit(numOfCols, sizeof(int16_t));
  for (int32_t i = 0; i < numOfCols; ++i) {
189
    SColumnInfoData* pCol = taosArrayGet(pTsdbReadHandle->pColumns, i);
H
Haojun Liao 已提交
190 191 192 193 194 195
    taosArrayPush(pIdList, &pCol->info.colId);
  }

  return pIdList;
}

196 197
static SArray* getDefaultLoadColumns(STsdbReadHandle* pTsdbReadHandle, bool loadTS) {
  SArray* pLocalIdList = getColumnIdList(pTsdbReadHandle);
H
Haojun Liao 已提交
198 199 200 201 202

  // check if the primary time stamp column needs to load
  int16_t colId = *(int16_t*)taosArrayGet(pLocalIdList, 0);

  // the primary timestamp column does not be included in the the specified load column list, add it
H
Haojun Liao 已提交
203 204
  if (loadTS && colId != PRIMARYKEY_TIMESTAMP_COL_ID) {
    int16_t columnId = PRIMARYKEY_TIMESTAMP_COL_ID;
H
Haojun Liao 已提交
205 206 207 208 209 210
    taosArrayInsert(pLocalIdList, 0, &columnId);
  }

  return pLocalIdList;
}

H
Haojun Liao 已提交
211
//int64_t tsdbGetNumOfRowsInMemTable(tsdbReaderT* pHandle) {
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
//  STsdbReadHandle* pTsdbReadHandle = (STsdbReadHandle*) pHandle;
//
//  int64_t rows = 0;
//  STsdbMemTable* pMemTable = pTsdbReadHandle->pMemTable;
//  if (pMemTable == NULL) { return rows; }
//
////  STableData* pMem  = NULL;
////  STableData* pIMem = NULL;
//
////  SMemTable* pMemT = pMemRef->snapshot.mem;
////  SMemTable* pIMemT = pMemRef->snapshot.imem;
//
//  size_t size = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
//  for (int32_t i = 0; i < size; ++i) {
//    STableCheckInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i);
//
////    if (pMemT && pCheckInfo->tableId < pMemT->maxTables) {
////      pMem = pMemT->tData[pCheckInfo->tableId];
////      rows += (pMem && pMem->uid == pCheckInfo->tableId) ? pMem->numOfRows : 0;
////    }
////    if (pIMemT && pCheckInfo->tableId < pIMemT->maxTables) {
////      pIMem = pIMemT->tData[pCheckInfo->tableId];
////      rows += (pIMem && pIMem->uid == pCheckInfo->tableId) ? pIMem->numOfRows : 0;
////    }
//  }
//  return rows;
//}

240 241 242
static SArray* createCheckInfoFromTableGroup(STsdbReadHandle* pTsdbReadHandle, STableGroupInfo* pGroupList) {
  size_t numOfGroup = taosArrayGetSize(pGroupList->pGroupList);
  assert(numOfGroup >= 1);
H
Haojun Liao 已提交
243 244 245 246 247 248 249 250

  // allocate buffer in order to load data blocks from file
  SArray* pTableCheckInfo = taosArrayInit(pGroupList->numOfTables, sizeof(STableCheckInfo));
  if (pTableCheckInfo == NULL) {
    return NULL;
  }

  // todo apply the lastkey of table check to avoid to load header file
251
  for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
252 253 254 255 256 257 258 259
    SArray* group = *(SArray**) taosArrayGet(pGroupList->pGroupList, i);

    size_t gsize = taosArrayGetSize(group);
    assert(gsize > 0);

    for (int32_t j = 0; j < gsize; ++j) {
      STableKeyInfo* pKeyInfo = (STableKeyInfo*) taosArrayGet(group, j);

260
      STableCheckInfo info = { .lastKey = pKeyInfo->lastKey, .tableId = pKeyInfo->uid};
261 262 263
      if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
        if (info.lastKey == INT64_MIN || info.lastKey < pTsdbReadHandle->window.skey) {
          info.lastKey = pTsdbReadHandle->window.skey;
264 265
        }

266
        assert(info.lastKey >= pTsdbReadHandle->window.skey && info.lastKey <= pTsdbReadHandle->window.ekey);
H
Haojun Liao 已提交
267
      } else {
268
        assert(info.lastKey >= pTsdbReadHandle->window.ekey && info.lastKey <= pTsdbReadHandle->window.skey);
H
Haojun Liao 已提交
269 270 271
      }

      taosArrayPush(pTableCheckInfo, &info);
H
Haojun Liao 已提交
272
      tsdbDebug("%p check table uid:%"PRId64" from lastKey:%"PRId64" %s", pTsdbReadHandle, info.tableId, info.lastKey, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
273 274 275
    }
  }

276
  // TODO  group table according to the tag value.
277
  taosArraySort(pTableCheckInfo, tsdbCheckInfoCompar);
H
Haojun Liao 已提交
278 279 280
  return pTableCheckInfo;
}

281 282
static void resetCheckInfo(STsdbReadHandle* pTsdbReadHandle) {
  size_t numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
H
Haojun Liao 已提交
283 284 285 286
  assert(numOfTables >= 1);

  // todo apply the lastkey of table check to avoid to load header file
  for (int32_t i = 0; i < numOfTables; ++i) {
287 288
    STableCheckInfo* pCheckInfo = (STableCheckInfo*) taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i);
    pCheckInfo->lastKey = pTsdbReadHandle->window.skey;
H
Haojun Liao 已提交
289 290
    pCheckInfo->iter    = tSkipListDestroyIter(pCheckInfo->iter);
    pCheckInfo->iiter   = tSkipListDestroyIter(pCheckInfo->iiter);
291
    pCheckInfo->initBuf = false;
H
Haojun Liao 已提交
292

293 294
    if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
      assert(pCheckInfo->lastKey >= pTsdbReadHandle->window.skey);
295
    } else {
296
      assert(pCheckInfo->lastKey <= pTsdbReadHandle->window.skey);
297
    }
H
Haojun Liao 已提交
298 299 300
  }
}

H
Haojun Liao 已提交
301 302 303
// only one table, not need to sort again
static SArray* createCheckInfoFromCheckInfo(STableCheckInfo* pCheckInfo, TSKEY skey, SArray** psTable) {
  SArray* pNew = taosArrayInit(1, sizeof(STableCheckInfo));
D
fix bug  
dapan1121 已提交
304

H
Haojun Liao 已提交
305
  STableCheckInfo info = { .lastKey = skey};
H
Haojun Liao 已提交
306

H
Haojun Liao 已提交
307 308
  info.tableId = pCheckInfo->tableId;
  taosArrayPush(pNew, &info);
H
Haojun Liao 已提交
309 310 311
  return pNew;
}

312 313
static bool emptyQueryTimewindow(STsdbReadHandle* pTsdbReadHandle) {
  assert(pTsdbReadHandle != NULL);
314

315 316
  STimeWindow* w = &pTsdbReadHandle->window;
  bool asc = ASCENDING_TRAVERSE(pTsdbReadHandle->order);
317 318 319 320

  return ((asc && w->skey > w->ekey) || (!asc && w->ekey > w->skey));
}

321 322
// Update the query time window according to the data time to live(TTL) information, in order to avoid to return
// the expired data to client, even it is queried already.
323
static int64_t getEarliestValidTimestamp(STsdb* pTsdb) {
324 325 326
  STsdbCfg* pCfg = &pTsdb->config;

  int64_t now = taosGetTimestamp(pCfg->precision);
327
  return now - (tsTickPerDay[pCfg->precision] * pCfg->keep) + 1;  // needs to add one tick
328 329
}

330 331
static void setQueryTimewindow(STsdbReadHandle* pTsdbReadHandle, STsdbQueryCond* pCond) {
  pTsdbReadHandle->window = pCond->twindow;
332

333
  bool    updateTs = false;
334 335 336 337
  int64_t startTs = getEarliestValidTimestamp(pTsdbReadHandle->pTsdb);
  if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
    if (startTs > pTsdbReadHandle->window.skey) {
      pTsdbReadHandle->window.skey = startTs;
338 339
      pCond->twindow.skey = startTs;
      updateTs = true;
340 341
    }
  } else {
342 343
    if (startTs > pTsdbReadHandle->window.ekey) {
      pTsdbReadHandle->window.ekey = startTs;
344 345
      pCond->twindow.ekey = startTs;
      updateTs = true;
346 347 348
    }
  }

349
  if (updateTs) {
H
Haojun Liao 已提交
350 351 352
    tsdbDebug("%p update the query time window, old:%" PRId64 " - %" PRId64 ", new:%" PRId64 " - %" PRId64 ", %s",
              pTsdbReadHandle, pCond->twindow.skey, pCond->twindow.ekey, pTsdbReadHandle->window.skey,
              pTsdbReadHandle->window.ekey, pTsdbReadHandle->idStr);
353
  }
354 355
}

H
Haojun Liao 已提交
356
static STsdbReadHandle* tsdbQueryTablesImpl(STsdb* tsdb, STsdbQueryCond* pCond, uint64_t qId, uint64_t taskId) {
357 358
  STsdbReadHandle* pReadHandle = calloc(1, sizeof(STsdbReadHandle));
  if (pReadHandle == NULL) {
359
    goto _end;
360
  }
H
Haojun Liao 已提交
361

362 363 364 365 366 367 368 369 370 371 372 373 374 375 376
  pReadHandle->order       = pCond->order;
  pReadHandle->pTsdb       = tsdb;
  pReadHandle->type        = TSDB_QUERY_TYPE_ALL;
  pReadHandle->cur.fid     = INT32_MIN;
  pReadHandle->cur.win     = TSWINDOW_INITIALIZER;
  pReadHandle->checkFiles  = true;
  pReadHandle->activeIndex = 0;   // current active table index
  pReadHandle->allocSize   = 0;
  pReadHandle->locateStart = false;
  pReadHandle->loadType    = pCond->type;

  pReadHandle->outputCapacity  = 4096;//((STsdb*)tsdb)->config.maxRowsPerFileBlock;
  pReadHandle->loadExternalRow = pCond->loadExternalRows;
  pReadHandle->currentLoadExternalRows = pCond->loadExternalRows;

H
Haojun Liao 已提交
377
  char buf[128] = {0};
H
Haojun Liao 已提交
378
  snprintf(buf, tListLen(buf), "TID:0x%"PRIx64" QID:0x%"PRIx64, taskId, qId);
H
Haojun Liao 已提交
379 380
  pReadHandle->idStr = strdup(buf);

381
  if (tsdbInitReadH(&pReadHandle->rhelper, (STsdb*)tsdb) != 0) {
382
    goto _end;
B
Bomin Zhang 已提交
383
  }
H
Haojun Liao 已提交
384

385 386
  assert(pCond != NULL);
  setQueryTimewindow(pReadHandle, pCond);
387

388 389
  if (pCond->numOfCols > 0) {
    // allocate buffer in order to load data blocks from file
390 391
    pReadHandle->statis = calloc(pCond->numOfCols, sizeof(SDataStatis));
    if (pReadHandle->statis == NULL) {
392
      goto _end;
393
    }
H
Haojun Liao 已提交
394

395
    // todo: use list instead of array?
396 397
    pReadHandle->pColumns = taosArrayInit(pCond->numOfCols, sizeof(SColumnInfoData));
    if (pReadHandle->pColumns == NULL) {
398
      goto _end;
399
    }
H
Haojun Liao 已提交
400

401 402
    for (int32_t i = 0; i < pCond->numOfCols; ++i) {
      SColumnInfoData colInfo = {{0}, 0};
H
Haojun Liao 已提交
403

404
      colInfo.info = pCond->colList[i];
405
      colInfo.pData = calloc(1, EXTRA_BYTES + pReadHandle->outputCapacity * pCond->colList[i].bytes);
406
      if (colInfo.pData == NULL) {
407
        goto _end;
408
      }
409

410 411
      taosArrayPush(pReadHandle->pColumns, &colInfo);
      pReadHandle->statis[i].colId = colInfo.info.colId;
B
Bomin Zhang 已提交
412
    }
H
Haojun Liao 已提交
413

414
    pReadHandle->defaultLoadColumn = getDefaultLoadColumns(pReadHandle, true);
H
Haojun Liao 已提交
415
  }
416

417 418
  pReadHandle->pDataCols = tdNewDataCols(1000, pReadHandle->pTsdb->config.maxRowsPerFileBlock);
  if (pReadHandle->pDataCols == NULL) {
H
Haojun Liao 已提交
419
    tsdbError("%p failed to malloc buf for pDataCols, %s", pReadHandle, pReadHandle->idStr);
H
Haojun Liao 已提交
420
    terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
421
    goto _end;
H
hjxilinx 已提交
422
  }
423

424 425
  tsdbInitDataBlockLoadInfo(&pReadHandle->dataBlockLoadInfo);
  tsdbInitCompBlockLoadInfo(&pReadHandle->compBlockLoadInfo);
426

H
Haojun Liao 已提交
427
  return (tsdbReaderT)pReadHandle;
428

429
  _end:
430
  tsdbCleanupReadHandle(pReadHandle);
431
  terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
432
  return NULL;
H
hjxilinx 已提交
433 434
}

H
Haojun Liao 已提交
435
tsdbReaderT* tsdbQueryTables(STsdb* tsdb, STsdbQueryCond* pCond, STableGroupInfo* groupList, uint64_t qId, uint64_t taskId) {
H
Haojun Liao 已提交
436
  STsdbReadHandle* pTsdbReadHandle = tsdbQueryTablesImpl(tsdb, pCond, qId, taskId);
437
  if (pTsdbReadHandle == NULL) {
438 439 440
    return NULL;
  }

441
  if (emptyQueryTimewindow(pTsdbReadHandle)) {
H
Haojun Liao 已提交
442
    return (tsdbReaderT*) pTsdbReadHandle;
443
  }
H
Haojun Liao 已提交
444 445

  // todo apply the lastkey of table check to avoid to load header file
446
  pTsdbReadHandle->pTableCheckInfo = createCheckInfoFromTableGroup(pTsdbReadHandle, groupList);
447
  if (pTsdbReadHandle->pTableCheckInfo == NULL) {
448
//    tsdbCleanupReadHandle(pTsdbReadHandle);
H
Haojun Liao 已提交
449 450 451 452
    terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
    return NULL;
  }

453 454 455
  tsdbDebug("%p total numOfTable:%" PRIzu " in this query, group %"PRIzu" %s", pTsdbReadHandle, taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo),
      taosArrayGetSize(groupList->pGroupList), pTsdbReadHandle->idStr);

H
Haojun Liao 已提交
456
  return (tsdbReaderT) pTsdbReadHandle;
H
Haojun Liao 已提交
457 458
}

H
Haojun Liao 已提交
459
void tsdbResetQueryHandle(tsdbReaderT queryHandle, STsdbQueryCond *pCond) {
460
  STsdbReadHandle* pTsdbReadHandle = queryHandle;
H
Haojun Liao 已提交
461

462 463 464
  if (emptyQueryTimewindow(pTsdbReadHandle)) {
    if (pCond->order != pTsdbReadHandle->order) {
      pTsdbReadHandle->order = pCond->order;
dengyihao's avatar
dengyihao 已提交
465
      TSWAP(pTsdbReadHandle->window.skey, pTsdbReadHandle->window.ekey, int64_t);
466 467 468 469 470
    }

    return;
  }

471 472 473 474 475 476 477 478 479
  pTsdbReadHandle->order       = pCond->order;
  pTsdbReadHandle->window      = pCond->twindow;
  pTsdbReadHandle->type        = TSDB_QUERY_TYPE_ALL;
  pTsdbReadHandle->cur.fid     = -1;
  pTsdbReadHandle->cur.win     = TSWINDOW_INITIALIZER;
  pTsdbReadHandle->checkFiles  = true;
  pTsdbReadHandle->activeIndex = 0;   // current active table index
  pTsdbReadHandle->locateStart = false;
  pTsdbReadHandle->loadExternalRow = pCond->loadExternalRows;
H
Haojun Liao 已提交
480 481

  if (ASCENDING_TRAVERSE(pCond->order)) {
482
    assert(pTsdbReadHandle->window.skey <= pTsdbReadHandle->window.ekey);
H
Haojun Liao 已提交
483
  } else {
484
    assert(pTsdbReadHandle->window.skey >= pTsdbReadHandle->window.ekey);
H
Haojun Liao 已提交
485 486 487
  }

  // allocate buffer in order to load data blocks from file
488
  memset(pTsdbReadHandle->statis, 0, sizeof(SDataStatis));
H
Haojun Liao 已提交
489

490 491
  tsdbInitDataBlockLoadInfo(&pTsdbReadHandle->dataBlockLoadInfo);
  tsdbInitCompBlockLoadInfo(&pTsdbReadHandle->compBlockLoadInfo);
H
Haojun Liao 已提交
492

493
  resetCheckInfo(pTsdbReadHandle);
H
Haojun Liao 已提交
494 495
}

H
Haojun Liao 已提交
496
void tsdbResetQueryHandleForNewTable(tsdbReaderT queryHandle, STsdbQueryCond *pCond, STableGroupInfo* groupList) {
497
  STsdbReadHandle* pTsdbReadHandle = queryHandle;
H
Haojun Liao 已提交
498

499 500 501 502 503 504 505 506 507
  pTsdbReadHandle->order       = pCond->order;
  pTsdbReadHandle->window      = pCond->twindow;
  pTsdbReadHandle->type        = TSDB_QUERY_TYPE_ALL;
  pTsdbReadHandle->cur.fid     = -1;
  pTsdbReadHandle->cur.win     = TSWINDOW_INITIALIZER;
  pTsdbReadHandle->checkFiles  = true;
  pTsdbReadHandle->activeIndex = 0;   // current active table index
  pTsdbReadHandle->locateStart = false;
  pTsdbReadHandle->loadExternalRow = pCond->loadExternalRows;
H
Haojun Liao 已提交
508 509

  if (ASCENDING_TRAVERSE(pCond->order)) {
510
    assert(pTsdbReadHandle->window.skey <= pTsdbReadHandle->window.ekey);
H
Haojun Liao 已提交
511
  } else {
512
    assert(pTsdbReadHandle->window.skey >= pTsdbReadHandle->window.ekey);
H
Haojun Liao 已提交
513 514 515
  }

  // allocate buffer in order to load data blocks from file
516
  memset(pTsdbReadHandle->statis, 0, sizeof(SDataStatis));
H
Haojun Liao 已提交
517

518 519
  tsdbInitDataBlockLoadInfo(&pTsdbReadHandle->dataBlockLoadInfo);
  tsdbInitCompBlockLoadInfo(&pTsdbReadHandle->compBlockLoadInfo);
H
Haojun Liao 已提交
520

H
Haojun Liao 已提交
521
  SArray* pTable = NULL;
522
//  STsdbMeta* pMeta = tsdbGetMeta(pTsdbReadHandle->pTsdb);
H
Haojun Liao 已提交
523

524
//  pTsdbReadHandle->pTableCheckInfo = destroyTableCheckInfo(pTsdbReadHandle->pTableCheckInfo);
H
Haojun Liao 已提交
525

526 527
  pTsdbReadHandle->pTableCheckInfo = NULL;//createCheckInfoFromTableGroup(pTsdbReadHandle, groupList, pMeta, &pTable);
  if (pTsdbReadHandle->pTableCheckInfo == NULL) {
528
//    tsdbCleanupReadHandle(pTsdbReadHandle);
H
Haojun Liao 已提交
529 530
    terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
  }
H
Haojun Liao 已提交
531

532 533
//  pTsdbReadHandle->prev = doFreeColumnInfoData(pTsdbReadHandle->prev);
//  pTsdbReadHandle->next = doFreeColumnInfoData(pTsdbReadHandle->next);
H
Haojun Liao 已提交
534 535
}

H
Haojun Liao 已提交
536
tsdbReaderT tsdbQueryLastRow(STsdb *tsdb, STsdbQueryCond *pCond, STableGroupInfo *groupList, uint64_t qId, uint64_t taskId) {
537
  pCond->twindow = updateLastrowForEachGroup(groupList);
H
Haojun Liao 已提交
538 539 540 541 542 543

  // no qualified table
  if (groupList->numOfTables == 0) {
    return NULL;
  }

H
Haojun Liao 已提交
544
  STsdbReadHandle *pTsdbReadHandle = (STsdbReadHandle*) tsdbQueryTables(tsdb, pCond, groupList, qId, taskId);
545
  if (pTsdbReadHandle == NULL) {
546 547 548
    return NULL;
  }

549
  int32_t code = checkForCachedLastRow(pTsdbReadHandle, groupList);
H
Haojun Liao 已提交
550 551 552 553
  if (code != TSDB_CODE_SUCCESS) { // set the numOfTables to be 0
    terrno = code;
    return NULL;
  }
H
Haojun Liao 已提交
554 555

  assert(pCond->order == TSDB_ORDER_ASC && pCond->twindow.skey <= pCond->twindow.ekey);
556 557
  if (pTsdbReadHandle->cachelastrow) {
    pTsdbReadHandle->type = TSDB_QUERY_TYPE_LAST;
D
init  
dapan1121 已提交
558 559
  }
  
560
  return pTsdbReadHandle;
D
init  
dapan1121 已提交
561 562
}

563
#if 0
H
Haojun Liao 已提交
564
tsdbReaderT tsdbQueryCacheLast(STsdb *tsdb, STsdbQueryCond *pCond, STableGroupInfo *groupList, uint64_t qId, STsdbMemTable* pMemRef) {
565 566
  STsdbReadHandle *pTsdbReadHandle = (STsdbReadHandle*) tsdbQueryTables(tsdb, pCond, groupList, qId, pMemRef);
  if (pTsdbReadHandle == NULL) {
567 568 569
    return NULL;
  }

570
  int32_t code = checkForCachedLast(pTsdbReadHandle);
D
init  
dapan1121 已提交
571 572 573 574 575
  if (code != TSDB_CODE_SUCCESS) { // set the numOfTables to be 0
    terrno = code;
    return NULL;
  }

576 577
  if (pTsdbReadHandle->cachelastrow) {
    pTsdbReadHandle->type = TSDB_QUERY_TYPE_LAST;
D
fix bug  
dapan1121 已提交
578
  }
D
init  
dapan1121 已提交
579
  
580
  return pTsdbReadHandle;
H
hjxilinx 已提交
581 582
}

583
#endif
H
Haojun Liao 已提交
584
SArray* tsdbGetQueriedTableList(tsdbReaderT *pHandle) {
585
  assert(pHandle != NULL);
H
Haojun Liao 已提交
586

587
  STsdbReadHandle *pTsdbReadHandle = (STsdbReadHandle*) pHandle;
H
Haojun Liao 已提交
588

589
  size_t size = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
590
  SArray* res = taosArrayInit(size, POINTER_BYTES);
591 592 593
  return res;
}

H
Haojun Liao 已提交
594 595 596 597 598 599
// leave only one table for each group
static STableGroupInfo* trimTableGroup(STimeWindow* window, STableGroupInfo* pGroupList) {
  assert(pGroupList);
  size_t numOfGroup = taosArrayGetSize(pGroupList->pGroupList);

  STableGroupInfo* pNew = calloc(1, sizeof(STableGroupInfo));
Y
yihaoDeng 已提交
600
  pNew->pGroupList = taosArrayInit(numOfGroup, POINTER_BYTES);
H
Haojun Liao 已提交
601 602 603 604 605 606 607 608

  for(int32_t i = 0; i < numOfGroup; ++i) {
    SArray* oneGroup = taosArrayGetP(pGroupList->pGroupList, i);
    size_t numOfTables = taosArrayGetSize(oneGroup);

    SArray* px = taosArrayInit(4, sizeof(STableKeyInfo));
    for (int32_t j = 0; j < numOfTables; ++j) {
      STableKeyInfo* pInfo = (STableKeyInfo*)taosArrayGet(oneGroup, j);
609 610 611 612 613
//      if (window->skey <= pInfo->lastKey && ((STable*)pInfo->pTable)->lastKey != TSKEY_INITIAL_VAL) {
//        taosArrayPush(px, pInfo);
//        pNew->numOfTables += 1;
//        break;
//      }
H
Haojun Liao 已提交
614 615 616 617 618 619 620 621 622 623 624 625 626
    }

    // there are no data in this group
    if (taosArrayGetSize(px) == 0) {
      taosArrayDestroy(px);
    } else {
      taosArrayPush(pNew->pGroupList, &px);
    }
  }

  return pNew;
}

H
Haojun Liao 已提交
627
tsdbReaderT tsdbQueryRowsInExternalWindow(STsdb *tsdb, STsdbQueryCond* pCond, STableGroupInfo *groupList, uint64_t qId, uint64_t taskId) {
H
Haojun Liao 已提交
628 629
  STableGroupInfo* pNew = trimTableGroup(&pCond->twindow, groupList);

630 631 632 633 634 635 636 637 638 639 640 641
  if (pNew->numOfTables == 0) {
    tsdbDebug("update query time range to invalidate time window");

    assert(taosArrayGetSize(pNew->pGroupList) == 0);
    bool asc = ASCENDING_TRAVERSE(pCond->order);
    if (asc) {
      pCond->twindow.ekey = pCond->twindow.skey - 1;
    } else {
      pCond->twindow.skey = pCond->twindow.ekey - 1;
    }
  }

H
Haojun Liao 已提交
642
  STsdbReadHandle *pTsdbReadHandle = (STsdbReadHandle*) tsdbQueryTables(tsdb, pCond, pNew, qId, taskId);
643 644
  pTsdbReadHandle->loadExternalRow = true;
  pTsdbReadHandle->currentLoadExternalRows = true;
645

646
  return pTsdbReadHandle;
647 648
}

649
static bool initTableMemIterator(STsdbReadHandle* pHandle, STableCheckInfo* pCheckInfo) {
650
  if (pCheckInfo->initBuf) {
651 652
    return true;
  }
H
Haojun Liao 已提交
653

654
  pCheckInfo->initBuf = true;
655
  int32_t order = pHandle->order;
H
Haojun Liao 已提交
656

657 658 659 660 661 662 663
  STbData** pMem = NULL;
  STbData** pIMem = NULL;

  TKEY tLastKey = 0;  /// keyToTkey(pCheckInfo->lastKey);
  if (pHandle->pTsdb->mem != NULL) {
    pMem = taosHashGet(pHandle->pTsdb->mem->pHashIdx, &pCheckInfo->tableId, sizeof(pCheckInfo->tableId));
    if (pMem != NULL) {
H
Haojun Liao 已提交
664
      pCheckInfo->iter =
665
          tSkipListCreateIterFromVal((*pMem)->pData, (const char*)&tLastKey, TSDB_DATA_TYPE_TIMESTAMP, order);
H
Haojun Liao 已提交
666
    }
667
  }
H
Haojun Liao 已提交
668

669 670 671
  if (pHandle->pTsdb->imem != NULL) {
    pIMem = taosHashGet(pHandle->pTsdb->imem->pHashIdx, &pCheckInfo->tableId, sizeof(pCheckInfo->tableId));
    if (pIMem != NULL) {
H
Haojun Liao 已提交
672
      pCheckInfo->iiter =
673
          tSkipListCreateIterFromVal((*pIMem)->pData, (const char*)&tLastKey, TSDB_DATA_TYPE_TIMESTAMP, order);
H
Haojun Liao 已提交
674
    }
675
  }
H
Haojun Liao 已提交
676

677 678 679 680
  // both iterators are NULL, no data in buffer right now
  if (pCheckInfo->iter == NULL && pCheckInfo->iiter == NULL) {
    return false;
  }
H
Haojun Liao 已提交
681

682 683 684 685 686
  bool memEmpty  = (pCheckInfo->iter == NULL) || (pCheckInfo->iter != NULL && !tSkipListIterNext(pCheckInfo->iter));
  bool imemEmpty = (pCheckInfo->iiter == NULL) || (pCheckInfo->iiter != NULL && !tSkipListIterNext(pCheckInfo->iiter));
  if (memEmpty && imemEmpty) { // buffer is empty
    return false;
  }
H
Haojun Liao 已提交
687

688 689 690
  if (!memEmpty) {
    SSkipListNode* node = tSkipListIterGet(pCheckInfo->iter);
    assert(node != NULL);
H
Haojun Liao 已提交
691

C
Cary Xu 已提交
692 693
    SMemRow row = (SMemRow)SL_GET_NODE_DATA(node);
    TSKEY   key = memRowKey(row);  // first timestamp in buffer
694
    tsdbDebug("%p uid:%" PRId64 ", check data in mem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64
H
Haojun Liao 已提交
695 696
              "-%" PRId64 ", lastKey:%" PRId64 ", numOfRows:%"PRId64", %s",
              pHandle, pCheckInfo->tableId, key, order, (*pMem)->keyMin, (*pMem)->keyMax, pCheckInfo->lastKey, (*pMem)->nrows, pHandle->idStr);
H
Haojun Liao 已提交
697 698 699 700 701 702 703

    if (ASCENDING_TRAVERSE(order)) {
      assert(pCheckInfo->lastKey <= key);
    } else {
      assert(pCheckInfo->lastKey >= key);
    }

704
  } else {
H
Haojun Liao 已提交
705
    tsdbDebug("%p uid:%"PRId64", no data in mem, %s", pHandle, pCheckInfo->tableId, pHandle->idStr);
706
  }
H
Haojun Liao 已提交
707

708 709 710
  if (!imemEmpty) {
    SSkipListNode* node = tSkipListIterGet(pCheckInfo->iiter);
    assert(node != NULL);
H
Haojun Liao 已提交
711

C
Cary Xu 已提交
712 713
    SMemRow row = (SMemRow)SL_GET_NODE_DATA(node);
    TSKEY   key = memRowKey(row);  // first timestamp in buffer
714
    tsdbDebug("%p uid:%" PRId64 ", check data in imem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64
H
Haojun Liao 已提交
715 716
              "-%" PRId64 ", lastKey:%" PRId64 ", numOfRows:%"PRId64", %s",
              pHandle, pCheckInfo->tableId, key, order, (*pIMem)->keyMin, (*pIMem)->keyMax, pCheckInfo->lastKey, (*pIMem)->nrows, pHandle->idStr);
H
Haojun Liao 已提交
717 718 719 720 721 722

    if (ASCENDING_TRAVERSE(order)) {
      assert(pCheckInfo->lastKey <= key);
    } else {
      assert(pCheckInfo->lastKey >= key);
    }
723
  } else {
H
Haojun Liao 已提交
724
    tsdbDebug("%p uid:%"PRId64", no data in imem, %s", pHandle, pCheckInfo->tableId, pHandle->idStr);
725
  }
H
Haojun Liao 已提交
726

727 728 729
  return true;
}

H
Haojun Liao 已提交
730 731 732 733 734
static void destroyTableMemIterator(STableCheckInfo* pCheckInfo) {
  tSkipListDestroyIter(pCheckInfo->iter);
  tSkipListDestroyIter(pCheckInfo->iiter);
}

735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790
static TSKEY extractFirstTraverseKey(STableCheckInfo* pCheckInfo, int32_t order, int32_t update) {
  SMemRow rmem = NULL, rimem = NULL;
  if (pCheckInfo->iter) {
    SSkipListNode* node = tSkipListIterGet(pCheckInfo->iter);
    if (node != NULL) {
      rmem = (SMemRow)SL_GET_NODE_DATA(node);
    }
  }

  if (pCheckInfo->iiter) {
    SSkipListNode* node = tSkipListIterGet(pCheckInfo->iiter);
    if (node != NULL) {
      rimem = (SMemRow)SL_GET_NODE_DATA(node);
    }
  }

  if (rmem == NULL && rimem == NULL) {
    return TSKEY_INITIAL_VAL;
  }

  if (rmem != NULL && rimem == NULL) {
    pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
    return memRowKey(rmem);
  }

  if (rmem == NULL && rimem != NULL) {
    pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
    return memRowKey(rimem);
  }

  TSKEY r1 = memRowKey(rmem);
  TSKEY r2 = memRowKey(rimem);

  if (r1 == r2) {
    if(update == TD_ROW_DISCARD_UPDATE){
      pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
      tSkipListIterNext(pCheckInfo->iter);
    }
    else if(update == TD_ROW_OVERWRITE_UPDATE) {
      pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
      tSkipListIterNext(pCheckInfo->iiter);
    } else {
      pCheckInfo->chosen = CHECKINFO_CHOSEN_BOTH;
    }
    return r1;
  } else if (r1 < r2 && ASCENDING_TRAVERSE(order)) {
    pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
    return r1;
  }
  else {
    pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
    return r2;
  }
}

static SMemRow getSMemRowInTableMem(STableCheckInfo* pCheckInfo, int32_t order, int32_t update, SMemRow* extraRow) {
C
Cary Xu 已提交
791
  SMemRow rmem = NULL, rimem = NULL;
H
Haojun Liao 已提交
792 793 794
  if (pCheckInfo->iter) {
    SSkipListNode* node = tSkipListIterGet(pCheckInfo->iter);
    if (node != NULL) {
C
Cary Xu 已提交
795
      rmem = (SMemRow)SL_GET_NODE_DATA(node);
H
Haojun Liao 已提交
796 797
    }
  }
798

H
Haojun Liao 已提交
799 800 801
  if (pCheckInfo->iiter) {
    SSkipListNode* node = tSkipListIterGet(pCheckInfo->iiter);
    if (node != NULL) {
C
Cary Xu 已提交
802
      rimem = (SMemRow)SL_GET_NODE_DATA(node);
H
Haojun Liao 已提交
803 804
    }
  }
805

H
Haojun Liao 已提交
806 807
  if (rmem == NULL && rimem == NULL) {
    return NULL;
H
Haojun Liao 已提交
808
  }
809

H
Haojun Liao 已提交
810
  if (rmem != NULL && rimem == NULL) {
H
Haojun Liao 已提交
811 812 813
    pCheckInfo->chosen = 0;
    return rmem;
  }
814

H
Haojun Liao 已提交
815
  if (rmem == NULL && rimem != NULL) {
H
Haojun Liao 已提交
816 817 818
    pCheckInfo->chosen = 1;
    return rimem;
  }
819

C
Cary Xu 已提交
820 821
  TSKEY r1 = memRowKey(rmem);
  TSKEY r2 = memRowKey(rimem);
H
Haojun Liao 已提交
822

823 824
  if (r1 == r2) {
    if (update == TD_ROW_DISCARD_UPDATE) {
H
TD-1439  
Hongze Cheng 已提交
825
      tSkipListIterNext(pCheckInfo->iter);
826
      pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
H
TD-1439  
Hongze Cheng 已提交
827
      return rimem;
828
    } else if(update == TD_ROW_OVERWRITE_UPDATE){
H
TD-1439  
Hongze Cheng 已提交
829
      tSkipListIterNext(pCheckInfo->iiter);
830 831 832 833 834
      pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
      return rmem;
    } else {
      pCheckInfo->chosen = CHECKINFO_CHOSEN_BOTH;
      extraRow = rimem;
H
TD-1439  
Hongze Cheng 已提交
835 836
      return rmem;
    }
H
Haojun Liao 已提交
837 838 839
  } else {
    if (ASCENDING_TRAVERSE(order)) {
      if (r1 < r2) {
840
        pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
H
Haojun Liao 已提交
841 842
        return rmem;
      } else {
843
        pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
H
Haojun Liao 已提交
844 845 846 847
        return rimem;
      }
    } else {
      if (r1 < r2) {
848
        pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
H
Haojun Liao 已提交
849 850
        return rimem;
      } else {
851
        pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
H
Haojun Liao 已提交
852 853 854 855
        return rmem;
      }
    }
  }
H
Haojun Liao 已提交
856 857
}

858
static bool moveToNextRowInMem(STableCheckInfo* pCheckInfo) {
H
Haojun Liao 已提交
859
  bool hasNext = false;
860
  if (pCheckInfo->chosen == CHECKINFO_CHOSEN_MEM) {
H
Haojun Liao 已提交
861 862 863
    if (pCheckInfo->iter != NULL) {
      hasNext = tSkipListIterNext(pCheckInfo->iter);
    }
864

H
Haojun Liao 已提交
865 866 867
    if (hasNext) {
      return hasNext;
    }
868

H
Haojun Liao 已提交
869 870 871
    if (pCheckInfo->iiter != NULL) {
      return tSkipListIterGet(pCheckInfo->iiter) != NULL;
    }
872
  } else if (pCheckInfo->chosen == CHECKINFO_CHOSEN_IMEM){
873 874 875
    if (pCheckInfo->iiter != NULL) {
      hasNext = tSkipListIterNext(pCheckInfo->iiter);
    }
876

877 878 879
    if (hasNext) {
      return hasNext;
    }
880

881 882
    if (pCheckInfo->iter != NULL) {
      return tSkipListIterGet(pCheckInfo->iter) != NULL;
H
Haojun Liao 已提交
883
    }
884 885 886 887 888 889 890
  } else {
    if (pCheckInfo->iter != NULL) {
      hasNext = tSkipListIterNext(pCheckInfo->iter);
    }
    if (pCheckInfo->iiter != NULL) {
      hasNext = tSkipListIterNext(pCheckInfo->iiter) || hasNext;
    }
H
Haojun Liao 已提交
891
  }
892

H
Haojun Liao 已提交
893 894 895
  return hasNext;
}

896
static bool hasMoreDataInCache(STsdbReadHandle* pHandle) {
H
TD-1439  
Hongze Cheng 已提交
897
  STsdbCfg *pCfg = &pHandle->pTsdb->config;
898 899
  size_t size = taosArrayGetSize(pHandle->pTableCheckInfo);
  assert(pHandle->activeIndex < size && pHandle->activeIndex >= 0 && size >= 1);
D
dapan1121 已提交
900
  pHandle->cur.fid = INT32_MIN;
H
Haojun Liao 已提交
901

902
  STableCheckInfo* pCheckInfo = taosArrayGet(pHandle->pTableCheckInfo, pHandle->activeIndex);
H
Haojun Liao 已提交
903 904 905 906
  if (!pCheckInfo->initBuf) {
    initTableMemIterator(pHandle, pCheckInfo);
  }

907
  SMemRow row = getSMemRowInTableMem(pCheckInfo, pHandle->order, pCfg->update, NULL);
H
Haojun Liao 已提交
908
  if (row == NULL) {
909 910
    return false;
  }
911

C
Cary Xu 已提交
912
  pCheckInfo->lastKey = memRowKey(row);  // first timestamp in buffer
H
Haojun Liao 已提交
913 914
  tsdbDebug("%p uid:%" PRId64", check data in buffer from skey:%" PRId64 ", order:%d, %s", pHandle,
      pCheckInfo->tableId,  pCheckInfo->lastKey, pHandle->order, pHandle->idStr);
H
Haojun Liao 已提交
915

916
  // all data in mem are checked already.
917 918
  if ((pCheckInfo->lastKey > pHandle->window.ekey && ASCENDING_TRAVERSE(pHandle->order)) ||
      (pCheckInfo->lastKey < pHandle->window.ekey && !ASCENDING_TRAVERSE(pHandle->order))) {
919 920
    return false;
  }
H
Haojun Liao 已提交
921

922 923
  int32_t step = ASCENDING_TRAVERSE(pHandle->order)? 1:-1;
  STimeWindow* win = &pHandle->cur.win;
H
Haojun Liao 已提交
924
  pHandle->cur.rows = tsdbReadRowsFromCache(pCheckInfo, pHandle->window.ekey, pHandle->outputCapacity, win, pHandle);
H
Haojun Liao 已提交
925

926 927 928 929
  // update the last key value
  pCheckInfo->lastKey = win->ekey + step;
  pHandle->cur.lastKey = win->ekey + step;
  pHandle->cur.mixBlock = true;
930

931
  if (!ASCENDING_TRAVERSE(pHandle->order)) {
dengyihao's avatar
dengyihao 已提交
932
    TSWAP(win->skey, win->ekey, TSKEY);
933
  }
H
Haojun Liao 已提交
934

935
  return true;
936
}
H
hjxilinx 已提交
937

938 939
static int32_t getFileIdFromKey(TSKEY key, int32_t daysPerFile, int32_t precision) {
  assert(precision >= TSDB_TIME_PRECISION_MICRO || precision <= TSDB_TIME_PRECISION_NANO);
940 941 942
  if (key == TSKEY_INITIAL_VAL) {
    return INT32_MIN;
  }
H
Haojun Liao 已提交
943

D
dapan1121 已提交
944
  if (key < 0) {
945
    key -= (daysPerFile * tsTickPerDay[precision]);
D
dapan1121 已提交
946 947
  }
  
948
  int64_t fid = (int64_t)(key / (daysPerFile * tsTickPerDay[precision]));  // set the starting fileId
949 950 951
  if (fid < 0L && llabs(fid) > INT32_MAX) { // data value overflow for INT32
    fid = INT32_MIN;
  }
H
Haojun Liao 已提交
952

953
  if (fid > 0L && fid > INT32_MAX) {
954 955
    fid = INT32_MAX;
  }
H
Haojun Liao 已提交
956

S
TD-1057  
Shengliang Guan 已提交
957
  return (int32_t)fid;
958 959
}

H
refact  
Hongze Cheng 已提交
960
static int32_t binarySearchForBlock(SBlock* pBlock, int32_t numOfBlocks, TSKEY skey, int32_t order) {
961 962
  int32_t firstSlot = 0;
  int32_t lastSlot = numOfBlocks - 1;
H
Haojun Liao 已提交
963

964
  int32_t midSlot = firstSlot;
H
Haojun Liao 已提交
965

966 967 968
  while (1) {
    numOfBlocks = lastSlot - firstSlot + 1;
    midSlot = (firstSlot + (numOfBlocks >> 1));
H
Haojun Liao 已提交
969

970
    if (numOfBlocks == 1) break;
H
Haojun Liao 已提交
971

972 973 974 975 976 977 978 979 980 981 982
    if (skey > pBlock[midSlot].keyLast) {
      if (numOfBlocks == 2) break;
      if ((order == TSDB_ORDER_DESC) && (skey < pBlock[midSlot + 1].keyFirst)) break;
      firstSlot = midSlot + 1;
    } else if (skey < pBlock[midSlot].keyFirst) {
      if ((order == TSDB_ORDER_ASC) && (skey > pBlock[midSlot - 1].keyLast)) break;
      lastSlot = midSlot - 1;
    } else {
      break;  // got the slot
    }
  }
H
Haojun Liao 已提交
983

984 985
  return midSlot;
}
986

987
static int32_t loadBlockInfo(STsdbReadHandle * pTsdbReadHandle, int32_t index, int32_t* numOfBlocks) {
H
Haojun Liao 已提交
988
  int32_t code = 0;
H
Haojun Liao 已提交
989

990
  STableCheckInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, index);
H
Haojun Liao 已提交
991
  pCheckInfo->numOfBlocks = 0;
992

H
Haojun Liao 已提交
993 994 995 996
  STable table = {.uid = pCheckInfo->tableId, .tid = pCheckInfo->tableId};
  table.pSchema = metaGetTbTSchema(pTsdbReadHandle->pTsdb->pMeta, pCheckInfo->tableId, 0);

  if (tsdbSetReadTable(&pTsdbReadHandle->rhelper, &table) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
997 998 999
    code = terrno;
    return code;
  }
1000

1001
  SBlockIdx* compIndex = pTsdbReadHandle->rhelper.pBlkIdx;
H
Hongze Cheng 已提交
1002

H
Haojun Liao 已提交
1003
  // no data block in this file, try next file
1004
  if (compIndex == NULL || compIndex->uid != pCheckInfo->tableId) {
H
Haojun Liao 已提交
1005 1006
    return 0;  // no data blocks in the file belongs to pCheckInfo->pTable
  }
1007

H
Haojun Liao 已提交
1008 1009 1010 1011 1012 1013 1014 1015
  if (pCheckInfo->compSize < (int32_t)compIndex->len) {
    assert(compIndex->len > 0);

    char* t = realloc(pCheckInfo->pCompInfo, compIndex->len);
    if (t == NULL) {
      terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
      code = TSDB_CODE_TDB_OUT_OF_MEMORY;
      return code;
1016 1017
    }

H
Haojun Liao 已提交
1018 1019 1020
    pCheckInfo->pCompInfo = (SBlockInfo*)t;
    pCheckInfo->compSize = compIndex->len;
  }
1021

1022
  if (tsdbLoadBlockInfo(&(pTsdbReadHandle->rhelper), (void*)(pCheckInfo->pCompInfo)) < 0) {
H
Hongze Cheng 已提交
1023 1024
    return terrno;
  }
H
Haojun Liao 已提交
1025
  SBlockInfo* pCompInfo = pCheckInfo->pCompInfo;
1026

H
Haojun Liao 已提交
1027
  TSKEY s = TSKEY_INITIAL_VAL, e = TSKEY_INITIAL_VAL;
1028

1029 1030
  if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
    assert(pCheckInfo->lastKey <= pTsdbReadHandle->window.ekey && pTsdbReadHandle->window.skey <= pTsdbReadHandle->window.ekey);
H
Haojun Liao 已提交
1031
  } else {
1032
    assert(pCheckInfo->lastKey >= pTsdbReadHandle->window.ekey && pTsdbReadHandle->window.skey >= pTsdbReadHandle->window.ekey);
H
Haojun Liao 已提交
1033
  }
1034

dengyihao's avatar
dengyihao 已提交
1035 1036
  s = TMIN(pCheckInfo->lastKey, pTsdbReadHandle->window.ekey);
  e = TMAX(pCheckInfo->lastKey, pTsdbReadHandle->window.ekey);
1037

H
Haojun Liao 已提交
1038 1039 1040
  // discard the unqualified data block based on the query time window
  int32_t start = binarySearchForBlock(pCompInfo->blocks, compIndex->numOfBlocks, s, TSDB_ORDER_ASC);
  int32_t end = start;
H
TD-100  
hzcheng 已提交
1041

H
Haojun Liao 已提交
1042 1043 1044
  if (s > pCompInfo->blocks[start].keyLast) {
    return 0;
  }
1045

H
Haojun Liao 已提交
1046 1047 1048 1049
  // todo speedup the procedure of located end block
  while (end < (int32_t)compIndex->numOfBlocks && (pCompInfo->blocks[end].keyFirst <= e)) {
    end += 1;
  }
1050

H
Haojun Liao 已提交
1051
  pCheckInfo->numOfBlocks = (end - start);
1052

H
Haojun Liao 已提交
1053 1054 1055
  if (start > 0) {
    memmove(pCompInfo->blocks, &pCompInfo->blocks[start], pCheckInfo->numOfBlocks * sizeof(SBlock));
  }
1056

H
Haojun Liao 已提交
1057 1058 1059
  (*numOfBlocks) += pCheckInfo->numOfBlocks;
  return 0;
}
1060

1061
static int32_t getFileCompInfo(STsdbReadHandle* pTsdbReadHandle, int32_t* numOfBlocks) {
H
Haojun Liao 已提交
1062 1063 1064 1065
  // load all the comp offset value for all tables in this file
  int32_t code = TSDB_CODE_SUCCESS;
  *numOfBlocks = 0;

1066
  pTsdbReadHandle->cost.headFileLoad += 1;
1067 1068
  int64_t s = taosGetTimestampUs();

H
Haojun Liao 已提交
1069
  size_t numOfTables = 0;
1070 1071 1072 1073
  if (pTsdbReadHandle->loadType == BLOCK_LOAD_TABLE_SEQ_ORDER) {
    code = loadBlockInfo(pTsdbReadHandle, pTsdbReadHandle->activeIndex, numOfBlocks);
  } else if (pTsdbReadHandle->loadType == BLOCK_LOAD_OFFSET_SEQ_ORDER) {
    numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
1074

H
Haojun Liao 已提交
1075
    for (int32_t i = 0; i < numOfTables; ++i) {
1076
      code = loadBlockInfo(pTsdbReadHandle, i, numOfBlocks);
H
Haojun Liao 已提交
1077
      if (code != TSDB_CODE_SUCCESS) {
1078 1079
        int64_t e = taosGetTimestampUs();

1080
        pTsdbReadHandle->cost.headFileLoadTime += (e - s);
H
Haojun Liao 已提交
1081 1082 1083 1084 1085
        return code;
      }
    }
  } else {
    assert(0);
1086
  }
1087

1088
  int64_t e = taosGetTimestampUs();
1089
  pTsdbReadHandle->cost.headFileLoadTime += (e - s);
H
Haojun Liao 已提交
1090
  return code;
1091 1092
}

1093
static int32_t doLoadFileDataBlock(STsdbReadHandle* pTsdbReadHandle, SBlock* pBlock, STableCheckInfo* pCheckInfo, int32_t slotIndex) {
H
Haojun Liao 已提交
1094
  int64_t st = taosGetTimestampUs();
1095

H
Haojun Liao 已提交
1096
  STSchema *pSchema = metaGetTbTSchema(pTsdbReadHandle->pTsdb->pMeta, pCheckInfo->tableId, 0);
1097
  int32_t   code = tdInitDataCols(pTsdbReadHandle->pDataCols, pSchema);
H
Haojun Liao 已提交
1098
  if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
1099
    tsdbError("%p failed to malloc buf for pDataCols, %s", pTsdbReadHandle, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1100 1101 1102 1103
    terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
    goto _error;
  }

1104
  code = tdInitDataCols(pTsdbReadHandle->rhelper.pDCols[0], pSchema);
H
Haojun Liao 已提交
1105
  if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
1106
    tsdbError("%p failed to malloc buf for rhelper.pDataCols[0], %s", pTsdbReadHandle, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1107 1108 1109 1110
    terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
    goto _error;
  }

1111
  code = tdInitDataCols(pTsdbReadHandle->rhelper.pDCols[1], pSchema);
H
Haojun Liao 已提交
1112
  if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
1113
    tsdbError("%p failed to malloc buf for rhelper.pDataCols[1], %s", pTsdbReadHandle, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1114 1115 1116
    terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
    goto _error;
  }
1117

1118
  int16_t* colIds = pTsdbReadHandle->defaultLoadColumn->pData;
H
Haojun Liao 已提交
1119

1120
  int32_t ret = tsdbLoadBlockDataCols(&(pTsdbReadHandle->rhelper), pBlock, pCheckInfo->pCompInfo, colIds, (int)(QH_GET_NUM_OF_COLS(pTsdbReadHandle)));
H
Haojun Liao 已提交
1121
  if (ret != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
1122 1123 1124
    int32_t c = terrno;
    assert(c != TSDB_CODE_SUCCESS);
    goto _error;
H
Haojun Liao 已提交
1125
  }
1126

1127
  SDataBlockLoadInfo* pBlockLoadInfo = &pTsdbReadHandle->dataBlockLoadInfo;
1128

1129 1130
  pBlockLoadInfo->fileGroup = pTsdbReadHandle->pFileGroup;
  pBlockLoadInfo->slot = pTsdbReadHandle->cur.slot;
H
Haojun Liao 已提交
1131
  pBlockLoadInfo->uid = pCheckInfo->tableId;
1132

1133
  SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0];
1134
  assert(pCols->numOfRows != 0 && pCols->numOfRows <= pBlock->numOfRows);
1135

1136
  pBlock->numOfRows = pCols->numOfRows;
H
Haojun Liao 已提交
1137

1138
  // Convert from TKEY to TSKEY for primary timestamp column if current block has timestamp before 1970-01-01T00:00:00Z
1139
  if(pBlock->keyFirst < 0 && colIds[0] == PRIMARYKEY_TIMESTAMP_COL_ID) {
1140 1141 1142 1143 1144 1145
    int64_t* src = pCols->cols[0].pData;
    for(int32_t i = 0; i < pBlock->numOfRows; ++i) {
      src[i] = tdGetKey(src[i]);
    }
  }

H
Haojun Liao 已提交
1146
  int64_t elapsedTime = (taosGetTimestampUs() - st);
1147
  pTsdbReadHandle->cost.blockLoadTime += elapsedTime;
1148

H
Haojun Liao 已提交
1149 1150
  tsdbDebug("%p load file block into buffer, index:%d, brange:%"PRId64"-%"PRId64", rows:%d, elapsed time:%"PRId64 " us, %s",
      pTsdbReadHandle, slotIndex, pBlock->keyFirst, pBlock->keyLast, pBlock->numOfRows, elapsedTime, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1151
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
1152 1153 1154 1155

_error:
  pBlock->numOfRows = 0;

H
Haojun Liao 已提交
1156 1157
  tsdbError("%p error occurs in loading file block, index:%d, brange:%"PRId64"-%"PRId64", rows:%d, %s",
            pTsdbReadHandle, slotIndex, pBlock->keyFirst, pBlock->keyLast, pBlock->numOfRows, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1158
  return terrno;
H
hjxilinx 已提交
1159 1160
}

1161 1162 1163 1164 1165
static int32_t getEndPosInDataBlock(STsdbReadHandle* pTsdbReadHandle, SDataBlockInfo* pBlockInfo);
static int32_t doCopyRowsFromFileBlock(STsdbReadHandle* pTsdbReadHandle, int32_t capacity, int32_t numOfRows, int32_t start, int32_t end);
static void moveDataToFront(STsdbReadHandle* pTsdbReadHandle, int32_t numOfRows, int32_t numOfCols);
static void doCheckGeneratedBlockRange(STsdbReadHandle* pTsdbReadHandle);
static void copyAllRemainRowsFromFileBlock(STsdbReadHandle* pTsdbReadHandle, STableCheckInfo* pCheckInfo, SDataBlockInfo* pBlockInfo, int32_t endPos);
1166

1167 1168 1169
static int32_t handleDataMergeIfNeeded(STsdbReadHandle* pTsdbReadHandle, SBlock* pBlock, STableCheckInfo* pCheckInfo){
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
  STsdbCfg*      pCfg = &pTsdbReadHandle->pTsdb->config;
H
Haojun Liao 已提交
1170
  SDataBlockInfo binfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlock);
1171
  TSKEY          key;
H
Haojun Liao 已提交
1172
  int32_t code = TSDB_CODE_SUCCESS;
1173

1174
  /*bool hasData = */ initTableMemIterator(pTsdbReadHandle, pCheckInfo);
H
Haojun Liao 已提交
1175 1176
  assert(cur->pos >= 0 && cur->pos <= binfo.rows);

1177
  key = extractFirstTraverseKey(pCheckInfo, pTsdbReadHandle->order, pCfg->update);
1178

H
Haojun Liao 已提交
1179
  if (key != TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
1180
    tsdbDebug("%p key in mem:%"PRId64", %s", pTsdbReadHandle, key, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1181
  } else {
H
Haojun Liao 已提交
1182
    tsdbDebug("%p no data in mem, %s", pTsdbReadHandle, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1183
  }
H
Haojun Liao 已提交
1184

1185 1186
  if ((ASCENDING_TRAVERSE(pTsdbReadHandle->order) && (key != TSKEY_INITIAL_VAL && key <= binfo.window.ekey)) ||
      (!ASCENDING_TRAVERSE(pTsdbReadHandle->order) && (key != TSKEY_INITIAL_VAL && key >= binfo.window.skey))) {
H
Haojun Liao 已提交
1187

1188 1189
    if ((ASCENDING_TRAVERSE(pTsdbReadHandle->order) && (key != TSKEY_INITIAL_VAL && key < binfo.window.skey)) ||
        (!ASCENDING_TRAVERSE(pTsdbReadHandle->order) && (key != TSKEY_INITIAL_VAL && key > binfo.window.ekey))) {
1190

H
Haojun Liao 已提交
1191
      // do not load file block into buffer
1192
      int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order) ? 1 : -1;
H
Haojun Liao 已提交
1193

1194 1195 1196
      TSKEY maxKey = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? (binfo.window.skey - step):(binfo.window.ekey - step);
      cur->rows = tsdbReadRowsFromCache(pCheckInfo, maxKey, pTsdbReadHandle->outputCapacity, &cur->win, pTsdbReadHandle);
      pTsdbReadHandle->realNumOfRows = cur->rows;
H
Haojun Liao 已提交
1197 1198 1199

      // update the last key value
      pCheckInfo->lastKey = cur->win.ekey + step;
1200
      if (!ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
dengyihao's avatar
dengyihao 已提交
1201
        TSWAP(cur->win.skey, cur->win.ekey, TSKEY);
H
Haojun Liao 已提交
1202
      }
H
Haojun Liao 已提交
1203

H
Haojun Liao 已提交
1204 1205
      cur->mixBlock = true;
      cur->blockCompleted = false;
H
Haojun Liao 已提交
1206
      return code;
H
Haojun Liao 已提交
1207
    }
H
Haojun Liao 已提交
1208

1209

1210
    // return error, add test cases
1211
    if ((code = doLoadFileDataBlock(pTsdbReadHandle, pBlock, pCheckInfo, cur->slot)) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
1212
      return code;
1213 1214
    }

1215
    doMergeTwoLevelData(pTsdbReadHandle, pCheckInfo, pBlock);
1216
  } else {
1217 1218 1219 1220 1221 1222
    /*
     * no data in cache, only load data from file
     * during the query processing, data in cache will not be checked anymore.
     *
     * Here the buffer is not enough, so only part of file block can be loaded into memory buffer
     */
1223 1224
    assert(pTsdbReadHandle->outputCapacity >= binfo.rows);
    int32_t endPos = getEndPosInDataBlock(pTsdbReadHandle, &binfo);
1225

1226 1227 1228
    if ((cur->pos == 0 && endPos == binfo.rows -1 && ASCENDING_TRAVERSE(pTsdbReadHandle->order)) ||
        (cur->pos == (binfo.rows - 1) && endPos == 0 && (!ASCENDING_TRAVERSE(pTsdbReadHandle->order)))) {
      pTsdbReadHandle->realNumOfRows = binfo.rows;
1229 1230 1231 1232

      cur->rows = binfo.rows;
      cur->win  = binfo.window;
      cur->mixBlock = false;
H
Haojun Liao 已提交
1233 1234
      cur->blockCompleted = true;

1235
      if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
H
Haojun Liao 已提交
1236 1237 1238 1239 1240 1241
        cur->lastKey = binfo.window.ekey + 1;
        cur->pos = binfo.rows;
      } else {
        cur->lastKey = binfo.window.skey - 1;
        cur->pos = -1;
      }
H
Haojun Liao 已提交
1242
    } else { // partially copy to dest buffer
1243
      copyAllRemainRowsFromFileBlock(pTsdbReadHandle, pCheckInfo, &binfo, endPos);
1244 1245
      cur->mixBlock = true;
    }
1246

H
Haojun Liao 已提交
1247
    assert(cur->blockCompleted);
H
Haojun Liao 已提交
1248
    if (cur->rows == binfo.rows) {
H
Haojun Liao 已提交
1249 1250
      tsdbDebug("%p whole file block qualified, brange:%"PRId64"-%"PRId64", rows:%d, lastKey:%"PRId64", %s",
                pTsdbReadHandle, cur->win.skey, cur->win.ekey, cur->rows, cur->lastKey, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1251
    } else {
H
Haojun Liao 已提交
1252 1253
      tsdbDebug("%p create data block from remain file block, brange:%"PRId64"-%"PRId64", rows:%d, total:%d, lastKey:%"PRId64", %s",
                pTsdbReadHandle, cur->win.skey, cur->win.ekey, cur->rows, binfo.rows, cur->lastKey, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1254 1255
    }

1256
  }
H
Haojun Liao 已提交
1257 1258

  return code;
1259 1260
}

1261 1262
static int32_t loadFileDataBlock(STsdbReadHandle* pTsdbReadHandle, SBlock* pBlock, STableCheckInfo* pCheckInfo, bool* exists) {
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
H
Haojun Liao 已提交
1263
  int32_t code = TSDB_CODE_SUCCESS;
1264
  bool asc = ASCENDING_TRAVERSE(pTsdbReadHandle->order);
1265

1266
  if (asc) {
H
Haojun Liao 已提交
1267
    // query ended in/started from current block
1268 1269
    if (pTsdbReadHandle->window.ekey < pBlock->keyLast || pCheckInfo->lastKey > pBlock->keyFirst) {
      if ((code = doLoadFileDataBlock(pTsdbReadHandle, pBlock, pCheckInfo, cur->slot)) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
1270 1271
        *exists = false;
        return code;
1272
      }
1273

1274
      SDataCols* pTSCol = pTsdbReadHandle->rhelper.pDCols[0];
H
Haojun Liao 已提交
1275
      assert(pTSCol->cols->type == TSDB_DATA_TYPE_TIMESTAMP && pTSCol->numOfRows == pBlock->numOfRows);
H
Haojun Liao 已提交
1276

1277 1278
      if (pCheckInfo->lastKey > pBlock->keyFirst) {
        cur->pos =
1279
            binarySearchForKey(pTSCol->cols[0].pData, pBlock->numOfRows, pCheckInfo->lastKey, pTsdbReadHandle->order);
1280 1281 1282
      } else {
        cur->pos = 0;
      }
H
Haojun Liao 已提交
1283

H
Haojun Liao 已提交
1284
      assert(pCheckInfo->lastKey <= pBlock->keyLast);
1285
      doMergeTwoLevelData(pTsdbReadHandle, pCheckInfo, pBlock);
1286
    } else {  // the whole block is loaded in to buffer
1287
      cur->pos = asc? 0:(pBlock->numOfRows - 1);
1288
      code = handleDataMergeIfNeeded(pTsdbReadHandle, pBlock, pCheckInfo);
H
[td-32]  
hjxilinx 已提交
1289
    }
1290
  } else {  //desc order, query ended in current block
1291 1292
    if (pTsdbReadHandle->window.ekey > pBlock->keyFirst || pCheckInfo->lastKey < pBlock->keyLast) {
      if ((code = doLoadFileDataBlock(pTsdbReadHandle, pBlock, pCheckInfo, cur->slot)) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
1293 1294
        *exists = false;
        return code;
1295
      }
H
Haojun Liao 已提交
1296

1297
      SDataCols* pTsCol = pTsdbReadHandle->rhelper.pDCols[0];
1298
      if (pCheckInfo->lastKey < pBlock->keyLast) {
1299
        cur->pos = binarySearchForKey(pTsCol->cols[0].pData, pBlock->numOfRows, pCheckInfo->lastKey, pTsdbReadHandle->order);
1300
      } else {
H
Haojun Liao 已提交
1301
        cur->pos = pBlock->numOfRows - 1;
1302
      }
H
Haojun Liao 已提交
1303

H
Haojun Liao 已提交
1304
      assert(pCheckInfo->lastKey >= pBlock->keyFirst);
1305
      doMergeTwoLevelData(pTsdbReadHandle, pCheckInfo, pBlock);
1306
    } else {
1307
      cur->pos = asc? 0:(pBlock->numOfRows-1);
1308
      code = handleDataMergeIfNeeded(pTsdbReadHandle, pBlock, pCheckInfo);
H
[td-32]  
hjxilinx 已提交
1309
    }
1310
  }
1311

1312
  *exists = pTsdbReadHandle->realNumOfRows > 0;
H
Haojun Liao 已提交
1313
  return code;
H
[td-32]  
hjxilinx 已提交
1314 1315
}

1316
static int doBinarySearchKey(char* pValue, int num, TSKEY key, int order) {
1317
  int    firstPos, lastPos, midPos = -1;
H
Haojun Liao 已提交
1318
  int    numOfRows;
1319 1320
  TSKEY* keyList;

1321
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
H
Haojun Liao 已提交
1322

1323
  if (num <= 0) return -1;
1324 1325

  keyList = (TSKEY*)pValue;
1326 1327
  firstPos = 0;
  lastPos = num - 1;
1328

1329
  if (order == TSDB_ORDER_DESC) {
1330 1331 1332 1333 1334
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
1335

H
Haojun Liao 已提交
1336 1337
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
1338

1339 1340 1341 1342 1343 1344 1345 1346
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
1347

1348 1349 1350 1351 1352
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
1353

1354 1355 1356 1357 1358 1359 1360
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
1361

H
Haojun Liao 已提交
1362 1363
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
1364

1365 1366 1367 1368 1369 1370 1371 1372 1373
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
1374

1375 1376 1377
  return midPos;
}

1378
static int32_t doCopyRowsFromFileBlock(STsdbReadHandle* pTsdbReadHandle, int32_t capacity, int32_t numOfRows, int32_t start, int32_t end) {
1379
  char* pData = NULL;
1380
  int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? 1 : -1;
H
Haojun Liao 已提交
1381

1382
  SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0];
1383
  TSKEY* tsArray = pCols->cols[0].pData;
H
Haojun Liao 已提交
1384

1385
  int32_t num = end - start + 1;
H
Haojun Liao 已提交
1386 1387 1388 1389 1390 1391
  assert(num >= 0);

  if (num == 0) {
    return numOfRows;
  }

1392
  int32_t requiredNumOfCols = (int32_t)taosArrayGetSize(pTsdbReadHandle->pColumns);
H
Haojun Liao 已提交
1393

1394
  //data in buffer has greater timestamp, copy data in file block
1395 1396
  int32_t i = 0, j = 0;
  while(i < requiredNumOfCols && j < pCols->numOfCols) {
1397
    SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i);
1398 1399 1400 1401 1402 1403 1404 1405 1406

    SDataCol* src = &pCols->cols[j];
    if (src->colId < pColInfo->info.colId) {
      j++;
      continue;
    }

    int32_t bytes = pColInfo->info.bytes;

1407
    if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
S
TD-1057  
Shengliang Guan 已提交
1408
      pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;
1409
    } else {
S
TD-1057  
Shengliang Guan 已提交
1410
      pData = (char*)pColInfo->pData + (capacity - numOfRows - num) * pColInfo->info.bytes;
1411
    }
1412

L
Liu Jicong 已提交
1413
    if (!isAllRowsNull(src) && pColInfo->info.colId == src->colId) {
1414
      if (pColInfo->info.type != TSDB_DATA_TYPE_BINARY && pColInfo->info.type != TSDB_DATA_TYPE_NCHAR) {
S
TD-1057  
Shengliang Guan 已提交
1415
        memmove(pData, (char*)src->pData + bytes * start, bytes * num);
1416 1417 1418 1419 1420
      } else {  // handle the var-string
        char* dst = pData;

        // todo refactor, only copy one-by-one
        for (int32_t k = start; k < num + start; ++k) {
K
kailixu 已提交
1421
          const char* p = tdGetColDataOfRow(src, k);
1422 1423
          memcpy(dst, p, varDataTLen(p));
          dst += bytes;
1424 1425
        }
      }
1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440

      j++;
      i++;
    } else { // pColInfo->info.colId < src->colId, it is a NULL data
      if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
        char* dst = pData;

        for(int32_t k = start; k < num + start; ++k) {
          setVardataNull(dst, pColInfo->info.type);
          dst += bytes;
        }
      } else {
        setNullN(pData, pColInfo->info.type, pColInfo->info.bytes, num);
      }
      i++;
1441 1442
    }
  }
1443 1444

  while (i < requiredNumOfCols) { // the remain columns are all null data
1445 1446
    SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i);
    if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
S
TD-1057  
Shengliang Guan 已提交
1447
      pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;
1448
    } else {
S
TD-1057  
Shengliang Guan 已提交
1449
      pData = (char*)pColInfo->pData + (capacity - numOfRows - num) * pColInfo->info.bytes;
1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460
    }

    if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
      char* dst = pData;

      for(int32_t k = start; k < num + start; ++k) {
        setVardataNull(dst, pColInfo->info.type);
        dst += pColInfo->info.bytes;
      }
    } else {
      setNullN(pData, pColInfo->info.type, pColInfo->info.bytes, num);
1461
    }
1462 1463

    i++;
1464
  }
H
Haojun Liao 已提交
1465

1466 1467
  pTsdbReadHandle->cur.win.ekey = tsArray[end];
  pTsdbReadHandle->cur.lastKey = tsArray[end] + step;
1468

1469
  return numOfRows + num;
1470 1471
}

1472
// Note: row1 always has high priority
1473 1474
static void mergeTwoRowFromMem(STsdbReadHandle* pTsdbReadHandle, int32_t capacity, int32_t numOfRows,
                               SMemRow row1, SMemRow row2, int32_t numOfCols, uint64_t uid,
1475
                               STSchema* pSchema1, STSchema* pSchema2, bool forceSetNull) {
1476
  char* pData = NULL;
1477 1478 1479 1480 1481 1482 1483 1484 1485 1486
  STSchema* pSchema;
  SMemRow row;
  int16_t colId;
  int16_t offset;

  bool isRow1DataRow = isDataRow(row1);
  bool isRow2DataRow;
  bool isChosenRowDataRow;
  int32_t chosen_itr;
  void *value;
1487

1488 1489 1490 1491
  // the schema version info is embeded in SDataRow
  int32_t numOfColsOfRow1 = 0;

  if (pSchema1 == NULL) {
1492
    pSchema1 = metaGetTbTSchema(pTsdbReadHandle->pTsdb->pMeta, uid, 0);
1493
  }
1494

1495 1496
  if(isRow1DataRow) {
    numOfColsOfRow1 = schemaNCols(pSchema1);
H
Haojun Liao 已提交
1497
  } else {
1498
    numOfColsOfRow1 = kvRowNCols(memRowKvBody(row1));
D
fix bug  
dapan1121 已提交
1499
  }
1500

1501 1502 1503 1504
  int32_t numOfColsOfRow2 = 0;
  if(row2) {
    isRow2DataRow = isDataRow(row2);
    if (pSchema2 == NULL) {
1505
      pSchema2 = metaGetTbTSchema(pTsdbReadHandle->pTsdb->pMeta, uid, 0);
1506 1507 1508 1509 1510 1511 1512
    }
    if(isRow2DataRow) {
      numOfColsOfRow2 = schemaNCols(pSchema2);
    } else {
      numOfColsOfRow2 = kvRowNCols(memRowKvBody(row2));
    }
  }
C
Cary Xu 已提交
1513

1514 1515 1516

  int32_t i = 0, j = 0, k = 0;
  while(i < numOfCols && (j < numOfColsOfRow1 || k < numOfColsOfRow2)) {
1517
    SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i);
1518

1519
    if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548
      pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;
    } else {
      pData = (char*)pColInfo->pData + (capacity - numOfRows - 1) * pColInfo->info.bytes;
    }

    int32_t colIdOfRow1;
    if(j >= numOfColsOfRow1) {
      colIdOfRow1 = INT32_MAX;
    } else if(isRow1DataRow) {
      colIdOfRow1 = pSchema1->columns[j].colId;
    } else {
      void *rowBody = memRowKvBody(row1);
      SColIdx *pColIdx = kvRowColIdxAt(rowBody, j);
      colIdOfRow1 = pColIdx->colId;
    }

    int32_t colIdOfRow2;
    if(k >= numOfColsOfRow2) {
      colIdOfRow2 = INT32_MAX;
    } else if(isRow2DataRow) {
      colIdOfRow2 = pSchema2->columns[k].colId;
    } else {
      void *rowBody = memRowKvBody(row2);
      SColIdx *pColIdx = kvRowColIdxAt(rowBody, k);
      colIdOfRow2 = pColIdx->colId;
    }

    if(colIdOfRow1 == colIdOfRow2) {
      if(colIdOfRow1 < pColInfo->info.colId) {
C
Cary Xu 已提交
1549
        j++;
1550
        k++;
C
Cary Xu 已提交
1551 1552
        continue;
      }
1553 1554 1555 1556 1557 1558 1559 1560
      row = row1;
      pSchema = pSchema1;
      isChosenRowDataRow = isRow1DataRow;
      chosen_itr = j;
    } else if(colIdOfRow1 < colIdOfRow2) {
      if(colIdOfRow1 < pColInfo->info.colId) {
        j++;
        continue;
C
Cary Xu 已提交
1561
      }
1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588
      row = row1;
      pSchema = pSchema1;
      isChosenRowDataRow = isRow1DataRow;
      chosen_itr = j;
    } else {
      if(colIdOfRow2 < pColInfo->info.colId) {
        k++;
        continue;
      }
      row = row2;
      pSchema = pSchema2;
      chosen_itr = k;
      isChosenRowDataRow = isRow2DataRow;
    }
    if(isChosenRowDataRow) {
      colId = pSchema->columns[chosen_itr].colId;
      offset = pSchema->columns[chosen_itr].offset;
      void *rowBody = memRowDataBody(row);
      value = tdGetRowDataOfCol(rowBody, (int8_t)pColInfo->info.type, TD_DATA_ROW_HEAD_SIZE + offset);
    } else {
      void *rowBody = memRowKvBody(row);
      SColIdx *pColIdx = kvRowColIdxAt(rowBody, chosen_itr);
      colId = pColIdx->colId;
      offset = pColIdx->offset;
      value = tdGetKvRowDataOfCol(rowBody, pColIdx->offset);
    }

C
Cary Xu 已提交
1589

1590 1591
    if (colId == pColInfo->info.colId) {
      if(forceSetNull || (!isNull(value, (int8_t)pColInfo->info.type))) {
C
Cary Xu 已提交
1592 1593 1594 1595 1596 1597 1598 1599 1600
        switch (pColInfo->info.type) {
          case TSDB_DATA_TYPE_BINARY:
          case TSDB_DATA_TYPE_NCHAR:
            memcpy(pData, value, varDataTLen(value));
            break;
          case TSDB_DATA_TYPE_NULL:
          case TSDB_DATA_TYPE_BOOL:
          case TSDB_DATA_TYPE_TINYINT:
          case TSDB_DATA_TYPE_UTINYINT:
1601
            *(uint8_t *)pData = *(uint8_t *)value;
C
Cary Xu 已提交
1602 1603 1604
            break;
          case TSDB_DATA_TYPE_SMALLINT:
          case TSDB_DATA_TYPE_USMALLINT:
1605
            *(uint16_t *)pData = *(uint16_t *)value;
C
Cary Xu 已提交
1606 1607 1608
            break;
          case TSDB_DATA_TYPE_INT:
          case TSDB_DATA_TYPE_UINT:
1609
            *(uint32_t *)pData = *(uint32_t *)value;
C
Cary Xu 已提交
1610 1611 1612
            break;
          case TSDB_DATA_TYPE_BIGINT:
          case TSDB_DATA_TYPE_UBIGINT:
1613
            *(uint64_t *)pData = *(uint64_t *)value;
C
Cary Xu 已提交
1614 1615 1616 1617 1618 1619 1620 1621
            break;
          case TSDB_DATA_TYPE_FLOAT:
            SET_FLOAT_PTR(pData, value);
            break;
          case TSDB_DATA_TYPE_DOUBLE:
            SET_DOUBLE_PTR(pData, value);
            break;
          case TSDB_DATA_TYPE_TIMESTAMP:
1622
            if (pColInfo->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
1623
              *(TSKEY *)pData = tdGetKey(*(TKEY *)value);
C
Cary Xu 已提交
1624
            } else {
1625
              *(TSKEY *)pData = *(TSKEY *)value;
C
Cary Xu 已提交
1626 1627 1628 1629 1630
            }
            break;
          default:
            memcpy(pData, value, pColInfo->info.bytes);
        }
1631 1632
      }
      i++;
C
Cary Xu 已提交
1633

1634
      if(row == row1) {
C
Cary Xu 已提交
1635
        j++;
1636 1637 1638 1639 1640
      } else {
        k++;
      }
    } else {
      if(forceSetNull) {
C
Cary Xu 已提交
1641 1642 1643 1644 1645 1646
        if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
          setVardataNull(pData, pColInfo->info.type);
        } else {
          setNull(pData, pColInfo->info.type, pColInfo->info.bytes);
        }
      }
1647
      i++;
1648
    }
1649
  }
1650

1651 1652
  if(forceSetNull) {
    while (i < numOfCols) { // the remain columns are all null data
1653 1654
      SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i);
      if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
C
Cary Xu 已提交
1655 1656 1657 1658 1659
        pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;
      } else {
        pData = (char*)pColInfo->pData + (capacity - numOfRows - 1) * pColInfo->info.bytes;
      }

1660 1661 1662 1663
      if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
        setVardataNull(pData, pColInfo->info.type);
      } else {
        setNull(pData, pColInfo->info.type, pColInfo->info.bytes);
1664
      }
1665

1666
      i++;
1667 1668 1669
    }
  }
}
1670

1671 1672
static void moveDataToFront(STsdbReadHandle* pTsdbReadHandle, int32_t numOfRows, int32_t numOfCols) {
  if (numOfRows == 0 || ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
1673 1674 1675 1676
    return;
  }

  // if the buffer is not full in case of descending order query, move the data in the front of the buffer
1677 1678
  if (numOfRows < pTsdbReadHandle->outputCapacity) {
    int32_t emptySize = pTsdbReadHandle->outputCapacity - numOfRows;
1679
    for(int32_t i = 0; i < numOfCols; ++i) {
1680
      SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i);
S
TD-1057  
Shengliang Guan 已提交
1681
      memmove((char*)pColInfo->pData, (char*)pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes);
1682 1683 1684 1685
    }
  }
}

1686
static void getQualifiedRowsPos(STsdbReadHandle* pTsdbReadHandle, int32_t startPos, int32_t endPos, int32_t numOfExisted,
1687
                                int32_t* start, int32_t* end) {
1688 1689
  *start = -1;

1690
  if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
1691
    int32_t remain = endPos - startPos + 1;
1692 1693
    if (remain + numOfExisted > pTsdbReadHandle->outputCapacity) {
      *end = (pTsdbReadHandle->outputCapacity - numOfExisted) + startPos - 1;
H
Haojun Liao 已提交
1694 1695
    } else {
      *end = endPos;
1696 1697 1698 1699 1700
    }

    *start = startPos;
  } else {
    int32_t remain = (startPos - endPos) + 1;
1701 1702
    if (remain + numOfExisted > pTsdbReadHandle->outputCapacity) {
      *end = startPos + 1 - (pTsdbReadHandle->outputCapacity - numOfExisted);
H
Haojun Liao 已提交
1703 1704
    } else {
      *end = endPos;
1705 1706 1707 1708 1709 1710 1711
    }

    *start = *end;
    *end = startPos;
  }
}

1712 1713
static void updateInfoAfterMerge(STsdbReadHandle* pTsdbReadHandle, STableCheckInfo* pCheckInfo, int32_t numOfRows, int32_t endPos) {
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
1714 1715

  pCheckInfo->lastKey = cur->lastKey;
1716
  pTsdbReadHandle->realNumOfRows = numOfRows;
1717 1718 1719 1720
  cur->rows = numOfRows;
  cur->pos = endPos;
}

1721 1722
static void doCheckGeneratedBlockRange(STsdbReadHandle* pTsdbReadHandle) {
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
H
Haojun Liao 已提交
1723 1724

  if (cur->rows > 0) {
1725 1726
    if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
      assert(cur->win.skey >= pTsdbReadHandle->window.skey && cur->win.ekey <= pTsdbReadHandle->window.ekey);
H
Haojun Liao 已提交
1727
    } else {
1728
      assert(cur->win.skey >= pTsdbReadHandle->window.ekey && cur->win.ekey <= pTsdbReadHandle->window.skey);
H
Haojun Liao 已提交
1729 1730
    }

1731
    SColumnInfoData* pColInfoData = taosArrayGet(pTsdbReadHandle->pColumns, 0);
H
Haojun Liao 已提交
1732 1733
    assert(cur->win.skey == ((TSKEY*)pColInfoData->pData)[0] && cur->win.ekey == ((TSKEY*)pColInfoData->pData)[cur->rows-1]);
  } else {
1734
    cur->win = pTsdbReadHandle->window;
H
Haojun Liao 已提交
1735

1736 1737
    int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? 1:-1;
    cur->lastKey = pTsdbReadHandle->window.ekey + step;
H
Haojun Liao 已提交
1738 1739 1740
  }
}

1741 1742
static void copyAllRemainRowsFromFileBlock(STsdbReadHandle* pTsdbReadHandle, STableCheckInfo* pCheckInfo, SDataBlockInfo* pBlockInfo, int32_t endPos) {
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
H
Haojun Liao 已提交
1743

1744
  SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0];
H
Haojun Liao 已提交
1745 1746
  TSKEY* tsArray = pCols->cols[0].pData;

1747 1748
  int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? 1:-1;
  int32_t numOfCols = (int32_t)(QH_GET_NUM_OF_COLS(pTsdbReadHandle));
H
Haojun Liao 已提交
1749 1750 1751 1752 1753 1754

  int32_t pos = cur->pos;

  int32_t start = cur->pos;
  int32_t end = endPos;

1755
  if (!ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
dengyihao's avatar
dengyihao 已提交
1756
    TSWAP(start, end, int32_t);
H
Haojun Liao 已提交
1757 1758
  }

1759 1760
  assert(pTsdbReadHandle->outputCapacity >= (end - start + 1));
  int32_t numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, 0, start, end);
H
Haojun Liao 已提交
1761 1762 1763

  // the time window should always be ascending order: skey <= ekey
  cur->win = (STimeWindow) {.skey = tsArray[start], .ekey = tsArray[end]};
H
Haojun Liao 已提交
1764
  cur->mixBlock = (numOfRows != pBlockInfo->rows);
H
Haojun Liao 已提交
1765
  cur->lastKey = tsArray[endPos] + step;
H
Haojun Liao 已提交
1766
  cur->blockCompleted = true;
H
Haojun Liao 已提交
1767 1768

  // if the buffer is not full in case of descending order query, move the data in the front of the buffer
1769
  moveDataToFront(pTsdbReadHandle, numOfRows, numOfCols);
H
Haojun Liao 已提交
1770 1771 1772

  // The value of pos may be -1 or pBlockInfo->rows, and it is invalid in both cases.
  pos = endPos + step;
1773 1774
  updateInfoAfterMerge(pTsdbReadHandle, pCheckInfo, numOfRows, pos);
  doCheckGeneratedBlockRange(pTsdbReadHandle);
H
Haojun Liao 已提交
1775

H
Haojun Liao 已提交
1776 1777
  tsdbDebug("%p uid:%" PRIu64", data block created, mixblock:%d, brange:%"PRIu64"-%"PRIu64" rows:%d, %s",
            pTsdbReadHandle, pCheckInfo->tableId, cur->mixBlock, cur->win.skey, cur->win.ekey, cur->rows, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1778 1779
}

1780
int32_t getEndPosInDataBlock(STsdbReadHandle* pTsdbReadHandle, SDataBlockInfo* pBlockInfo) {
H
Haojun Liao 已提交
1781 1782
  // NOTE: reverse the order to find the end position in data block
  int32_t endPos = -1;
1783
  int32_t order = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? TSDB_ORDER_DESC : TSDB_ORDER_ASC;
H
Haojun Liao 已提交
1784

1785 1786
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
  SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0];
H
Haojun Liao 已提交
1787

1788
  if (ASCENDING_TRAVERSE(pTsdbReadHandle->order) && pTsdbReadHandle->window.ekey >= pBlockInfo->window.ekey) {
H
Haojun Liao 已提交
1789 1790
    endPos = pBlockInfo->rows - 1;
    cur->mixBlock = (cur->pos != 0);
1791
  } else if (!ASCENDING_TRAVERSE(pTsdbReadHandle->order) && pTsdbReadHandle->window.ekey <= pBlockInfo->window.skey) {
H
Haojun Liao 已提交
1792 1793 1794 1795
    endPos = 0;
    cur->mixBlock = (cur->pos != pBlockInfo->rows - 1);
  } else {
    assert(pCols->numOfRows > 0);
1796
    endPos = doBinarySearchKey(pCols->cols[0].pData, pCols->numOfRows, pTsdbReadHandle->window.ekey, order);
H
Haojun Liao 已提交
1797 1798 1799 1800 1801 1802
    cur->mixBlock = true;
  }

  return endPos;
}

H
[td-32]  
hjxilinx 已提交
1803 1804
// only return the qualified data to client in terms of query time window, data rows in the same block but do not
// be included in the query time window will be discarded
1805 1806 1807 1808
static void doMergeTwoLevelData(STsdbReadHandle* pTsdbReadHandle, STableCheckInfo* pCheckInfo, SBlock* pBlock) {
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
  SDataBlockInfo blockInfo = {0};//GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlock);
  STsdbCfg*      pCfg = &pTsdbReadHandle->pTsdb->config;
H
Haojun Liao 已提交
1809

1810
  initTableMemIterator(pTsdbReadHandle, pCheckInfo);
1811

1812 1813
  SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0];
  assert(pCols->cols[0].type == TSDB_DATA_TYPE_TIMESTAMP && pCols->cols[0].colId == PRIMARYKEY_TIMESTAMP_COL_ID &&
H
Haojun Liao 已提交
1814 1815
      cur->pos >= 0 && cur->pos < pBlock->numOfRows);

1816
  TSKEY* tsArray = pCols->cols[0].pData;
H
Haojun Liao 已提交
1817
  assert(pCols->numOfRows == pBlock->numOfRows && tsArray[0] == pBlock->keyFirst && tsArray[pBlock->numOfRows-1] == pBlock->keyLast);
1818 1819

  // for search the endPos, so the order needs to reverse
1820
  int32_t order = (pTsdbReadHandle->order == TSDB_ORDER_ASC)? TSDB_ORDER_DESC:TSDB_ORDER_ASC;
1821

1822 1823
  int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? 1:-1;
  int32_t numOfCols = (int32_t)(QH_GET_NUM_OF_COLS(pTsdbReadHandle));
1824

H
Haojun Liao 已提交
1825
  STable* pTable = NULL;
1826
  int32_t endPos = getEndPosInDataBlock(pTsdbReadHandle, &blockInfo);
H
Haojun Liao 已提交
1827

1828
  tsdbDebug("%p uid:%" PRIu64" start merge data block, file block range:%"PRIu64"-%"PRIu64" rows:%d, start:%d,"
H
Haojun Liao 已提交
1829
            "end:%d, %s",
1830
            pTsdbReadHandle, pCheckInfo->tableId, blockInfo.window.skey, blockInfo.window.ekey,
H
Haojun Liao 已提交
1831
            blockInfo.rows, cur->pos, endPos, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
1832

1833 1834
  // compared with the data from in-memory buffer, to generate the correct timestamp array list
  int32_t numOfRows = 0;
H
Haojun Liao 已提交
1835

1836 1837 1838 1839
  int16_t rv1 = -1;
  int16_t rv2 = -1;
  STSchema* pSchema1 = NULL;
  STSchema* pSchema2 = NULL;
D
fix bug  
dapan1121 已提交
1840

H
Haojun Liao 已提交
1841 1842
  int32_t pos = cur->pos;
  cur->win = TSWINDOW_INITIALIZER;
1843

1844 1845
  // no data in buffer, load data from file directly
  if (pCheckInfo->iiter == NULL && pCheckInfo->iter == NULL) {
1846
    copyAllRemainRowsFromFileBlock(pTsdbReadHandle, pCheckInfo, &blockInfo, endPos);
1847
    return;
1848
  } else if (pCheckInfo->iter != NULL || pCheckInfo->iiter != NULL) {
1849 1850
    SSkipListNode* node = NULL;
    do {
1851
      SMemRow row2 = NULL;
1852
      SMemRow row1 = getSMemRowInTableMem(pCheckInfo, pTsdbReadHandle->order, pCfg->update, &row2);
1853
      if (row1 == NULL) {
H
[td-32]  
hjxilinx 已提交
1854
        break;
1855
      }
1856

1857
      TSKEY key = memRowKey(row1);
1858 1859
      if ((key > pTsdbReadHandle->window.ekey && ASCENDING_TRAVERSE(pTsdbReadHandle->order)) ||
          (key < pTsdbReadHandle->window.ekey && !ASCENDING_TRAVERSE(pTsdbReadHandle->order))) {
1860 1861 1862
        break;
      }

1863 1864
      if (((pos > endPos || tsArray[pos] > pTsdbReadHandle->window.ekey) && ASCENDING_TRAVERSE(pTsdbReadHandle->order)) ||
          ((pos < endPos || tsArray[pos] < pTsdbReadHandle->window.ekey) && !ASCENDING_TRAVERSE(pTsdbReadHandle->order))) {
1865 1866 1867
        break;
      }

1868 1869
      if ((key < tsArray[pos] && ASCENDING_TRAVERSE(pTsdbReadHandle->order)) ||
          (key > tsArray[pos] && !ASCENDING_TRAVERSE(pTsdbReadHandle->order))) {
1870
        if (rv1 != memRowVersion(row1)) {
1871
//          pSchema1 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row1));
1872
          rv1 = memRowVersion(row1);
C
Cary Xu 已提交
1873
        }
1874
        if(row2 && rv2 != memRowVersion(row2)) {
1875
//          pSchema2 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row2));
1876 1877 1878
          rv2 = memRowVersion(row2);
        }
        
H
Haojun Liao 已提交
1879
        mergeTwoRowFromMem(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, numOfRows, row1, row2, numOfCols, pCheckInfo->tableId, pSchema1, pSchema2, true);
1880 1881 1882 1883
        numOfRows += 1;
        if (cur->win.skey == TSKEY_INITIAL_VAL) {
          cur->win.skey = key;
        }
1884

1885
        cur->win.ekey = key;
1886 1887 1888
        cur->lastKey  = key + step;
        cur->mixBlock = true;

1889
        moveToNextRowInMem(pCheckInfo);
1890
      } else if (key == tsArray[pos]) {  // data in buffer has the same timestamp of data in file block, ignore it
H
TD-1439  
Hongze Cheng 已提交
1891
        if (pCfg->update) {
1892
          if(pCfg->update == TD_ROW_PARTIAL_UPDATE) {
1893
            doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, numOfRows, pos, pos);
D
fix bug  
dapan1121 已提交
1894
          }
1895
          if (rv1 != memRowVersion(row1)) {
1896
//            pSchema1 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row1));
1897 1898 1899
            rv1 = memRowVersion(row1);
          }
          if(row2 && rv2 != memRowVersion(row2)) {
1900
//            pSchema2 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row2));
1901 1902 1903 1904
            rv2 = memRowVersion(row2);
          }
          
          bool forceSetNull = pCfg->update != TD_ROW_PARTIAL_UPDATE;
H
Haojun Liao 已提交
1905
          mergeTwoRowFromMem(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, numOfRows, row1, row2, numOfCols, pCheckInfo->tableId, pSchema1, pSchema2, forceSetNull);
H
TD-1439  
Hongze Cheng 已提交
1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919
          numOfRows += 1;
          if (cur->win.skey == TSKEY_INITIAL_VAL) {
            cur->win.skey = key;
          }

          cur->win.ekey = key;
          cur->lastKey = key + step;
          cur->mixBlock = true;

          moveToNextRowInMem(pCheckInfo);
          pos += step;
        } else {
          moveToNextRowInMem(pCheckInfo);
        }
1920 1921
      } else if ((key > tsArray[pos] && ASCENDING_TRAVERSE(pTsdbReadHandle->order)) ||
                  (key < tsArray[pos] && !ASCENDING_TRAVERSE(pTsdbReadHandle->order))) {
1922 1923 1924
        if (cur->win.skey == TSKEY_INITIAL_VAL) {
          cur->win.skey = tsArray[pos];
        }
1925

1926
        int32_t end = doBinarySearchKey(pCols->cols[0].pData, pCols->numOfRows, key, order);
1927 1928
        assert(end != -1);

H
Haojun Liao 已提交
1929
        if (tsArray[end] == key) { // the value of key in cache equals to the end timestamp value, ignore it
1930
          if (pCfg->update == TD_ROW_DISCARD_UPDATE) {
H
Hongze Cheng 已提交
1931 1932 1933 1934
            moveToNextRowInMem(pCheckInfo);
          } else {
            end -= step;
          }
H
Haojun Liao 已提交
1935
        }
1936

1937
        int32_t qstart = 0, qend = 0;
1938
        getQualifiedRowsPos(pTsdbReadHandle, pos, end, numOfRows, &qstart, &qend);
1939

1940
        numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, numOfRows, qstart, qend);
1941 1942
        pos += (qend - qstart + 1) * step;

1943
        cur->win.ekey = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? tsArray[qend]:tsArray[qstart];
1944
        cur->lastKey  = cur->win.ekey + step;
1945
      }
1946
    } while (numOfRows < pTsdbReadHandle->outputCapacity);
H
Haojun Liao 已提交
1947

1948
    if (numOfRows < pTsdbReadHandle->outputCapacity) {
H
Haojun Liao 已提交
1949 1950 1951 1952
      /**
       * if cache is empty, load remain file block data. In contrast, if there are remain data in cache, do NOT
       * copy them all to result buffer, since it may be overlapped with file data block.
       */
1953
      if (node == NULL ||
1954 1955 1956 1957
          ((memRowKey((SMemRow)SL_GET_NODE_DATA(node)) > pTsdbReadHandle->window.ekey) &&
           ASCENDING_TRAVERSE(pTsdbReadHandle->order)) ||
          ((memRowKey((SMemRow)SL_GET_NODE_DATA(node)) < pTsdbReadHandle->window.ekey) &&
           !ASCENDING_TRAVERSE(pTsdbReadHandle->order))) {
1958 1959 1960 1961 1962
        // no data in cache or data in cache is greater than the ekey of time window, load data from file block
        if (cur->win.skey == TSKEY_INITIAL_VAL) {
          cur->win.skey = tsArray[pos];
        }

1963
        int32_t start = -1, end = -1;
1964
        getQualifiedRowsPos(pTsdbReadHandle, pos, endPos, numOfRows, &start, &end);
1965

1966
        numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, numOfRows, start, end);
1967
        pos += (end - start + 1) * step;
1968

1969
        cur->win.ekey = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? tsArray[end]:tsArray[start];
1970
        cur->lastKey  = cur->win.ekey + step;
H
Haojun Liao 已提交
1971
        cur->mixBlock = true;
1972
      }
1973 1974
    }
  }
H
Haojun Liao 已提交
1975 1976

  cur->blockCompleted =
1977 1978
      (((pos > endPos || cur->lastKey > pTsdbReadHandle->window.ekey) && ASCENDING_TRAVERSE(pTsdbReadHandle->order)) ||
       ((pos < endPos || cur->lastKey < pTsdbReadHandle->window.ekey) && !ASCENDING_TRAVERSE(pTsdbReadHandle->order)));
1979

1980
  if (!ASCENDING_TRAVERSE(pTsdbReadHandle->order)) {
dengyihao's avatar
dengyihao 已提交
1981
    TSWAP(cur->win.skey, cur->win.ekey, TSKEY);
1982
  }
1983

1984 1985 1986
  moveDataToFront(pTsdbReadHandle, numOfRows, numOfCols);
  updateInfoAfterMerge(pTsdbReadHandle, pCheckInfo, numOfRows, pos);
  doCheckGeneratedBlockRange(pTsdbReadHandle);
H
Haojun Liao 已提交
1987

H
Haojun Liao 已提交
1988 1989
  tsdbDebug("%p uid:%" PRIu64", data block created, mixblock:%d, brange:%"PRIu64"-%"PRIu64" rows:%d, %s",
      pTsdbReadHandle, pCheckInfo->tableId, cur->mixBlock, cur->win.skey, cur->win.ekey, cur->rows, pTsdbReadHandle->idStr);
1990 1991
}

1992
int32_t binarySearchForKey(char* pValue, int num, TSKEY key, int order) {
H
[td-32]  
hjxilinx 已提交
1993
  int    firstPos, lastPos, midPos = -1;
H
Haojun Liao 已提交
1994
  int    numOfRows;
1995 1996
  TSKEY* keyList;

H
[td-32]  
hjxilinx 已提交
1997
  if (num <= 0) return -1;
1998 1999

  keyList = (TSKEY*)pValue;
H
[td-32]  
hjxilinx 已提交
2000 2001
  firstPos = 0;
  lastPos = num - 1;
2002

2003
  if (order == TSDB_ORDER_DESC) {
H
[td-32]  
hjxilinx 已提交
2004 2005 2006 2007 2008
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2009

H
Haojun Liao 已提交
2010 2011
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2012

H
[td-32]  
hjxilinx 已提交
2013 2014 2015 2016 2017 2018 2019 2020
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2021

H
[td-32]  
hjxilinx 已提交
2022 2023 2024 2025 2026
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2027

H
[td-32]  
hjxilinx 已提交
2028 2029 2030 2031 2032 2033 2034
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2035

H
Haojun Liao 已提交
2036 2037
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2038

H
[td-32]  
hjxilinx 已提交
2039 2040 2041 2042 2043 2044 2045 2046 2047
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2048

H
[td-32]  
hjxilinx 已提交
2049 2050 2051
  return midPos;
}

2052
static void cleanBlockOrderSupporter(SBlockOrderSupporter* pSupporter, int32_t numOfTables) {
S
TD-1848  
Shengliang Guan 已提交
2053 2054
  tfree(pSupporter->numOfBlocksPerTable);
  tfree(pSupporter->blockIndexArray);
2055 2056

  for (int32_t i = 0; i < numOfTables; ++i) {
H
Haojun Liao 已提交
2057
    STableBlockInfo* pBlockInfo = pSupporter->pDataBlockInfo[i];
S
TD-1848  
Shengliang Guan 已提交
2058
    tfree(pBlockInfo);
2059 2060
  }

S
TD-1848  
Shengliang Guan 已提交
2061
  tfree(pSupporter->pDataBlockInfo);
2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072
}

static int32_t dataBlockOrderCompar(const void* pLeft, const void* pRight, void* param) {
  int32_t leftTableIndex = *(int32_t*)pLeft;
  int32_t rightTableIndex = *(int32_t*)pRight;

  SBlockOrderSupporter* pSupporter = (SBlockOrderSupporter*)param;

  int32_t leftTableBlockIndex = pSupporter->blockIndexArray[leftTableIndex];
  int32_t rightTableBlockIndex = pSupporter->blockIndexArray[rightTableIndex];

2073
  if (leftTableBlockIndex > pSupporter->numOfBlocksPerTable[leftTableIndex]) {
2074 2075
    /* left block is empty */
    return 1;
2076
  } else if (rightTableBlockIndex > pSupporter->numOfBlocksPerTable[rightTableIndex]) {
2077 2078 2079 2080 2081 2082 2083
    /* right block is empty */
    return -1;
  }

  STableBlockInfo* pLeftBlockInfoEx = &pSupporter->pDataBlockInfo[leftTableIndex][leftTableBlockIndex];
  STableBlockInfo* pRightBlockInfoEx = &pSupporter->pDataBlockInfo[rightTableIndex][rightTableBlockIndex];

H
Haojun Liao 已提交
2084
  //    assert(pLeftBlockInfoEx->compBlock->offset != pRightBlockInfoEx->compBlock->offset);
2085
#if 0	// TODO: temporarily comment off requested by Dr. Liao
H
Haojun Liao 已提交
2086 2087
  if (pLeftBlockInfoEx->compBlock->offset == pRightBlockInfoEx->compBlock->offset &&
      pLeftBlockInfoEx->compBlock->last == pRightBlockInfoEx->compBlock->last) {
B
Bomin Zhang 已提交
2088
    tsdbError("error in header file, two block with same offset:%" PRId64, (int64_t)pLeftBlockInfoEx->compBlock->offset);
2089
  }
H
Haojun Liao 已提交
2090
#endif
2091

H
Haojun Liao 已提交
2092
  return pLeftBlockInfoEx->compBlock->offset > pRightBlockInfoEx->compBlock->offset ? 1 : -1;
2093 2094
}

2095
static int32_t createDataBlocksInfo(STsdbReadHandle* pTsdbReadHandle, int32_t numOfBlocks, int32_t* numOfAllocBlocks) {
H
Haojun Liao 已提交
2096 2097
  size_t size = sizeof(STableBlockInfo) * numOfBlocks;

2098 2099 2100
  if (pTsdbReadHandle->allocSize < size) {
    pTsdbReadHandle->allocSize = (int32_t)size;
    char* tmp = realloc(pTsdbReadHandle->pDataBlockInfo, pTsdbReadHandle->allocSize);
H
Haojun Liao 已提交
2101 2102 2103 2104
    if (tmp == NULL) {
      return TSDB_CODE_TDB_OUT_OF_MEMORY;
    }

2105
    pTsdbReadHandle->pDataBlockInfo = (STableBlockInfo*) tmp;
2106 2107
  }

2108
  memset(pTsdbReadHandle->pDataBlockInfo, 0, size);
2109 2110
  *numOfAllocBlocks = numOfBlocks;

H
Haojun Liao 已提交
2111
  // access data blocks according to the offset of each block in asc/desc order.
2112
  int32_t numOfTables = (int32_t)taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
2113

2114 2115
  SBlockOrderSupporter sup = {0};
  sup.numOfTables = numOfTables;
2116
  sup.numOfBlocksPerTable = calloc(1, sizeof(int32_t) * numOfTables);
2117 2118 2119
  sup.blockIndexArray = calloc(1, sizeof(int32_t) * numOfTables);
  sup.pDataBlockInfo = calloc(1, POINTER_BYTES * numOfTables);

2120
  if (sup.numOfBlocksPerTable == NULL || sup.blockIndexArray == NULL || sup.pDataBlockInfo == NULL) {
2121
    cleanBlockOrderSupporter(&sup, 0);
2122
    return TSDB_CODE_TDB_OUT_OF_MEMORY;
2123
  }
H
Haojun Liao 已提交
2124

2125
  int32_t cnt = 0;
2126
  int32_t numOfQualTables = 0;
H
Haojun Liao 已提交
2127

2128
  for (int32_t j = 0; j < numOfTables; ++j) {
2129
    STableCheckInfo* pTableCheck = (STableCheckInfo*)taosArrayGet(pTsdbReadHandle->pTableCheckInfo, j);
2130 2131 2132
    if (pTableCheck->numOfBlocks <= 0) {
      continue;
    }
H
Haojun Liao 已提交
2133

H
refact  
Hongze Cheng 已提交
2134
    SBlock* pBlock = pTableCheck->pCompInfo->blocks;
2135
    sup.numOfBlocksPerTable[numOfQualTables] = pTableCheck->numOfBlocks;
2136

H
Haojun Liao 已提交
2137
    char* buf = malloc(sizeof(STableBlockInfo) * pTableCheck->numOfBlocks);
2138
    if (buf == NULL) {
2139
      cleanBlockOrderSupporter(&sup, numOfQualTables);
2140
      return TSDB_CODE_TDB_OUT_OF_MEMORY;
2141 2142
    }

2143
    sup.pDataBlockInfo[numOfQualTables] = (STableBlockInfo*)buf;
2144 2145

    for (int32_t k = 0; k < pTableCheck->numOfBlocks; ++k) {
H
Haojun Liao 已提交
2146
      STableBlockInfo* pBlockInfo = &sup.pDataBlockInfo[numOfQualTables][k];
2147

H
Haojun Liao 已提交
2148 2149
      pBlockInfo->compBlock = &pBlock[k];
      pBlockInfo->pTableCheckInfo = pTableCheck;
2150 2151 2152
      cnt++;
    }

2153
    numOfQualTables++;
2154 2155
  }

H
Haojun Liao 已提交
2156
  assert(numOfBlocks == cnt);
2157

H
Haojun Liao 已提交
2158 2159
  // since there is only one table qualified, blocks are not sorted
  if (numOfQualTables == 1) {
2160
    memcpy(pTsdbReadHandle->pDataBlockInfo, sup.pDataBlockInfo[0], sizeof(STableBlockInfo) * numOfBlocks);
H
Haojun Liao 已提交
2161
    cleanBlockOrderSupporter(&sup, numOfQualTables);
2162

H
Haojun Liao 已提交
2163 2164
    tsdbDebug("%p create data blocks info struct completed for 1 table, %d blocks not sorted %s", pTsdbReadHandle, cnt,
        pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
2165 2166
    return TSDB_CODE_SUCCESS;
  }
2167

H
Haojun Liao 已提交
2168 2169
  tsdbDebug("%p create data blocks info struct completed, %d blocks in %d tables %s", pTsdbReadHandle, cnt,
      numOfQualTables, pTsdbReadHandle->idStr);
2170

2171
  assert(cnt <= numOfBlocks && numOfQualTables <= numOfTables);  // the pTableQueryInfo[j]->numOfBlocks may be 0
2172
  sup.numOfTables = numOfQualTables;
2173

2174 2175
  SMultiwayMergeTreeInfo* pTree = NULL;
  uint8_t ret = tMergeTreeCreate(&pTree, sup.numOfTables, &sup, dataBlockOrderCompar);
2176 2177
  if (ret != TSDB_CODE_SUCCESS) {
    cleanBlockOrderSupporter(&sup, numOfTables);
2178
    return TSDB_CODE_TDB_OUT_OF_MEMORY;
2179 2180 2181 2182 2183
  }

  int32_t numOfTotal = 0;

  while (numOfTotal < cnt) {
2184
    int32_t pos = tMergeTreeGetChosenIndex(pTree);
2185 2186
    int32_t index = sup.blockIndexArray[pos]++;

H
Haojun Liao 已提交
2187
    STableBlockInfo* pBlocksInfo = sup.pDataBlockInfo[pos];
2188
    pTsdbReadHandle->pDataBlockInfo[numOfTotal++] = pBlocksInfo[index];
2189 2190

    // set data block index overflow, in order to disable the offset comparator
2191 2192
    if (sup.blockIndexArray[pos] >= sup.numOfBlocksPerTable[pos]) {
      sup.blockIndexArray[pos] = sup.numOfBlocksPerTable[pos] + 1;
2193
    }
2194

2195
    tMergeTreeAdjust(pTree, tMergeTreeAdjustIndex(pTree));
2196 2197 2198 2199 2200
  }

  /*
   * available when no import exists
   * for(int32_t i = 0; i < cnt - 1; ++i) {
H
Haojun Liao 已提交
2201
   *   assert((*pDataBlockInfo)[i].compBlock->offset < (*pDataBlockInfo)[i+1].compBlock->offset);
2202 2203 2204
   * }
   */

H
Haojun Liao 已提交
2205
  tsdbDebug("%p %d data blocks sort completed, %s", pTsdbReadHandle, cnt, pTsdbReadHandle->idStr);
2206 2207 2208 2209 2210 2211
  cleanBlockOrderSupporter(&sup, numOfTables);
  free(pTree);

  return TSDB_CODE_SUCCESS;
}

2212
static int32_t getFirstFileDataBlock(STsdbReadHandle* pTsdbReadHandle, bool* exists);
H
Haojun Liao 已提交
2213

2214 2215 2216
static int32_t getDataBlockRv(STsdbReadHandle* pTsdbReadHandle, STableBlockInfo* pNext, bool *exists) {
  int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? 1 : -1;
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
H
Haojun Liao 已提交
2217 2218

  while(1) {
2219
    int32_t code = loadFileDataBlock(pTsdbReadHandle, pNext->compBlock, pNext->pTableCheckInfo, exists);
H
Haojun Liao 已提交
2220 2221 2222 2223
    if (code != TSDB_CODE_SUCCESS || *exists) {
      return code;
    }

2224 2225
    if ((cur->slot == pTsdbReadHandle->numOfBlocks - 1 && ASCENDING_TRAVERSE(pTsdbReadHandle->order)) ||
        (cur->slot == 0 && !ASCENDING_TRAVERSE(pTsdbReadHandle->order))) {
H
Haojun Liao 已提交
2226
      // all data blocks in current file has been checked already, try next file if exists
2227
      return getFirstFileDataBlock(pTsdbReadHandle, exists);
H
Haojun Liao 已提交
2228 2229 2230 2231
    } else {  // next block of the same file
      cur->slot += step;
      cur->mixBlock = false;
      cur->blockCompleted = false;
2232
      pNext = &pTsdbReadHandle->pDataBlockInfo[cur->slot];
H
Haojun Liao 已提交
2233 2234 2235 2236
    }
  }
}

2237 2238 2239
static int32_t getFirstFileDataBlock(STsdbReadHandle* pTsdbReadHandle, bool* exists) {
  pTsdbReadHandle->numOfBlocks = 0;
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
H
Haojun Liao 已提交
2240 2241 2242

  int32_t code = TSDB_CODE_SUCCESS;

2243
  int32_t numOfBlocks = 0;
2244
  int32_t numOfTables = (int32_t)taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
2245

2246
  STsdbCfg* pCfg = &pTsdbReadHandle->pTsdb->config;
2247 2248
  STimeWindow win = TSWINDOW_INITIALIZER;

H
Hongze Cheng 已提交
2249
  while (true) {
2250
    tsdbRLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Hongze Cheng 已提交
2251

2252 2253
    if ((pTsdbReadHandle->pFileGroup = tsdbFSIterNext(&pTsdbReadHandle->fileIter)) == NULL) {
      tsdbUnLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Hongze Cheng 已提交
2254 2255 2256
      break;
    }

2257
    tsdbGetFidKeyRange(pCfg->daysPerFile, pCfg->precision, pTsdbReadHandle->pFileGroup->fid, &win.skey, &win.ekey);
2258 2259

    // current file are not overlapped with query time window, ignore remain files
2260 2261 2262
    if ((ASCENDING_TRAVERSE(pTsdbReadHandle->order) && win.skey > pTsdbReadHandle->window.ekey) ||
        (!ASCENDING_TRAVERSE(pTsdbReadHandle->order) && win.ekey < pTsdbReadHandle->window.ekey)) {
      tsdbUnLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Haojun Liao 已提交
2263 2264
      tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, %s", pTsdbReadHandle,
                pTsdbReadHandle->window.skey, pTsdbReadHandle->window.ekey, pTsdbReadHandle->idStr);
2265 2266
      pTsdbReadHandle->pFileGroup = NULL;
      assert(pTsdbReadHandle->numOfBlocks == 0);
2267 2268 2269
      break;
    }

2270 2271
    if (tsdbSetAndOpenReadFSet(&pTsdbReadHandle->rhelper, pTsdbReadHandle->pFileGroup) < 0) {
      tsdbUnLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Hongze Cheng 已提交
2272 2273 2274 2275
      code = terrno;
      break;
    }

2276
    tsdbUnLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Hongze Cheng 已提交
2277

2278
    if (tsdbLoadBlockIdx(&pTsdbReadHandle->rhelper) < 0) {
H
Hongze Cheng 已提交
2279 2280 2281 2282
      code = terrno;
      break;
    }

2283
    if ((code = getFileCompInfo(pTsdbReadHandle, &numOfBlocks)) != TSDB_CODE_SUCCESS) {
2284 2285
      break;
    }
H
Haojun Liao 已提交
2286

H
Haojun Liao 已提交
2287 2288
    tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %s", pTsdbReadHandle, numOfBlocks, numOfTables,
              pTsdbReadHandle->pFileGroup->fid, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
2289

2290 2291 2292 2293
    assert(numOfBlocks >= 0);
    if (numOfBlocks == 0) {
      continue;
    }
H
Haojun Liao 已提交
2294

2295
    // todo return error code to query engine
2296
    if ((code = createDataBlocksInfo(pTsdbReadHandle, numOfBlocks, &pTsdbReadHandle->numOfBlocks)) != TSDB_CODE_SUCCESS) {
2297 2298
      break;
    }
H
Haojun Liao 已提交
2299

2300 2301
    assert(numOfBlocks >= pTsdbReadHandle->numOfBlocks);
    if (pTsdbReadHandle->numOfBlocks > 0) {
2302 2303 2304
      break;
    }
  }
H
Haojun Liao 已提交
2305

2306
  // no data in file anymore
2307
  if (pTsdbReadHandle->numOfBlocks <= 0 || code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
2308
    if (code == TSDB_CODE_SUCCESS) {
2309
      assert(pTsdbReadHandle->pFileGroup == NULL);
H
Haojun Liao 已提交
2310 2311
    }

D
dapan1121 已提交
2312
    cur->fid = INT32_MIN;  // denote that there are no data in file anymore
H
Haojun Liao 已提交
2313 2314
    *exists = false;
    return code;
2315
  }
H
Haojun Liao 已提交
2316

2317 2318 2319
  assert(pTsdbReadHandle->pFileGroup != NULL && pTsdbReadHandle->numOfBlocks > 0);
  cur->slot = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? 0:pTsdbReadHandle->numOfBlocks-1;
  cur->fid = pTsdbReadHandle->pFileGroup->fid;
H
Haojun Liao 已提交
2320

2321 2322
  STableBlockInfo* pBlockInfo = &pTsdbReadHandle->pDataBlockInfo[cur->slot];
  return getDataBlockRv(pTsdbReadHandle, pBlockInfo, exists);
H
Haojun Liao 已提交
2323 2324 2325 2326 2327 2328 2329
}

static bool isEndFileDataBlock(SQueryFilePos* cur, int32_t numOfBlocks, bool ascTrav) {
  assert(cur != NULL && numOfBlocks > 0);
  return (cur->slot == numOfBlocks - 1 && ascTrav) || (cur->slot == 0 && !ascTrav);
}

2330 2331
static void moveToNextDataBlockInCurrentFile(STsdbReadHandle* pTsdbReadHandle) {
  int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? 1 : -1;
H
Haojun Liao 已提交
2332

2333 2334
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
  assert(cur->slot < pTsdbReadHandle->numOfBlocks && cur->slot >= 0);
H
Haojun Liao 已提交
2335 2336 2337 2338

  cur->slot += step;
  cur->mixBlock       = false;
  cur->blockCompleted = false;
2339
}
2340
#if 0
H
Haojun Liao 已提交
2341
int32_t tsdbGetFileBlocksDistInfo(tsdbReaderT* queryHandle, STableBlockDist* pTableBlockInfo) {
2342
  STsdbReadHandle* pTsdbReadHandle = (STsdbReadHandle*) queryHandle;
H
Haojun Liao 已提交
2343

H
Haojun Liao 已提交
2344
  pTableBlockInfo->totalSize = 0;
2345
  pTableBlockInfo->totalRows = 0;
2346
  STsdbFS* pFileHandle = REPO_FS(pTsdbReadHandle->pTsdb);
H
Haojun Liao 已提交
2347 2348

  // find the start data block in file
2349 2350 2351
  pTsdbReadHandle->locateStart = true;
  STsdbCfg* pCfg = &pTsdbReadHandle->pTsdb->config;
  int32_t   fid = getFileIdFromKey(pTsdbReadHandle->window.skey, pCfg->daysPerFile, pCfg->precision);
H
Haojun Liao 已提交
2352 2353

  tsdbRLockFS(pFileHandle);
2354 2355
  tsdbFSIterInit(&pTsdbReadHandle->fileIter, pFileHandle, pTsdbReadHandle->order);
  tsdbFSIterSeek(&pTsdbReadHandle->fileIter, fid);
H
Haojun Liao 已提交
2356 2357
  tsdbUnLockFS(pFileHandle);

H
Haojun Liao 已提交
2358
  pTableBlockInfo->numOfFiles += 1;
H
Haojun Liao 已提交
2359

H
Haojun Liao 已提交
2360
  int32_t     code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
2361
  int32_t     numOfBlocks = 0;
2362
  int32_t     numOfTables = (int32_t)taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
2363
  int         defaultRows = TSDB_DEFAULT_BLOCK_ROWS(pCfg->maxRowsPerFileBlock);
H
Haojun Liao 已提交
2364 2365 2366 2367
  STimeWindow win = TSWINDOW_INITIALIZER;

  while (true) {
    numOfBlocks = 0;
2368
    tsdbRLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Haojun Liao 已提交
2369

2370 2371
    if ((pTsdbReadHandle->pFileGroup = tsdbFSIterNext(&pTsdbReadHandle->fileIter)) == NULL) {
      tsdbUnLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Haojun Liao 已提交
2372 2373 2374
      break;
    }

2375
    tsdbGetFidKeyRange(pCfg->daysPerFile, pCfg->precision, pTsdbReadHandle->pFileGroup->fid, &win.skey, &win.ekey);
H
Haojun Liao 已提交
2376 2377

    // current file are not overlapped with query time window, ignore remain files
2378 2379 2380
    if ((ASCENDING_TRAVERSE(pTsdbReadHandle->order) && win.skey > pTsdbReadHandle->window.ekey) ||
    (!ASCENDING_TRAVERSE(pTsdbReadHandle->order) && win.ekey < pTsdbReadHandle->window.ekey)) {
      tsdbUnLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Haojun Liao 已提交
2381 2382
      tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, %s", pTsdbReadHandle,
                pTsdbReadHandle->window.skey, pTsdbReadHandle->window.ekey, pTsdbReadHandle->idStr);
2383
      pTsdbReadHandle->pFileGroup = NULL;
H
Haojun Liao 已提交
2384 2385 2386
      break;
    }

H
Haojun Liao 已提交
2387
    pTableBlockInfo->numOfFiles += 1;
2388 2389
    if (tsdbSetAndOpenReadFSet(&pTsdbReadHandle->rhelper, pTsdbReadHandle->pFileGroup) < 0) {
      tsdbUnLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Haojun Liao 已提交
2390 2391 2392 2393
      code = terrno;
      break;
    }

2394
    tsdbUnLockFS(REPO_FS(pTsdbReadHandle->pTsdb));
H
Haojun Liao 已提交
2395

2396
    if (tsdbLoadBlockIdx(&pTsdbReadHandle->rhelper) < 0) {
H
Haojun Liao 已提交
2397 2398 2399 2400
      code = terrno;
      break;
    }

2401
    if ((code = getFileCompInfo(pTsdbReadHandle, &numOfBlocks)) != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
2402 2403 2404
      break;
    }

H
Haojun Liao 已提交
2405 2406
    tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %s", pTsdbReadHandle, numOfBlocks, numOfTables,
              pTsdbReadHandle->pFileGroup->fid, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
2407 2408 2409 2410 2411 2412

    if (numOfBlocks == 0) {
      continue;
    }

    for (int32_t i = 0; i < numOfTables; ++i) {
2413
      STableCheckInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i);
H
Haojun Liao 已提交
2414 2415 2416

      SBlock* pBlock = pCheckInfo->pCompInfo->blocks;
      for (int32_t j = 0; j < pCheckInfo->numOfBlocks; ++j) {
H
Haojun Liao 已提交
2417
        pTableBlockInfo->totalSize += pBlock[j].len;
H
Haojun Liao 已提交
2418

H
Haojun Liao 已提交
2419
        int32_t numOfRows = pBlock[j].numOfRows;
2420 2421 2422
        pTableBlockInfo->totalRows += numOfRows;
        if (numOfRows > pTableBlockInfo->maxRows) pTableBlockInfo->maxRows = numOfRows;
        if (numOfRows < pTableBlockInfo->minRows) pTableBlockInfo->minRows = numOfRows;
2423
        if (numOfRows < defaultRows) pTableBlockInfo->numOfSmallBlocks+=1;
2424 2425 2426
        int32_t  stepIndex = (numOfRows-1)/TSDB_BLOCK_DIST_STEP_ROWS;
        SFileBlockInfo *blockInfo = (SFileBlockInfo*)taosArrayGet(pTableBlockInfo->dataBlockInfos, stepIndex);
        blockInfo->numBlocksOfStep++;
H
Haojun Liao 已提交
2427 2428 2429 2430 2431 2432
      }
    }
  }

  return code;
}
2433
#endif
H
Haojun Liao 已提交
2434

2435 2436 2437
static int32_t getDataBlocksInFiles(STsdbReadHandle* pTsdbReadHandle, bool* exists) {
  STsdbFS*       pFileHandle = REPO_FS(pTsdbReadHandle->pTsdb);
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
2438 2439

  // find the start data block in file
2440 2441 2442 2443
  if (!pTsdbReadHandle->locateStart) {
    pTsdbReadHandle->locateStart = true;
    STsdbCfg* pCfg = &pTsdbReadHandle->pTsdb->config;
    int32_t   fid = getFileIdFromKey(pTsdbReadHandle->window.skey, pCfg->daysPerFile, pCfg->precision);
H
Haojun Liao 已提交
2444

H
Hongze Cheng 已提交
2445
    tsdbRLockFS(pFileHandle);
2446 2447
    tsdbFSIterInit(&pTsdbReadHandle->fileIter, pFileHandle, pTsdbReadHandle->order);
    tsdbFSIterSeek(&pTsdbReadHandle->fileIter, fid);
H
Hongze Cheng 已提交
2448
    tsdbUnLockFS(pFileHandle);
2449

2450
    return getFirstFileDataBlock(pTsdbReadHandle, exists);
2451
  } else {
2452
    // check if current file block is all consumed
2453
    STableBlockInfo* pBlockInfo = &pTsdbReadHandle->pDataBlockInfo[cur->slot];
2454
    STableCheckInfo* pCheckInfo = pBlockInfo->pTableCheckInfo;
H
Haojun Liao 已提交
2455

2456
    // current block is done, try next
H
Haojun Liao 已提交
2457
    if ((!cur->mixBlock) || cur->blockCompleted) {
H
Haojun Liao 已提交
2458
      // all data blocks in current file has been checked already, try next file if exists
2459
    } else {
H
Haojun Liao 已提交
2460 2461
      tsdbDebug("%p continue in current data block, index:%d, pos:%d, %s", pTsdbReadHandle, cur->slot, cur->pos,
                pTsdbReadHandle->idStr);
2462 2463
      int32_t code = handleDataMergeIfNeeded(pTsdbReadHandle, pBlockInfo->compBlock, pCheckInfo);
      *exists = (pTsdbReadHandle->realNumOfRows > 0);
H
Haojun Liao 已提交
2464

H
Haojun Liao 已提交
2465 2466 2467 2468 2469 2470 2471
      if (code != TSDB_CODE_SUCCESS || *exists) {
        return code;
      }
    }

    // current block is empty, try next block in file
    // all data blocks in current file has been checked already, try next file if exists
2472 2473
    if (isEndFileDataBlock(cur, pTsdbReadHandle->numOfBlocks, ASCENDING_TRAVERSE(pTsdbReadHandle->order))) {
      return getFirstFileDataBlock(pTsdbReadHandle, exists);
H
Haojun Liao 已提交
2474
    } else {
2475 2476 2477
      moveToNextDataBlockInCurrentFile(pTsdbReadHandle);
      STableBlockInfo* pNext = &pTsdbReadHandle->pDataBlockInfo[cur->slot];
      return getDataBlockRv(pTsdbReadHandle, pNext, exists);
2478 2479
    }
  }
2480 2481
}

2482 2483
static bool doHasDataInBuffer(STsdbReadHandle* pTsdbReadHandle) {
  size_t numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
2484
  
2485 2486
  while (pTsdbReadHandle->activeIndex < numOfTables) {
    if (hasMoreDataInCache(pTsdbReadHandle)) {
2487 2488
      return true;
    }
H
Haojun Liao 已提交
2489

2490
    pTsdbReadHandle->activeIndex += 1;
2491
  }
H
Haojun Liao 已提交
2492

2493 2494 2495
  return false;
}

2496
//todo not unref yet, since it is not support multi-group interpolation query
H
Haojun Liao 已提交
2497
static UNUSED_FUNC void changeQueryHandleForInterpQuery(tsdbReaderT pHandle) {
H
Haojun Liao 已提交
2498
  // filter the queried time stamp in the first place
2499
  STsdbReadHandle* pTsdbReadHandle = (STsdbReadHandle*) pHandle;
H
Haojun Liao 已提交
2500 2501

  // starts from the buffer in case of descending timestamp order check data blocks
2502
  size_t numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
H
Haojun Liao 已提交
2503 2504 2505

  int32_t i = 0;
  while(i < numOfTables) {
2506
    STableCheckInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i);
H
Haojun Liao 已提交
2507 2508

    // the first qualified table for interpolation query
2509 2510 2511 2512
//    if ((pTsdbReadHandle->window.skey <= pCheckInfo->pTableObj->lastKey) &&
//        (pCheckInfo->pTableObj->lastKey != TSKEY_INITIAL_VAL)) {
//      break;
//    }
H
Haojun Liao 已提交
2513 2514 2515 2516 2517 2518 2519 2520 2521

    i++;
  }

  // there are no data in all the tables
  if (i == numOfTables) {
    return;
  }

2522 2523
  STableCheckInfo info = *(STableCheckInfo*) taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i);
  taosArrayClear(pTsdbReadHandle->pTableCheckInfo);
H
Haojun Liao 已提交
2524

2525 2526
  info.lastKey = pTsdbReadHandle->window.skey;
  taosArrayPush(pTsdbReadHandle->pTableCheckInfo, &info);
H
Haojun Liao 已提交
2527 2528 2529
}

static int tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int maxRowsToRead, STimeWindow* win,
2530
                                 STsdbReadHandle* pTsdbReadHandle) {
H
Haojun Liao 已提交
2531
  int     numOfRows = 0;
2532 2533
  int32_t numOfCols = (int32_t)taosArrayGetSize(pTsdbReadHandle->pColumns);
  STsdbCfg* pCfg = &pTsdbReadHandle->pTsdb->config;
H
Haojun Liao 已提交
2534 2535 2536
  win->skey = TSKEY_INITIAL_VAL;

  int64_t st = taosGetTimestampUs();
D
fix bug  
dapan1121 已提交
2537 2538
  int16_t rv = -1;
  STSchema* pSchema = NULL;
H
Haojun Liao 已提交
2539 2540

  do {
2541
    SMemRow row = getSMemRowInTableMem(pCheckInfo, pTsdbReadHandle->order, pCfg->update, NULL);
H
Haojun Liao 已提交
2542 2543 2544 2545
    if (row == NULL) {
      break;
    }

C
Cary Xu 已提交
2546
    TSKEY key = memRowKey(row);
2547 2548 2549
    if ((key > maxKey && ASCENDING_TRAVERSE(pTsdbReadHandle->order)) || (key < maxKey && !ASCENDING_TRAVERSE(pTsdbReadHandle->order))) {
      tsdbDebug("%p key:%"PRIu64" beyond qrange:%"PRId64" - %"PRId64", no more data in buffer", pTsdbReadHandle, key, pTsdbReadHandle->window.skey,
                pTsdbReadHandle->window.ekey);
H
Haojun Liao 已提交
2550 2551 2552 2553 2554 2555 2556 2557 2558

      break;
    }

    if (win->skey == INT64_MIN) {
      win->skey = key;
    }

    win->ekey = key;
C
Cary Xu 已提交
2559
    if (rv != memRowVersion(row)) {
2560
      pSchema = metaGetTbTSchema(pTsdbReadHandle->pTsdb->pMeta, pCheckInfo->tableId, 0);
C
Cary Xu 已提交
2561
      rv = memRowVersion(row);
D
fix bug  
dapan1121 已提交
2562
    }
2563
    mergeTwoRowFromMem(pTsdbReadHandle, maxRowsToRead, numOfRows, row, NULL, numOfCols, pCheckInfo->tableId, pSchema, NULL, true);
H
Haojun Liao 已提交
2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574

    if (++numOfRows >= maxRowsToRead) {
      moveToNextRowInMem(pCheckInfo);
      break;
    }

  } while(moveToNextRowInMem(pCheckInfo));

  assert(numOfRows <= maxRowsToRead);

  // if the buffer is not full in case of descending order query, move the data in the front of the buffer
2575
  if (!ASCENDING_TRAVERSE(pTsdbReadHandle->order) && numOfRows < maxRowsToRead) {
H
Haojun Liao 已提交
2576 2577 2578
    int32_t emptySize = maxRowsToRead - numOfRows;

    for(int32_t i = 0; i < numOfCols; ++i) {
2579
      SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i);
H
Haojun Liao 已提交
2580 2581 2582 2583 2584
      memmove((char*)pColInfo->pData, (char*)pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes);
    }
  }

  int64_t elapsedTime = taosGetTimestampUs() - st;
H
Haojun Liao 已提交
2585 2586
  tsdbDebug("%p build data block from cache completed, elapsed time:%"PRId64" us, numOfRows:%d, numOfCols:%d, %s", pTsdbReadHandle,
            elapsedTime, numOfRows, numOfCols, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
2587 2588 2589 2590

  return numOfRows;
}

2591 2592
static int32_t getAllTableList(SMeta* pMeta, uint64_t uid, SArray* list) {
  SMCtbCursor* pCur = metaOpenCtbCursor(pMeta, uid);
H
Haojun Liao 已提交
2593

2594 2595 2596 2597 2598
  while (1) {
    tb_uid_t id = metaCtbCursorNext(pCur);
    if (id == 0) {
      break;
    }
H
Haojun Liao 已提交
2599

2600
    STableKeyInfo info = {.lastKey = TSKEY_INITIAL_VAL, uid = id};
H
Haojun Liao 已提交
2601 2602 2603
    taosArrayPush(list, &info);
  }

2604
  metaCloseCtbCurosr(pCur);
H
Haojun Liao 已提交
2605 2606 2607 2608 2609 2610 2611 2612
  return TSDB_CODE_SUCCESS;
}

static void destroyHelper(void* param) {
  if (param == NULL) {
    return;
  }

2613 2614 2615 2616 2617 2618
//  tQueryInfo* pInfo = (tQueryInfo*)param;
//  if (pInfo->optr != TSDB_RELATION_IN) {
//    tfree(pInfo->q);
//  } else {
//    taosHashCleanup((SHashObj *)(pInfo->q));
//  }
H
Haojun Liao 已提交
2619 2620 2621 2622

  free(param);
}

2623 2624 2625 2626 2627
#define TSDB_PREV_ROW  0x1
#define TSDB_NEXT_ROW  0x2

static bool  loadBlockOfActiveTable(STsdbReadHandle* pTsdbReadHandle) {
  if (pTsdbReadHandle->checkFiles) {
H
Haojun Liao 已提交
2628 2629
    // check if the query range overlaps with the file data block
    bool exists = true;
H
Haojun Liao 已提交
2630

2631
    int32_t code = getDataBlocksInFiles(pTsdbReadHandle, &exists);
H
Haojun Liao 已提交
2632
    if (code != TSDB_CODE_SUCCESS) {
2633
      pTsdbReadHandle->checkFiles = false;
H
Haojun Liao 已提交
2634 2635
      return false;
    }
H
Haojun Liao 已提交
2636

H
Haojun Liao 已提交
2637
    if (exists) {
H
Haojun Liao 已提交
2638
      tsdbRetrieveDataBlock((tsdbReaderT*) pTsdbReadHandle, NULL);
2639 2640 2641
      if (pTsdbReadHandle->currentLoadExternalRows && pTsdbReadHandle->window.skey == pTsdbReadHandle->window.ekey) {
        SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, 0);
        assert(*(int64_t*)pColInfo->pData == pTsdbReadHandle->window.skey);
H
Haojun Liao 已提交
2642 2643
      }

2644
      pTsdbReadHandle->currentLoadExternalRows = false; // clear the flag, since the exact matched row is found.
H
Haojun Liao 已提交
2645 2646
      return exists;
    }
H
Haojun Liao 已提交
2647

2648
    pTsdbReadHandle->checkFiles = false;
H
Haojun Liao 已提交
2649
  }
H
Haojun Liao 已提交
2650

2651 2652
  if (hasMoreDataInCache(pTsdbReadHandle)) {
    pTsdbReadHandle->currentLoadExternalRows = false;
H
Haojun Liao 已提交
2653 2654
    return true;
  }
H
Haojun Liao 已提交
2655

H
Haojun Liao 已提交
2656
  // current result is empty
2657 2658
  if (pTsdbReadHandle->currentLoadExternalRows && pTsdbReadHandle->window.skey == pTsdbReadHandle->window.ekey && pTsdbReadHandle->cur.rows == 0) {
//    STsdbMemTable* pMemRef = pTsdbReadHandle->pMemTable;
H
Haojun Liao 已提交
2659

2660 2661
//    doGetExternalRow(pTsdbReadHandle, TSDB_PREV_ROW, pMemRef);
//    doGetExternalRow(pTsdbReadHandle, TSDB_NEXT_ROW, pMemRef);
H
Haojun Liao 已提交
2662

2663
    bool result = tsdbGetExternalRow(pTsdbReadHandle);
H
Haojun Liao 已提交
2664

2665 2666 2667
//    pTsdbReadHandle->prev = doFreeColumnInfoData(pTsdbReadHandle->prev);
//    pTsdbReadHandle->next = doFreeColumnInfoData(pTsdbReadHandle->next);
    pTsdbReadHandle->currentLoadExternalRows = false;
H
Haojun Liao 已提交
2668 2669

    return result;
2670
  }
H
Haojun Liao 已提交
2671

H
Haojun Liao 已提交
2672 2673
  return false;
}
2674

2675
static bool loadCachedLastRow(STsdbReadHandle* pTsdbReadHandle) {
H
Haojun Liao 已提交
2676
  // the last row is cached in buffer, return it directly.
2677 2678 2679
  // here note that the pTsdbReadHandle->window must be the TS_INITIALIZER
  int32_t numOfCols  = (int32_t)(QH_GET_NUM_OF_COLS(pTsdbReadHandle));
  size_t  numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
H
Haojun Liao 已提交
2680 2681
  assert(numOfTables > 0 && numOfCols > 0);

2682
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
2683

C
Cary Xu 已提交
2684
  SMemRow  pRow = NULL;
H
Haojun Liao 已提交
2685
  TSKEY    key  = TSKEY_INITIAL_VAL;
2686 2687 2688 2689 2690 2691 2692 2693
  int32_t  step = ASCENDING_TRAVERSE(pTsdbReadHandle->order)? 1:-1;

  if (++pTsdbReadHandle->activeIndex < numOfTables) {
    STableCheckInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, pTsdbReadHandle->activeIndex);
//    int32_t ret = tsdbGetCachedLastRow(pCheckInfo->pTableObj, &pRow, &key);
//    if (ret != TSDB_CODE_SUCCESS) {
//      return false;
//    }
H
Haojun Liao 已提交
2694
    mergeTwoRowFromMem(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, 0, pRow, NULL, numOfCols, pCheckInfo->tableId, NULL, NULL, true);
H
Haojun Liao 已提交
2695
    tfree(pRow);
H
Haojun Liao 已提交
2696

H
Haojun Liao 已提交
2697 2698 2699 2700 2701 2702 2703 2704 2705 2706
    // update the last key value
    pCheckInfo->lastKey = key + step;

    cur->rows     = 1;  // only one row
    cur->lastKey  = key + step;
    cur->mixBlock = true;
    cur->win.skey = key;
    cur->win.ekey = key;

    return true;
2707
  }
H
Haojun Liao 已提交
2708

H
Haojun Liao 已提交
2709 2710 2711
  return false;
}

D
init  
dapan1121 已提交
2712

D
update  
dapan1121 已提交
2713

2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865
//static bool loadCachedLast(STsdbReadHandle* pTsdbReadHandle) {
//  // the last row is cached in buffer, return it directly.
//  // here note that the pTsdbReadHandle->window must be the TS_INITIALIZER
//  int32_t tgNumOfCols = (int32_t)QH_GET_NUM_OF_COLS(pTsdbReadHandle);
//  size_t  numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
//  int32_t numOfRows = 0;
//  assert(numOfTables > 0 && tgNumOfCols > 0);
//  SQueryFilePos* cur = &pTsdbReadHandle->cur;
//  TSKEY priKey = TSKEY_INITIAL_VAL;
//  int32_t priIdx = -1;
//  SColumnInfoData* pColInfo = NULL;
//
//  while (++pTsdbReadHandle->activeIndex < numOfTables) {
//    STableCheckInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, pTsdbReadHandle->activeIndex);
//    STable* pTable = pCheckInfo->pTableObj;
//    char* pData = NULL;
//
//    int32_t numOfCols = pTable->maxColNum;
//
//    if (pTable->lastCols == NULL || pTable->maxColNum <= 0) {
//      tsdbWarn("no last cached for table %s, uid:%" PRIu64 ",tid:%d", pTable->name->data, pTable->uid, pTable->tableId);
//      continue;
//    }
//
//    int32_t i = 0, j = 0;
//    while(i < tgNumOfCols && j < numOfCols) {
//      pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i);
//      if (pTable->lastCols[j].colId < pColInfo->info.colId) {
//        j++;
//        continue;
//      } else if (pTable->lastCols[j].colId > pColInfo->info.colId) {
//        i++;
//        continue;
//      }
//
//      pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;
//
//      if (pTable->lastCols[j].bytes > 0) {
//        void* value = pTable->lastCols[j].pData;
//        switch (pColInfo->info.type) {
//          case TSDB_DATA_TYPE_BINARY:
//          case TSDB_DATA_TYPE_NCHAR:
//            memcpy(pData, value, varDataTLen(value));
//            break;
//          case TSDB_DATA_TYPE_NULL:
//          case TSDB_DATA_TYPE_BOOL:
//          case TSDB_DATA_TYPE_TINYINT:
//          case TSDB_DATA_TYPE_UTINYINT:
//            *(uint8_t *)pData = *(uint8_t *)value;
//            break;
//          case TSDB_DATA_TYPE_SMALLINT:
//          case TSDB_DATA_TYPE_USMALLINT:
//            *(uint16_t *)pData = *(uint16_t *)value;
//            break;
//          case TSDB_DATA_TYPE_INT:
//          case TSDB_DATA_TYPE_UINT:
//            *(uint32_t *)pData = *(uint32_t *)value;
//            break;
//          case TSDB_DATA_TYPE_BIGINT:
//          case TSDB_DATA_TYPE_UBIGINT:
//            *(uint64_t *)pData = *(uint64_t *)value;
//            break;
//          case TSDB_DATA_TYPE_FLOAT:
//            SET_FLOAT_PTR(pData, value);
//            break;
//          case TSDB_DATA_TYPE_DOUBLE:
//            SET_DOUBLE_PTR(pData, value);
//            break;
//          case TSDB_DATA_TYPE_TIMESTAMP:
//            if (pColInfo->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
//              priKey = tdGetKey(*(TKEY *)value);
//              priIdx = i;
//
//              i++;
//              j++;
//              continue;
//            } else {
//              *(TSKEY *)pData = *(TSKEY *)value;
//            }
//            break;
//          default:
//            memcpy(pData, value, pColInfo->info.bytes);
//        }
//
//        for (int32_t n = 0; n < tgNumOfCols; ++n) {
//          if (n == i) {
//            continue;
//          }
//
//          pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, n);
//          pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;;
//
//          if (pColInfo->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
////            *(TSKEY *)pData = pTable->lastCols[j].ts;
//            continue;
//          }
//
//          if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
//            setVardataNull(pData, pColInfo->info.type);
//          } else {
//            setNull(pData, pColInfo->info.type, pColInfo->info.bytes);
//          }
//        }
//
//        numOfRows++;
//        assert(numOfRows < pTsdbReadHandle->outputCapacity);
//      }
//
//      i++;
//      j++;
//    }
//
//    // leave the real ts column as the last row, because last function only (not stable) use the last row as res
//    if (priKey != TSKEY_INITIAL_VAL) {
//      pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, priIdx);
//      pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;
//
//      *(TSKEY *)pData = priKey;
//
//      for (int32_t n = 0; n < tgNumOfCols; ++n) {
//        if (n == priIdx) {
//          continue;
//        }
//
//        pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, n);
//        pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;;
//
//        assert (pColInfo->info.colId != PRIMARYKEY_TIMESTAMP_COL_ID);
//
//        if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
//          setVardataNull(pData, pColInfo->info.type);
//        } else {
//          setNull(pData, pColInfo->info.type, pColInfo->info.bytes);
//        }
//      }
//
//      numOfRows++;
//    }
//
//    if (numOfRows > 0) {
//      cur->rows     = numOfRows;
//      cur->mixBlock = true;
//
//      return true;
//    }
//  }
//
//  return false;
//}

static bool loadDataBlockFromTableSeq(STsdbReadHandle* pTsdbReadHandle) {
  size_t numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo);
H
Haojun Liao 已提交
2866 2867 2868
  assert(numOfTables > 0);

  int64_t stime = taosGetTimestampUs();
H
Haojun Liao 已提交
2869

2870 2871
  while(pTsdbReadHandle->activeIndex < numOfTables) {
    if (loadBlockOfActiveTable(pTsdbReadHandle)) {
H
Haojun Liao 已提交
2872 2873 2874
      return true;
    }

2875
    STableCheckInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, pTsdbReadHandle->activeIndex);
H
Haojun Liao 已提交
2876 2877
    pCheckInfo->numOfBlocks = 0;

2878 2879 2880 2881 2882
    pTsdbReadHandle->activeIndex += 1;
    pTsdbReadHandle->locateStart = false;
    pTsdbReadHandle->checkFiles  = true;
    pTsdbReadHandle->cur.rows    = 0;
    pTsdbReadHandle->currentLoadExternalRows = pTsdbReadHandle->loadExternalRow;
H
Haojun Liao 已提交
2883 2884 2885 2886

    terrno = TSDB_CODE_SUCCESS;

    int64_t elapsedTime = taosGetTimestampUs() - stime;
2887
    pTsdbReadHandle->cost.checkForNextTime += elapsedTime;
H
Haojun Liao 已提交
2888 2889 2890
  }

  return false;
2891 2892
}

H
Haojun Liao 已提交
2893
// handle data in cache situation
H
Haojun Liao 已提交
2894
bool tsdbNextDataBlock(tsdbReaderT pHandle) {
2895
  STsdbReadHandle* pTsdbReadHandle = (STsdbReadHandle*) pHandle;
Y
yihaoDeng 已提交
2896

2897
  if (emptyQueryTimewindow(pTsdbReadHandle)) {
H
Haojun Liao 已提交
2898
    tsdbDebug("%p query window not overlaps with the data set, no result returned, %s", pTsdbReadHandle, pTsdbReadHandle->idStr);
2899 2900 2901
    return false;
  }

Y
yihaoDeng 已提交
2902 2903 2904
  int64_t stime = taosGetTimestampUs();
  int64_t elapsedTime = stime;

2905
  // TODO refactor: remove "type"
2906 2907 2908 2909 2910
  if (pTsdbReadHandle->type == TSDB_QUERY_TYPE_LAST) {
    if (pTsdbReadHandle->cachelastrow == TSDB_CACHED_TYPE_LASTROW) {
//      return loadCachedLastRow(pTsdbReadHandle);
    } else if (pTsdbReadHandle->cachelastrow == TSDB_CACHED_TYPE_LAST) {
//      return loadCachedLast(pTsdbReadHandle);
D
init  
dapan1121 已提交
2911
    }
H
Haojun Liao 已提交
2912
  }
Y
yihaoDeng 已提交
2913

2914 2915
  if (pTsdbReadHandle->loadType == BLOCK_LOAD_TABLE_SEQ_ORDER) {
    return loadDataBlockFromTableSeq(pTsdbReadHandle);
H
Haojun Liao 已提交
2916
  } else { // loadType == RR and Offset Order
2917
    if (pTsdbReadHandle->checkFiles) {
H
Haojun Liao 已提交
2918 2919 2920
      // check if the query range overlaps with the file data block
      bool exists = true;

2921
      int32_t code = getDataBlocksInFiles(pTsdbReadHandle, &exists);
H
Haojun Liao 已提交
2922
      if (code != TSDB_CODE_SUCCESS) {
2923 2924
        pTsdbReadHandle->activeIndex = 0;
        pTsdbReadHandle->checkFiles = false;
H
Haojun Liao 已提交
2925 2926 2927 2928 2929

        return false;
      }

      if (exists) {
2930
        pTsdbReadHandle->cost.checkForNextTime += (taosGetTimestampUs() - stime);
H
Haojun Liao 已提交
2931 2932
        return exists;
      }
Y
yihaoDeng 已提交
2933

2934 2935
      pTsdbReadHandle->activeIndex = 0;
      pTsdbReadHandle->checkFiles = false;
Y
yihaoDeng 已提交
2936 2937
    }

H
Haojun Liao 已提交
2938
    // TODO: opt by consider the scan order
2939
    bool ret = doHasDataInBuffer(pTsdbReadHandle);
H
Haojun Liao 已提交
2940
    terrno = TSDB_CODE_SUCCESS;
Y
yihaoDeng 已提交
2941

H
Haojun Liao 已提交
2942
    elapsedTime = taosGetTimestampUs() - stime;
2943
    pTsdbReadHandle->cost.checkForNextTime += elapsedTime;
H
Haojun Liao 已提交
2944
    return ret;
Y
yihaoDeng 已提交
2945 2946
  }
}
2947

2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012
//static int32_t doGetExternalRow(STsdbReadHandle* pTsdbReadHandle, int16_t type, STsdbMemTable* pMemRef) {
//  STsdbReadHandle* pSecQueryHandle = NULL;
//
//  if (type == TSDB_PREV_ROW && pTsdbReadHandle->prev) {
//    return TSDB_CODE_SUCCESS;
//  }
//
//  if (type == TSDB_NEXT_ROW && pTsdbReadHandle->next) {
//    return TSDB_CODE_SUCCESS;
//  }
//
//  // prepare the structure
//  int32_t numOfCols = (int32_t) QH_GET_NUM_OF_COLS(pTsdbReadHandle);
//
//  if (type == TSDB_PREV_ROW) {
//    pTsdbReadHandle->prev = taosArrayInit(numOfCols, sizeof(SColumnInfoData));
//    if (pTsdbReadHandle->prev == NULL) {
//      terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
//      goto out_of_memory;
//    }
//  } else {
//    pTsdbReadHandle->next = taosArrayInit(numOfCols, sizeof(SColumnInfoData));
//    if (pTsdbReadHandle->next == NULL) {
//      terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
//      goto out_of_memory;
//    }
//  }
//
//  SArray* row = (type == TSDB_PREV_ROW)? pTsdbReadHandle->prev : pTsdbReadHandle->next;
//
//  for (int32_t i = 0; i < numOfCols; ++i) {
//    SColumnInfoData* pCol = taosArrayGet(pTsdbReadHandle->pColumns, i);
//
//    SColumnInfoData colInfo = {{0}, 0};
//    colInfo.info = pCol->info;
//    colInfo.pData = calloc(1, pCol->info.bytes);
//    if (colInfo.pData == NULL) {
//      terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
//      goto out_of_memory;
//    }
//
//    taosArrayPush(row, &colInfo);
//  }
//
//  // load the previous row
//  STsdbQueryCond cond = {.numOfCols = numOfCols, .loadExternalRows = false, .type = BLOCK_LOAD_OFFSET_SEQ_ORDER};
//  if (type == TSDB_PREV_ROW) {
//    cond.order = TSDB_ORDER_DESC;
//    cond.twindow = (STimeWindow){pTsdbReadHandle->window.skey, INT64_MIN};
//  } else {
//    cond.order = TSDB_ORDER_ASC;
//    cond.twindow = (STimeWindow){pTsdbReadHandle->window.skey, INT64_MAX};
//  }
//
//  cond.colList = calloc(cond.numOfCols, sizeof(SColumnInfo));
//  if (cond.colList == NULL) {
//    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
//    goto out_of_memory;
//  }
//
//  for (int32_t i = 0; i < cond.numOfCols; ++i) {
//    SColumnInfoData* pColInfoData = taosArrayGet(pTsdbReadHandle->pColumns, i);
//    memcpy(&cond.colList[i], &pColInfoData->info, sizeof(SColumnInfo));
//  }
//
H
Haojun Liao 已提交
3013
//  pSecQueryHandle = tsdbQueryTablesImpl(pTsdbReadHandle->pTsdb, &cond, pTsdbReadHandle->idStr, pMemRef);
3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053
//  tfree(cond.colList);
//
//  // current table, only one table
//  STableCheckInfo* pCurrent = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, pTsdbReadHandle->activeIndex);
//
//  SArray* psTable = NULL;
//  pSecQueryHandle->pTableCheckInfo = createCheckInfoFromCheckInfo(pCurrent, pSecQueryHandle->window.skey, &psTable);
//  if (pSecQueryHandle->pTableCheckInfo == NULL) {
//    taosArrayDestroy(psTable);
//    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
//    goto out_of_memory;
//  }
//
//
//  tsdbMayTakeMemSnapshot(pSecQueryHandle, psTable);
//  if (!tsdbNextDataBlock((void*)pSecQueryHandle)) {
//    // no result in current query, free the corresponding result rows structure
//    if (type == TSDB_PREV_ROW) {
//      pTsdbReadHandle->prev = doFreeColumnInfoData(pTsdbReadHandle->prev);
//    } else {
//      pTsdbReadHandle->next = doFreeColumnInfoData(pTsdbReadHandle->next);
//    }
//
//    goto out_of_memory;
//  }
//
//  SDataBlockInfo blockInfo = {{0}, 0};
//  tsdbRetrieveDataBlockInfo((void*)pSecQueryHandle, &blockInfo);
//  tsdbRetrieveDataBlock((void*)pSecQueryHandle, pSecQueryHandle->defaultLoadColumn);
//
//  row = (type == TSDB_PREV_ROW)? pTsdbReadHandle->prev:pTsdbReadHandle->next;
//  int32_t pos = (type == TSDB_PREV_ROW)?pSecQueryHandle->cur.rows - 1:0;
//
//  for (int32_t i = 0; i < numOfCols; ++i) {
//    SColumnInfoData* pCol = taosArrayGet(row, i);
//    SColumnInfoData* s = taosArrayGet(pSecQueryHandle->pColumns, i);
//    memcpy((char*)pCol->pData, (char*)s->pData + s->info.bytes * pos, pCol->info.bytes);
//  }
//
//out_of_memory:
3054
//  tsdbCleanupReadHandle(pSecQueryHandle);
3055 3056 3057
//  return terrno;
//}

H
Haojun Liao 已提交
3058
bool tsdbGetExternalRow(tsdbReaderT pHandle) {
3059 3060
  STsdbReadHandle* pTsdbReadHandle = (STsdbReadHandle*) pHandle;
  SQueryFilePos* cur = &pTsdbReadHandle->cur;
H
Haojun Liao 已提交
3061

H
Haojun Liao 已提交
3062 3063
  cur->fid = INT32_MIN;
  cur->mixBlock = true;
3064
  if (pTsdbReadHandle->prev == NULL || pTsdbReadHandle->next == NULL) {
H
Haojun Liao 已提交
3065 3066
    cur->rows = 0;
    return false;
H
Haojun Liao 已提交
3067 3068
  }

3069
  int32_t numOfCols = (int32_t) QH_GET_NUM_OF_COLS(pTsdbReadHandle);
H
Haojun Liao 已提交
3070
  for (int32_t i = 0; i < numOfCols; ++i) {
3071 3072
    SColumnInfoData* pColInfoData = taosArrayGet(pTsdbReadHandle->pColumns, i);
    SColumnInfoData* first = taosArrayGet(pTsdbReadHandle->prev, i);
H
Haojun Liao 已提交
3073 3074 3075

    memcpy(pColInfoData->pData, first->pData, pColInfoData->info.bytes);

3076
    SColumnInfoData* sec = taosArrayGet(pTsdbReadHandle->next, i);
sangshuduo's avatar
sangshuduo 已提交
3077
    memcpy(((char*)pColInfoData->pData) + pColInfoData->info.bytes, sec->pData, pColInfoData->info.bytes);
H
Haojun Liao 已提交
3078 3079

    if (i == 0 && pColInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP) {
H
Haojun Liao 已提交
3080
      cur->win.skey = *(TSKEY*)pColInfoData->pData;
sangshuduo's avatar
sangshuduo 已提交
3081
      cur->win.ekey = *(TSKEY*)(((char*)pColInfoData->pData) + TSDB_KEYSIZE);
H
Haojun Liao 已提交
3082 3083 3084
    }
  }

H
Haojun Liao 已提交
3085 3086
  cur->rows = 2;
  return true;
3087 3088
}

3089
/*
3090
 * if lastRow == NULL, return TSDB_CODE_TDB_NO_CACHE_LAST_ROW
3091
 * else set pRes and return TSDB_CODE_SUCCESS and save lastKey
3092
 */
3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114
//int32_t tsdbGetCachedLastRow(STable* pTable, SMemRow* pRes, TSKEY* lastKey) {
//  int32_t code = TSDB_CODE_SUCCESS;
//
//  TSDB_RLOCK_TABLE(pTable);
//
//  if (!pTable->lastRow) {
//    code = TSDB_CODE_TDB_NO_CACHE_LAST_ROW;
//    goto out;
//  }
//
//  if (pRes) {
//    *pRes = tdMemRowDup(pTable->lastRow);
//    if (*pRes == NULL) {
//      code = TSDB_CODE_TDB_OUT_OF_MEMORY;
//    }
//  }
//
//out:
//  TSDB_RUNLOCK_TABLE(pTable);
//  return code;
//}

H
Haojun Liao 已提交
3115
bool isTsdbCacheLastRow(tsdbReaderT* pTsdbReadHandle) {
3116
  return ((STsdbReadHandle *)pTsdbReadHandle)->cachelastrow > TSDB_CACHED_TYPE_NONE;
D
fix bug  
dapan1121 已提交
3117 3118
}

3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145
int32_t checkForCachedLastRow(STsdbReadHandle* pTsdbReadHandle, STableGroupInfo *groupList) {
  assert(pTsdbReadHandle != NULL && groupList != NULL);

//  TSKEY    key = TSKEY_INITIAL_VAL;
//
//  SArray* group = taosArrayGetP(groupList->pGroupList, 0);
//  assert(group != NULL);
//
//  STableKeyInfo* pInfo = (STableKeyInfo*)taosArrayGet(group, 0);
//
//  int32_t code = 0;
//
//  if (((STable*)pInfo->pTable)->lastRow) {
//    code = tsdbGetCachedLastRow(pInfo->pTable, NULL, &key);
//    if (code != TSDB_CODE_SUCCESS) {
//      pTsdbReadHandle->cachelastrow = TSDB_CACHED_TYPE_NONE;
//    } else {
//      pTsdbReadHandle->cachelastrow = TSDB_CACHED_TYPE_LASTROW;
//    }
//  }
//
//  // update the tsdb query time range
//  if (pTsdbReadHandle->cachelastrow != TSDB_CACHED_TYPE_NONE) {
//    pTsdbReadHandle->window      = TSWINDOW_INITIALIZER;
//    pTsdbReadHandle->checkFiles  = false;
//    pTsdbReadHandle->activeIndex = -1;  // start from -1
//  }
H
Haojun Liao 已提交
3146

3147
  return TSDB_CODE_SUCCESS;
3148 3149
}

3150 3151
int32_t checkForCachedLast(STsdbReadHandle* pTsdbReadHandle) {
  assert(pTsdbReadHandle != NULL);
D
update  
dapan1121 已提交
3152 3153

  int32_t code = 0;
3154 3155 3156
//  if (pTsdbReadHandle->pTsdb && atomic_load_8(&pTsdbReadHandle->pTsdb->hasCachedLastColumn)){
//    pTsdbReadHandle->cachelastrow = TSDB_CACHED_TYPE_LAST;
//  }
D
update  
dapan1121 已提交
3157 3158

  // update the tsdb query time range
3159 3160 3161
  if (pTsdbReadHandle->cachelastrow) {
    pTsdbReadHandle->checkFiles  = false;
    pTsdbReadHandle->activeIndex = -1;  // start from -1
D
update  
dapan1121 已提交
3162 3163 3164 3165 3166 3167
  }

  return code;
}


3168
STimeWindow updateLastrowForEachGroup(STableGroupInfo *groupList) {
H
Haojun Liao 已提交
3169
  STimeWindow window = {INT64_MAX, INT64_MIN};
H
Haojun Liao 已提交
3170

H
Haojun Liao 已提交
3171
  int32_t totalNumOfTable = 0;
3172
  SArray* emptyGroup = taosArrayInit(16, sizeof(int32_t));
H
Haojun Liao 已提交
3173

H
Haojun Liao 已提交
3174 3175 3176 3177 3178
  // NOTE: starts from the buffer in case of descending timestamp order check data blocks
  size_t numOfGroups = taosArrayGetSize(groupList->pGroupList);
  for(int32_t j = 0; j < numOfGroups; ++j) {
    SArray* pGroup = taosArrayGetP(groupList->pGroupList, j);
    TSKEY   key = TSKEY_INITIAL_VAL;
H
Haojun Liao 已提交
3179

H
Haojun Liao 已提交
3180
    STableKeyInfo keyInfo = {0};
H
Haojun Liao 已提交
3181

H
Haojun Liao 已提交
3182 3183
    size_t numOfTables = taosArrayGetSize(pGroup);
    for(int32_t i = 0; i < numOfTables; ++i) {
3184
      STableKeyInfo* pInfo = (STableKeyInfo*) taosArrayGet(pGroup, i);
H
Haojun Liao 已提交
3185

H
Haojun Liao 已提交
3186
      // if the lastKey equals to INT64_MIN, there is no data in this table
3187
      TSKEY lastKey = 0;//((STable*)(pInfo->pTable))->lastKey;
H
Haojun Liao 已提交
3188 3189
      if (key < lastKey) {
        key = lastKey;
H
Haojun Liao 已提交
3190

3191
//        keyInfo.pTable  = pInfo->pTable;
H
Haojun Liao 已提交
3192
        keyInfo.lastKey = key;
3193
        pInfo->lastKey  = key;
H
Haojun Liao 已提交
3194

H
Haojun Liao 已提交
3195 3196 3197
        if (key < window.skey) {
          window.skey = key;
        }
3198

H
Haojun Liao 已提交
3199 3200 3201 3202
        if (key > window.ekey) {
          window.ekey = key;
        }
      }
3203
    }
H
Haojun Liao 已提交
3204

H
Haojun Liao 已提交
3205
    // more than one table in each group, only one table left for each group
3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217
//    if (keyInfo.pTable != NULL) {
//      totalNumOfTable++;
//      if (taosArrayGetSize(pGroup) == 1) {
//        // do nothing
//      } else {
//        taosArrayClear(pGroup);
//        taosArrayPush(pGroup, &keyInfo);
//      }
//    } else {  // mark all the empty groups, and remove it later
//      taosArrayDestroy(pGroup);
//      taosArrayPush(emptyGroup, &j);
//    }
3218
  }
H
Haojun Liao 已提交
3219

H
Haojun Liao 已提交
3220 3221 3222
  // window does not being updated, so set the original
  if (window.skey == INT64_MAX && window.ekey == INT64_MIN) {
    window = TSWINDOW_INITIALIZER;
H
Haojun Liao 已提交
3223
    assert(totalNumOfTable == 0 && taosArrayGetSize(groupList->pGroupList) == numOfGroups);
H
Haojun Liao 已提交
3224 3225
  }

H
Haojun Liao 已提交
3226
  taosArrayRemoveBatch(groupList->pGroupList, TARRAY_GET_START(emptyGroup), (int32_t) taosArrayGetSize(emptyGroup));
3227 3228
  taosArrayDestroy(emptyGroup);

H
Haojun Liao 已提交
3229
  groupList->numOfTables = totalNumOfTable;
H
Haojun Liao 已提交
3230
  return window;
H
hjxilinx 已提交
3231 3232
}

H
Haojun Liao 已提交
3233
void tsdbRetrieveDataBlockInfo(tsdbReaderT* pTsdbReadHandle, SDataBlockInfo* pDataBlockInfo) {
3234
  STsdbReadHandle* pHandle = (STsdbReadHandle*)pTsdbReadHandle;
3235
  SQueryFilePos* cur = &pHandle->cur;
3236 3237

  uint64_t uid = 0;
H
Haojun Liao 已提交
3238

3239
  // there are data in file
D
dapan1121 已提交
3240
  if (pHandle->cur.fid != INT32_MIN) {
3241
    STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[cur->slot];
3242
    uid = pBlockInfo->pTableCheckInfo->tableId;
H
[td-32]  
hjxilinx 已提交
3243
  } else {
3244
    STableCheckInfo* pCheckInfo = taosArrayGet(pHandle->pTableCheckInfo, pHandle->activeIndex);
3245
    uid = pCheckInfo->tableId;
3246
  }
3247

3248 3249
  pDataBlockInfo->uid    = uid;
  pDataBlockInfo->rows   = cur->rows;
H
Haojun Liao 已提交
3250
  pDataBlockInfo->window = cur->win;
S
TD-1057  
Shengliang Guan 已提交
3251
  pDataBlockInfo->numOfCols = (int32_t)(QH_GET_NUM_OF_COLS(pHandle));
3252
}
H
hjxilinx 已提交
3253

H
Haojun Liao 已提交
3254 3255 3256
/*
 * return null for mixed data block, if not a complete file data block, the statistics value will always return NULL
 */
H
Haojun Liao 已提交
3257
int32_t tsdbRetrieveDataBlockStatisInfo(tsdbReaderT* pTsdbReadHandle, SDataStatis** pBlockStatis) {
3258
  STsdbReadHandle* pHandle = (STsdbReadHandle*) pTsdbReadHandle;
H
Haojun Liao 已提交
3259

H
Haojun Liao 已提交
3260 3261
  SQueryFilePos* c = &pHandle->cur;
  if (c->mixBlock) {
H
Haojun Liao 已提交
3262 3263 3264
    *pBlockStatis = NULL;
    return TSDB_CODE_SUCCESS;
  }
H
Haojun Liao 已提交
3265

H
Haojun Liao 已提交
3266 3267 3268 3269
  STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[c->slot];
  assert((c->slot >= 0 && c->slot < pHandle->numOfBlocks) || ((c->slot == pHandle->numOfBlocks) && (c->slot == 0)));

  // file block with sub-blocks has no statistics data
H
Haojun Liao 已提交
3270 3271 3272 3273
  if (pBlockInfo->compBlock->numOfSubBlocks > 1) {
    *pBlockStatis = NULL;
    return TSDB_CODE_SUCCESS;
  }
H
Haojun Liao 已提交
3274 3275

  int64_t stime = taosGetTimestampUs();
H
Hongze Cheng 已提交
3276 3277 3278
  if (tsdbLoadBlockStatis(&pHandle->rhelper, pBlockInfo->compBlock) < 0) {
    return terrno;
  }
H
Haojun Liao 已提交
3279

H
Haojun Liao 已提交
3280 3281
  int16_t* colIds = pHandle->defaultLoadColumn->pData;

H
Haojun Liao 已提交
3282
  size_t numOfCols = QH_GET_NUM_OF_COLS(pHandle);
H
Haojun Liao 已提交
3283
  memset(pHandle->statis, 0, numOfCols * sizeof(SDataStatis));
3284
  for(int32_t i = 0; i < numOfCols; ++i) {
H
Haojun Liao 已提交
3285
    pHandle->statis[i].colId = colIds[i];
3286
  }
H
Haojun Liao 已提交
3287

H
Hongze Cheng 已提交
3288
  tsdbGetBlockStatis(&pHandle->rhelper, pHandle->statis, (int)numOfCols);
H
Haojun Liao 已提交
3289 3290 3291

  // always load the first primary timestamp column data
  SDataStatis* pPrimaryColStatis = &pHandle->statis[0];
3292
  assert(pPrimaryColStatis->colId == PRIMARYKEY_TIMESTAMP_COL_ID);
H
Haojun Liao 已提交
3293 3294 3295 3296 3297

  pPrimaryColStatis->numOfNull = 0;
  pPrimaryColStatis->min = pBlockInfo->compBlock->keyFirst;
  pPrimaryColStatis->max = pBlockInfo->compBlock->keyLast;

H
Haojun Liao 已提交
3298
  //update the number of NULL data rows
H
Haojun Liao 已提交
3299
  for(int32_t i = 1; i < numOfCols; ++i) {
3300
    if (pHandle->statis[i].numOfNull == -1) { // set the column data are all NULL
H
Haojun Liao 已提交
3301 3302 3303
      pHandle->statis[i].numOfNull = pBlockInfo->compBlock->numOfRows;
    }
  }
H
Haojun Liao 已提交
3304 3305 3306 3307

  int64_t elapsed = taosGetTimestampUs() - stime;
  pHandle->cost.statisInfoLoadTime += elapsed;

H
Haojun Liao 已提交
3308
  *pBlockStatis = pHandle->statis;
3309
  return TSDB_CODE_SUCCESS;
H
hjxilinx 已提交
3310 3311
}

H
Haojun Liao 已提交
3312
SArray* tsdbRetrieveDataBlock(tsdbReaderT* pTsdbReadHandle, SArray* pIdList) {
H
[td-32]  
hjxilinx 已提交
3313
  /**
H
hjxilinx 已提交
3314
   * In the following two cases, the data has been loaded to SColumnInfoData.
H
[td-32]  
hjxilinx 已提交
3315 3316
   * 1. data is from cache, 2. data block is not completed qualified to query time range
   */
3317
  STsdbReadHandle* pHandle = (STsdbReadHandle*)pTsdbReadHandle;
3318

D
dapan1121 已提交
3319
  if (pHandle->cur.fid == INT32_MIN) {
H
[td-32]  
hjxilinx 已提交
3320 3321
    return pHandle->pColumns;
  } else {
H
Haojun Liao 已提交
3322 3323
    STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[pHandle->cur.slot];
    STableCheckInfo* pCheckInfo = pBlockInfo->pTableCheckInfo;
3324

3325
    if (pHandle->cur.mixBlock) {
H
[td-32]  
hjxilinx 已提交
3326 3327
      return pHandle->pColumns;
    } else {
H
Haojun Liao 已提交
3328
      SDataBlockInfo binfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlockInfo->compBlock);
3329
      assert(pHandle->realNumOfRows <= binfo.rows);
H
Haojun Liao 已提交
3330

H
hjxilinx 已提交
3331 3332
      // data block has been loaded, todo extract method
      SDataBlockLoadInfo* pBlockLoadInfo = &pHandle->dataBlockLoadInfo;
H
Haojun Liao 已提交
3333

H
Hongze Cheng 已提交
3334
      if (pBlockLoadInfo->slot == pHandle->cur.slot && pBlockLoadInfo->fileGroup->fid == pHandle->cur.fid &&
H
Haojun Liao 已提交
3335
          pBlockLoadInfo->uid == pCheckInfo->tableId) {
H
hjxilinx 已提交
3336
        return pHandle->pColumns;
H
Haojun Liao 已提交
3337
      } else {  // only load the file block
H
refact  
Hongze Cheng 已提交
3338
        SBlock* pBlock = pBlockInfo->compBlock;
H
Haojun Liao 已提交
3339
        if (doLoadFileDataBlock(pHandle, pBlock, pCheckInfo, pHandle->cur.slot) != TSDB_CODE_SUCCESS) {
3340 3341
          return NULL;
        }
H
Haojun Liao 已提交
3342

H
Haojun Liao 已提交
3343
        // todo refactor
H
Haojun Liao 已提交
3344
        int32_t numOfRows = doCopyRowsFromFileBlock(pHandle, pHandle->outputCapacity, 0, 0, pBlock->numOfRows - 1);
H
Haojun Liao 已提交
3345

H
Haojun Liao 已提交
3346
        // if the buffer is not full in case of descending order query, move the data in the front of the buffer
3347
        if (!ASCENDING_TRAVERSE(pHandle->order) && numOfRows < pHandle->outputCapacity) {
H
Haojun Liao 已提交
3348
          int32_t emptySize = pHandle->outputCapacity - numOfRows;
S
TD-1057  
Shengliang Guan 已提交
3349
          int32_t reqNumOfCols = (int32_t)taosArrayGetSize(pHandle->pColumns);
H
Haojun Liao 已提交
3350

H
Haojun Liao 已提交
3351 3352
          for(int32_t i = 0; i < reqNumOfCols; ++i) {
            SColumnInfoData* pColInfo = taosArrayGet(pHandle->pColumns, i);
S
TD-1057  
Shengliang Guan 已提交
3353
            memmove((char*)pColInfo->pData, (char*)pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes);
H
Haojun Liao 已提交
3354 3355
          }
        }
H
Haojun Liao 已提交
3356

H
hjxilinx 已提交
3357 3358
        return pHandle->pColumns;
      }
H
[td-32]  
hjxilinx 已提交
3359 3360
    }
  }
H
hjxilinx 已提交
3361
}
3362
#if 0
3363
void filterPrepare(void* expr, void* param) {
3364
  tExprNode* pExpr = (tExprNode*)expr;
H
[td-32]  
hjxilinx 已提交
3365
  if (pExpr->_node.info != NULL) {
3366 3367
    return;
  }
3368

H
[td-32]  
hjxilinx 已提交
3369
  pExpr->_node.info = calloc(1, sizeof(tQueryInfo));
H
Haojun Liao 已提交
3370

3371
  STSchema*   pTSSchema = (STSchema*) param;
H
hjxilinx 已提交
3372 3373 3374
  tQueryInfo* pInfo = pExpr->_node.info;
  tVariant*   pCond = pExpr->_node.pRight->pVal;
  SSchema*    pSchema = pExpr->_node.pLeft->pSchema;
3375

3376 3377
  pInfo->sch      = *pSchema;
  pInfo->optr     = pExpr->_node.optr;
Y
yihaoDeng 已提交
3378
  pInfo->compare  = getComparFunc(pInfo->sch.type, pInfo->optr);
H
Haojun Liao 已提交
3379
  pInfo->indexed  = pTSSchema->columns->colId == pInfo->sch.colId;
H
Haojun Liao 已提交
3380

H
hjxilinx 已提交
3381
  if (pInfo->optr == TSDB_RELATION_IN) {
Y
yihaoDeng 已提交
3382
     int dummy = -1;
3383
     SHashObj *pObj = NULL;
Y
yihaoDeng 已提交
3384 3385 3386 3387
     if (pInfo->sch.colId == TSDB_TBNAME_COLUMN_INDEX) {
        pObj = taosHashInit(256, taosGetDefaultHashFunction(pInfo->sch.type), true, false);
        SArray *arr = (SArray *)(pCond->arr);
        for (size_t i = 0; i < taosArrayGetSize(arr); i++) {
Y
yihaoDeng 已提交
3388
          char* p = taosArrayGetP(arr, i);
3389 3390
          strntolower_s(varDataVal(p), varDataVal(p), varDataLen(p));
          taosHashPut(pObj, varDataVal(p), varDataLen(p), &dummy, sizeof(dummy));
Y
yihaoDeng 已提交
3391 3392 3393 3394
        }
     } else {
       buildFilterSetFromBinary((void **)&pObj, pCond->pz, pCond->nLen);
     }
3395
     pInfo->q = (char *)pObj;
H
Haojun Liao 已提交
3396
  } else if (pCond != NULL) {
3397 3398 3399 3400
    uint32_t size = pCond->nLen * TSDB_NCHAR_SIZE;
    if (size < (uint32_t)pSchema->bytes) {
      size = pSchema->bytes;
    }
3401 3402
    // to make sure tonchar does not cause invalid write, since the '\0' needs at least sizeof(wchar_t) space.
    pInfo->q = calloc(1, size + TSDB_NCHAR_SIZE + VARSTR_HEADER_SIZE);
3403
    tVariantDump(pCond, pInfo->q, pSchema->type, true);
weixin_48148422's avatar
weixin_48148422 已提交
3404
  }
3405 3406
}

3407
#endif
3408

H
Haojun Liao 已提交
3409
static int32_t tableGroupComparFn(const void *p1, const void *p2, const void *param) {
3410
#if 0
3411
  STableGroupSupporter* pTableGroupSupp = (STableGroupSupporter*) param;
3412 3413
  STable* pTable1 = ((STableKeyInfo*) p1)->uid;
  STable* pTable2 = ((STableKeyInfo*) p2)->uid;
H
Haojun Liao 已提交
3414

3415 3416 3417
  for (int32_t i = 0; i < pTableGroupSupp->numOfCols; ++i) {
    SColIndex* pColIndex = &pTableGroupSupp->pCols[i];
    int32_t colIndex = pColIndex->colIndex;
H
Haojun Liao 已提交
3418

H
Haojun Liao 已提交
3419
    assert(colIndex >= TSDB_TBNAME_COLUMN_INDEX);
H
Haojun Liao 已提交
3420

3421 3422 3423 3424
    char *  f1 = NULL;
    char *  f2 = NULL;
    int32_t type = 0;
    int32_t bytes = 0;
H
Haojun Liao 已提交
3425

H
Haojun Liao 已提交
3426 3427 3428
    if (colIndex == TSDB_TBNAME_COLUMN_INDEX) {
      f1 = (char*) TABLE_NAME(pTable1);
      f2 = (char*) TABLE_NAME(pTable2);
3429
      type = TSDB_DATA_TYPE_BINARY;
3430
      bytes = tGetTbnameColumnSchema()->bytes;
3431
    } else {
Y
yihaoDeng 已提交
3432 3433 3434 3435 3436 3437 3438
      if (pTableGroupSupp->pTagSchema && colIndex < pTableGroupSupp->pTagSchema->numOfCols) {
        STColumn* pCol = schemaColAt(pTableGroupSupp->pTagSchema, colIndex);
        bytes = pCol->bytes;
        type = pCol->type;
        f1 = tdGetKVRowValOfCol(pTable1->tagVal, pCol->colId);
        f2 = tdGetKVRowValOfCol(pTable2->tagVal, pCol->colId);
      } 
3439
    }
H
Haojun Liao 已提交
3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453

    // this tags value may be NULL
    if (f1 == NULL && f2 == NULL) {
      continue;
    }

    if (f1 == NULL) {
      return -1;
    }

    if (f2 == NULL) {
      return 1;
    }

3454 3455 3456 3457 3458 3459 3460
    int32_t ret = doCompare(f1, f2, type, bytes);
    if (ret == 0) {
      continue;
    } else {
      return ret;
    }
  }
3461
#endif
3462 3463 3464
  return 0;
}

H
Haojun Liao 已提交
3465
static int tsdbCheckInfoCompar(const void* key1, const void* key2) {
3466
  if (((STableCheckInfo*)key1)->tableId < ((STableCheckInfo*)key2)->tableId) {
H
Haojun Liao 已提交
3467
    return -1;
3468
  } else if (((STableCheckInfo*)key1)->tableId > ((STableCheckInfo*)key2)->tableId) {
H
Haojun Liao 已提交
3469 3470 3471 3472 3473 3474 3475 3476 3477
    return 1;
  } else {
    ASSERT(false);
    return 0;
  }
}

void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTables, TSKEY skey,
                          STableGroupSupporter* pSupp, __ext_compar_fn_t compareFn) {
3478
  STable* pTable = taosArrayGetP(pTableList, 0);
H
Haojun Liao 已提交
3479 3480
  SArray* g = taosArrayInit(16, sizeof(STableKeyInfo));

3481
  STableKeyInfo info = {.lastKey = skey};
H
Haojun Liao 已提交
3482
  taosArrayPush(g, &info);
3483

3484
  for (int32_t i = 1; i < numOfTables; ++i) {
3485 3486
    STable** prev = taosArrayGet(pTableList, i - 1);
    STable** p = taosArrayGet(pTableList, i);
H
Haojun Liao 已提交
3487

H
hjxilinx 已提交
3488
    int32_t ret = compareFn(prev, p, pSupp);
3489
    assert(ret == 0 || ret == -1);
H
Haojun Liao 已提交
3490

3491
    if (ret == 0) {
3492
      STableKeyInfo info1 = {.lastKey = skey};
H
Haojun Liao 已提交
3493
      taosArrayPush(g, &info1);
3494 3495
    } else {
      taosArrayPush(pGroups, &g);  // current group is ended, start a new group
H
Haojun Liao 已提交
3496 3497
      g = taosArrayInit(16, sizeof(STableKeyInfo));

3498
      STableKeyInfo info1 = {.lastKey = skey};
H
Haojun Liao 已提交
3499
      taosArrayPush(g, &info1);
3500 3501
    }
  }
H
Haojun Liao 已提交
3502

3503
  taosArrayPush(pGroups, &g);
3504 3505
}

3506
SArray* createTableGroup(SArray* pTableList, SSchemaWrapper* pTagSchema, SColIndex* pCols, int32_t numOfOrderCols, TSKEY skey) {
3507
  assert(pTableList != NULL);
3508
  SArray* pTableGroup = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
3509

3510 3511
  size_t size = taosArrayGetSize(pTableList);
  if (size == 0) {
S
Shengliang Guan 已提交
3512
    tsdbDebug("no qualified tables");
3513 3514
    return pTableGroup;
  }
H
Haojun Liao 已提交
3515

3516
  if (numOfOrderCols == 0 || size == 1) { // no group by tags clause or only one table
3517
    SArray* sa = taosArrayDup(pTableList);
H
Haojun Liao 已提交
3518 3519 3520 3521
    if (sa == NULL) {
      taosArrayDestroy(pTableGroup);
      return NULL;
    }
H
Haojun Liao 已提交
3522

3523
    taosArrayPush(pTableGroup, &sa);
S
TD-1057  
Shengliang Guan 已提交
3524
    tsdbDebug("all %" PRIzu " tables belong to one group", size);
3525
  } else {
H
Haojun Liao 已提交
3526 3527
    STableGroupSupporter sup = {0};
    sup.numOfCols = numOfOrderCols;
3528
    sup.pTagSchema = pTagSchema->pSchema;
H
Haojun Liao 已提交
3529 3530
    sup.pCols = pCols;

3531 3532
    taosqsort(pTableList->pData, size, sizeof(STableKeyInfo), &sup, tableGroupComparFn);
    createTableGroupImpl(pTableGroup, pTableList, size, skey, &sup, tableGroupComparFn);
3533
  }
H
Haojun Liao 已提交
3534

3535 3536 3537
  return pTableGroup;
}

3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621
//static bool tableFilterFp(const void* pNode, void* param) {
//  tQueryInfo* pInfo = (tQueryInfo*) param;
//
//  STable* pTable = (STable*)(SL_GET_NODE_DATA((SSkipListNode*)pNode));
//
//  char* val = NULL;
//  if (pInfo->sch.colId == TSDB_TBNAME_COLUMN_INDEX) {
//    val = (char*) TABLE_NAME(pTable);
//  } else {
//    val = tdGetKVRowValOfCol(pTable->tagVal, pInfo->sch.colId);
//  }
//
//  if (pInfo->optr == TSDB_RELATION_ISNULL || pInfo->optr == TSDB_RELATION_NOTNULL) {
//    if (pInfo->optr == TSDB_RELATION_ISNULL) {
//      return (val == NULL) || isNull(val, pInfo->sch.type);
//    } else if (pInfo->optr == TSDB_RELATION_NOTNULL) {
//      return (val != NULL) && (!isNull(val, pInfo->sch.type));
//    }
//  } else if (pInfo->optr == TSDB_RELATION_IN) {
//     int type = pInfo->sch.type;
//     if (type == TSDB_DATA_TYPE_BOOL || IS_SIGNED_NUMERIC_TYPE(type) || type == TSDB_DATA_TYPE_TIMESTAMP) {
//       int64_t v;
//       GET_TYPED_DATA(v, int64_t, pInfo->sch.type, val);
//       return NULL != taosHashGet((SHashObj *)pInfo->q, (char *)&v, sizeof(v));
//     } else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
//       uint64_t v;
//       GET_TYPED_DATA(v, uint64_t, pInfo->sch.type, val);
//       return NULL != taosHashGet((SHashObj *)pInfo->q, (char *)&v, sizeof(v));
//     }
//     else if (type == TSDB_DATA_TYPE_DOUBLE || type == TSDB_DATA_TYPE_FLOAT) {
//       double v;
//       GET_TYPED_DATA(v, double, pInfo->sch.type, val);
//       return NULL != taosHashGet((SHashObj *)pInfo->q, (char *)&v, sizeof(v));
//     } else if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR){
//       return NULL != taosHashGet((SHashObj *)pInfo->q, varDataVal(val), varDataLen(val));
//     }
//
//  }
//
//  int32_t ret = 0;
//  if (val == NULL) { //the val is possible to be null, so check it out carefully
//    ret = -1; // val is missing in table tags value pairs
//  } else {
//    ret = pInfo->compare(val, pInfo->q);
//  }
//
//  switch (pInfo->optr) {
//    case TSDB_RELATION_EQUAL: {
//      return ret == 0;
//    }
//    case TSDB_RELATION_NOT_EQUAL: {
//      return ret != 0;
//    }
//    case TSDB_RELATION_GREATER_EQUAL: {
//      return ret >= 0;
//    }
//    case TSDB_RELATION_GREATER: {
//      return ret > 0;
//    }
//    case TSDB_RELATION_LESS_EQUAL: {
//      return ret <= 0;
//    }
//    case TSDB_RELATION_LESS: {
//      return ret < 0;
//    }
//    case TSDB_RELATION_LIKE: {
//      return ret == 0;
//    }
//    case TSDB_RELATION_MATCH: {
//      return ret == 0;
//    }
//    case TSDB_RELATION_NMATCH: {
//      return ret == 0;
//    }
//    case TSDB_RELATION_IN: {
//      return ret == 1;
//    }
//
//    default:
//      assert(false);
//  }
//
//  return true;
//}
H
Haojun Liao 已提交
3622

3623
//static void getTableListfromSkipList(tExprNode *pExpr, SSkipList *pSkipList, SArray *result, SExprTraverseSupp *param);
3624

3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636
//static int32_t doQueryTableList(STable* pSTable, SArray* pRes, tExprNode* pExpr) {
//  // query according to the expression tree
//  SExprTraverseSupp supp = {
//      .nodeFilterFn = (__result_filter_fn_t) tableFilterFp,
//      .setupInfoFn = filterPrepare,
//      .pExtInfo = pSTable->tagSchema,
//      };
//
//  getTableListfromSkipList(pExpr, pSTable->pIndex, pRes, &supp);
//  tExprTreeDestroy(pExpr, destroyHelper);
//  return TSDB_CODE_SUCCESS;
//}
3637

H
Haojun Liao 已提交
3638
int32_t tsdbQuerySTableByTagCond(void* pMeta, uint64_t uid, TSKEY skey, const char* pTagCond, size_t len,
3639
                                 int16_t tagNameRelType, const char* tbnameCond, STableGroupInfo* pGroupInfo,
3640
                                 SColIndex* pColIndex, int32_t numOfCols, uint64_t reqId, uint64_t taskId) {
H
Haojun Liao 已提交
3641
  STbCfg* pTbCfg = metaGetTbInfoByUid(pMeta, uid);
3642
  if (pTbCfg == NULL) {
H
Haojun Liao 已提交
3643
    tsdbError("%p failed to get stable, uid:%"PRIu64", TID:0x%"PRIx64" QID:0x%"PRIx64, pMeta, uid, taskId, reqId);
3644 3645
    terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
    goto _error;
3646
  }
H
Haojun Liao 已提交
3647

3648
  if (pTbCfg->type != META_SUPER_TABLE) {
H
Haojun Liao 已提交
3649
    tsdbError("%p query normal tag not allowed, uid:%" PRIu64 ", TID:0x%"PRIx64" QID:0x%"PRIx64, pMeta, uid, taskId, reqId);
3650
    terrno = TSDB_CODE_OPS_NOT_SUPPORT; //basically, this error is caused by invalid sql issued by client
3651
    goto _error;
H
hjxilinx 已提交
3652
  }
3653 3654

  //NOTE: not add ref count for super table
H
Haojun Liao 已提交
3655
  SArray* res = taosArrayInit(8, sizeof(STableKeyInfo));
H
Haojun Liao 已提交
3656
  SSchemaWrapper* pTagSchema = metaGetTableSchema(pMeta, uid, 0, true);
H
Haojun Liao 已提交
3657

weixin_48148422's avatar
weixin_48148422 已提交
3658 3659
  // no tags and tbname condition, all child tables of this stable are involved
  if (tbnameCond == NULL && (pTagCond == NULL || len == 0)) {
H
Haojun Liao 已提交
3660
    int32_t ret = getAllTableList(pMeta, uid, res);
3661 3662
    if (ret != TSDB_CODE_SUCCESS) {
      goto _error;
3663
    }
3664

sangshuduo's avatar
sangshuduo 已提交
3665
    pGroupInfo->numOfTables = (uint32_t) taosArrayGetSize(res);
H
Haojun Liao 已提交
3666
    pGroupInfo->pGroupList  = createTableGroup(res, pTagSchema, pColIndex, numOfCols, skey);
H
Haojun Liao 已提交
3667

H
Haojun Liao 已提交
3668 3669
    tsdbDebug("%p no table name/tag condition, all tables qualified, numOfTables:%u, group:%zu, TID:0x%"PRIx64" QID:0x%"PRIx64, pMeta,
              pGroupInfo->numOfTables, taosArrayGetSize(pGroupInfo->pGroupList), taskId, reqId);
3670

3671
    taosArrayDestroy(res);
3672 3673
    return ret;
  }
3674

H
hjxilinx 已提交
3675
  int32_t ret = TSDB_CODE_SUCCESS;
3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719
//  tExprNode* expr = NULL;
//
//  TRY(TSDB_MAX_TAG_CONDITIONS) {
//    expr = exprTreeFromTableName(tbnameCond);
//    if (expr == NULL) {
//      expr = exprTreeFromBinary(pTagCond, len);
//    } else {
//      CLEANUP_PUSH_VOID_PTR_PTR(true, tExprTreeDestroy, expr, NULL);
//      tExprNode* tagExpr = exprTreeFromBinary(pTagCond, len);
//      if (tagExpr != NULL) {
//        CLEANUP_PUSH_VOID_PTR_PTR(true, tExprTreeDestroy, tagExpr, NULL);
//        tExprNode* tbnameExpr = expr;
//        expr = calloc(1, sizeof(tExprNode));
//        if (expr == NULL) {
//          THROW( TSDB_CODE_TDB_OUT_OF_MEMORY );
//        }
//        expr->nodeType = TSQL_NODE_EXPR;
//        expr->_node.optr = (uint8_t)tagNameRelType;
//        expr->_node.pLeft = tagExpr;
//        expr->_node.pRight = tbnameExpr;
//      }
//    }
//    CLEANUP_EXECUTE();
//
//  } CATCH( code ) {
//    CLEANUP_EXECUTE();
//    terrno = code;
//    tsdbUnlockRepoMeta(tsdb);     // unlock tsdb in any cases
//
//    goto _error;
//    // TODO: more error handling
//  } END_TRY
//
//  doQueryTableList(pTable, res, expr);
//  pGroupInfo->numOfTables = (uint32_t)taosArrayGetSize(res);
//  pGroupInfo->pGroupList  = createTableGroup(res, pTagSchema, pColIndex, numOfCols, skey);
//
//  tsdbDebug("%p stable tid:%d, uid:%"PRIu64" query, numOfTables:%u, belong to %" PRIzu " groups", tsdb, pTable->tableId,
//      pTable->uid, pGroupInfo->numOfTables, taosArrayGetSize(pGroupInfo->pGroupList));
//
//  taosArrayDestroy(res);
//
//  if (tsdbUnlockRepoMeta(tsdb) < 0) goto _error;
//  return ret;
3720 3721 3722

  _error:
  return terrno;
3723
}
3724

H
Haojun Liao 已提交
3725 3726
int32_t tsdbGetOneTableGroup(void* pMeta, uint64_t uid, TSKEY startKey, STableGroupInfo* pGroupInfo) {
  STbCfg* pTbCfg = metaGetTbInfoByUid(pMeta, uid);
3727
  if (pTbCfg == NULL) {
3728 3729
    terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
    goto _error;
3730
  }
3731

3732 3733
  pGroupInfo->numOfTables = 1;
  pGroupInfo->pGroupList = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
3734

H
Haojun Liao 已提交
3735 3736
  SArray* group = taosArrayInit(1, sizeof(STableKeyInfo));

3737
  STableKeyInfo info = {.lastKey = startKey, .uid = uid};
H
Haojun Liao 已提交
3738
  taosArrayPush(group, &info);
H
Haojun Liao 已提交
3739

3740
  taosArrayPush(pGroupInfo->pGroupList, &group);
3741
  return TSDB_CODE_SUCCESS;
3742 3743 3744

  _error:
  return terrno;
3745
}
3746

3747
#if 0
3748
int32_t tsdbGetTableGroupFromIdList(STsdb* tsdb, SArray* pTableIdList, STableGroupInfo* pGroupInfo) {
B
Bomin Zhang 已提交
3749 3750 3751
  if (tsdbRLockRepoMeta(tsdb) < 0) {
    return terrno;
  }
3752 3753 3754 3755

  assert(pTableIdList != NULL);
  size_t size = taosArrayGetSize(pTableIdList);
  pGroupInfo->pGroupList = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
3756
  SArray* group = taosArrayInit(1, sizeof(STableKeyInfo));
3757

B
Bomin Zhang 已提交
3758
  for(int32_t i = 0; i < size; ++i) {
3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769
    STableIdInfo *id = taosArrayGet(pTableIdList, i);

    STable* pTable = tsdbGetTableByUid(tsdbGetMeta(tsdb), id->uid);
    if (pTable == NULL) {
      tsdbWarn("table uid:%"PRIu64", tid:%d has been drop already", id->uid, id->tid);
      continue;
    }

    if (pTable->type == TSDB_SUPER_TABLE) {
      tsdbError("direct query on super tale is not allowed, table uid:%"PRIu64", tid:%d", id->uid, id->tid);
      terrno = TSDB_CODE_QRY_INVALID_MSG;
D
fix bug  
dapan1121 已提交
3770 3771 3772
      tsdbUnlockRepoMeta(tsdb);
      taosArrayDestroy(group);
      return terrno;
3773 3774
    }

H
Haojun Liao 已提交
3775 3776
    STableKeyInfo info = {.pTable = pTable, .lastKey = id->key};
    taosArrayPush(group, &info);
3777 3778
  }

B
Bomin Zhang 已提交
3779 3780 3781 3782
  if (tsdbUnlockRepoMeta(tsdb) < 0) {
    taosArrayDestroy(group);
    return terrno;
  }
3783

sangshuduo's avatar
sangshuduo 已提交
3784
  pGroupInfo->numOfTables = (uint32_t) taosArrayGetSize(group);
B
Bomin Zhang 已提交
3785 3786 3787 3788 3789
  if (pGroupInfo->numOfTables > 0) {
    taosArrayPush(pGroupInfo->pGroupList, &group);
  } else {
    taosArrayDestroy(group);
  }
3790 3791 3792

  return TSDB_CODE_SUCCESS;
}
3793
#endif
3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808
static void* doFreeColumnInfoData(SArray* pColumnInfoData) {
  if (pColumnInfoData == NULL) {
    return NULL;
  }

  size_t cols = taosArrayGetSize(pColumnInfoData);
  for (int32_t i = 0; i < cols; ++i) {
    SColumnInfoData* pColInfo = taosArrayGet(pColumnInfoData, i);
    tfree(pColInfo->pData);
  }

  taosArrayDestroy(pColumnInfoData);
  return NULL;
}

H
Haojun Liao 已提交
3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821
static void* destroyTableCheckInfo(SArray* pTableCheckInfo) {
  size_t size = taosArrayGetSize(pTableCheckInfo);
  for (int32_t i = 0; i < size; ++i) {
    STableCheckInfo* p = taosArrayGet(pTableCheckInfo, i);
    destroyTableMemIterator(p);

    tfree(p->pCompInfo);
  }

  taosArrayDestroy(pTableCheckInfo);
  return NULL;
}

3822

H
Haojun Liao 已提交
3823
void tsdbCleanupReadHandle(tsdbReaderT queryHandle) {
3824 3825
  STsdbReadHandle* pTsdbReadHandle = (STsdbReadHandle*)queryHandle;
  if (pTsdbReadHandle == NULL) {
3826 3827
    return;
  }
3828

3829
  pTsdbReadHandle->pColumns = doFreeColumnInfoData(pTsdbReadHandle->pColumns);
3830

3831 3832 3833
  taosArrayDestroy(pTsdbReadHandle->defaultLoadColumn);
  tfree(pTsdbReadHandle->pDataBlockInfo);
  tfree(pTsdbReadHandle->statis);
3834

3835
  if (!emptyQueryTimewindow(pTsdbReadHandle)) {
3836
//    tsdbMayUnTakeMemSnapshot(pTsdbReadHandle);
3837
  } else {
3838
    assert(pTsdbReadHandle->pTableCheckInfo == NULL);
3839 3840
  }

3841 3842
  if (pTsdbReadHandle->pTableCheckInfo != NULL) {
    pTsdbReadHandle->pTableCheckInfo = destroyTableCheckInfo(pTsdbReadHandle->pTableCheckInfo);
3843
  }
3844

3845
  tsdbDestroyReadH(&pTsdbReadHandle->rhelper);
H
Haojun Liao 已提交
3846

3847 3848
  tdFreeDataCols(pTsdbReadHandle->pDataCols);
  pTsdbReadHandle->pDataCols = NULL;
H
Haojun Liao 已提交
3849

3850 3851
  pTsdbReadHandle->prev = doFreeColumnInfoData(pTsdbReadHandle->prev);
  pTsdbReadHandle->next = doFreeColumnInfoData(pTsdbReadHandle->next);
3852

3853
  SIOCostSummary* pCost = &pTsdbReadHandle->cost;
3854

H
Haojun Liao 已提交
3855 3856
  tsdbDebug("%p :io-cost summary: head-file read cnt:%"PRIu64", head-file time:%"PRIu64" us, statis-info:%"PRId64" us, datablock:%" PRId64" us, check data:%"PRId64" us, %s",
      pTsdbReadHandle, pCost->headFileLoad, pCost->headFileLoadTime, pCost->statisInfoLoadTime, pCost->blockLoadTime, pCost->checkForNextTime, pTsdbReadHandle->idStr);
H
Haojun Liao 已提交
3857

3858
  tfree(pTsdbReadHandle);
3859
}
3860

3861
#if 0
H
Haojun Liao 已提交
3862
void tsdbDestroyTableGroup(STableGroupInfo *pGroupList) {
3863 3864 3865 3866 3867 3868 3869 3870 3871 3872
  assert(pGroupList != NULL);

  size_t numOfGroup = taosArrayGetSize(pGroupList->pGroupList);

  for(int32_t i = 0; i < numOfGroup; ++i) {
    SArray* p = taosArrayGetP(pGroupList->pGroupList, i);

    size_t numOfTables = taosArrayGetSize(p);
    for(int32_t j = 0; j < numOfTables; ++j) {
      STable* pTable = taosArrayGetP(p, j);
3873 3874 3875 3876
      if (pTable != NULL) { // in case of handling retrieve data from tsdb
        tsdbUnRefTable(pTable);
      }
      //assert(pTable != NULL);
3877 3878 3879 3880 3881
    }

    taosArrayDestroy(p);
  }

3882
  taosHashCleanup(pGroupList->map);
3883
  taosArrayDestroy(pGroupList->pGroupList);
H
Haojun Liao 已提交
3884
  pGroupList->numOfTables = 0;
3885
}
H
Haojun Liao 已提交
3886 3887 3888 3889 3890 3891 3892

static void applyFilterToSkipListNode(SSkipList *pSkipList, tExprNode *pExpr, SArray *pResult, SExprTraverseSupp *param) {
  SSkipListIterator* iter = tSkipListCreateIter(pSkipList);

  // Scan each node in the skiplist by using iterator
  while (tSkipListIterNext(iter)) {
    SSkipListNode *pNode = tSkipListIterGet(iter);
H
Haojun Liao 已提交
3893
    if (exprTreeApplyFilter(pExpr, pNode, param)) {
H
Haojun Liao 已提交
3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916
      taosArrayPush(pResult, &(SL_GET_NODE_DATA(pNode)));
    }
  }

  tSkipListDestroyIter(iter);
}

typedef struct {
  char*    v;
  int32_t  optr;
} SEndPoint;

typedef struct {
  SEndPoint* start;
  SEndPoint* end;
} SQueryCond;

// todo check for malloc failure
static int32_t setQueryCond(tQueryInfo *queryColInfo, SQueryCond* pCond) {
  int32_t optr = queryColInfo->optr;

  if (optr == TSDB_RELATION_GREATER || optr == TSDB_RELATION_GREATER_EQUAL ||
      optr == TSDB_RELATION_EQUAL || optr == TSDB_RELATION_NOT_EQUAL) {
Y
yihaoDeng 已提交
3917
    pCond->start       = calloc(1, sizeof(SEndPoint));
H
Haojun Liao 已提交
3918
    pCond->start->optr = queryColInfo->optr;
Y
yihaoDeng 已提交
3919
    pCond->start->v    = queryColInfo->q;
H
Haojun Liao 已提交
3920
  } else if (optr == TSDB_RELATION_LESS || optr == TSDB_RELATION_LESS_EQUAL) {
Y
yihaoDeng 已提交
3921
    pCond->end       = calloc(1, sizeof(SEndPoint));
H
Haojun Liao 已提交
3922
    pCond->end->optr = queryColInfo->optr;
Y
yihaoDeng 已提交
3923 3924 3925 3926 3927 3928
    pCond->end->v    = queryColInfo->q;
  } else if (optr == TSDB_RELATION_IN) {
    pCond->start       = calloc(1, sizeof(SEndPoint));
    pCond->start->optr = queryColInfo->optr;
    pCond->start->v    = queryColInfo->q; 
  } else if (optr == TSDB_RELATION_LIKE) {
H
Haojun Liao 已提交
3929
    assert(0);
3930 3931
  } else if (optr == TSDB_RELATION_MATCH) {
    assert(0);
3932 3933
  } else if (optr == TSDB_RELATION_NMATCH) {
    assert(0);
H
Haojun Liao 已提交
3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016
  }

  return TSDB_CODE_SUCCESS;
}

static void queryIndexedColumn(SSkipList* pSkipList, tQueryInfo* pQueryInfo, SArray* result) {
  SSkipListIterator* iter = NULL;

  SQueryCond cond = {0};
  if (setQueryCond(pQueryInfo, &cond) != TSDB_CODE_SUCCESS) {
    //todo handle error
  }

  if (cond.start != NULL) {
    iter = tSkipListCreateIterFromVal(pSkipList, (char*) cond.start->v, pSkipList->type, TSDB_ORDER_ASC);
  } else {
    iter = tSkipListCreateIterFromVal(pSkipList, (char*)(cond.end ? cond.end->v: NULL), pSkipList->type, TSDB_ORDER_DESC);
  }

  if (cond.start != NULL) {
    int32_t optr = cond.start->optr;

    if (optr == TSDB_RELATION_EQUAL) {   // equals
      while(tSkipListIterNext(iter)) {
        SSkipListNode* pNode = tSkipListIterGet(iter);

        int32_t ret = pQueryInfo->compare(SL_GET_NODE_KEY(pSkipList, pNode), cond.start->v);
        if (ret != 0) {
          break;
        }

        STableKeyInfo info = {.pTable = (void*)SL_GET_NODE_DATA(pNode), .lastKey = TSKEY_INITIAL_VAL};
        taosArrayPush(result, &info);
      }
    } else if (optr == TSDB_RELATION_GREATER || optr == TSDB_RELATION_GREATER_EQUAL) { // greater equal
      bool comp = true;
      int32_t ret = 0;

      while(tSkipListIterNext(iter)) {
        SSkipListNode* pNode = tSkipListIterGet(iter);

        if (comp) {
          ret = pQueryInfo->compare(SL_GET_NODE_KEY(pSkipList, pNode), cond.start->v);
          assert(ret >= 0);
        }

        if (ret == 0 && optr == TSDB_RELATION_GREATER) {
          continue;
        } else {
          STableKeyInfo info = {.pTable = (void*)SL_GET_NODE_DATA(pNode), .lastKey = TSKEY_INITIAL_VAL};
          taosArrayPush(result, &info);
          comp = false;
        }
      }
    } else if (optr == TSDB_RELATION_NOT_EQUAL) {   // not equal
      bool comp = true;

      while(tSkipListIterNext(iter)) {
        SSkipListNode* pNode = tSkipListIterGet(iter);
        comp = comp && (pQueryInfo->compare(SL_GET_NODE_KEY(pSkipList, pNode), cond.start->v) == 0);
        if (comp) {
          continue;
        }

        STableKeyInfo info = {.pTable = (void*)SL_GET_NODE_DATA(pNode), .lastKey = TSKEY_INITIAL_VAL};
        taosArrayPush(result, &info);
      }

      tSkipListDestroyIter(iter);

      comp = true;
      iter = tSkipListCreateIterFromVal(pSkipList, (char*) cond.start->v, pSkipList->type, TSDB_ORDER_DESC);
      while(tSkipListIterNext(iter)) {
        SSkipListNode* pNode = tSkipListIterGet(iter);
        comp = comp && (pQueryInfo->compare(SL_GET_NODE_KEY(pSkipList, pNode), cond.start->v) == 0);
        if (comp) {
          continue;
        }

        STableKeyInfo info = {.pTable = (void*)SL_GET_NODE_DATA(pNode), .lastKey = TSKEY_INITIAL_VAL};
        taosArrayPush(result, &info);
      }

Y
yihaoDeng 已提交
4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029
    } else if (optr == TSDB_RELATION_IN) {
      while(tSkipListIterNext(iter)) {
        SSkipListNode* pNode = tSkipListIterGet(iter);

        int32_t ret = pQueryInfo->compare(SL_GET_NODE_KEY(pSkipList, pNode), cond.start->v);
        if (ret != 0) {
          break;
        }

        STableKeyInfo info = {.pTable = (void*)SL_GET_NODE_DATA(pNode), .lastKey = TSKEY_INITIAL_VAL};
        taosArrayPush(result, &info);
      }
      
H
Haojun Liao 已提交
4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090
    } else {
      assert(0);
    }
  } else {
    int32_t optr = cond.end ? cond.end->optr : TSDB_RELATION_INVALID;
    if (optr == TSDB_RELATION_LESS || optr == TSDB_RELATION_LESS_EQUAL) {
      bool    comp = true;
      int32_t ret = 0;

      while (tSkipListIterNext(iter)) {
        SSkipListNode *pNode = tSkipListIterGet(iter);

        if (comp) {
          ret = pQueryInfo->compare(SL_GET_NODE_KEY(pSkipList, pNode), cond.end->v);
          assert(ret <= 0);
        }

        if (ret == 0 && optr == TSDB_RELATION_LESS) {
          continue;
        } else {
          STableKeyInfo info = {.pTable = (void *)SL_GET_NODE_DATA(pNode), .lastKey = TSKEY_INITIAL_VAL};
          taosArrayPush(result, &info);
          comp = false;  // no need to compare anymore
        }
      }
    } else {
      assert(pQueryInfo->optr == TSDB_RELATION_ISNULL || pQueryInfo->optr == TSDB_RELATION_NOTNULL);

      while (tSkipListIterNext(iter)) {
        SSkipListNode *pNode = tSkipListIterGet(iter);

        bool isnull = isNull(SL_GET_NODE_KEY(pSkipList, pNode), pQueryInfo->sch.type);
        if ((pQueryInfo->optr == TSDB_RELATION_ISNULL && isnull) ||
            (pQueryInfo->optr == TSDB_RELATION_NOTNULL && (!isnull))) {
          STableKeyInfo info = {.pTable = (void *)SL_GET_NODE_DATA(pNode), .lastKey = TSKEY_INITIAL_VAL};
          taosArrayPush(result, &info);
        }
      }
    }
  }

  free(cond.start);
  free(cond.end);
  tSkipListDestroyIter(iter);
}

static void queryIndexlessColumn(SSkipList* pSkipList, tQueryInfo* pQueryInfo, SArray* res, __result_filter_fn_t filterFp) {
  SSkipListIterator* iter = tSkipListCreateIter(pSkipList);

  while (tSkipListIterNext(iter)) {
    bool addToResult = false;

    SSkipListNode *pNode = tSkipListIterGet(iter);

    char *pData = SL_GET_NODE_DATA(pNode);
    tstr *name = (tstr*) tsdbGetTableName((void*) pData);

    // todo speed up by using hash
    if (pQueryInfo->sch.colId == TSDB_TBNAME_COLUMN_INDEX) {
      if (pQueryInfo->optr == TSDB_RELATION_IN) {
        addToResult = pQueryInfo->compare(name, pQueryInfo->q);
4091 4092 4093
      } else if (pQueryInfo->optr == TSDB_RELATION_LIKE ||
                 pQueryInfo->optr == TSDB_RELATION_MATCH ||
                 pQueryInfo->optr == TSDB_RELATION_NMATCH) {
H
Haojun Liao 已提交
4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124
        addToResult = !pQueryInfo->compare(name, pQueryInfo->q);
      }
    } else {
      addToResult = filterFp(pNode, pQueryInfo);
    }

    if (addToResult) {
      STableKeyInfo info = {.pTable = (void*)pData, .lastKey = TSKEY_INITIAL_VAL};
      taosArrayPush(res, &info);
    }
  }

  tSkipListDestroyIter(iter);
}

// Apply the filter expression to each node in the skiplist to acquire the qualified nodes in skip list
void getTableListfromSkipList(tExprNode *pExpr, SSkipList *pSkipList, SArray *result, SExprTraverseSupp *param) {
  if (pExpr == NULL) {
    return;
  }

  tExprNode *pLeft  = pExpr->_node.pLeft;
  tExprNode *pRight = pExpr->_node.pRight;

  // column project
  if (pLeft->nodeType != TSQL_NODE_EXPR && pRight->nodeType != TSQL_NODE_EXPR) {
    assert(pLeft->nodeType == TSQL_NODE_COL && (pRight->nodeType == TSQL_NODE_VALUE || pRight->nodeType == TSQL_NODE_DUMMY));

    param->setupInfoFn(pExpr, param->pExtInfo);

    tQueryInfo *pQueryInfo = pExpr->_node.info;
4125 4126
    if (pQueryInfo->indexed && (pQueryInfo->optr != TSDB_RELATION_LIKE
                                && pQueryInfo->optr != TSDB_RELATION_MATCH && pQueryInfo->optr != TSDB_RELATION_NMATCH
4127
                                && pQueryInfo->optr != TSDB_RELATION_IN)) {
H
Haojun Liao 已提交
4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142
      queryIndexedColumn(pSkipList, pQueryInfo, result);
    } else {
      queryIndexlessColumn(pSkipList, pQueryInfo, result, param->nodeFilterFn);
    }

    return;
  }

  // The value of hasPK is always 0.
  uint8_t weight = pLeft->_node.hasPK + pRight->_node.hasPK;
  assert(weight == 0 && pSkipList != NULL && taosArrayGetSize(result) == 0);

  //apply the hierarchical filter expression to every node in skiplist to find the qualified nodes
  applyFilterToSkipListNode(pSkipList, pExpr, result, param);
}
4143
#endif