tsort.c 49.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "query.h"
H
Hongze Cheng 已提交
17
#include "tcommon.h"
18

H
Hongze Cheng 已提交
19
#include "tcompare.h"
H
Haojun Liao 已提交
20
#include "tdatablock.h"
S
Shengliang Guan 已提交
21
#include "tdef.h"
22
#include "theap.h"
23 24
#include "tlosertree.h"
#include "tpagedbuf.h"
H
Haojun Liao 已提交
25
#include "tsort.h"
26
#include "tutil.h"
27
#include "tsimplehash.h"
28

29 30 31 32 33
struct STupleHandle {
  SSDataBlock* pBlock;
  int32_t      rowIndex;
};

H
Haojun Liao 已提交
34
struct SSortHandle {
H
Hongze Cheng 已提交
35 36 37 38
  int32_t        type;
  int32_t        pageSize;
  int32_t        numOfPages;
  SDiskbasedBuf* pBuf;
39 40 41 42 43 44
  SArray*        pSortInfo;
  SArray*        pOrderedSource;
  int32_t        loops;
  uint64_t       sortElapsed;
  int64_t        startTs;
  uint64_t       totalElapsed;
45

S
slzhou 已提交
46 47 48
  uint64_t         pqMaxRows;
  uint32_t         pqMaxTupleLength;
  uint32_t         pqSortBufSize;
49
  bool             forceUsePQSort;
50 51 52
  BoundedQueue*    pBoundedQueue;
  uint32_t         tmpRowIdx;

S
slzhou 已提交
53
  int64_t          mergeLimit;
54
  int64_t          currMergeLimitTs;          
S
slzhou 已提交
55

56
  int32_t           sourceId;
H
Hongze Cheng 已提交
57
  SSDataBlock*      pDataBlock;
58 59 60
  SMsortComparParam cmpParam;
  int32_t           numOfCompletedSources;
  bool              opened;
D
dapan1121 已提交
61
  int8_t            closed;
H
Hongze Cheng 已提交
62
  const char*       idStr;
63 64 65
  bool              inMemSort;
  bool              needAdjust;
  STupleHandle      tupleHandle;
H
Hongze Cheng 已提交
66
  void*             param;
67
  void (*beforeFp)(SSDataBlock* pBlock, void* param);
68 69 70

  _sort_fetch_block_fn_t  fetchfp;
  _sort_merge_compar_fn_t comparFn;
H
Hongze Cheng 已提交
71
  SMultiwayMergeTreeInfo* pMergeTree;
H
Haojun Liao 已提交
72
};
73

H
Hongze Cheng 已提交
74
static int32_t msortComparFn(const void* pLeft, const void* pRight, void* param);
75

76 77 78 79 80
// | offset[0] | offset[1] |....| nullbitmap | data |...|
static void* createTuple(uint32_t columnNum, uint32_t tupleLen) {
  uint32_t totalLen = sizeof(uint32_t) * columnNum + BitmapLen(columnNum) + tupleLen;
  return taosMemoryCalloc(1, totalLen);
}
81
static void destoryAllocatedTuple(void* t) { taosMemoryFree(t); }
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114

#define tupleOffset(tuple, colIdx) ((uint32_t*)(tuple + sizeof(uint32_t) * colIdx))
#define tupleSetOffset(tuple, colIdx, offset) (*tupleOffset(tuple, colIdx) = offset)
#define tupleSetNull(tuple, colIdx, colNum) colDataSetNull_f((char*)tuple + sizeof(uint32_t) * colNum, colIdx)
#define tupleColIsNull(tuple, colIdx, colNum) colDataIsNull_f((char*)tuple + sizeof(uint32_t) * colNum, colIdx)
#define tupleGetDataStartOffset(colNum) (sizeof(uint32_t) * colNum + BitmapLen(colNum))
#define tupleSetData(tuple, offset, data, length) memcpy(tuple + offset, data, length)

/**
 * @param t the tuple pointer addr, if realloced, *t is changed to the new addr
 * @param offset copy data into pTuple start from offset
 * @param colIndex the columnIndex, for setting null bitmap
 * @return the next offset to add field
 * */
static inline size_t tupleAddField(char** t, uint32_t colNum, uint32_t offset, uint32_t colIdx, void* data, size_t length,
                            bool isNull, uint32_t tupleLen) {
  tupleSetOffset(*t, colIdx, offset);
  if (isNull) {
    tupleSetNull(*t, colIdx, colNum);
  } else {
    if (offset + length > tupleLen + tupleGetDataStartOffset(colNum)) {
      *t = taosMemoryRealloc(*t, offset + length);
    }
    tupleSetData(*t, offset, data, length);
  }
  return offset + length;
}

static void* tupleGetField(char* t, uint32_t colIdx, uint32_t colNum) {
  if (tupleColIsNull(t, colIdx, colNum)) return NULL;
  return t + *tupleOffset(t, colIdx);
}

115 116
SSDataBlock* tsortGetSortedDataBlock(const SSortHandle* pSortHandle) {
  return createOneDataBlock(pSortHandle->pDataBlock, false);
117 118
}

119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
#define AllocatedTupleType 0
#define ReferencedTupleType 1 // tuple references to one row in pDataBlock
typedef struct TupleDesc {
  uint8_t type;
  char*   data; // if type is AllocatedTuple, then points to the created tuple, otherwise points to the DataBlock
} TupleDesc;

typedef struct ReferencedTuple {
  TupleDesc desc;
  size_t    rowIndex;
} ReferencedTuple;

static TupleDesc* createAllocatedTuple(SSDataBlock* pBlock, size_t colNum, uint32_t tupleLen, size_t rowIdx) {
  TupleDesc* t = taosMemoryCalloc(1, sizeof(TupleDesc));
  void*      pTuple = createTuple(colNum, tupleLen);
  if (!pTuple) {
    taosMemoryFree(t);
    return NULL;
  }
  size_t   colLen = 0;
  uint32_t offset = tupleGetDataStartOffset(colNum);
  for (size_t colIdx = 0; colIdx < colNum; ++colIdx) {
    SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, colIdx);
    if (colDataIsNull_s(pCol, rowIdx)) {
      offset = tupleAddField((char**)&pTuple, colNum, offset, colIdx, 0, 0, true, tupleLen);
    } else {
      colLen = colDataGetRowLength(pCol, rowIdx);
      offset =
          tupleAddField((char**)&pTuple, colNum, offset, colIdx, colDataGetData(pCol, rowIdx), colLen, false, tupleLen);
    }
  }
  t->type = AllocatedTupleType;
  t->data = pTuple;
  return t;
}

void* tupleDescGetField(const TupleDesc* pDesc, int32_t colIdx, uint32_t colNum) {
  if (pDesc->type == ReferencedTupleType) {
    ReferencedTuple* pRefTuple = (ReferencedTuple*)pDesc;
    SColumnInfoData* pCol = taosArrayGet(((SSDataBlock*)pDesc->data)->pDataBlock, colIdx);
    if (colDataIsNull_s(pCol, pRefTuple->rowIndex)) return NULL;
    return colDataGetData(pCol, pRefTuple->rowIndex);
  } else {
    return tupleGetField(pDesc->data, colIdx, colNum);
  }
}

void destroyTuple(void* t) {
  TupleDesc* pDesc = t;
  if (pDesc->type == AllocatedTupleType) {
    destoryAllocatedTuple(pDesc->data);
    taosMemoryFree(pDesc);
  }
}

174 175 176 177 178
/**
 *
 * @param type
 * @return
 */
H
Hongze Cheng 已提交
179
SSortHandle* tsortCreateSortHandle(SArray* pSortInfo, int32_t type, int32_t pageSize, int32_t numOfPages,
S
slzhou 已提交
180 181
                                   SSDataBlock* pBlock, const char* idstr, uint64_t pqMaxRows, uint32_t pqMaxTupleLength,
                                   uint32_t pqSortBufSize) {
wafwerar's avatar
wafwerar 已提交
182
  SSortHandle* pSortHandle = taosMemoryCalloc(1, sizeof(SSortHandle));
183

H
Hongze Cheng 已提交
184 185
  pSortHandle->type = type;
  pSortHandle->pageSize = pageSize;
186
  pSortHandle->numOfPages = numOfPages;
H
Hongze Cheng 已提交
187 188
  pSortHandle->pSortInfo = pSortInfo;
  pSortHandle->loops = 0;
189

S
slzhou 已提交
190 191 192 193
  pSortHandle->pqMaxTupleLength = pqMaxTupleLength;
  if (pqMaxRows != 0) {
    pSortHandle->pqSortBufSize = pqSortBufSize;
    pSortHandle->pqMaxRows = pqMaxRows;
194 195
  }
  pSortHandle->forceUsePQSort = false;
196

197 198 199
  if (pBlock != NULL) {
    pSortHandle->pDataBlock = createOneDataBlock(pBlock, false);
  }
H
Haojun Liao 已提交
200

S
slzhou 已提交
201 202
  pSortHandle->mergeLimit = -1;

H
Hongze Cheng 已提交
203
  pSortHandle->pOrderedSource = taosArrayInit(4, POINTER_BYTES);
H
Haojun Liao 已提交
204
  pSortHandle->cmpParam.orderInfo = pSortInfo;
205
  pSortHandle->cmpParam.cmpGroupId = false;
206
  pSortHandle->cmpParam.sortType = type;
S
slzhou 已提交
207
  if (type == SORT_BLOCK_TS_MERGE) {
208 209 210 211 212
    SBlockOrderInfo* pOrder = TARRAY_GET_ELEM(pSortInfo, 0);
    pSortHandle->cmpParam.tsSlotId = pOrder->slotId;
    pSortHandle->cmpParam.order = pOrder->order;
    pSortHandle->cmpParam.cmpFn = (pOrder->order == TSDB_ORDER_ASC) ? compareInt64Val : compareInt64ValDesc;
  }
H
Haojun Liao 已提交
213
  tsortSetComparFp(pSortHandle, msortComparFn);
214 215

  if (idstr != NULL) {
216
    pSortHandle->idStr = taosStrdup(idstr);
217 218 219 220 221
  }

  return pSortHandle;
}

222
static int32_t sortComparCleanup(SMsortComparParam* cmpParam) {
223
  // NOTICE: pSource may be, if it is SORT_MULTISOURCE_MERGE
H
Hongze Cheng 已提交
224
  for (int32_t i = 0; i < cmpParam->numOfSources; ++i) {
225
    SSortSource* pSource = cmpParam->pSources[i];
wmmhello's avatar
wmmhello 已提交
226
    blockDataDestroy(pSource->src.pBlock);
S
slzhou 已提交
227 228 229
    if (pSource->pageIdList) {
      taosArrayDestroy(pSource->pageIdList);
    }
wmmhello's avatar
wmmhello 已提交
230
    taosMemoryFreeClear(pSource);
S
slzhou 已提交
231
    cmpParam->pSources[i] = NULL;
wmmhello's avatar
wmmhello 已提交
232 233 234 235 236 237
  }

  cmpParam->numOfSources = 0;
  return TSDB_CODE_SUCCESS;
}

D
dapan1121 已提交
238
void tsortClearOrderdSource(SArray* pOrderedSource, int64_t *fetchUs, int64_t *fetchNum) {
239 240 241 242 243
  for (size_t i = 0; i < taosArrayGetSize(pOrderedSource); i++) {
    SSortSource** pSource = taosArrayGet(pOrderedSource, i);
    if (NULL == *pSource) {
      continue;
    }
D
dapan1121 已提交
244 245 246 247 248 249

    if (fetchUs) {
      *fetchUs += (*pSource)->fetchUs;
      *fetchNum += (*pSource)->fetchNum;
    }
    
250 251 252
    // release pageIdList
    if ((*pSource)->pageIdList) {
      taosArrayDestroy((*pSource)->pageIdList);
S
slzhou 已提交
253
      (*pSource)->pageIdList = NULL;
254
    }
255
    if ((*pSource)->param && !(*pSource)->onlyRef) {
256
      taosMemoryFree((*pSource)->param);
S
slzhou 已提交
257
      (*pSource)->param = NULL;
258
    }
dengyihao's avatar
dengyihao 已提交
259 260

    if (!(*pSource)->onlyRef && (*pSource)->src.pBlock) {
dengyihao's avatar
dengyihao 已提交
261 262
      blockDataDestroy((*pSource)->src.pBlock);
      (*pSource)->src.pBlock = NULL;
dengyihao's avatar
dengyihao 已提交
263
    }
dengyihao's avatar
dengyihao 已提交
264

265 266 267 268 269 270
    taosMemoryFreeClear(*pSource);
  }

  taosArrayClear(pOrderedSource);
}

H
Haojun Liao 已提交
271
void tsortDestroySortHandle(SSortHandle* pSortHandle) {
272 273 274
  if (pSortHandle == NULL) {
    return;
  }
H
Haojun Liao 已提交
275
  tsortClose(pSortHandle);
276
  if (pSortHandle->pMergeTree != NULL) {
D
dapan1121 已提交
277
    tMergeTreeDestroy(&pSortHandle->pMergeTree);
278 279
  }

H
Haojun Liao 已提交
280
  destroyDiskbasedBuf(pSortHandle->pBuf);
wafwerar's avatar
wafwerar 已提交
281
  taosMemoryFreeClear(pSortHandle->idStr);
wmmhello's avatar
wmmhello 已提交
282
  blockDataDestroy(pSortHandle->pDataBlock);
283
  if (pSortHandle->pBoundedQueue) destroyBoundedQueue(pSortHandle->pBoundedQueue);
284

D
dapan1121 已提交
285 286
  int64_t fetchUs = 0, fetchNum = 0;
  tsortClearOrderdSource(pSortHandle->pOrderedSource, &fetchUs, &fetchNum);
287
  qDebug("all source fetch time: %" PRId64 "us num:%" PRId64 " %s", fetchUs, fetchNum, pSortHandle->idStr);
D
dapan1121 已提交
288
  
wmmhello's avatar
wmmhello 已提交
289
  taosArrayDestroy(pSortHandle->pOrderedSource);
wafwerar's avatar
wafwerar 已提交
290
  taosMemoryFreeClear(pSortHandle);
291 292
}

H
Haojun Liao 已提交
293
int32_t tsortAddSource(SSortHandle* pSortHandle, void* pSource) {
H
Haojun Liao 已提交
294
  taosArrayPush(pSortHandle->pOrderedSource, &pSource);
295
  return TSDB_CODE_SUCCESS;
296 297
}

H
Hongze Cheng 已提交
298 299
static int32_t doAddNewExternalMemSource(SDiskbasedBuf* pBuf, SArray* pAllSources, SSDataBlock* pBlock,
                                         int32_t* sourceId, SArray* pPageIdList) {
wmmhello's avatar
wmmhello 已提交
300
  SSortSource* pSource = taosMemoryCalloc(1, sizeof(SSortSource));
301
  if (pSource == NULL) {
H
Haojun Liao 已提交
302
    taosArrayDestroy(pPageIdList);
S
Shengliang Guan 已提交
303
    return TSDB_CODE_OUT_OF_MEMORY;
304 305 306
  }

  pSource->src.pBlock = pBlock;
307
  pSource->pageIdList = pPageIdList;
308 309 310 311 312
  taosArrayPush(pAllSources, &pSource);

  (*sourceId) += 1;

  int32_t rowSize = blockDataGetSerialRowSize(pSource->src.pBlock);
313 314

  // The value of numOfRows must be greater than 0, which is guaranteed by the previous memory allocation
H
Hongze Cheng 已提交
315 316
  int32_t numOfRows =
      (getBufPageSize(pBuf) - blockDataGetSerialMetaSize(taosArrayGetSize(pBlock->pDataBlock))) / rowSize;
wmmhello's avatar
wmmhello 已提交
317
  ASSERT(numOfRows > 0);
H
Haojun Liao 已提交
318

319 320 321 322 323 324 325
  return blockDataEnsureCapacity(pSource->src.pBlock, numOfRows);
}

static int32_t doAddToBuf(SSDataBlock* pDataBlock, SSortHandle* pHandle) {
  int32_t start = 0;

  if (pHandle->pBuf == NULL) {
wafwerar's avatar
wafwerar 已提交
326
    if (!osTempSpaceAvailable()) {
327 328
      terrno = TSDB_CODE_NO_DISKSPACE;
      qError("Add to buf failed since %s, tempDir:%s", terrstr(), tsTempDir);
wafwerar's avatar
wafwerar 已提交
329 330
      return terrno;
    }
331

H
Hongze Cheng 已提交
332
    int32_t code = createDiskbasedBuf(&pHandle->pBuf, pHandle->pageSize, pHandle->numOfPages * pHandle->pageSize,
333
                                      "sortExternalBuf", tsTempDir);
H
Haojun Liao 已提交
334
    dBufSetPrintInfo(pHandle->pBuf);
335 336 337 338 339
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
  }

340
  SArray* pPageIdList = taosArrayInit(4, sizeof(int32_t));
H
Hongze Cheng 已提交
341
  while (start < pDataBlock->info.rows) {
342
    int32_t stop = 0;
343
    blockDataSplitRows(pDataBlock, pDataBlock->info.hasVarCol, start, &stop, pHandle->pageSize);
344 345
    SSDataBlock* p = blockDataExtractBlock(pDataBlock, start, stop - start + 1);
    if (p == NULL) {
H
Haojun Liao 已提交
346
      taosArrayDestroy(pPageIdList);
347 348 349 350
      return terrno;
    }

    int32_t pageId = -1;
H
Hongze Cheng 已提交
351
    void*   pPage = getNewBufPage(pHandle->pBuf, &pageId);
352
    if (pPage == NULL) {
H
Haojun Liao 已提交
353
      taosArrayDestroy(pPageIdList);
wmmhello's avatar
wmmhello 已提交
354
      blockDataDestroy(p);
355 356 357
      return terrno;
    }

358 359
    taosArrayPush(pPageIdList, &pageId);

H
Hongze Cheng 已提交
360
    int32_t size = blockDataGetSize(p) + sizeof(int32_t) + taosArrayGetSize(p->pDataBlock) * sizeof(int32_t);
H
Haojun Liao 已提交
361
    ASSERT(size <= getBufPageSize(pHandle->pBuf));
362

363
    blockDataToBuf(pPage, p);
364 365 366 367 368 369 370 371

    setBufPageDirty(pPage, true);
    releaseBufPage(pHandle->pBuf, pPage);

    blockDataDestroy(p);
    start = stop + 1;
  }

372
  blockDataCleanup(pDataBlock);
373

374
  SSDataBlock* pBlock = createOneDataBlock(pDataBlock, false);
375
  return doAddNewExternalMemSource(pHandle->pBuf, pHandle->pOrderedSource, pBlock, &pHandle->sourceId, pPageIdList);
376 377
}

378
static void setCurrentSourceDone(SSortSource* pSource, SSortHandle* pHandle) {
379 380 381 382
  pSource->src.rowIndex = -1;
  ++pHandle->numOfCompletedSources;
}

383
static int32_t sortComparInit(SMsortComparParam* pParam, SArray* pSources, int32_t startIndex, int32_t endIndex,
H
Hongze Cheng 已提交
384
                              SSortHandle* pHandle) {
385 386
  pParam->pSources = taosArrayGet(pSources, startIndex);
  pParam->numOfSources = (endIndex - startIndex + 1);
387 388 389

  int32_t code = 0;

H
Haojun Liao 已提交
390 391 392
  // multi-pass internal merge sort is required
  if (pHandle->pBuf == NULL) {
    if (!osTempSpaceAvailable()) {
393 394
      code = terrno = TSDB_CODE_NO_DISKSPACE;
      qError("Sort compare init failed since %s, tempDir:%s, idStr:%s", terrstr(), tsTempDir, pHandle->idStr);
H
Haojun Liao 已提交
395 396 397 398 399 400 401
      return code;
    }

    code = createDiskbasedBuf(&pHandle->pBuf, pHandle->pageSize, pHandle->numOfPages * pHandle->pageSize,
                              "sortComparInit", tsTempDir);
    dBufSetPrintInfo(pHandle->pBuf);
    if (code != TSDB_CODE_SUCCESS) {
D
dapan1121 已提交
402
      terrno = code;
H
Haojun Liao 已提交
403 404 405 406
      return code;
    }
  }

H
Haojun Liao 已提交
407
  if (pHandle->type == SORT_SINGLESOURCE_SORT) {
408 409
    for (int32_t i = 0; i < pParam->numOfSources; ++i) {
      SSortSource* pSource = pParam->pSources[i];
410

411
      // set current source is done
412
      if (taosArrayGetSize(pSource->pageIdList) == 0) {
413
        setCurrentSourceDone(pSource, pHandle);
414
        continue;
415 416
      }

D
dapan1121 已提交
417
      int32_t* pPgId = taosArrayGet(pSource->pageIdList, pSource->pageIndex);
418

D
dapan1121 已提交
419
      void* pPage = getBufPage(pHandle->pBuf, *pPgId);
420 421 422 423
      if (NULL == pPage) {
        return terrno;
      }
      
424
      code = blockDataFromBuf(pSource->src.pBlock, pPage);
425
      if (code != TSDB_CODE_SUCCESS) {
D
dapan1121 已提交
426
        terrno = code;
427 428 429 430 431 432
        return code;
      }

      releaseBufPage(pHandle->pBuf, pPage);
    }
  } else {
433 434 435 436 437
    qDebug("start init for the multiway merge sort, %s", pHandle->idStr);
    int64_t st = taosGetTimestampUs();

    for (int32_t i = 0; i < pParam->numOfSources; ++i) {
      SSortSource* pSource = pParam->pSources[i];
438
      pSource->src.pBlock = pHandle->fetchfp(pSource->param);
439

440
      // set current source is done
441
      if (pSource->src.pBlock == NULL) {
442
        setCurrentSourceDone(pSource, pHandle);
443
      }
444
    }
445 446

    int64_t et = taosGetTimestampUs();
447
    qDebug("init for merge sort completed, elapsed time:%.2f ms, %s", (et - st) / 1000.0, pHandle->idStr);
448 449 450 451 452
  }

  return code;
}

H
Hongze Cheng 已提交
453
static void appendOneRowToDataBlock(SSDataBlock* pBlock, const SSDataBlock* pSource, int32_t* rowIndex) {
454
  for (int32_t i = 0; i < taosArrayGetSize(pBlock->pDataBlock); ++i) {
455 456 457
    SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, i);

    SColumnInfoData* pSrcColInfo = taosArrayGet(pSource->pDataBlock, i);
H
Hongze Cheng 已提交
458
    bool             isNull = colDataIsNull(pSrcColInfo, pSource->info.rows, *rowIndex, NULL);
459 460

    if (isNull) {
461
      colDataSetVal(pColInfo, pBlock->info.rows, NULL, true);
462
    } else {
463
      char* pData = colDataGetData(pSrcColInfo, *rowIndex);
464
      colDataSetVal(pColInfo, pBlock->info.rows, pData, false);
465 466 467 468 469 470 471
    }
  }

  pBlock->info.rows += 1;
  *rowIndex += 1;
}

H
Hongze Cheng 已提交
472 473
static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeTreeInfo* pTree, SSortHandle* pHandle,
                                           int32_t* numOfCompleted) {
474 475 476 477 478 479 480
  /*
   * load a new SDataBlock into memory of a given intermediate data-set source,
   * since it's last record in buffer has been chosen to be processed, as the winner of loser-tree
   */
  if (pSource->src.rowIndex >= pSource->src.pBlock->info.rows) {
    pSource->src.rowIndex = 0;

481
    if (pHandle->type == SORT_SINGLESOURCE_SORT) {
H
Hongze Cheng 已提交
482
      pSource->pageIndex++;
483
      if (pSource->pageIndex >= taosArrayGetSize(pSource->pageIdList)) {
484
        qDebug("adjust merge tree. %d source completed %d", *numOfCompleted, pSource->pageIndex);
485 486 487 488 489
        (*numOfCompleted) += 1;
        pSource->src.rowIndex = -1;
        pSource->pageIndex = -1;
        pSource->src.pBlock = blockDataDestroy(pSource->src.pBlock);
      } else {
490
        if (pSource->pageIndex % 512 == 0) qDebug("begin source %p page %d", pSource, pSource->pageIndex);
S
slzhou 已提交
491

D
dapan1121 已提交
492
        int32_t* pPgId = taosArrayGet(pSource->pageIdList, pSource->pageIndex);
493

H
Hongze Cheng 已提交
494
        void*   pPage = getBufPage(pHandle->pBuf, *pPgId);
495 496 497 498 499
        if (pPage == NULL) {
          qError("failed to get buffer, code:%s", tstrerror(terrno));
          return terrno;
        }

H
Hongze Cheng 已提交
500
        int32_t code = blockDataFromBuf(pSource->src.pBlock, pPage);
501 502 503 504
        if (code != TSDB_CODE_SUCCESS) {
          return code;
        }
        releaseBufPage(pHandle->pBuf, pPage);
505 506
      }
    } else {
D
dapan1121 已提交
507
      int64_t st = taosGetTimestampUs();      
wmmhello's avatar
wmmhello 已提交
508
      pSource->src.pBlock = pHandle->fetchfp(((SSortSource*)pSource)->param);
D
dapan1121 已提交
509 510
      pSource->fetchUs += taosGetTimestampUs() - st;
      pSource->fetchNum++;
511 512 513
      if (pSource->src.pBlock == NULL) {
        (*numOfCompleted) += 1;
        pSource->src.rowIndex = -1;
514
        qDebug("adjust merge tree. %d source completed", *numOfCompleted);
515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535
      }
    }
  }

  /*
   * Adjust loser tree otherwise, according to new candidate data
   * if the loser tree is rebuild completed, we do not need to adjust
   */
  int32_t leafNodeIndex = tMergeTreeGetAdjustIndex(pTree);

#ifdef _DEBUG_VIEW
  printf("before adjust:\t");
  tMergeTreePrint(pTree);
#endif

  tMergeTreeAdjust(pTree, leafNodeIndex);

#ifdef _DEBUG_VIEW
  printf("\nafter adjust:\t");
  tMergeTreePrint(pTree);
#endif
536
  return TSDB_CODE_SUCCESS;
537 538
}

wmmhello's avatar
wmmhello 已提交
539
static SSDataBlock* getSortedBlockDataInner(SSortHandle* pHandle, SMsortComparParam* cmpParam, int32_t capacity) {
540
  blockDataCleanup(pHandle->pDataBlock);
541

H
Hongze Cheng 已提交
542
  while (1) {
543 544 545 546 547 548
    if (cmpParam->numOfSources == pHandle->numOfCompletedSources) {
      break;
    }

    int32_t index = tMergeTreeGetChosenIndex(pHandle->pMergeTree);

H
Hongze Cheng 已提交
549
    SSortSource* pSource = (*cmpParam).pSources[index];
550 551 552 553 554 555 556 557 558 559 560 561 562
    appendOneRowToDataBlock(pHandle->pDataBlock, pSource->src.pBlock, &pSource->src.rowIndex);

    int32_t code = adjustMergeTreeForNextTuple(pSource, pHandle->pMergeTree, pHandle, &pHandle->numOfCompletedSources);
    if (code != TSDB_CODE_SUCCESS) {
      terrno = code;
      return NULL;
    }

    if (pHandle->pDataBlock->info.rows >= capacity) {
      return pHandle->pDataBlock;
    }
  }

H
Hongze Cheng 已提交
563
  return (pHandle->pDataBlock->info.rows > 0) ? pHandle->pDataBlock : NULL;
564 565
}

H
Hongze Cheng 已提交
566 567 568
int32_t msortComparFn(const void* pLeft, const void* pRight, void* param) {
  int32_t pLeftIdx = *(int32_t*)pLeft;
  int32_t pRightIdx = *(int32_t*)pRight;
569

H
Hongze Cheng 已提交
570
  SMsortComparParam* pParam = (SMsortComparParam*)param;
571

H
Hongze Cheng 已提交
572
  SArray* pInfo = pParam->orderInfo;
573

H
Hongze Cheng 已提交
574
  SSortSource* pLeftSource = pParam->pSources[pLeftIdx];
wmmhello's avatar
wmmhello 已提交
575
  SSortSource* pRightSource = pParam->pSources[pRightIdx];
576 577 578 579 580 581 582 583 584 585 586 587 588

  // this input is exhausted, set the special value to denote this
  if (pLeftSource->src.rowIndex == -1) {
    return 1;
  }

  if (pRightSource->src.rowIndex == -1) {
    return -1;
  }

  SSDataBlock* pLeftBlock = pLeftSource->src.pBlock;
  SSDataBlock* pRightBlock = pRightSource->src.pBlock;

589
  if (pParam->cmpGroupId) {
H
Haojun Liao 已提交
590 591
    if (pLeftBlock->info.id.groupId != pRightBlock->info.id.groupId) {
      return pLeftBlock->info.id.groupId < pRightBlock->info.id.groupId ? -1 : 1;
592
    }
593 594
  }

S
slzhou 已提交
595
  if (pParam->sortType == SORT_BLOCK_TS_MERGE) {
596 597 598 599
    SColumnInfoData* pLeftColInfoData = TARRAY_GET_ELEM(pLeftBlock->pDataBlock, pParam->tsSlotId);
    SColumnInfoData* pRightColInfoData = TARRAY_GET_ELEM(pRightBlock->pDataBlock, pParam->tsSlotId);
    int64_t*            left1 = (int64_t*)(pLeftColInfoData->pData) + pLeftSource->src.rowIndex;
    int64_t*            right1 =  (int64_t*)(pRightColInfoData->pData) + pRightSource->src.rowIndex;
600

601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
    int ret = pParam->cmpFn(left1, right1);
    return ret;
  } else {
    for (int32_t i = 0; i < pInfo->size; ++i) {
      SBlockOrderInfo* pOrder = TARRAY_GET_ELEM(pInfo, i);
      SColumnInfoData* pLeftColInfoData = TARRAY_GET_ELEM(pLeftBlock->pDataBlock, pOrder->slotId);
      SColumnInfoData* pRightColInfoData = TARRAY_GET_ELEM(pRightBlock->pDataBlock, pOrder->slotId);

      bool leftNull = false;
      if (pLeftColInfoData->hasNull) {
        if (pLeftBlock->pBlockAgg == NULL) {
          leftNull = colDataIsNull_s(pLeftColInfoData, pLeftSource->src.rowIndex);
        } else {
          leftNull = colDataIsNull(pLeftColInfoData, pLeftBlock->info.rows, pLeftSource->src.rowIndex,
                                   pLeftBlock->pBlockAgg[i]);
        }
617
      }
618

619 620 621 622 623 624 625 626
      bool rightNull = false;
      if (pRightColInfoData->hasNull) {
        if (pRightBlock->pBlockAgg == NULL) {
          rightNull = colDataIsNull_s(pRightColInfoData, pRightSource->src.rowIndex);
        } else {
          rightNull = colDataIsNull(pRightColInfoData, pRightBlock->info.rows, pRightSource->src.rowIndex,
                                    pRightBlock->pBlockAgg[i]);
        }
627
      }
628

629 630 631
      if (leftNull && rightNull) {
        continue;  // continue to next slot
      }
632

633 634 635
      if (rightNull) {
        return pOrder->nullFirst ? 1 : -1;
      }
636

637 638 639
      if (leftNull) {
        return pOrder->nullFirst ? -1 : 1;
      }
640

641 642
      void* left1 = colDataGetData(pLeftColInfoData, pLeftSource->src.rowIndex);
      void* right1 = colDataGetData(pRightColInfoData, pRightSource->src.rowIndex);
643

644
      __compar_fn_t fn = getKeyComparFunc(pLeftColInfoData->info.type, pOrder->order);
645

646 647 648 649 650 651
      int ret = fn(left1, right1);
      if (ret == 0) {
        continue;
      } else {
        return ret;
      }
652 653
    }
  }
654
  return 0;
655 656 657 658
}

static int32_t doInternalMergeSort(SSortHandle* pHandle) {
  size_t numOfSources = taosArrayGetSize(pHandle->pOrderedSource);
H
Haojun Liao 已提交
659 660 661
  if (numOfSources == 0) {
    return 0;
  }
662 663 664 665 666

  // Calculate the I/O counts to complete the data sort.
  double sortPass = floorl(log2(numOfSources) / log2(pHandle->numOfPages));

  pHandle->totalElapsed = taosGetTimestampUs() - pHandle->startTs;
667 668 669 670 671 672 673

  if (sortPass > 0) {
    size_t s = pHandle->pBuf ? getTotalBufSize(pHandle->pBuf) : 0;
    qDebug("%s %d rounds mergesort required to complete the sort, first-round sorted data size:%" PRIzu
           ", sort elapsed:%" PRId64 ", total elapsed:%" PRId64,
           pHandle->idStr, (int32_t)(sortPass + 1), s, pHandle->sortElapsed, pHandle->totalElapsed);
  } else {
H
Hongze Cheng 已提交
674 675
    qDebug("%s ordered source:%" PRIzu ", available buf:%d, no need internal sort", pHandle->idStr, numOfSources,
           pHandle->numOfPages);
676
  }
677

G
Ganlin Zhao 已提交
678 679
  int32_t numOfRows = blockDataGetCapacityInRow(pHandle->pDataBlock, pHandle->pageSize,
                                                blockDataGetSerialMetaSize(taosArrayGetSize(pHandle->pDataBlock->pDataBlock)));
wmmhello's avatar
wmmhello 已提交
680
  blockDataEnsureCapacity(pHandle->pDataBlock, numOfRows);
681

682 683 684
  // the initial pass + sortPass + final mergePass
  pHandle->loops = sortPass + 2;

685
  size_t numOfSorted = taosArrayGetSize(pHandle->pOrderedSource);
H
Hongze Cheng 已提交
686
  for (int32_t t = 0; t < sortPass; ++t) {
687 688 689 690 691 692 693 694
    int64_t st = taosGetTimestampUs();

    SArray* pResList = taosArrayInit(4, POINTER_BYTES);

    int32_t numOfInputSources = pHandle->numOfPages;
    int32_t sortGroup = (numOfSorted + numOfInputSources - 1) / numOfInputSources;

    // Only *numOfInputSources* can be loaded into buffer to perform the external sort.
H
Hongze Cheng 已提交
695
    for (int32_t i = 0; i < sortGroup; ++i) {
696
      qDebug("internal merge sort pass %d group %d. num input sources %d ", t, i, numOfInputSources);
697 698 699 700 701 702 703 704 705 706 707
      pHandle->sourceId += 1;

      int32_t end = (i + 1) * numOfInputSources - 1;
      if (end > numOfSorted - 1) {
        end = numOfSorted - 1;
      }

      pHandle->cmpParam.numOfSources = end - i * numOfInputSources + 1;

      int32_t code = sortComparInit(&pHandle->cmpParam, pHandle->pOrderedSource, i * numOfInputSources, end, pHandle);
      if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
708
        taosArrayDestroy(pResList);
709 710 711
        return code;
      }

H
Hongze Cheng 已提交
712 713
      code =
          tMergeTreeCreate(&pHandle->pMergeTree, pHandle->cmpParam.numOfSources, &pHandle->cmpParam, pHandle->comparFn);
714
      if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
715
        taosArrayDestroy(pResList);
716 717
        return code;
      }
S
slzhou 已提交
718 719

      int nMergedRows = 0;
720

721
      SArray* pPageIdList = taosArrayInit(4, sizeof(int32_t));
722
      while (1) {
D
dapan1121 已提交
723 724 725 726
        if (tsortIsClosed(pHandle)) {
          code = terrno = TSDB_CODE_TSC_QUERY_CANCELLED;
          return code;
        }
S
slzhou 已提交
727

wmmhello's avatar
wmmhello 已提交
728
        SSDataBlock* pDataBlock = getSortedBlockDataInner(pHandle, &pHandle->cmpParam, numOfRows);
729 730 731 732 733
        if (pDataBlock == NULL) {
          break;
        }

        int32_t pageId = -1;
H
Hongze Cheng 已提交
734
        void*   pPage = getNewBufPage(pHandle->pBuf, &pageId);
735
        if (pPage == NULL) {
H
Haojun Liao 已提交
736 737
          taosArrayDestroy(pResList);
          taosArrayDestroy(pPageIdList);
738 739 740
          return terrno;
        }

741 742
        taosArrayPush(pPageIdList, &pageId);

H
Hongze Cheng 已提交
743 744
        int32_t size =
            blockDataGetSize(pDataBlock) + sizeof(int32_t) + taosArrayGetSize(pDataBlock->pDataBlock) * sizeof(int32_t);
H
Haojun Liao 已提交
745
        ASSERT(size <= getBufPageSize(pHandle->pBuf));
746

747
        blockDataToBuf(pPage, pDataBlock);
748 749 750

        setBufPageDirty(pPage, true);
        releaseBufPage(pHandle->pBuf, pPage);
S
slzhou 已提交
751 752
        nMergedRows += pDataBlock->info.rows;

753
        blockDataCleanup(pDataBlock);
S
slzhou 已提交
754 755 756
        if ((pHandle->mergeLimit != -1) && (nMergedRows >= pHandle->mergeLimit)) {
          break;
        }        
757 758
      }

759
      sortComparCleanup(&pHandle->cmpParam);
D
dapan1121 已提交
760
      tMergeTreeDestroy(&pHandle->pMergeTree);
761 762
      pHandle->numOfCompletedSources = 0;

763
      SSDataBlock* pBlock = createOneDataBlock(pHandle->pDataBlock, false);
764
      code = doAddNewExternalMemSource(pHandle->pBuf, pResList, pBlock, &pHandle->sourceId, pPageIdList);
H
Haojun Liao 已提交
765 766
      if (code != TSDB_CODE_SUCCESS) {
        taosArrayDestroy(pResList);
767 768 769 770
        return code;
      }
    }

D
dapan1121 已提交
771
    tsortClearOrderdSource(pHandle->pOrderedSource, NULL, NULL);
772 773 774 775 776 777 778 779 780
    taosArrayAddAll(pHandle->pOrderedSource, pResList);
    taosArrayDestroy(pResList);

    numOfSorted = taosArrayGetSize(pHandle->pOrderedSource);

    int64_t el = taosGetTimestampUs() - st;
    pHandle->totalElapsed += el;

    SDiskbasedBufStatis statis = getDBufStatis(pHandle->pBuf);
H
Hongze Cheng 已提交
781 782
    qDebug("%s %d round mergesort, elapsed:%" PRId64 " readDisk:%.2f Kb, flushDisk:%.2f Kb", pHandle->idStr, t + 1, el,
           statis.loadBytes / 1024.0, statis.flushBytes / 1024.0);
783

H
Haojun Liao 已提交
784 785
    if (pHandle->type == SORT_MULTISOURCE_MERGE) {
      pHandle->type = SORT_SINGLESOURCE_SORT;
786 787 788 789 790 791 792 793
      pHandle->comparFn = msortComparFn;
    }
  }

  pHandle->cmpParam.numOfSources = taosArrayGetSize(pHandle->pOrderedSource);
  return 0;
}

794 795 796 797 798
// get sort page size
int32_t getProperSortPageSize(size_t rowSize, uint32_t numOfCols) {
  uint32_t pgSize = rowSize * 4 + blockDataGetSerialMetaSize(numOfCols);
  if (pgSize < DEFAULT_PAGESIZE) {
    return DEFAULT_PAGESIZE;
799 800 801 802 803
  }

  return pgSize;
}

804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821
static int32_t createPageBuf(SSortHandle* pHandle) {
  if (pHandle->pBuf == NULL) {
    if (!osTempSpaceAvailable()) {
      terrno = TSDB_CODE_NO_DISKSPACE;
      qError("create page buf failed since %s, tempDir:%s", terrstr(), tsTempDir);
      return terrno;
    }

    int32_t code = createDiskbasedBuf(&pHandle->pBuf, pHandle->pageSize, pHandle->numOfPages * pHandle->pageSize,
                                      "tableBlocksBuf", tsTempDir);
    dBufSetPrintInfo(pHandle->pBuf);
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
  }
  return 0;
}

S
slzhou 已提交
822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837
typedef struct SBlkMergeSupport {
  int64_t** aTs;
  int32_t* aRowIdx;
  int32_t order;
} SBlkMergeSupport;

static int32_t blockCompareTsFn(const void* pLeft, const void* pRight, void* param) {
  int32_t left = *(int32_t*)pLeft;
  int32_t right = *(int32_t*)pRight;

  SBlkMergeSupport* pSup = (SBlkMergeSupport*)param;
  if (pSup->aRowIdx[left] == -1) {
    return 1;
  } else if (pSup->aRowIdx[right] == -1) {
    return -1;
  }
838

S
slzhou 已提交
839 840
  int64_t leftTs = pSup->aTs[left][pSup->aRowIdx[left]];
  int64_t rightTs = pSup->aTs[right][pSup->aRowIdx[right]];
841

S
slzhou 已提交
842 843 844 845 846 847
  int32_t ret = leftTs>rightTs ? 1 : ((leftTs < rightTs) ? -1 : 0);
  if (pSup->order == TSDB_ORDER_DESC) {
    ret = -1 * ret;
  }
  return ret;
}
848

S
slzhou 已提交
849 850 851 852
static int32_t appendDataBlockToPageBuf(SSortHandle* pHandle, SSDataBlock* blk, SArray* aPgId) {
  int32_t pageId = -1;
  void*   pPage = getNewBufPage(pHandle->pBuf, &pageId);
  taosArrayPush(aPgId, &pageId);
853 854 855 856

  int32_t size = blockDataGetSize(blk) + sizeof(int32_t) + taosArrayGetSize(blk->pDataBlock) * sizeof(int32_t);
  ASSERT(size <= getBufPageSize(pHandle->pBuf));

S
slzhou 已提交
857
  blockDataToBuf(pPage, blk);
858

S
slzhou 已提交
859 860
  setBufPageDirty(pPage, true);
  releaseBufPage(pHandle->pBuf, pPage);
861

S
slzhou 已提交
862 863
  return 0;
}
864

865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891
static int32_t getPageBufIncForRow(SSDataBlock* blk, int32_t row, int32_t rowIdxInPage) {
  int sz = 0;
  int numCols = taosArrayGetSize(blk->pDataBlock);
  if (!blk->info.hasVarCol) {
    sz += numCols * ((rowIdxInPage & 0x7) == 0 ? 1: 0);
    sz += blockDataGetRowSize(blk);
  } else {
    for (int32_t i = 0; i < numCols; ++i) {
      SColumnInfoData* pColInfoData = TARRAY_GET_ELEM(blk->pDataBlock, i);
      if (IS_VAR_DATA_TYPE(pColInfoData->info.type)) {
        if (pColInfoData->varmeta.offset[row] != -1) {
          char* p = colDataGetData(pColInfoData, row);
          sz += varDataTLen(p);
        }

        sz += sizeof(pColInfoData->varmeta.offset[0]);
      } else {
        sz += pColInfoData->info.bytes;

        if (((rowIdxInPage) & 0x07) == 0) {
          sz += 1; // bitmap
        }
      }
    }    
  }
  return sz;
}
S
slzhou 已提交
892 893

static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockOrderInfo* order, SArray* aExtSrc) {
894 895
  int pgHeaderSz = sizeof(int32_t) + sizeof(int32_t) * taosArrayGetSize(pHandle->pDataBlock->pDataBlock);
  int32_t rowCap = blockDataGetCapacityInRow(pHandle->pDataBlock, pHandle->pageSize, pgHeaderSz);
S
slzhou 已提交
896 897 898 899 900 901 902 903 904 905 906 907 908
  blockDataEnsureCapacity(pHandle->pDataBlock, rowCap);
  blockDataCleanup(pHandle->pDataBlock);
  int32_t numBlks = taosArrayGetSize(aBlk);

  SBlkMergeSupport sup;
  sup.aRowIdx = taosMemoryCalloc(numBlks, sizeof(int32_t));
  sup.aTs = taosMemoryCalloc(numBlks, sizeof(int64_t*));
  sup.order = order->order;
  for (int i = 0; i < numBlks; ++i) {
    SSDataBlock* blk = taosArrayGetP(aBlk, i);
    SColumnInfoData* col = taosArrayGet(blk->pDataBlock, order->slotId);
    sup.aTs[i] = (int64_t*)col->pData;
    sup.aRowIdx[i] = 0;
909 910
  }

S
slzhou 已提交
911 912 913 914 915 916 917 918 919 920 921
  int32_t totalRows = 0;
  for (int i = 0; i < numBlks; ++i) {
    SSDataBlock* blk = taosArrayGetP(aBlk, i);
    totalRows += blk->info.rows;
  }

  SArray* aPgId = taosArrayInit(8, sizeof(int32_t));

  SMultiwayMergeTreeInfo* pTree = NULL;        
  tMergeTreeCreate(&pTree, taosArrayGetSize(aBlk), &sup, blockCompareTsFn);
  int32_t nRows = 0;
S
slzhou 已提交
922 923
  int32_t nMergedRows = 0;
  bool mergeLimitReached = false;
924
  size_t blkPgSz = pgHeaderSz;
925 926
  int64_t lastPageBufTs = (order->order == TSDB_ORDER_ASC) ? INT64_MAX : INT64_MIN;
  int64_t currTs = (order->order == TSDB_ORDER_ASC) ? INT64_MAX : INT64_MIN;
S
slzhou 已提交
927 928 929 930
  while (nRows < totalRows) {
    int32_t minIdx = tMergeTreeGetChosenIndex(pTree);
    SSDataBlock* minBlk = taosArrayGetP(aBlk, minIdx);
    int32_t minRow = sup.aRowIdx[minIdx];
931
    int32_t bufInc = getPageBufIncForRow(minBlk, minRow, pHandle->pDataBlock->info.rows);
932

933
    if (blkPgSz <= pHandle->pageSize && blkPgSz + bufInc > pHandle->pageSize) {
934 935
        SColumnInfoData* tsCol = taosArrayGet(pHandle->pDataBlock->pDataBlock, order->slotId);
        lastPageBufTs = ((int64_t*)tsCol->pData)[pHandle->pDataBlock->info.rows - 1];
936
        appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId);
S
slzhou 已提交
937 938
        nMergedRows += pHandle->pDataBlock->info.rows;

S
slzhou 已提交
939
        blockDataCleanup(pHandle->pDataBlock);
940 941
        blkPgSz = pgHeaderSz;
        bufInc = getPageBufIncForRow(minBlk, minRow, 0);
942
        
S
slzhou 已提交
943 944
        if ((pHandle->mergeLimit != -1) && (nMergedRows >= pHandle->mergeLimit)) {
          mergeLimitReached = true;
945 946 947 948
          if ((lastPageBufTs < pHandle->currMergeLimitTs && order->order == TSDB_ORDER_ASC) ||
              (lastPageBufTs > pHandle->currMergeLimitTs && order->order == TSDB_ORDER_DESC)) {
                pHandle->currMergeLimitTs = lastPageBufTs;
          }
S
slzhou 已提交
949 950
          break;
        }        
951 952
    }
    blockDataEnsureCapacity(pHandle->pDataBlock, pHandle->pDataBlock->info.rows + 1);
S
slzhou 已提交
953
    appendOneRowToDataBlock(pHandle->pDataBlock, minBlk, &minRow);
954 955
    blkPgSz += bufInc;

S
slzhou 已提交
956
    ++nRows;
957

S
slzhou 已提交
958 959 960 961 962 963 964 965
    if (sup.aRowIdx[minIdx] == minBlk->info.rows - 1) {
      sup.aRowIdx[minIdx] = -1;
    } else {
      ++sup.aRowIdx[minIdx];
    }
    tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree));
  }
  if (pHandle->pDataBlock->info.rows > 0) {
S
slzhou 已提交
966
    if (!mergeLimitReached) {
967 968
      SColumnInfoData* tsCol = taosArrayGet(pHandle->pDataBlock->pDataBlock, order->slotId);
      lastPageBufTs = ((int64_t*)tsCol->pData)[pHandle->pDataBlock->info.rows - 1];
S
slzhou 已提交
969 970
      appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId);
      nMergedRows += pHandle->pDataBlock->info.rows;
971 972 973 974 975 976 977
      if ((pHandle->mergeLimit != -1) && (nMergedRows >= pHandle->mergeLimit)) {
          mergeLimitReached = true;
          if ((lastPageBufTs < pHandle->currMergeLimitTs && order->order == TSDB_ORDER_ASC) ||
              (lastPageBufTs > pHandle->currMergeLimitTs && order->order == TSDB_ORDER_DESC)) {
                pHandle->currMergeLimitTs = lastPageBufTs;
          }
      }
S
slzhou 已提交
978
    }
S
slzhou 已提交
979
    blockDataCleanup(pHandle->pDataBlock);
S
slzhou 已提交
980 981 982 983 984 985 986 987 988
  }
  SSDataBlock* pMemSrcBlk = createOneDataBlock(pHandle->pDataBlock, false);
  doAddNewExternalMemSource(pHandle->pBuf, aExtSrc, pMemSrcBlk, &pHandle->sourceId, aPgId);

  taosMemoryFree(sup.aRowIdx);
  taosMemoryFree(sup.aTs);

  tMergeTreeDestroy(&pTree);

989 990 991
  return 0;
}

992 993 994 995
static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) {
  SBlockOrderInfo* pOrder = taosArrayGet(pHandle->pSortInfo, 0);
  size_t           nSrc = taosArrayGetSize(pHandle->pOrderedSource);
  SArray*          aExtSrc = taosArrayInit(nSrc, POINTER_BYTES);
H
Haojun Liao 已提交
996

997
  size_t maxBufSize = pHandle->numOfPages * pHandle->pageSize;
998 999 1000 1001 1002 1003

  int32_t code = createPageBuf(pHandle);
  if (code != TSDB_CODE_SUCCESS) {
    taosArrayDestroy(aExtSrc);
    return code;
  }
dengyihao's avatar
dengyihao 已提交
1004

1005 1006
  SSortSource* pSrc = taosArrayGetP(pHandle->pOrderedSource, 0);
  int32_t      szSort = 0;
1007

1008 1009 1010 1011 1012 1013
  if (pOrder->order == TSDB_ORDER_ASC) {
    pHandle->currMergeLimitTs = INT64_MAX;
  } else {
    pHandle->currMergeLimitTs = INT64_MIN;
  }

1014
  SArray* aBlkSort = taosArrayInit(8, POINTER_BYTES);
1015
  SSHashObj* mUidBlk = tSimpleHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT));
1016 1017
  while (1) {
    SSDataBlock* pBlk = pHandle->fetchfp(pSrc->param);
1018 1019 1020 1021 1022 1023 1024 1025
    if (pBlk != NULL) {
      SColumnInfoData* tsCol = taosArrayGet(pBlk->pDataBlock, pOrder->slotId);
      int64_t firstRowTs = *(int64_t*)tsCol->pData;
      if ((pOrder->order == TSDB_ORDER_ASC && firstRowTs > pHandle->currMergeLimitTs)  ||
          (pOrder->order == TSDB_ORDER_DESC && firstRowTs < pHandle->currMergeLimitTs)) {
            continue;
          }
    }
1026 1027
    if (pBlk != NULL) {
      szSort += blockDataGetSize(pBlk);
1028 1029 1030 1031 1032 1033 1034 1035 1036 1037

      void* ppBlk = tSimpleHashGet(mUidBlk, &pBlk->info.id.uid, sizeof(pBlk->info.id.uid));
      if (ppBlk != NULL) {
        SSDataBlock* tBlk = *(SSDataBlock**)(ppBlk);
        blockDataMerge(tBlk, pBlk);
      } else {
        SSDataBlock* tBlk = createOneDataBlock(pBlk, true);
        tSimpleHashPut(mUidBlk, &pBlk->info.id.uid, sizeof(pBlk->info.id.uid), &tBlk, POINTER_BYTES);
        taosArrayPush(aBlkSort, &tBlk);
      }
1038
    }
1039

1040
    if ((pBlk != NULL && szSort > maxBufSize) || (pBlk == NULL && szSort > 0)) {
1041 1042
      tSimpleHashClear(mUidBlk);

1043 1044 1045 1046
      int64_t p = taosGetTimestampUs();
      sortBlocksToExtSource(pHandle, aBlkSort, pOrder, aExtSrc);
      int64_t el = taosGetTimestampUs() - p;
      pHandle->sortElapsed += el;
1047

1048 1049
      for (int i = 0; i < taosArrayGetSize(aBlkSort); ++i) {
        blockDataDestroy(taosArrayGetP(aBlkSort, i));
1050
      }
1051 1052
      taosArrayClear(aBlkSort);
      szSort = 0;
1053
      qDebug("source %zu created", taosArrayGetSize(aExtSrc));
1054 1055 1056
    }
    if (pBlk == NULL) {
      break;
1057 1058 1059 1060 1061 1062 1063 1064 1065 1066
    }

    if (tsortIsClosed(pHandle)) {
      tSimpleHashClear(mUidBlk);
      for (int i = 0; i < taosArrayGetSize(aBlkSort); ++i) {
        blockDataDestroy(taosArrayGetP(aBlkSort, i));
      }
      taosArrayClear(aBlkSort);
      break;
    }
1067
  }
1068

1069
  tSimpleHashCleanup(mUidBlk);
1070 1071
  taosArrayDestroy(aBlkSort);
  tsortClearOrderdSource(pHandle->pOrderedSource, NULL, NULL);
1072 1073 1074
  if (!tsortIsClosed(pHandle)) {
    taosArrayAddAll(pHandle->pOrderedSource, aExtSrc);
  }
1075 1076 1077
  taosArrayDestroy(aExtSrc);

  pHandle->type = SORT_SINGLESOURCE_SORT;
1078
  return TSDB_CODE_SUCCESS;
1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114
}

static int32_t createBlocksQuickSortInitialSources(SSortHandle* pHandle) {
  int32_t code = 0;
  size_t  sortBufSize = pHandle->numOfPages * pHandle->pageSize;

  SSortSource** pSource = taosArrayGet(pHandle->pOrderedSource, 0);
  SSortSource*  source = *pSource;
  *pSource = NULL;

  tsortClearOrderdSource(pHandle->pOrderedSource, NULL, NULL);

  while (1) {
    SSDataBlock* pBlock = pHandle->fetchfp(source->param);
    if (pBlock == NULL) {
      break;
    }

    if (pHandle->pDataBlock == NULL) {
      uint32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock);
      pHandle->pageSize = getProperSortPageSize(blockDataGetRowSize(pBlock), numOfCols);

      // todo, number of pages are set according to the total available sort buffer
      pHandle->numOfPages = 1024;
      sortBufSize = pHandle->numOfPages * pHandle->pageSize;
      pHandle->pDataBlock = createOneDataBlock(pBlock, false);
    }

    if (pHandle->beforeFp != NULL) {
      pHandle->beforeFp(pBlock, pHandle->param);
    }

    code = blockDataMerge(pHandle->pDataBlock, pBlock);
    if (code != TSDB_CODE_SUCCESS) {
      if (source->param && !source->onlyRef) {
        taosMemoryFree(source->param);
1115
      }
1116 1117 1118 1119 1120 1121 1122
      if (!source->onlyRef && source->src.pBlock) {
        blockDataDestroy(source->src.pBlock);
        source->src.pBlock = NULL;
      }
      taosMemoryFree(source);
      return code;
    }
1123

1124 1125 1126 1127 1128 1129
    size_t size = blockDataGetSize(pHandle->pDataBlock);
    if (size > sortBufSize) {
      // Perform the in-memory sort and then flush data in the buffer into disk.
      int64_t p = taosGetTimestampUs();
      code = blockDataSort(pHandle->pDataBlock, pHandle->pSortInfo);
      if (code != 0) {
1130 1131 1132
        if (source->param && !source->onlyRef) {
          taosMemoryFree(source->param);
        }
dengyihao's avatar
dengyihao 已提交
1133 1134 1135 1136
        if (!source->onlyRef && source->src.pBlock) {
          blockDataDestroy(source->src.pBlock);
          source->src.pBlock = NULL;
        }
1137

1138
        taosMemoryFree(source);
1139 1140
        return code;
      }
1141

1142 1143
      int64_t el = taosGetTimestampUs() - p;
      pHandle->sortElapsed += el;
S
slzhou 已提交
1144
      if (pHandle->pqMaxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->pqMaxRows);
1145 1146 1147
      code = doAddToBuf(pHandle->pDataBlock, pHandle);
      if (code != TSDB_CODE_SUCCESS) {
        return code;
1148 1149
      }
    }
1150
  }
1151

1152 1153 1154
  if (source->param && !source->onlyRef) {
    taosMemoryFree(source->param);
  }
1155

1156
  taosMemoryFree(source);
1157

1158 1159
  if (pHandle->pDataBlock != NULL && pHandle->pDataBlock->info.rows > 0) {
    size_t size = blockDataGetSize(pHandle->pDataBlock);
1160

1161 1162
    // Perform the in-memory sort and then flush data in the buffer into disk.
    int64_t p = taosGetTimestampUs();
1163

1164 1165 1166 1167
    code = blockDataSort(pHandle->pDataBlock, pHandle->pSortInfo);
    if (code != 0) {
      return code;
    }
1168

S
slzhou 已提交
1169
    if (pHandle->pqMaxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->pqMaxRows);
1170 1171
    int64_t el = taosGetTimestampUs() - p;
    pHandle->sortElapsed += el;
1172

1173 1174 1175 1176
    // All sorted data can fit in memory, external memory sort is not needed. Return to directly
    if (size <= sortBufSize && pHandle->pBuf == NULL) {
      pHandle->cmpParam.numOfSources = 1;
      pHandle->inMemSort = true;
1177

1178 1179 1180 1181 1182 1183
      pHandle->loops = 1;
      pHandle->tupleHandle.rowIndex = -1;
      pHandle->tupleHandle.pBlock = pHandle->pDataBlock;
      return 0;
    } else {
      code = doAddToBuf(pHandle->pDataBlock, pHandle);
1184
    }
1185 1186 1187
  }
  return code;
}
1188

1189 1190
static int32_t createInitialSources(SSortHandle* pHandle) {
  int32_t code = 0;
1191

1192 1193
  if (pHandle->type == SORT_SINGLESOURCE_SORT) {
    code = createBlocksQuickSortInitialSources(pHandle);
S
slzhou 已提交
1194
  } else if (pHandle->type == SORT_BLOCK_TS_MERGE) {
1195 1196
    code = createBlocksMergeSortInitialSources(pHandle);
  }
1197
  qDebug("%zu sources created", taosArrayGetSize(pHandle->pOrderedSource));
1198
  return code;
H
Haojun Liao 已提交
1199 1200
}

1201
static bool tsortOpenForBufMergeSort(SSortHandle* pHandle) {
1202
  int32_t code = createInitialSources(pHandle);
H
Haojun Liao 已提交
1203 1204
  if (code != TSDB_CODE_SUCCESS) {
    return code;
1205 1206 1207
  }

  // do internal sort
H
Haojun Liao 已提交
1208
  code = doInternalMergeSort(pHandle);
1209 1210 1211 1212 1213
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

  int32_t numOfSources = taosArrayGetSize(pHandle->pOrderedSource);
H
Haojun Liao 已提交
1214 1215 1216 1217
  if (pHandle->pBuf != NULL) {
    ASSERT(numOfSources <= getNumOfInMemBufPages(pHandle->pBuf));
  }

H
Haojun Liao 已提交
1218 1219 1220 1221
  if (numOfSources == 0) {
    return 0;
  }

1222 1223 1224 1225 1226
  code = sortComparInit(&pHandle->cmpParam, pHandle->pOrderedSource, 0, numOfSources - 1, pHandle);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

1227
  return tMergeTreeCreate(&pHandle->pMergeTree, pHandle->cmpParam.numOfSources, &pHandle->cmpParam, pHandle->comparFn);
1228 1229
}

H
Haojun Liao 已提交
1230
int32_t tsortClose(SSortHandle* pHandle) {
D
dapan1121 已提交
1231 1232
  atomic_val_compare_exchange_8(&pHandle->closed, 0, 1);
  taosMsleep(10);
1233
  return TSDB_CODE_SUCCESS;
1234 1235
}

D
dapan1121 已提交
1236 1237 1238 1239 1240 1241 1242 1243
bool tsortIsClosed(SSortHandle* pHandle) {
  return atomic_val_compare_exchange_8(&pHandle->closed, 1, 2);
}

void tsortSetClosed(SSortHandle* pHandle) {
  atomic_store_8(&pHandle->closed, 2);
}

S
slzhou 已提交
1244 1245 1246 1247
void tsortSetMergeLimit(SSortHandle* pHandle, int64_t mergeLimit) {
  pHandle->mergeLimit = mergeLimit;
}

H
Hongze Cheng 已提交
1248 1249
int32_t tsortSetFetchRawDataFp(SSortHandle* pHandle, _sort_fetch_block_fn_t fetchFp, void (*fp)(SSDataBlock*, void*),
                               void* param) {
1250 1251 1252
  pHandle->fetchfp = fetchFp;
  pHandle->beforeFp = fp;
  pHandle->param = param;
1253
  return TSDB_CODE_SUCCESS;
1254 1255
}

H
Haojun Liao 已提交
1256
int32_t tsortSetComparFp(SSortHandle* pHandle, _sort_merge_compar_fn_t fp) {
1257
  pHandle->comparFn = fp;
1258
  return TSDB_CODE_SUCCESS;
1259 1260
}

1261 1262 1263 1264 1265
int32_t tsortSetCompareGroupId(SSortHandle* pHandle, bool compareGroupId) {
  pHandle->cmpParam.cmpGroupId = compareGroupId;
  return TSDB_CODE_SUCCESS;
}

1266
static STupleHandle* tsortBufMergeSortNextTuple(SSortHandle* pHandle) {
D
dapan1121 已提交
1267 1268 1269
  if (tsortIsClosed(pHandle)) {
    return NULL;
  }
1270 1271 1272 1273
  if (pHandle->cmpParam.numOfSources == pHandle->numOfCompletedSources) {
    return NULL;
  }

1274
  // All the data are hold in the buffer, no external sort is invoked.
1275 1276 1277 1278 1279 1280 1281 1282 1283 1284
  if (pHandle->inMemSort) {
    pHandle->tupleHandle.rowIndex += 1;
    if (pHandle->tupleHandle.rowIndex == pHandle->pDataBlock->info.rows) {
      pHandle->numOfCompletedSources = 1;
      return NULL;
    }

    return &pHandle->tupleHandle;
  }

H
Hongze Cheng 已提交
1285 1286
  int32_t      index = tMergeTreeGetChosenIndex(pHandle->pMergeTree);
  SSortSource* pSource = pHandle->cmpParam.pSources[index];
1287 1288 1289 1290 1291 1292 1293 1294 1295

  if (pHandle->needAdjust) {
    int32_t code = adjustMergeTreeForNextTuple(pSource, pHandle->pMergeTree, pHandle, &pHandle->numOfCompletedSources);
    if (code != TSDB_CODE_SUCCESS) {
      terrno = code;
      return NULL;
    }
  }

H
Haojun Liao 已提交
1296
  // all sources are completed.
1297 1298 1299 1300
  if (pHandle->cmpParam.numOfSources == pHandle->numOfCompletedSources) {
    return NULL;
  }

1301
  // Get the adjusted value after the loser tree is updated.
1302 1303 1304
  index = tMergeTreeGetChosenIndex(pHandle->pMergeTree);
  pSource = pHandle->cmpParam.pSources[index];

1305
  ASSERT(pSource->src.pBlock != NULL);
1306 1307 1308 1309 1310 1311 1312 1313 1314 1315

  pHandle->tupleHandle.rowIndex = pSource->src.rowIndex;
  pHandle->tupleHandle.pBlock = pSource->src.pBlock;

  pHandle->needAdjust = true;
  pSource->src.rowIndex += 1;

  return &pHandle->tupleHandle;
}

1316 1317 1318 1319 1320 1321 1322 1323
static bool tsortIsForceUsePQSort(SSortHandle* pHandle) {
  return pHandle->forceUsePQSort == true;
}

void tsortSetForceUsePQSort(SSortHandle* pHandle) {
  pHandle->forceUsePQSort = true;
}

1324 1325
static bool tsortIsPQSortApplicable(SSortHandle* pHandle) {
  if (pHandle->type != SORT_SINGLESOURCE_SORT) return false;
1326
  if (tsortIsForceUsePQSort(pHandle)) return true;
S
slzhou 已提交
1327 1328
  uint64_t maxRowsFitInMemory = pHandle->pqSortBufSize / (pHandle->pqMaxTupleLength + sizeof(char*));
  return maxRowsFitInMemory > pHandle->pqMaxRows;
1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344
}

static bool tsortPQCompFn(void* a, void* b, void* param) {
  SSortHandle* pHandle = param;
  int32_t res = pHandle->comparFn(a, b, param);
  if (res < 0) return 1;
  return 0;
}

static bool tsortPQComFnReverse(void*a, void* b, void* param) {
  SSortHandle* pHandle = param;
  int32_t res = pHandle->comparFn(a, b, param);
  if (res > 0) return 1;
  return 0;
}

1345 1346 1347 1348
static int32_t tupleComparFn(const void* pLeft, const void* pRight, void* param) {
  TupleDesc* pLeftDesc = (TupleDesc*)pLeft;
  TupleDesc* pRightDesc = (TupleDesc*)pRight;

1349 1350 1351 1352 1353
  SSortHandle* pHandle = (SSortHandle*)param;
  SArray* orderInfo = (SArray*)pHandle->pSortInfo;
  uint32_t colNum = blockDataGetNumOfCols(pHandle->pDataBlock);
  for (int32_t i = 0; i < orderInfo->size; ++i) {
    SBlockOrderInfo* pOrder = TARRAY_GET_ELEM(orderInfo, i);
1354 1355
    void *lData = tupleDescGetField(pLeftDesc, pOrder->slotId, colNum);
    void *rData = tupleDescGetField(pRightDesc, pOrder->slotId, colNum);
1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373
    if (!lData && !rData) continue;
    if (!lData) return pOrder->nullFirst ? -1 : 1;
    if (!rData) return pOrder->nullFirst ? 1 : -1;

    int           type = ((SColumnInfoData*)taosArrayGet(pHandle->pDataBlock->pDataBlock, pOrder->slotId))->info.type;
    __compar_fn_t fn = getKeyComparFunc(type, pOrder->order);

    int ret = fn(lData, rData);
    if (ret == 0) {
      continue;
    } else {
      return ret;
    }
  }
  return 0;
}

static int32_t tsortOpenForPQSort(SSortHandle* pHandle) {
S
slzhou 已提交
1374
  pHandle->pBoundedQueue = createBoundedQueue(pHandle->pqMaxRows, tsortPQCompFn, destroyTuple, pHandle);
1375
  if (NULL == pHandle->pBoundedQueue) return TSDB_CODE_OUT_OF_MEMORY;
1376
  tsortSetComparFp(pHandle, tupleComparFn);
1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407

  SSortSource** pSource = taosArrayGet(pHandle->pOrderedSource, 0);
  SSortSource*  source = *pSource;

  pHandle->pDataBlock = NULL;
  uint32_t tupleLen = 0;
  PriorityQueueNode pqNode;
  while (1) {
    // fetch data
    SSDataBlock* pBlock = pHandle->fetchfp(source->param);
    if (NULL == pBlock) break;

    if (pHandle->beforeFp != NULL) {
      pHandle->beforeFp(pBlock, pHandle->param);
    }
    if (pHandle->pDataBlock == NULL) {
      pHandle->pDataBlock = createOneDataBlock(pBlock, false);
    }
    if (pHandle->pDataBlock == NULL) return TSDB_CODE_OUT_OF_MEMORY;

    size_t colNum = blockDataGetNumOfCols(pBlock);

    if (tupleLen == 0) {
      for (size_t colIdx = 0; colIdx < colNum; ++colIdx) {
        SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, colIdx);
        tupleLen += pCol->info.bytes;
        if (IS_VAR_DATA_TYPE(pCol->info.type)) {
          tupleLen += sizeof(VarDataLenT);
        }
      }
    }
1408
    ReferencedTuple refTuple = {.desc.data = (char*)pBlock, .desc.type = ReferencedTupleType, .rowIndex = 0};
1409
    for (size_t rowIdx = 0; rowIdx < pBlock->info.rows; ++rowIdx) {
1410 1411 1412 1413 1414 1415 1416 1417
      refTuple.rowIndex = rowIdx;
      pqNode.data = &refTuple;
      PriorityQueueNode* pPushedNode = taosBQPush(pHandle->pBoundedQueue, &pqNode);
      if (!pPushedNode) {
        // do nothing if push failed
      } else {
        pPushedNode->data = createAllocatedTuple(pBlock, colNum, tupleLen, rowIdx);
        if (pPushedNode->data == NULL) return TSDB_CODE_OUT_OF_MEMORY;
1418 1419 1420 1421 1422 1423 1424
      }
    }
  }
  return TSDB_CODE_SUCCESS;
}

static STupleHandle* tsortPQSortNextTuple(SSortHandle* pHandle) {
1425 1426 1427
  if (pHandle->pDataBlock == NULL) { // when no input stream datablock
    return NULL;
  }
1428 1429
  blockDataCleanup(pHandle->pDataBlock);
  blockDataEnsureCapacity(pHandle->pDataBlock, 1);
1430
  // abandon the top tuple if queue size bigger than max size
1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441
  if (taosBQSize(pHandle->pBoundedQueue) == taosBQMaxSize(pHandle->pBoundedQueue) + 1) {
    taosBQPop(pHandle->pBoundedQueue);
  }
  if (pHandle->tmpRowIdx == 0) {
    // sort the results
    taosBQSetFn(pHandle->pBoundedQueue, tsortPQComFnReverse);
    taosBQBuildHeap(pHandle->pBoundedQueue);
  }
  if (taosBQSize(pHandle->pBoundedQueue) > 0) {
    uint32_t           colNum = blockDataGetNumOfCols(pHandle->pDataBlock);
    PriorityQueueNode* node = taosBQTop(pHandle->pBoundedQueue);
1442
    char*              pTuple = ((TupleDesc*)node->data)->data;
1443 1444 1445 1446 1447 1448

    for (uint32_t i = 0; i < colNum; ++i) {
      void* pData = tupleGetField(pTuple, i, colNum);
      if (!pData) {
        colDataSetNULL(bdGetColumnInfoData(pHandle->pDataBlock, i), 0);
      } else {
H
Haojun Liao 已提交
1449
        colDataSetVal(bdGetColumnInfoData(pHandle->pDataBlock, i), 0, pData, false);
1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483
      }
    }
    pHandle->pDataBlock->info.rows++;
    pHandle->tmpRowIdx++;
    taosBQPop(pHandle->pBoundedQueue);
  }
  if (pHandle->pDataBlock->info.rows == 0) return NULL;
  pHandle->tupleHandle.pBlock = pHandle->pDataBlock;
  return &pHandle->tupleHandle;
}

int32_t tsortOpen(SSortHandle* pHandle) {
  if (pHandle->opened) {
    return 0;
  }

  if (pHandle->fetchfp == NULL || pHandle->comparFn == NULL) {
    return -1;
  }

  pHandle->opened = true;
  if (tsortIsPQSortApplicable(pHandle))
    return tsortOpenForPQSort(pHandle);
  else
    return tsortOpenForBufMergeSort(pHandle);
}

STupleHandle* tsortNextTuple(SSortHandle* pHandle) {
  if (pHandle->pBoundedQueue)
    return tsortPQSortNextTuple(pHandle);
  else
    return tsortBufMergeSortNextTuple(pHandle);
}

H
Haojun Liao 已提交
1484
bool tsortIsNullVal(STupleHandle* pVHandle, int32_t colIndex) {
1485
  SColumnInfoData* pColInfoSrc = taosArrayGet(pVHandle->pBlock->pDataBlock, colIndex);
1486
  return colDataIsNull_s(pColInfoSrc, pVHandle->rowIndex);
1487 1488
}

H
Haojun Liao 已提交
1489
void* tsortGetValue(STupleHandle* pVHandle, int32_t colIndex) {
1490
  SColumnInfoData* pColInfo = TARRAY_GET_ELEM(pVHandle->pBlock->pDataBlock, colIndex);
1491 1492 1493 1494 1495
  if (pColInfo->pData == NULL) {
    return NULL;
  } else {
    return colDataGetData(pColInfo, pVHandle->rowIndex);
  }
1496
}
1497

H
Haojun Liao 已提交
1498
uint64_t tsortGetGroupId(STupleHandle* pVHandle) { return pVHandle->pBlock->info.id.groupId; }
1499
void*    tsortGetBlockInfo(STupleHandle* pVHandle) { return &pVHandle->pBlock->info; }
S
slzhou 已提交
1500

1501 1502 1503
SSortExecInfo tsortGetSortExecInfo(SSortHandle* pHandle) {
  SSortExecInfo info = {0};

1504
  if (pHandle == NULL) {
dengyihao's avatar
dengyihao 已提交
1505 1506
    info.sortMethod = SORT_QSORT_T;  // by default
    info.sortBuffer = 2 * 1048576;   // 2mb by default
1507 1508 1509 1510 1511 1512 1513 1514 1515 1516
  } else {
    info.sortBuffer = pHandle->pageSize * pHandle->numOfPages;
    info.sortMethod = pHandle->inMemSort ? SORT_QSORT_T : SORT_SPILLED_MERGE_SORT_T;
    info.loops = pHandle->loops;

    if (pHandle->pBuf != NULL) {
      SDiskbasedBufStatis st = getDBufStatis(pHandle->pBuf);
      info.writeBytes = st.flushBytes;
      info.readBytes = st.loadBytes;
    }
1517 1518 1519 1520
  }

  return info;
}