tscLocalMerge.c 55.2 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

16
#include "os.h"
S
slguan 已提交
17
#include "tlosertree.h"
H
hzcheng 已提交
18
#include "tscUtil.h"
H
hjxilinx 已提交
19
#include "tschemautil.h"
S
slguan 已提交
20
#include "tsclient.h"
H
hzcheng 已提交
21
#include "tutil.h"
S
slguan 已提交
22
#include "tscLog.h"
23
#include "tscLocalMerge.h"
H
hzcheng 已提交
24 25

typedef struct SCompareParam {
S
slguan 已提交
26 27
  SLocalDataSource **pLocalData;
  tOrderDescriptor * pDesc;
28
  int32_t            num;
S
slguan 已提交
29
  int32_t            groupOrderType;
H
hzcheng 已提交
30 31 32 33 34 35
} SCompareParam;

int32_t treeComparator(const void *pLeft, const void *pRight, void *param) {
  int32_t pLeftIdx = *(int32_t *)pLeft;
  int32_t pRightIdx = *(int32_t *)pRight;

S
slguan 已提交
36 37 38
  SCompareParam *    pParam = (SCompareParam *)param;
  tOrderDescriptor * pDesc = pParam->pDesc;
  SLocalDataSource **pLocalData = pParam->pLocalData;
H
hzcheng 已提交
39 40 41 42 43 44 45 46 47 48

  /* this input is exhausted, set the special value to denote this */
  if (pLocalData[pLeftIdx]->rowIdx == -1) {
    return 1;
  }

  if (pLocalData[pRightIdx]->rowIdx == -1) {
    return -1;
  }

49
  if (pParam->groupOrderType == TSDB_ORDER_DESC) {  // desc
50 51
    return compare_d(pDesc, pParam->num, pLocalData[pLeftIdx]->rowIdx, pLocalData[pLeftIdx]->filePage.data,
                     pParam->num, pLocalData[pRightIdx]->rowIdx, pLocalData[pRightIdx]->filePage.data);
H
hzcheng 已提交
52
  } else {
53 54
    return compare_a(pDesc, pParam->num, pLocalData[pLeftIdx]->rowIdx, pLocalData[pLeftIdx]->filePage.data,
                     pParam->num, pLocalData[pRightIdx]->rowIdx, pLocalData[pRightIdx]->filePage.data);
H
hzcheng 已提交
55 56 57
  }
}

H
hjLiao 已提交
58
static void tscInitSqlContext(SSqlCmd *pCmd, SLocalReducer *pReducer, tOrderDescriptor *pDesc) {
H
hzcheng 已提交
59 60
  /*
   * the fields and offset attributes in pCmd and pModel may be different due to
S
slguan 已提交
61
   * merge requirement. So, the final result in pRes structure is formatted in accordance with the pCmd object.
H
hzcheng 已提交
62
   */
H
hjxilinx 已提交
63
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
hjxilinx 已提交
64 65 66
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  
  for (int32_t i = 0; i < size; ++i) {
H
hzcheng 已提交
67
    SQLFunctionCtx *pCtx = &pReducer->pCtx[i];
H
hjxilinx 已提交
68 69
    SSqlExpr *      pExpr = tscSqlExprGet(pQueryInfo, i);

H
Haojun Liao 已提交
70
    pCtx->aOutputBuf = pReducer->pResultBuf->data + pExpr->offset * pReducer->resColModel->capacity;
71
    pCtx->order = pQueryInfo->order.order;
H
hjxilinx 已提交
72
    pCtx->functionId = pExpr->functionId;
S
slguan 已提交
73 74

    // input buffer hold only one point data
H
hjxilinx 已提交
75 76 77
    int16_t  offset = getColumnModelOffset(pDesc->pColumnModel, i);
    SSchema *pSchema = getColumnModelSchema(pDesc->pColumnModel, i);

H
hjxilinx 已提交
78
    pCtx->aInputElemBuf = pReducer->pTempBuffer->data + offset;
H
hzcheng 已提交
79 80

    // input data format comes from pModel
H
hjxilinx 已提交
81 82
    pCtx->inputType = pSchema->type;
    pCtx->inputBytes = pSchema->bytes;
H
hzcheng 已提交
83 84

    // output data format yet comes from pCmd.
H
hjxilinx 已提交
85 86
    pCtx->outputBytes = pExpr->resBytes;
    pCtx->outputType = pExpr->resType;
H
hzcheng 已提交
87 88 89

    pCtx->startOffset = 0;
    pCtx->size = 1;
S
slguan 已提交
90
    pCtx->hasNull = true;
H
hzcheng 已提交
91 92
    pCtx->currentStage = SECONDARY_STAGE_MERGE;

S
slguan 已提交
93
    // for top/bottom function, the output of timestamp is the first column
H
hjxilinx 已提交
94
    int32_t functionId = pExpr->functionId;
S
slguan 已提交
95 96
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      pCtx->ptsOutputBuf = pReducer->pCtx[0].aOutputBuf;
97
      pCtx->param[2].i64Key = pQueryInfo->order.order;
H
hjLiao 已提交
98
      pCtx->param[2].nType  = TSDB_DATA_TYPE_BIGINT;
99
      pCtx->param[1].i64Key = pQueryInfo->order.orderColId;
H
hzcheng 已提交
100
    }
S
slguan 已提交
101 102

    SResultInfo *pResInfo = &pReducer->pResInfo[i];
103
    pResInfo->bufLen = pExpr->interBytes;
H
hjLiao 已提交
104
    pResInfo->interResultBuf = calloc(1, (size_t) pResInfo->bufLen);
S
slguan 已提交
105 106 107 108 109

    pCtx->resultInfo = &pReducer->pResInfo[i];
    pCtx->resultInfo->superTableQ = true;
  }

H
hjxilinx 已提交
110 111
  int16_t          n = 0;
  int16_t          tagLen = 0;
H
hjxilinx 已提交
112
  SQLFunctionCtx **pTagCtx = calloc(pQueryInfo->fieldsInfo.numOfOutput, POINTER_BYTES);
S
slguan 已提交
113

H
hjxilinx 已提交
114
  SQLFunctionCtx *pCtx = NULL;
H
hjxilinx 已提交
115
  for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
116
    SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, i);
S
slguan 已提交
117 118 119 120 121 122 123 124
    if (pExpr->functionId == TSDB_FUNC_TAG_DUMMY || pExpr->functionId == TSDB_FUNC_TS_DUMMY) {
      tagLen += pExpr->resBytes;
      pTagCtx[n++] = &pReducer->pCtx[i];
    } else if ((aAggs[pExpr->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      pCtx = &pReducer->pCtx[i];
    }
  }

B
Bomin Zhang 已提交
125
  if (n == 0 || pCtx == NULL) {
S
slguan 已提交
126 127 128 129 130
    free(pTagCtx);
  } else {
    pCtx->tagInfo.pTagCtxList = pTagCtx;
    pCtx->tagInfo.numOfTagCols = n;
    pCtx->tagInfo.tagsLen = tagLen;
H
hzcheng 已提交
131 132 133
  }
}

134
static SFillColInfo* createFillColInfo(SQueryInfo* pQueryInfo) {
S
TD-1057  
Shengliang Guan 已提交
135
  int32_t numOfCols = (int32_t)tscSqlExprNumOfExprs(pQueryInfo);
136 137 138 139 140 141 142
  int32_t offset = 0;
  
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
  for(int32_t i = 0; i < numOfCols; ++i) {
    SSqlExpr* pExpr = tscSqlExprGet(pQueryInfo, i);
    
    pFillCol[i].col.bytes  = pExpr->resBytes;
S
TD-1057  
Shengliang Guan 已提交
143
    pFillCol[i].col.type   = (int8_t)pExpr->resType;
144
    pFillCol[i].col.colId  = pExpr->colInfo.colId;
145 146 147
    pFillCol[i].flag       = pExpr->colInfo.flag;
    pFillCol[i].col.offset = offset;
    pFillCol[i].functionId = pExpr->functionId;
148
    pFillCol[i].fillVal.i  = pQueryInfo->fillVal[i];
149 150 151 152 153 154
    offset += pExpr->resBytes;
  }
  
  return pFillCol;
}

H
hzcheng 已提交
155
void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrderDescriptor *pDesc,
H
hjLiao 已提交
156 157 158 159
                           SColumnModel *finalmodel, SSqlObj* pSql) {
  SSqlCmd* pCmd = &pSql->cmd;
  SSqlRes* pRes = &pSql->res;
  
160
  if (pMemBuffer == NULL) {
H
hjLiao 已提交
161 162
    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);
    tscError("%p pMemBuffer is NULL", pMemBuffer);
163
    pRes->code = TSDB_CODE_TSC_APP_ERROR;
164 165 166 167
    return;
  }
 
  if (pDesc->pColumnModel == NULL) {
H
hzcheng 已提交
168
    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);
H
hjLiao 已提交
169
    tscError("%p no local buffer or intermediate result format model", pSql);
170
    pRes->code = TSDB_CODE_TSC_APP_ERROR;
H
hzcheng 已提交
171 172 173 174 175 176 177
    return;
  }

  int32_t numOfFlush = 0;
  for (int32_t i = 0; i < numOfBuffer; ++i) {
    int32_t len = pMemBuffer[i]->fileMeta.flushoutData.nLength;
    if (len == 0) {
178
      tscDebug("%p no data retrieved from orderOfVnode:%d", pSql, i + 1);
H
hzcheng 已提交
179 180 181 182 183 184 185 186
      continue;
    }

    numOfFlush += len;
  }

  if (numOfFlush == 0 || numOfBuffer == 0) {
    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);
187
    tscDebug("%p retrieved no data", pSql);
H
hzcheng 已提交
188 189 190
    return;
  }

H
hjxilinx 已提交
191
  if (pDesc->pColumnModel->capacity >= pMemBuffer[0]->pageSize) {
H
hjLiao 已提交
192
    tscError("%p Invalid value of buffer capacity %d and page size %d ", pSql, pDesc->pColumnModel->capacity,
H
hjxilinx 已提交
193
             pMemBuffer[0]->pageSize);
S
slguan 已提交
194 195

    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);
196
    pRes->code = TSDB_CODE_TSC_APP_ERROR;
H
hzcheng 已提交
197 198 199
    return;
  }

H
hjLiao 已提交
200 201 202
  size_t size = sizeof(SLocalReducer) + POINTER_BYTES * numOfFlush;
  
  SLocalReducer *pReducer = (SLocalReducer *) calloc(1, size);
H
hzcheng 已提交
203
  if (pReducer == NULL) {
H
hjLiao 已提交
204
    tscError("%p failed to create local merge structure, out of memory", pSql);
S
slguan 已提交
205 206

    tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer);
207
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
208 209 210 211
    return;
  }

  pReducer->pExtMemBuffer = pMemBuffer;
S
slguan 已提交
212
  pReducer->pLocalDataSrc = (SLocalDataSource **)&pReducer[1];
H
hzcheng 已提交
213 214 215 216
  assert(pReducer->pLocalDataSrc != NULL);

  pReducer->numOfBuffer = numOfFlush;
  pReducer->numOfVnode = numOfBuffer;
217

H
hzcheng 已提交
218
  pReducer->pDesc = pDesc;
219
  tscDebug("%p the number of merged leaves is: %d", pSql, pReducer->numOfBuffer);
H
hzcheng 已提交
220 221 222 223 224 225

  int32_t idx = 0;
  for (int32_t i = 0; i < numOfBuffer; ++i) {
    int32_t numOfFlushoutInFile = pMemBuffer[i]->fileMeta.flushoutData.nLength;

    for (int32_t j = 0; j < numOfFlushoutInFile; ++j) {
H
hjLiao 已提交
226 227 228
      SLocalDataSource *ds = (SLocalDataSource *)malloc(sizeof(SLocalDataSource) + pMemBuffer[0]->pageSize);
      if (ds == NULL) {
        tscError("%p failed to create merge structure", pSql);
229
        pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
S
Shengliang Guan 已提交
230
        taosTFree(pReducer);
H
hzcheng 已提交
231 232
        return;
      }
H
hjLiao 已提交
233 234
      
      pReducer->pLocalDataSrc[idx] = ds;
H
hzcheng 已提交
235

H
hjLiao 已提交
236 237
      ds->pMemBuffer = pMemBuffer[i];
      ds->flushoutIdx = j;
238
      ds->filePage.num = 0;
H
hjLiao 已提交
239 240
      ds->pageId = 0;
      ds->rowIdx = 0;
H
hzcheng 已提交
241

242
      tscDebug("%p load data from disk into memory, orderOfVnode:%d, total:%d", pSql, i + 1, idx + 1);
H
hjLiao 已提交
243
      tExtMemBufferLoadData(pMemBuffer[i], &(ds->filePage), j, 0);
H
hzcheng 已提交
244
#ifdef _DEBUG_VIEW
245
      printf("load data page into mem for build loser tree: %" PRIu64 " rows\n", ds->filePage.num);
H
hzcheng 已提交
246
      SSrcColumnInfo colInfo[256] = {0};
H
hjxilinx 已提交
247
      SQueryInfo *   pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
248 249

      tscGetSrcColumnInfo(colInfo, pQueryInfo);
H
hzcheng 已提交
250

251
      tColModelDisplayEx(pDesc->pColumnModel, ds->filePage.data, ds->filePage.num,
H
hjxilinx 已提交
252
                         pMemBuffer[0]->numOfElemsPerPage, colInfo);
H
hzcheng 已提交
253
#endif
H
hjLiao 已提交
254
      
255
      if (ds->filePage.num == 0) {  // no data in this flush, the index does not increase
256
        tscDebug("%p flush data is empty, ignore %d flush record", pSql, idx);
S
Shengliang Guan 已提交
257
        taosTFree(ds);
H
hzcheng 已提交
258 259
        continue;
      }
H
hjLiao 已提交
260
      
H
hzcheng 已提交
261 262 263
      idx += 1;
    }
  }
H
hjLiao 已提交
264 265
  
  // no data actually, no need to merge result.
H
hzcheng 已提交
266
  if (idx == 0) {
S
Shengliang Guan 已提交
267
    taosTFree(pReducer);
H
hzcheng 已提交
268 269 270 271 272 273
    return;
  }

  pReducer->numOfBuffer = idx;

  SCompareParam *param = malloc(sizeof(SCompareParam));
B
Bomin Zhang 已提交
274
  if (param == NULL) {
S
Shengliang Guan 已提交
275
    taosTFree(pReducer);
B
Bomin Zhang 已提交
276 277
    return;
  }
H
Haojun Liao 已提交
278

H
hzcheng 已提交
279 280
  param->pLocalData = pReducer->pLocalDataSrc;
  param->pDesc = pReducer->pDesc;
281
  param->num = pReducer->pLocalDataSrc[0]->pMemBuffer->numOfElemsPerPage;
H
hjxilinx 已提交
282 283
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

284
  param->groupOrderType = pQueryInfo->groupbyExpr.orderType;
H
Haojun Liao 已提交
285
  pReducer->orderPrjOnSTable = tscOrderedProjectionQueryOnSTable(pQueryInfo, 0);
H
hzcheng 已提交
286 287 288

  pRes->code = tLoserTreeCreate(&pReducer->pLoserTree, pReducer->numOfBuffer, param, treeComparator);
  if (pReducer->pLoserTree == NULL || pRes->code != 0) {
S
Shengliang Guan 已提交
289 290
    taosTFree(param);
    taosTFree(pReducer);
H
hzcheng 已提交
291 292 293 294 295
    return;
  }

  // the input data format follows the old format, but output in a new format.
  // so, all the input must be parsed as old format
H
hjLiao 已提交
296
  pReducer->pCtx = (SQLFunctionCtx *)calloc(tscSqlExprNumOfExprs(pQueryInfo), sizeof(SQLFunctionCtx));
H
hzcheng 已提交
297 298
  pReducer->rowSize = pMemBuffer[0]->nElemSize;

H
hjxilinx 已提交
299 300
  tscRestoreSQLFuncForSTableQuery(pQueryInfo);
  tscFieldInfoUpdateOffset(pQueryInfo);
H
hzcheng 已提交
301

H
hjxilinx 已提交
302
  if (pReducer->rowSize > pMemBuffer[0]->pageSize) {
H
hzcheng 已提交
303 304 305 306 307 308 309 310
    assert(false);  // todo fixed row size is larger than the minimum page size;
  }

  pReducer->hasPrevRow = false;
  pReducer->hasUnprocessedRow = false;

  pReducer->prevRowOfInput = (char *)calloc(1, pReducer->rowSize);

S
slguan 已提交
311
  // used to keep the latest input row
H
hzcheng 已提交
312 313 314 315
  pReducer->pTempBuffer = (tFilePage *)calloc(1, pReducer->rowSize + sizeof(tFilePage));
  pReducer->discardData = (tFilePage *)calloc(1, pReducer->rowSize + sizeof(tFilePage));
  pReducer->discard = false;

H
hjxilinx 已提交
316
  pReducer->nResultBufSize = pMemBuffer[0]->pageSize * 16;
H
hzcheng 已提交
317
  pReducer->pResultBuf = (tFilePage *)calloc(1, pReducer->nResultBufSize + sizeof(tFilePage));
H
hjxilinx 已提交
318

H
Haojun Liao 已提交
319
  pReducer->finalRowSize = tscGetResRowLength(pQueryInfo->exprList);
H
hzcheng 已提交
320
  pReducer->resColModel = finalmodel;
B
Bomin Zhang 已提交
321
  pReducer->resColModel->capacity = pReducer->nResultBufSize;
322

323
  assert(pReducer->finalRowSize > 0);
B
Bomin Zhang 已提交
324 325 326
  if (pReducer->finalRowSize > 0) {
    pReducer->resColModel->capacity /= pReducer->finalRowSize;
  }
H
Haojun Liao 已提交
327
  assert(pReducer->finalRowSize <= pReducer->rowSize);
H
hzcheng 已提交
328

H
hjxilinx 已提交
329
  pReducer->pFinalRes = calloc(1, pReducer->rowSize * pReducer->resColModel->capacity);
H
hzcheng 已提交
330

H
hjxilinx 已提交
331
  if (pReducer->pTempBuffer == NULL || pReducer->discardData == NULL || pReducer->pResultBuf == NULL ||
332
      /*pReducer->pBufForInterpo == NULL || */pReducer->pFinalRes == NULL || pReducer->prevRowOfInput == NULL) {
S
Shengliang Guan 已提交
333 334 335 336 337 338 339 340
    taosTFree(pReducer->pTempBuffer);
    taosTFree(pReducer->discardData);
    taosTFree(pReducer->pResultBuf);
    taosTFree(pReducer->pFinalRes);
    taosTFree(pReducer->prevRowOfInput);
    taosTFree(pReducer->pLoserTree);
    taosTFree(param);
    taosTFree(pReducer);
341
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
342 343
    return;
  }
H
hjLiao 已提交
344
  
345 346 347 348
  size_t numOfCols = tscSqlExprNumOfExprs(pQueryInfo);
  
  pReducer->pTempBuffer->num = 0;
  pReducer->pResInfo = calloc(numOfCols, sizeof(SResultInfo));
H
hzcheng 已提交
349

350
  tscCreateResPointerInfo(pRes, pQueryInfo);
H
hjLiao 已提交
351
  tscInitSqlContext(pCmd, pReducer, pDesc);
H
hzcheng 已提交
352

H
hjxilinx 已提交
353 354
  // we change the capacity of schema to denote that there is only one row in temp buffer
  pReducer->pDesc->pColumnModel->capacity = 1;
H
hjxilinx 已提交
355 356

  // restore the limitation value at the last stage
357 358 359 360
  if (tscOrderedProjectionQueryOnSTable(pQueryInfo, 0)) {
    pQueryInfo->limit.limit = pQueryInfo->clauseLimit;
    pQueryInfo->limit.offset = pQueryInfo->prjOffset;
  }
H
hjxilinx 已提交
361

S
TD-1057  
Shengliang Guan 已提交
362
  pReducer->offset = (int32_t)pQueryInfo->limit.offset;
H
hjxilinx 已提交
363

H
hzcheng 已提交
364 365 366
  pRes->pLocalReducer = pReducer;
  pRes->numOfGroups = 0;

367
  STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0);
368
  STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta);
H
hjxilinx 已提交
369
  
370
  TSKEY stime = (pQueryInfo->order.order == TSDB_ORDER_ASC)? pQueryInfo->window.skey : pQueryInfo->window.ekey;
371
  int64_t revisedSTime = taosTimeTruncate(stime, &pQueryInfo->interval, tinfo.precision);
372 373 374 375
  
  if (pQueryInfo->fillType != TSDB_FILL_NONE) {
    SFillColInfo* pFillCol = createFillColInfo(pQueryInfo);
    pReducer->pFillInfo = taosInitFillInfo(pQueryInfo->order.order, revisedSTime, pQueryInfo->groupbyExpr.numOfGroupCols,
376
                                           4096, (int32_t)numOfCols, pQueryInfo->interval.sliding, pQueryInfo->interval.slidingUnit,
H
Haojun Liao 已提交
377
                                           tinfo.precision, pQueryInfo->fillType, pFillCol);
378
  }
H
hzcheng 已提交
379 380 381 382
}

static int32_t tscFlushTmpBufferImpl(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tFilePage *pPage,
                                     int32_t orderType) {
383
  if (pPage->num == 0) {
H
hzcheng 已提交
384 385 386
    return 0;
  }

387
  assert(pPage->num <= pDesc->pColumnModel->capacity);
H
hzcheng 已提交
388 389

  // sort before flush to disk, the data must be consecutively put on tFilePage.
H
Haojun Liao 已提交
390
  if (pDesc->orderInfo.numOfCols > 0) {
S
TD-1057  
Shengliang Guan 已提交
391
    tColDataQSort(pDesc, (int32_t)pPage->num, 0, (int32_t)pPage->num - 1, pPage->data, orderType);
H
hzcheng 已提交
392 393 394
  }

#ifdef _DEBUG_VIEW
395 396
  printf("%" PRIu64 " rows data flushed to disk after been sorted:\n", pPage->num);
  tColModelDisplay(pDesc->pColumnModel, pPage->data, pPage->num, pPage->num);
H
hzcheng 已提交
397 398 399
#endif

  // write to cache after being sorted
S
TD-1057  
Shengliang Guan 已提交
400
  if (tExtMemBufferPut(pMemoryBuf, pPage->data, (int32_t)pPage->num) < 0) {
H
hzcheng 已提交
401 402 403 404
    tscError("failed to save data in temporary buffer");
    return -1;
  }

405
  pPage->num = 0;
H
hzcheng 已提交
406 407 408 409
  return 0;
}

int32_t tscFlushTmpBuffer(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tFilePage *pPage, int32_t orderType) {
410 411 412
  int32_t ret = 0;
  if ((ret = tscFlushTmpBufferImpl(pMemoryBuf, pDesc, pPage, orderType)) != 0) {
    return ret;
H
hzcheng 已提交
413 414
  }

415 416
  if ((ret = tExtMemBufferFlush(pMemoryBuf)) != 0) {
    return ret;
H
hzcheng 已提交
417 418 419 420 421 422 423
  }

  return 0;
}

int32_t saveToBuffer(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tFilePage *pPage, void *data,
                     int32_t numOfRows, int32_t orderType) {
424
  SColumnModel *pModel = pDesc->pColumnModel;
H
hjxilinx 已提交
425

426
  if (pPage->num + numOfRows <= pModel->capacity) {
427
    tColModelAppend(pModel, pPage, data, 0, numOfRows, numOfRows);
H
hzcheng 已提交
428 429 430
    return 0;
  }

431
  // current buffer is overflow, flush data to extensive buffer
S
TD-1057  
Shengliang Guan 已提交
432
  int32_t numOfRemainEntries = pModel->capacity - (int32_t)pPage->num;
H
hzcheng 已提交
433 434
  tColModelAppend(pModel, pPage, data, 0, numOfRemainEntries, numOfRows);

435
  // current buffer is full, need to flushed to disk
436
  assert(pPage->num == pModel->capacity);
437 438 439
  int32_t code = tscFlushTmpBuffer(pMemoryBuf, pDesc, pPage, orderType);
  if (code != 0) {
    return code;
H
hzcheng 已提交
440 441 442 443 444 445
  }

  int32_t remain = numOfRows - numOfRemainEntries;

  while (remain > 0) {
    int32_t numOfWriteElems = 0;
H
hjxilinx 已提交
446 447
    if (remain > pModel->capacity) {
      numOfWriteElems = pModel->capacity;
H
hzcheng 已提交
448 449 450 451 452 453
    } else {
      numOfWriteElems = remain;
    }

    tColModelAppend(pModel, pPage, data, numOfRows - remain, numOfWriteElems, numOfRows);

454
    if (pPage->num == pModel->capacity) {
455 456
      if ((code = tscFlushTmpBuffer(pMemoryBuf, pDesc, pPage, orderType)) != TSDB_CODE_SUCCESS) {
        return code;
H
hzcheng 已提交
457 458
      }
    } else {
459
      pPage->num = numOfWriteElems;
H
hzcheng 已提交
460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
    }

    remain -= numOfWriteElems;
    numOfRemainEntries += numOfWriteElems;
  }

  return 0;
}

void tscDestroyLocalReducer(SSqlObj *pSql) {
  if (pSql == NULL) {
    return;
  }

  SSqlRes *pRes = &(pSql->res);
  if (pRes->pLocalReducer == NULL) {
    return;
  }

H
hjxilinx 已提交
479 480 481
  SSqlCmd *   pCmd = &pSql->cmd;
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

H
hzcheng 已提交
482
  // there is no more result, so we release all allocated resource
H
hjxilinx 已提交
483
  SLocalReducer *pLocalReducer = (SLocalReducer *)atomic_exchange_ptr(&pRes->pLocalReducer, NULL);
H
hzcheng 已提交
484 485
  if (pLocalReducer != NULL) {
    int32_t status = 0;
weixin_48148422's avatar
weixin_48148422 已提交
486
    while ((status = atomic_val_compare_exchange_32(&pLocalReducer->status, TSC_LOCALREDUCE_READY,
H
hzcheng 已提交
487 488
                                                    TSC_LOCALREDUCE_TOBE_FREED)) == TSC_LOCALREDUCE_IN_PROGRESS) {
      taosMsleep(100);
489
      tscDebug("%p waiting for delete procedure, status: %d", pSql, status);
H
hzcheng 已提交
490 491
    }

H
Haojun Liao 已提交
492
    pLocalReducer->pFillInfo = taosDestoryFillInfo(pLocalReducer->pFillInfo);
H
hzcheng 已提交
493

S
slguan 已提交
494
    if (pLocalReducer->pCtx != NULL) {
H
hjxilinx 已提交
495
      for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
S
slguan 已提交
496
        SQLFunctionCtx *pCtx = &pLocalReducer->pCtx[i];
H
hjxilinx 已提交
497

H
hjxilinx 已提交
498
        tVariantDestroy(&pCtx->tag);
499
        if (pCtx->tagInfo.pTagCtxList != NULL) {
S
Shengliang Guan 已提交
500
          taosTFree(pCtx->tagInfo.pTagCtxList);
501
        }
S
slguan 已提交
502
      }
H
hjxilinx 已提交
503

S
Shengliang Guan 已提交
504
      taosTFree(pLocalReducer->pCtx);
S
slguan 已提交
505 506
    }

S
Shengliang Guan 已提交
507
    taosTFree(pLocalReducer->prevRowOfInput);
H
hzcheng 已提交
508

S
Shengliang Guan 已提交
509 510
    taosTFree(pLocalReducer->pTempBuffer);
    taosTFree(pLocalReducer->pResultBuf);
H
hzcheng 已提交
511

S
slguan 已提交
512
    if (pLocalReducer->pResInfo != NULL) {
H
Haojun Liao 已提交
513 514
      size_t num = tscSqlExprNumOfExprs(pQueryInfo);
      for (int32_t i = 0; i < num; ++i) {
S
Shengliang Guan 已提交
515
        taosTFree(pLocalReducer->pResInfo[i].interResultBuf);
S
slguan 已提交
516 517
      }

S
Shengliang Guan 已提交
518
      taosTFree(pLocalReducer->pResInfo);
S
slguan 已提交
519 520
    }

H
hzcheng 已提交
521
    if (pLocalReducer->pLoserTree) {
S
Shengliang Guan 已提交
522 523
      taosTFree(pLocalReducer->pLoserTree->param);
      taosTFree(pLocalReducer->pLoserTree);
H
hzcheng 已提交
524 525
    }

S
Shengliang Guan 已提交
526 527
    taosTFree(pLocalReducer->pFinalRes);
    taosTFree(pLocalReducer->discardData);
H
hzcheng 已提交
528 529 530 531

    tscLocalReducerEnvDestroy(pLocalReducer->pExtMemBuffer, pLocalReducer->pDesc, pLocalReducer->resColModel,
                              pLocalReducer->numOfVnode);
    for (int32_t i = 0; i < pLocalReducer->numOfBuffer; ++i) {
S
Shengliang Guan 已提交
532
      taosTFree(pLocalReducer->pLocalDataSrc[i]);
H
hzcheng 已提交
533 534 535 536 537 538
    }

    pLocalReducer->numOfBuffer = 0;
    pLocalReducer->numOfCompleted = 0;
    free(pLocalReducer);
  } else {
539
    tscDebug("%p already freed or another free function is invoked", pSql);
H
hzcheng 已提交
540 541
  }

542
  tscDebug("%p free local reducer finished", pSql);
H
hzcheng 已提交
543 544
}

H
hjxilinx 已提交
545
static int32_t createOrderDescriptor(tOrderDescriptor **pOrderDesc, SSqlCmd *pCmd, SColumnModel *pModel) {
H
hjxilinx 已提交
546 547 548
  int32_t     numOfGroupByCols = 0;
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

549 550
  if (pQueryInfo->groupbyExpr.numOfGroupCols > 0) {
    numOfGroupByCols = pQueryInfo->groupbyExpr.numOfGroupCols;
H
hzcheng 已提交
551 552 553
  }

  // primary timestamp column is involved in final result
554
  if (pQueryInfo->interval.interval != 0 || tscOrderedProjectionQueryOnSTable(pQueryInfo, 0)) {
H
hzcheng 已提交
555 556 557
    numOfGroupByCols++;
  }

H
Haojun Liao 已提交
558 559
  int32_t *orderColIndexList = (int32_t *)calloc(numOfGroupByCols, sizeof(int32_t));
  if (orderColIndexList == NULL) {
560
    return TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
561 562 563 564
  }

  if (numOfGroupByCols > 0) {

H
Haojun Liao 已提交
565 566 567 568 569 570 571 572 573 574 575 576
    if (pQueryInfo->groupbyExpr.numOfGroupCols > 0) {
      int32_t startCols = pQueryInfo->fieldsInfo.numOfOutput - pQueryInfo->groupbyExpr.numOfGroupCols;

      // the last "pQueryInfo->groupbyExpr.numOfGroupCols" columns are order-by columns
      for (int32_t i = 0; i < pQueryInfo->groupbyExpr.numOfGroupCols; ++i) {
        orderColIndexList[i] = startCols++;
      }

      if (pQueryInfo->interval.interval != 0) {
        // the first column is the timestamp, handles queries like "interval(10m) group by tags"
        orderColIndexList[numOfGroupByCols - 1] = PRIMARYKEY_TIMESTAMP_COL_INDEX; //TODO ???
      }
H
Haojun Liao 已提交
577 578 579 580 581 582 583 584 585 586 587 588 589 590
    } else {
      /*
       * 1. the orderby ts asc/desc projection query for the super table
       * 2. interval query without groupby clause
       */
      if (pQueryInfo->interval.interval != 0) {
        orderColIndexList[0] = PRIMARYKEY_TIMESTAMP_COL_INDEX;
      } else {
        size_t size = tscSqlExprNumOfExprs(pQueryInfo);
        for (int32_t i = 0; i < size; ++i) {
          SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, i);
          if (pExpr->functionId == TSDB_FUNC_PRJ && pExpr->colInfo.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
            orderColIndexList[0] = i;
          }
H
Haojun Liao 已提交
591 592
        }
      }
H
hzcheng 已提交
593

H
Haojun Liao 已提交
594
      assert(pQueryInfo->order.orderColId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
H
hzcheng 已提交
595 596 597
    }
  }

H
Haojun Liao 已提交
598 599
  *pOrderDesc = tOrderDesCreate(orderColIndexList, numOfGroupByCols, pModel, pQueryInfo->order.order);
  taosTFree(orderColIndexList);
H
hzcheng 已提交
600 601

  if (*pOrderDesc == NULL) {
602
    return TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
603 604 605 606 607
  } else {
    return TSDB_CODE_SUCCESS;
  }
}

S
slguan 已提交
608
bool isSameGroup(SSqlCmd *pCmd, SLocalReducer *pReducer, char *pPrev, tFilePage *tmpBuffer) {
H
hjxilinx 已提交
609 610
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

S
slguan 已提交
611
  // disable merge procedure for column projection query
H
Haojun Liao 已提交
612 613
  int16_t functionId = pReducer->pCtx[0].functionId;
  if (pReducer->orderPrjOnSTable) {
614 615
    return true;
  }
H
hjxilinx 已提交
616

S
slguan 已提交
617 618
  if (functionId == TSDB_FUNC_PRJ || functionId == TSDB_FUNC_ARITHM) {
    return false;
H
hzcheng 已提交
619 620 621
  }

  tOrderDescriptor *pOrderDesc = pReducer->pDesc;
H
Haojun Liao 已提交
622
  SColumnOrderInfo* orderInfo = &pOrderDesc->orderInfo;
623

S
slguan 已提交
624
  // no group by columns, all data belongs to one group
H
Haojun Liao 已提交
625
  int32_t numOfCols = orderInfo->numOfCols;
S
slguan 已提交
626
  if (numOfCols <= 0) {
H
hzcheng 已提交
627 628 629
    return true;
  }

H
Haojun Liao 已提交
630
  if (orderInfo->colIndex[numOfCols - 1] == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
631 632 633 634
    /*
     * super table interval query
     * if the order columns is the primary timestamp, all result data belongs to one group
     */
635
    assert(pQueryInfo->interval.interval > 0);
H
Haojun Liao 已提交
636 637 638
    if (numOfCols == 1) {
      return true;
    }
S
slguan 已提交
639
  } else {  // simple group by query
640
    assert(pQueryInfo->interval.interval == 0);
S
slguan 已提交
641 642
  }

H
hzcheng 已提交
643
  // only one row exists
H
Haojun Liao 已提交
644
  int32_t index = orderInfo->colIndex[0];
H
Haojun Liao 已提交
645
  int32_t offset = (pOrderDesc->pColumnModel)->pFields[index].offset;
646

H
Haojun Liao 已提交
647 648
  int32_t ret = memcmp(pPrev + offset, tmpBuffer->data + offset, pOrderDesc->pColumnModel->rowSize - offset);
  return ret == 0;
H
hzcheng 已提交
649 650 651
}

int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOrderDescriptor **pOrderDesc,
H
hjxilinx 已提交
652
                                 SColumnModel **pFinalModel, uint32_t nBufferSizes) {
H
hzcheng 已提交
653 654 655
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

H
hjxilinx 已提交
656
  SSchema *     pSchema = NULL;
H
hjxilinx 已提交
657
  SColumnModel *pModel = NULL;
H
hzcheng 已提交
658 659
  *pFinalModel = NULL;

H
hjxilinx 已提交
660
  SQueryInfo *    pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
hjxilinx 已提交
661
  STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0);
S
slguan 已提交
662

H
Haojun Liao 已提交
663
  (*pMemBuffer) = (tExtMemBuffer **)malloc(POINTER_BYTES * pSql->subState.numOfSub);
H
hzcheng 已提交
664 665
  if (*pMemBuffer == NULL) {
    tscError("%p failed to allocate memory", pSql);
666
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
667 668
    return pRes->code;
  }
H
hjxilinx 已提交
669 670 671 672
  
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  
  pSchema = (SSchema *)calloc(1, sizeof(SSchema) * size);
H
hzcheng 已提交
673 674
  if (pSchema == NULL) {
    tscError("%p failed to allocate memory", pSql);
675
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
H
hzcheng 已提交
676 677 678 679
    return pRes->code;
  }

  int32_t rlen = 0;
H
hjxilinx 已提交
680
  for (int32_t i = 0; i < size; ++i) {
681
    SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, i);
H
hzcheng 已提交
682 683

    pSchema[i].bytes = pExpr->resBytes;
S
TD-1057  
Shengliang Guan 已提交
684
    pSchema[i].type = (int8_t)pExpr->resType;
H
hzcheng 已提交
685 686 687
    rlen += pExpr->resBytes;
  }

L
lihui 已提交
688
  int32_t capacity = 0;
H
hjxilinx 已提交
689 690 691
  if (rlen != 0) {
    capacity = nBufferSizes / rlen;
  }
H
hjxilinx 已提交
692
  
S
TD-1057  
Shengliang Guan 已提交
693
  pModel = createColumnModel(pSchema, (int32_t)size, capacity);
H
hzcheng 已提交
694

H
Haojun Liao 已提交
695 696 697 698 699 700
  int32_t pg = DEFAULT_PAGE_SIZE;
  int32_t overhead = sizeof(tFilePage);
  while((pg - overhead) < pModel->rowSize * 2) {
    pg *= 2;
  }

H
hjxilinx 已提交
701
  size_t numOfSubs = pTableMetaInfo->vgroupList->numOfVgroups;
702
  for (int32_t i = 0; i < numOfSubs; ++i) {
H
Haojun Liao 已提交
703
    (*pMemBuffer)[i] = createExtMemBuffer(nBufferSizes, rlen, pg, pModel);
704 705
    (*pMemBuffer)[i]->flushModel = MULTIPLE_APPEND_MODEL;
  }
H
hzcheng 已提交
706 707

  if (createOrderDescriptor(pOrderDesc, pCmd, pModel) != TSDB_CODE_SUCCESS) {
708
    pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
S
Shengliang Guan 已提交
709
    taosTFree(pSchema);
H
hzcheng 已提交
710 711 712
    return pRes->code;
  }

H
hjxilinx 已提交
713
  // final result depends on the fields number
H
hjxilinx 已提交
714 715
  memset(pSchema, 0, sizeof(SSchema) * size);
  for (int32_t i = 0; i < size; ++i) {
H
hjxilinx 已提交
716 717
    SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, i);

718
    SSchema *p1 = tscGetTableColumnSchema(pTableMetaInfo->pTableMeta, pExpr->colInfo.colIndex);
H
hjxilinx 已提交
719

720
    int32_t inter = 0;
H
hjxilinx 已提交
721 722
    int16_t type = -1;
    int16_t bytes = 0;
H
hjxilinx 已提交
723 724 725 726 727

    // the final result size and type in the same as query on single table.
    // so here, set the flag to be false;
    int32_t functionId = pExpr->functionId;
    if (functionId >= TSDB_FUNC_TS && functionId <= TSDB_FUNC_DIFF) {
H
hjxilinx 已提交
728 729
      type = pModel->pFields[i].field.type;
      bytes = pModel->pFields[i].field.bytes;
H
hjxilinx 已提交
730 731 732 733 734 735 736 737
    } else {
      if (functionId == TSDB_FUNC_FIRST_DST) {
        functionId = TSDB_FUNC_FIRST;
      } else if (functionId == TSDB_FUNC_LAST_DST) {
        functionId = TSDB_FUNC_LAST;
      }

      getResultDataInfo(p1->type, p1->bytes, functionId, 0, &type, &bytes, &inter, 0, false);
H
hjxilinx 已提交
738
    }
H
hzcheng 已提交
739

S
TD-1057  
Shengliang Guan 已提交
740
    pSchema[i].type = (uint8_t)type;
H
hjxilinx 已提交
741 742
    pSchema[i].bytes = bytes;
    strcpy(pSchema[i].name, pModel->pFields[i].field.name);
H
hzcheng 已提交
743
  }
H
hjxilinx 已提交
744
  
S
TD-1057  
Shengliang Guan 已提交
745
  *pFinalModel = createColumnModel(pSchema, (int32_t)size, capacity);
S
Shengliang Guan 已提交
746
  taosTFree(pSchema);
H
hzcheng 已提交
747 748 749 750 751 752 753 754 755 756

  return TSDB_CODE_SUCCESS;
}

/**
 * @param pMemBuffer
 * @param pDesc
 * @param pFinalModel
 * @param numOfVnodes
 */
H
hjxilinx 已提交
757
void tscLocalReducerEnvDestroy(tExtMemBuffer **pMemBuffer, tOrderDescriptor *pDesc, SColumnModel *pFinalModel,
H
hzcheng 已提交
758
                               int32_t numOfVnodes) {
H
hjxilinx 已提交
759
  destroyColumnModel(pFinalModel);
H
hzcheng 已提交
760
  tOrderDescDestroy(pDesc);
H
Haojun Liao 已提交
761

H
hzcheng 已提交
762
  for (int32_t i = 0; i < numOfVnodes; ++i) {
H
hjxilinx 已提交
763
    pMemBuffer[i] = destoryExtMemBuffer(pMemBuffer[i]);
H
hzcheng 已提交
764 765
  }

S
Shengliang Guan 已提交
766
  taosTFree(pMemBuffer);
H
hzcheng 已提交
767 768 769 770 771 772 773 774 775
}

/**
 *
 * @param pLocalReducer
 * @param pOneInterDataSrc
 * @param treeList
 * @return the number of remain input source. if ret == 0, all data has been handled
 */
S
slguan 已提交
776
int32_t loadNewDataFromDiskFor(SLocalReducer *pLocalReducer, SLocalDataSource *pOneInterDataSrc,
H
hzcheng 已提交
777 778 779 780
                               bool *needAdjustLoserTree) {
  pOneInterDataSrc->rowIdx = 0;
  pOneInterDataSrc->pageId += 1;

S
TD-1057  
Shengliang Guan 已提交
781
  if ((uint32_t)pOneInterDataSrc->pageId <
H
hzcheng 已提交
782 783 784 785 786 787
      pOneInterDataSrc->pMemBuffer->fileMeta.flushoutData.pFlushoutInfo[pOneInterDataSrc->flushoutIdx].numOfPages) {
    tExtMemBufferLoadData(pOneInterDataSrc->pMemBuffer, &(pOneInterDataSrc->filePage), pOneInterDataSrc->flushoutIdx,
                          pOneInterDataSrc->pageId);

#if defined(_DEBUG_VIEW)
    printf("new page load to buffer\n");
H
hjxilinx 已提交
788
    tColModelDisplay(pOneInterDataSrc->pMemBuffer->pColumnModel, pOneInterDataSrc->filePage.data,
789
                     pOneInterDataSrc->filePage.num, pOneInterDataSrc->pMemBuffer->pColumnModel->capacity);
H
hzcheng 已提交
790 791 792 793 794 795 796 797 798 799 800 801 802
#endif
    *needAdjustLoserTree = true;
  } else {
    pLocalReducer->numOfCompleted += 1;

    pOneInterDataSrc->rowIdx = -1;
    pOneInterDataSrc->pageId = -1;
    *needAdjustLoserTree = true;
  }

  return pLocalReducer->numOfBuffer;
}

S
slguan 已提交
803 804
void adjustLoserTreeFromNewData(SLocalReducer *pLocalReducer, SLocalDataSource *pOneInterDataSrc,
                                SLoserTreeInfo *pTree) {
H
hzcheng 已提交
805 806 807 808 809
  /*
   * load a new data page into memory for intermediate dataset source,
   * since it's last record in buffer has been chosen to be processed, as the winner of loser-tree
   */
  bool needToAdjust = true;
810
  if (pOneInterDataSrc->filePage.num <= pOneInterDataSrc->rowIdx) {
H
hzcheng 已提交
811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835
    loadNewDataFromDiskFor(pLocalReducer, pOneInterDataSrc, &needToAdjust);
  }

  /*
   * adjust loser tree otherwise, according to new candidate data
   * if the loser tree is rebuild completed, we do not need to adjust
   */
  if (needToAdjust) {
    int32_t leafNodeIdx = pTree->pNode[0].index + pLocalReducer->numOfBuffer;

#ifdef _DEBUG_VIEW
    printf("before adjust:\t");
    tLoserTreeDisplay(pTree);
#endif

    tLoserTreeAdjust(pTree, leafNodeIdx);

#ifdef _DEBUG_VIEW
    printf("\nafter adjust:\t");
    tLoserTreeDisplay(pTree);
    printf("\n");
#endif
  }
}

836
void savePrevRecordAndSetupFillInfo(SLocalReducer *pLocalReducer, SQueryInfo *pQueryInfo, SFillInfo *pFillInfo) {
H
hjxilinx 已提交
837
  // discard following dataset in the same group and reset the interpolation information
H
hjxilinx 已提交
838
  STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0);
839

H
hjxilinx 已提交
840
  STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta);
841

H
Haojun Liao 已提交
842 843
  if (pFillInfo != NULL) {
    int64_t stime = (pQueryInfo->window.skey < pQueryInfo->window.ekey) ? pQueryInfo->window.skey : pQueryInfo->window.ekey;
844
    int64_t revisedSTime = taosTimeTruncate(stime, &pQueryInfo->interval, tinfo.precision);
H
Haojun Liao 已提交
845 846 847
  
    taosResetFillInfo(pFillInfo, revisedSTime);
  }
H
hzcheng 已提交
848 849

  pLocalReducer->discard = true;
850
  pLocalReducer->discardData->num = 0;
H
hzcheng 已提交
851

H
hjxilinx 已提交
852
  SColumnModel *pModel = pLocalReducer->pDesc->pColumnModel;
H
hzcheng 已提交
853 854 855
  tColModelAppend(pModel, pLocalReducer->discardData, pLocalReducer->prevRowOfInput, 0, 1, 1);
}

856
static void genFinalResWithoutFill(SSqlRes* pRes, SLocalReducer *pLocalReducer, SQueryInfo* pQueryInfo) {
857
  assert(pQueryInfo->interval.interval == 0 || pQueryInfo->fillType == TSDB_FILL_NONE);
H
hjxilinx 已提交
858

859
  tFilePage * pBeforeFillData = pLocalReducer->pResultBuf;
H
hzcheng 已提交
860

861 862
  pRes->data = pLocalReducer->pFinalRes;
  pRes->numOfRows = pBeforeFillData->num;
H
hzcheng 已提交
863

864 865 866 867
  if (pQueryInfo->limit.offset > 0) {
    if (pQueryInfo->limit.offset < pRes->numOfRows) {
      int32_t prevSize = (int32_t)pBeforeFillData->num;
      tColModelErase(pLocalReducer->resColModel, pBeforeFillData, prevSize, 0, (int32_t)pQueryInfo->limit.offset - 1);
H
hzcheng 已提交
868

869 870 871 872 873 874 875 876
      /* remove the hole in column model */
      tColModelCompact(pLocalReducer->resColModel, pBeforeFillData, prevSize);

      pRes->numOfRows -= pQueryInfo->limit.offset;
      pQueryInfo->limit.offset = 0;
    } else {
      pQueryInfo->limit.offset -= pRes->numOfRows;
      pRes->numOfRows = 0;
H
hzcheng 已提交
877
    }
878
  }
H
hzcheng 已提交
879

880
  pRes->numOfRowsGroup += pRes->numOfRows;
H
Haojun Liao 已提交
881

882 883 884 885 886
  // impose the limitation of output rows on the final result
  if (pQueryInfo->limit.limit >= 0 && pRes->numOfRowsGroup > pQueryInfo->limit.limit) {
    int32_t prevSize = (int32_t)pBeforeFillData->num;
    int32_t overflow = (int32_t)(pRes->numOfRowsGroup - pQueryInfo->limit.limit);
    assert(overflow < pRes->numOfRows);
H
hzcheng 已提交
887

888 889 890
    pRes->numOfRowsGroup = pQueryInfo->limit.limit;
    pRes->numOfRows -= overflow;
    pBeforeFillData->num -= overflow;
H
hzcheng 已提交
891

892
    tColModelCompact(pLocalReducer->resColModel, pBeforeFillData, prevSize);
H
hzcheng 已提交
893

894 895 896
    // set remain data to be discarded, and reset the interpolation information
    savePrevRecordAndSetupFillInfo(pLocalReducer, pQueryInfo, pLocalReducer->pFillInfo);
  }
H
hzcheng 已提交
897

898
  memcpy(pRes->data, pBeforeFillData->data, (size_t)(pRes->numOfRows * pLocalReducer->finalRowSize));
H
Haojun Liao 已提交
899

900 901 902 903 904 905 906 907 908 909 910 911 912 913 914
  pRes->numOfClauseTotal += pRes->numOfRows;
  pBeforeFillData->num = 0;
}

/*
 * Note: pRes->pLocalReducer may be null, due to the fact that "tscDestroyLocalReducer" is called
 * by "interuptHandler" function in shell
 */
static void doFillResult(SSqlObj *pSql, SLocalReducer *pLocalReducer, bool doneOutput) {
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;
  
  tFilePage  *pBeforeFillData = pLocalReducer->pResultBuf;
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
  SFillInfo  *pFillInfo = pLocalReducer->pFillInfo;
H
hzcheng 已提交
915

916 917
  // todo extract function
  int64_t actualETime = (pQueryInfo->order.order == TSDB_ORDER_ASC)? pQueryInfo->window.ekey: pQueryInfo->window.skey;
H
hzcheng 已提交
918

H
hjxilinx 已提交
919 920 921
  tFilePage **pResPages = malloc(POINTER_BYTES * pQueryInfo->fieldsInfo.numOfOutput);
  for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
    TAOS_FIELD *pField = tscFieldInfoGetField(&pQueryInfo->fieldsInfo, i);
H
hjxilinx 已提交
922
    pResPages[i] = calloc(1, sizeof(tFilePage) + pField->bytes * pLocalReducer->resColModel->capacity);
H
hzcheng 已提交
923
  }
H
Haojun Liao 已提交
924

H
hzcheng 已提交
925
  while (1) {
926
    int64_t newRows = taosGenerateDataBlock(pFillInfo, pResPages, pLocalReducer->resColModel->capacity);
H
hzcheng 已提交
927

928 929
    if (pQueryInfo->limit.offset < newRows) {
      newRows -= pQueryInfo->limit.offset;
H
hzcheng 已提交
930

931
      if (pQueryInfo->limit.offset > 0) {
H
hjxilinx 已提交
932 933
        for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
          TAOS_FIELD *pField = tscFieldInfoGetField(&pQueryInfo->fieldsInfo, i);
H
hjxilinx 已提交
934
          memmove(pResPages[i]->data, pResPages[i]->data + pField->bytes * pQueryInfo->limit.offset,
935
                  (size_t)(newRows * pField->bytes));
H
hzcheng 已提交
936 937 938 939 940 941
        }
      }

      pRes->data = pLocalReducer->pFinalRes;
      pRes->numOfRows = newRows;

942
      pQueryInfo->limit.offset = 0;
H
hzcheng 已提交
943 944
      break;
    } else {
945
      pQueryInfo->limit.offset -= newRows;
H
hzcheng 已提交
946 947
      pRes->numOfRows = 0;

948
      int32_t rpoints = taosNumOfRemainRows(pFillInfo);
H
hzcheng 已提交
949
      if (rpoints <= 0) {
950
        if (!doneOutput) { // reduce procedure has not completed yet, but current results for fill are exhausted
H
hzcheng 已提交
951 952 953
          break;
        }

954
        // all output in current group are completed
S
TD-1057  
Shengliang Guan 已提交
955
        int32_t totalRemainRows = (int32_t)getFilledNumOfRes(pFillInfo, actualETime, pLocalReducer->resColModel->capacity);
H
hzcheng 已提交
956 957 958 959 960 961 962 963
        if (totalRemainRows <= 0) {
          break;
        }
      }
    }
  }

  if (pRes->numOfRows > 0) {
964
    int32_t currentTotal = (int32_t)(pRes->numOfRowsGroup + pRes->numOfRows);
H
hzcheng 已提交
965

966 967 968 969 970
    if (pQueryInfo->limit.limit >= 0 && currentTotal > pQueryInfo->limit.limit) {
      int32_t overflow = (int32_t)(currentTotal - pQueryInfo->limit.limit);

      pRes->numOfRows -= overflow;
      assert(pRes->numOfRows >= 0);
H
Haojun Liao 已提交
971

H
hzcheng 已提交
972
      /* set remain data to be discarded, and reset the interpolation information */
973
      savePrevRecordAndSetupFillInfo(pLocalReducer, pQueryInfo, pFillInfo);
H
hzcheng 已提交
974 975
    }

976 977 978
    for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
      TAOS_FIELD *pField = tscFieldInfoGetField(&pQueryInfo->fieldsInfo, i);
      int16_t     offset = getColumnModelOffset(pLocalReducer->resColModel, i);
979
      memcpy(pRes->data + offset * pRes->numOfRows, pResPages[i]->data, (size_t)(pField->bytes * pRes->numOfRows));
H
hzcheng 已提交
980
    }
H
Haojun Liao 已提交
981 982 983

    pRes->numOfRowsGroup += pRes->numOfRows;
    pRes->numOfClauseTotal += pRes->numOfRows;
H
hzcheng 已提交
984 985
  }

986
  pBeforeFillData->num = 0;
H
hjxilinx 已提交
987
  for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
S
Shengliang Guan 已提交
988
    taosTFree(pResPages[i]);
H
hzcheng 已提交
989
  }
990
  
S
Shengliang Guan 已提交
991
  taosTFree(pResPages);
H
hzcheng 已提交
992 993
}

S
slguan 已提交
994
static void savePreviousRow(SLocalReducer *pLocalReducer, tFilePage *tmpBuffer) {
H
hjxilinx 已提交
995
  SColumnModel *pColumnModel = pLocalReducer->pDesc->pColumnModel;
996
  assert(pColumnModel->capacity == 1 && tmpBuffer->num == 1);
H
hzcheng 已提交
997 998

  // copy to previous temp buffer
H
hjxilinx 已提交
999
  for (int32_t i = 0; i < pColumnModel->numOfCols; ++i) {
H
hjxilinx 已提交
1000 1001 1002
    SSchema *pSchema = getColumnModelSchema(pColumnModel, i);
    int16_t  offset = getColumnModelOffset(pColumnModel, i);

H
hjxilinx 已提交
1003
    memcpy(pLocalReducer->prevRowOfInput + offset, tmpBuffer->data + offset, pSchema->bytes);
H
hzcheng 已提交
1004 1005
  }

1006
  tmpBuffer->num = 0;
H
hzcheng 已提交
1007 1008 1009
  pLocalReducer->hasPrevRow = true;
}

H
hjxilinx 已提交
1010
static void doExecuteSecondaryMerge(SSqlCmd *pCmd, SLocalReducer *pLocalReducer, bool needInit) {
S
slguan 已提交
1011
  // the tag columns need to be set before all functions execution
H
hjxilinx 已提交
1012
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
1013

H
hjxilinx 已提交
1014 1015
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  for (int32_t j = 0; j < size; ++j) {
S
slguan 已提交
1016
    SQLFunctionCtx *pCtx = &pLocalReducer->pCtx[j];
H
hzcheng 已提交
1017

S
slguan 已提交
1018
    // tags/tags_dummy function, the tag field of SQLFunctionCtx is from the input buffer
H
Haojun Liao 已提交
1019
    int32_t functionId = pCtx->functionId;
H
hjxilinx 已提交
1020
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TS_DUMMY) {
S
slguan 已提交
1021
      tVariantDestroy(&pCtx->tag);
1022 1023 1024 1025 1026 1027 1028 1029
      char* input = pCtx->aInputElemBuf;
      
      if (pCtx->inputType == TSDB_DATA_TYPE_BINARY || pCtx->inputType == TSDB_DATA_TYPE_NCHAR) {
        assert(varDataLen(input) <= pCtx->inputBytes);
        tVariantCreateFromBinary(&pCtx->tag, varDataVal(input), varDataLen(input), pCtx->inputType);
      } else {
        tVariantCreateFromBinary(&pCtx->tag, input, pCtx->inputBytes, pCtx->inputType);
      }
H
Haojun Liao 已提交
1030 1031 1032
    } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, j);
      pCtx->param[0].i64Key = pExpr->param[0].i64Key;
H
hzcheng 已提交
1033 1034
    }

S
slguan 已提交
1035 1036 1037
    pCtx->currentStage = SECONDARY_STAGE_MERGE;

    if (needInit) {
H
Haojun Liao 已提交
1038
      aAggs[pCtx->functionId].init(pCtx);
S
slguan 已提交
1039 1040 1041
    }
  }

H
hjxilinx 已提交
1042
  for (int32_t j = 0; j < size; ++j) {
H
Haojun Liao 已提交
1043
    int32_t functionId = pLocalReducer->pCtx[j].functionId;
S
slguan 已提交
1044 1045 1046 1047 1048 1049 1050
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }

    aAggs[functionId].distSecondaryMergeFunc(&pLocalReducer->pCtx[j]);
  }
}
H
hzcheng 已提交
1051

H
hjxilinx 已提交
1052
static void handleUnprocessedRow(SSqlCmd *pCmd, SLocalReducer *pLocalReducer, tFilePage *tmpBuffer) {
S
slguan 已提交
1053 1054 1055
  if (pLocalReducer->hasUnprocessedRow) {
    pLocalReducer->hasUnprocessedRow = false;
    doExecuteSecondaryMerge(pCmd, pLocalReducer, true);
S
slguan 已提交
1056
    savePreviousRow(pLocalReducer, tmpBuffer);
H
hzcheng 已提交
1057 1058 1059
  }
}

1060
static int64_t getNumOfResultLocal(SQueryInfo *pQueryInfo, SQLFunctionCtx *pCtx) {
H
hzcheng 已提交
1061
  int64_t maxOutput = 0;
H
hjxilinx 已提交
1062 1063 1064
  
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  for (int32_t j = 0; j < size; ++j) {
H
hzcheng 已提交
1065 1066 1067 1068
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
H
Haojun Liao 已提交
1069
    int32_t functionId = pCtx[j].functionId;
H
hzcheng 已提交
1070 1071 1072
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
H
hjxilinx 已提交
1073

H
Haojun Liao 已提交
1074 1075 1076
    SResultInfo* pResInfo = GET_RES_INFO(&pCtx[j]);
    if (maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
H
hzcheng 已提交
1077 1078
    }
  }
H
hjxilinx 已提交
1079

H
hzcheng 已提交
1080 1081 1082 1083
  return maxOutput;
}

/*
S
slguan 已提交
1084
 * in handling the top/bottom query, which produce more than one rows result,
H
hzcheng 已提交
1085 1086
 * the tsdb_func_tags only fill the first row of results, the remain rows need to
 * filled with the same result, which is the tags, specified in group by clause
S
slguan 已提交
1087
 *
H
hzcheng 已提交
1088
 */
H
hjxilinx 已提交
1089
static void fillMultiRowsOfTagsVal(SQueryInfo *pQueryInfo, int32_t numOfRes, SLocalReducer *pLocalReducer) {
S
slguan 已提交
1090
  int32_t maxBufSize = 0;  // find the max tags column length to prepare the buffer
H
hjxilinx 已提交
1091 1092 1093
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  
  for (int32_t k = 0; k < size; ++k) {
1094
    SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, k);
S
slguan 已提交
1095
    if (maxBufSize < pExpr->resBytes && pExpr->functionId == TSDB_FUNC_TAG) {
H
hzcheng 已提交
1096 1097 1098 1099 1100 1101
      maxBufSize = pExpr->resBytes;
    }
  }

  assert(maxBufSize >= 0);

H
hjxilinx 已提交
1102
  char *buf = malloc((size_t)maxBufSize);
H
hjxilinx 已提交
1103
  for (int32_t k = 0; k < size; ++k) {
H
Haojun Liao 已提交
1104 1105
    SQLFunctionCtx *pCtx = &pLocalReducer->pCtx[k];
    if (pCtx->functionId != TSDB_FUNC_TAG) {
S
slguan 已提交
1106 1107 1108
      continue;
    }

H
hzcheng 已提交
1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121
    int32_t inc = numOfRes - 1;  // tsdb_func_tag function only produce one row of result
    memset(buf, 0, (size_t)maxBufSize);
    memcpy(buf, pCtx->aOutputBuf, (size_t)pCtx->outputBytes);

    for (int32_t i = 0; i < inc; ++i) {
      pCtx->aOutputBuf += pCtx->outputBytes;
      memcpy(pCtx->aOutputBuf, buf, (size_t)pCtx->outputBytes);
    }
  }

  free(buf);
}

H
hjxilinx 已提交
1122
int32_t finalizeRes(SQueryInfo *pQueryInfo, SLocalReducer *pLocalReducer) {
H
hjxilinx 已提交
1123 1124 1125
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
  
  for (int32_t k = 0; k < size; ++k) {
H
Haojun Liao 已提交
1126 1127
    SQLFunctionCtx* pCtx = &pLocalReducer->pCtx[k];
    aAggs[pCtx->functionId].xFinalize(pCtx);
H
hzcheng 已提交
1128 1129 1130 1131
  }

  pLocalReducer->hasPrevRow = false;

1132
  int32_t numOfRes = (int32_t)getNumOfResultLocal(pQueryInfo, pLocalReducer->pCtx);
1133
  pLocalReducer->pResultBuf->num += numOfRes;
H
hzcheng 已提交
1134

1135
  fillMultiRowsOfTagsVal(pQueryInfo, numOfRes, pLocalReducer);
H
hzcheng 已提交
1136 1137 1138 1139 1140 1141 1142 1143 1144 1145
  return numOfRes;
}

/*
 * points merge:
 * points are merged according to the sort info, which is tags columns and timestamp column.
 * In case of points without either tags columns or timestamp, such as
 * results generated by simple aggregation function, we merge them all into one points
 * *Exception*: column projection query, required no merge procedure
 */
H
hjxilinx 已提交
1146
bool needToMerge(SQueryInfo *pQueryInfo, SLocalReducer *pLocalReducer, tFilePage *tmpBuffer) {
H
hzcheng 已提交
1147
  int32_t ret = 0;  // merge all result by default
1148

H
Haojun Liao 已提交
1149
  int16_t functionId = pLocalReducer->pCtx[0].functionId;
1150 1151 1152

  // todo opt performance
  if ((/*functionId == TSDB_FUNC_PRJ || */functionId == TSDB_FUNC_ARITHM) || (tscIsProjectionQueryOnSTable(pQueryInfo, 0))) {  // column projection query
H
hzcheng 已提交
1153 1154 1155
    ret = 1;                                                            // disable merge procedure
  } else {
    tOrderDescriptor *pDesc = pLocalReducer->pDesc;
H
Haojun Liao 已提交
1156
    if (pDesc->orderInfo.numOfCols > 0) {
1157
      if (pDesc->tsOrder == TSDB_ORDER_ASC) {  // asc
H
hzcheng 已提交
1158
        // todo refactor comparator
S
slguan 已提交
1159
        ret = compare_a(pLocalReducer->pDesc, 1, 0, pLocalReducer->prevRowOfInput, 1, 0, tmpBuffer->data);
H
hzcheng 已提交
1160
      } else {  // desc
S
slguan 已提交
1161
        ret = compare_d(pLocalReducer->pDesc, 1, 0, pLocalReducer->prevRowOfInput, 1, 0, tmpBuffer->data);
H
hzcheng 已提交
1162 1163 1164 1165 1166 1167 1168 1169
      }
    }
  }

  /* if ret == 0, means the result belongs to the same group */
  return (ret == 0);
}

H
hjxilinx 已提交
1170
static bool reachGroupResultLimit(SQueryInfo *pQueryInfo, SSqlRes *pRes) {
1171
  return (pRes->numOfGroups >= pQueryInfo->slimit.limit && pQueryInfo->slimit.limit >= 0);
S
slguan 已提交
1172 1173 1174 1175 1176 1177
}

static bool saveGroupResultInfo(SSqlObj *pSql) {
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

H
hjxilinx 已提交
1178
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
Haojun Liao 已提交
1179 1180 1181 1182

  if (pRes->numOfRowsGroup > 0) {
    pRes->numOfGroups += 1;
  }
S
slguan 已提交
1183

S
slguan 已提交
1184
  // the output group is limited by the slimit clause
1185
  if (reachGroupResultLimit(pQueryInfo, pRes)) {
S
slguan 已提交
1186 1187 1188 1189
    return true;
  }

  //    pRes->pGroupRec = realloc(pRes->pGroupRec, pRes->numOfGroups*sizeof(SResRec));
H
hzcheng 已提交
1190
  //    pRes->pGroupRec[pRes->numOfGroups-1].numOfRows = pRes->numOfRows;
H
Haojun Liao 已提交
1191
  //    pRes->pGroupRec[pRes->numOfGroups-1].numOfClauseTotal = pRes->numOfClauseTotal;
S
slguan 已提交
1192 1193

  return false;
H
hzcheng 已提交
1194 1195
}

S
slguan 已提交
1196 1197 1198 1199 1200 1201 1202
/**
 *
 * @param pSql
 * @param pLocalReducer
 * @param noMoreCurrentGroupRes
 * @return if current group is skipped, return false, and do NOT record it into pRes->numOfGroups
 */
1203
bool genFinalResults(SSqlObj *pSql, SLocalReducer *pLocalReducer, bool noMoreCurrentGroupRes) {
H
hjxilinx 已提交
1204 1205 1206 1207 1208
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

  SQueryInfo *  pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
  tFilePage *   pResBuf = pLocalReducer->pResultBuf;
H
hjxilinx 已提交
1209
  SColumnModel *pModel = pLocalReducer->resColModel;
H
hzcheng 已提交
1210

S
slguan 已提交
1211 1212 1213
  pRes->code = TSDB_CODE_SUCCESS;

  /*
1214
   * Ignore the output of the current group since this group is skipped by user
S
slguan 已提交
1215 1216
   * We set the numOfRows to be 0 and discard the possible remain results.
   */
1217
  if (pQueryInfo->slimit.offset > 0) {
S
slguan 已提交
1218
    pRes->numOfRows = 0;
1219
    pQueryInfo->slimit.offset -= 1;
S
slguan 已提交
1220
    pLocalReducer->discard = !noMoreCurrentGroupRes;
H
Haojun Liao 已提交
1221 1222 1223 1224 1225 1226

    if (pLocalReducer->discard) {
      SColumnModel *pInternModel = pLocalReducer->pDesc->pColumnModel;
      tColModelAppend(pInternModel, pLocalReducer->discardData, pLocalReducer->pTempBuffer->data, 0, 1, 1);
    }

S
slguan 已提交
1227 1228 1229
    return false;
  }

H
hjxilinx 已提交
1230
  tColModelCompact(pModel, pResBuf, pModel->capacity);
H
hzcheng 已提交
1231 1232 1233

#ifdef _DEBUG_VIEW
  printf("final result before interpo:\n");
1234
//  tColModelDisplay(pLocalReducer->resColModel, pLocalReducer->pBufForInterpo, pResBuf->num, pResBuf->num);
H
hzcheng 已提交
1235
#endif
1236 1237

  // no interval query, no fill operation
1238
  if (pQueryInfo->interval.interval == 0 || pQueryInfo->fillType == TSDB_FILL_NONE) {
1239 1240 1241 1242
    genFinalResWithoutFill(pRes, pLocalReducer, pQueryInfo);
  } else {
    SFillInfo* pFillInfo = pLocalReducer->pFillInfo;
    if (pFillInfo != NULL) {
1243 1244 1245
      TSKEY ekey = (pQueryInfo->order.order == TSDB_ORDER_ASC)? pQueryInfo->window.ekey: pQueryInfo->window.skey;

      taosFillSetStartInfo(pFillInfo, (int32_t)pResBuf->num, ekey);
1246 1247 1248 1249 1250 1251
      taosFillCopyInputDataFromOneFilePage(pFillInfo, pResBuf);
    }
    
    doFillResult(pSql, pLocalReducer, noMoreCurrentGroupRes);
  }

S
slguan 已提交
1252
  return true;
H
hzcheng 已提交
1253 1254
}

H
hjxilinx 已提交
1255
void resetOutputBuf(SQueryInfo *pQueryInfo, SLocalReducer *pLocalReducer) {  // reset output buffer to the beginning
H
hjxilinx 已提交
1256
  for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) {
H
hzcheng 已提交
1257
    pLocalReducer->pCtx[i].aOutputBuf =
H
hjxilinx 已提交
1258
        pLocalReducer->pResultBuf->data + tscFieldInfoGetOffset(pQueryInfo, i) * pLocalReducer->resColModel->capacity;
H
hzcheng 已提交
1259 1260 1261 1262 1263
  }

  memset(pLocalReducer->pResultBuf, 0, pLocalReducer->nResultBufSize + sizeof(tFilePage));
}

S
slguan 已提交
1264
static void resetEnvForNewResultset(SSqlRes *pRes, SSqlCmd *pCmd, SLocalReducer *pLocalReducer) {
S
slguan 已提交
1265
  // In handling data in other groups, we need to reset the interpolation information for a new group data
H
hzcheng 已提交
1266
  pRes->numOfRows = 0;
H
Haojun Liao 已提交
1267
  pRes->numOfRowsGroup = 0;
H
hjxilinx 已提交
1268 1269 1270

  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);

1271
  pQueryInfo->limit.offset = pLocalReducer->offset;
H
hzcheng 已提交
1272

1273
  STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0);
H
hjxilinx 已提交
1274
  STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta);
H
hjxilinx 已提交
1275
  
S
slguan 已提交
1276
  // for group result interpolation, do not return if not data is generated
1277
  if (pQueryInfo->fillType != TSDB_FILL_NONE) {
1278
    TSKEY skey = (pQueryInfo->order.order == TSDB_ORDER_ASC)? pQueryInfo->window.skey:pQueryInfo->window.ekey;//MIN(pQueryInfo->window.skey, pQueryInfo->window.ekey);
1279
    int64_t newTime = taosTimeTruncate(skey, &pQueryInfo->interval, tinfo.precision);
1280
    taosResetFillInfo(pLocalReducer->pFillInfo, newTime);
H
hzcheng 已提交
1281 1282 1283
  }
}

S
slguan 已提交
1284 1285 1286 1287
static bool isAllSourcesCompleted(SLocalReducer *pLocalReducer) {
  return (pLocalReducer->numOfBuffer == pLocalReducer->numOfCompleted);
}

1288
static bool doBuildFilledResultForGroup(SSqlObj *pSql) {
H
hzcheng 已提交
1289 1290 1291
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

1292 1293 1294
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
  SLocalReducer *pLocalReducer = pRes->pLocalReducer;
  SFillInfo *pFillInfo = pLocalReducer->pFillInfo;
S
slguan 已提交
1295

1296 1297
  if (pFillInfo != NULL && taosNumOfRemainRows(pFillInfo) > 0) {
    assert(pQueryInfo->fillType != TSDB_FILL_NONE);
H
hzcheng 已提交
1298

S
slguan 已提交
1299
    tFilePage *pFinalDataBuf = pLocalReducer->pResultBuf;
H
Haojun Liao 已提交
1300
    int64_t etime = *(int64_t *)(pFinalDataBuf->data + TSDB_KEYSIZE * (pFillInfo->numOfRows - 1));
H
hzcheng 已提交
1301

1302
    // the first column must be the timestamp column
S
TD-1057  
Shengliang Guan 已提交
1303
    int32_t rows = (int32_t)getFilledNumOfRes(pFillInfo, etime, pLocalReducer->resColModel->capacity);
H
Haojun Liao 已提交
1304
    if (rows > 0) {  // do fill gap
H
Haojun Liao 已提交
1305
      doFillResult(pSql, pLocalReducer, false);
S
slguan 已提交
1306
    }
H
hzcheng 已提交
1307

S
slguan 已提交
1308 1309 1310
    return true;
  } else {
    return false;
H
hzcheng 已提交
1311
  }
S
slguan 已提交
1312
}
H
hzcheng 已提交
1313

S
slguan 已提交
1314 1315 1316 1317
static bool doHandleLastRemainData(SSqlObj *pSql) {
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

1318 1319
  SLocalReducer *pLocalReducer = pRes->pLocalReducer;
  SFillInfo     *pFillInfo = pLocalReducer->pFillInfo;
H
hzcheng 已提交
1320

S
slguan 已提交
1321
  bool prevGroupCompleted = (!pLocalReducer->discard) && pLocalReducer->hasUnprocessedRow;
S
slguan 已提交
1322

H
Haojun Liao 已提交
1323
  SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
hzcheng 已提交
1324

S
slguan 已提交
1325 1326
  if ((isAllSourcesCompleted(pLocalReducer) && !pLocalReducer->hasPrevRow) || pLocalReducer->pLocalDataSrc[0] == NULL ||
      prevGroupCompleted) {
1327
    // if fillType == TSDB_FILL_NONE, return directly
H
Haojun Liao 已提交
1328 1329
    if (pQueryInfo->fillType != TSDB_FILL_NONE &&
      ((pRes->numOfRowsGroup < pQueryInfo->limit.limit && pQueryInfo->limit.limit > 0) || (pQueryInfo->limit.limit < 0))) {
1330
      int64_t etime = (pQueryInfo->order.order == TSDB_ORDER_ASC)? pQueryInfo->window.ekey : pQueryInfo->window.skey;
H
hzcheng 已提交
1331

S
TD-1057  
Shengliang Guan 已提交
1332
      int32_t rows = (int32_t)getFilledNumOfRes(pFillInfo, etime, pLocalReducer->resColModel->capacity);
1333
      if (rows > 0) {
H
Haojun Liao 已提交
1334
        doFillResult(pSql, pLocalReducer, true);
H
hzcheng 已提交
1335 1336 1337
      }
    }

S
slguan 已提交
1338 1339 1340 1341 1342 1343 1344 1345
    /*
     * 1. numOfRows == 0, means no interpolation results are generated.
     * 2. if all local data sources are consumed, and no un-processed rows exist.
     *
     * No results will be generated and query completed.
     */
    if (pRes->numOfRows > 0 || (isAllSourcesCompleted(pLocalReducer) && (!pLocalReducer->hasUnprocessedRow))) {
      return true;
H
hzcheng 已提交
1346
    }
S
slguan 已提交
1347 1348 1349 1350 1351 1352 1353

    // start to process result for a new group and save the result info of previous group
    if (saveGroupResultInfo(pSql)) {
      return true;
    }

    resetEnvForNewResultset(pRes, pCmd, pLocalReducer);
H
hzcheng 已提交
1354 1355
  }

S
slguan 已提交
1356 1357
  return false;
}
H
hzcheng 已提交
1358

H
hjxilinx 已提交
1359 1360 1361 1362
static void doProcessResultInNextWindow(SSqlObj *pSql, int32_t numOfRes) {
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;

S
slguan 已提交
1363
  SLocalReducer *pLocalReducer = pRes->pLocalReducer;
H
hjxilinx 已提交
1364
  SQueryInfo *   pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
hjxilinx 已提交
1365
  size_t size = tscSqlExprNumOfExprs(pQueryInfo);
H
hjxilinx 已提交
1366

H
hjxilinx 已提交
1367
  for (int32_t k = 0; k < size; ++k) {
S
slguan 已提交
1368 1369
    SQLFunctionCtx *pCtx = &pLocalReducer->pCtx[k];
    pCtx->aOutputBuf += pCtx->outputBytes * numOfRes;
S
slguan 已提交
1370 1371

    // set the correct output timestamp column position
H
Haojun Liao 已提交
1372
    if (pCtx->functionId == TSDB_FUNC_TOP || pCtx->functionId == TSDB_FUNC_BOTTOM) {
S
slguan 已提交
1373
      pCtx->ptsOutputBuf = ((char *)pCtx->ptsOutputBuf + TSDB_KEYSIZE * numOfRes);
H
hzcheng 已提交
1374
    }
S
slguan 已提交
1375 1376
  }

S
slguan 已提交
1377
  doExecuteSecondaryMerge(pCmd, pLocalReducer, true);
S
slguan 已提交
1378 1379
}

1380
int32_t tscDoLocalMerge(SSqlObj *pSql) {
S
slguan 已提交
1381 1382
  SSqlCmd *pCmd = &pSql->cmd;
  SSqlRes *pRes = &pSql->res;
H
hjxilinx 已提交
1383

H
hjxilinx 已提交
1384
  tscResetForNextRetrieve(pRes);
H
hjxilinx 已提交
1385

S
slguan 已提交
1386
  if (pSql->signature != pSql || pRes == NULL || pRes->pLocalReducer == NULL) {  // all data has been processed
H
Haojun Liao 已提交
1387 1388
    tscError("%p local merge abort due to error occurs, code:%s", pSql, tstrerror(pRes->code));
    return pRes->code;
H
hzcheng 已提交
1389
  }
H
hjxilinx 已提交
1390

S
slguan 已提交
1391
  SLocalReducer *pLocalReducer = pRes->pLocalReducer;
H
Haojun Liao 已提交
1392
  SQueryInfo    *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex);
H
hjxilinx 已提交
1393

S
slguan 已提交
1394 1395
  // set the data merge in progress
  int32_t prevStatus =
weixin_48148422's avatar
weixin_48148422 已提交
1396
      atomic_val_compare_exchange_32(&pLocalReducer->status, TSC_LOCALREDUCE_READY, TSC_LOCALREDUCE_IN_PROGRESS);
H
hjxilinx 已提交
1397
  if (prevStatus != TSC_LOCALREDUCE_READY) {
H
hjxilinx 已提交
1398
    assert(prevStatus == TSC_LOCALREDUCE_TOBE_FREED);  // it is in tscDestroyLocalReducer function already
S
slguan 已提交
1399 1400 1401 1402 1403 1404 1405 1406 1407 1408
    return TSDB_CODE_SUCCESS;
  }

  tFilePage *tmpBuffer = pLocalReducer->pTempBuffer;

  if (doHandleLastRemainData(pSql)) {
    pLocalReducer->status = TSC_LOCALREDUCE_READY;  // set the flag, taos_free_result can release this result.
    return TSDB_CODE_SUCCESS;
  }

1409
  if (doBuildFilledResultForGroup(pSql)) {
S
slguan 已提交
1410 1411 1412 1413
    pLocalReducer->status = TSC_LOCALREDUCE_READY;  // set the flag, taos_free_result can release this result.
    return TSDB_CODE_SUCCESS;
  }

H
hzcheng 已提交
1414 1415 1416
  SLoserTreeInfo *pTree = pLocalReducer->pLoserTree;

  // clear buffer
S
slguan 已提交
1417
  handleUnprocessedRow(pCmd, pLocalReducer, tmpBuffer);
H
hjxilinx 已提交
1418
  SColumnModel *pModel = pLocalReducer->pDesc->pColumnModel;
H
hzcheng 已提交
1419 1420

  while (1) {
S
slguan 已提交
1421
    if (isAllSourcesCompleted(pLocalReducer)) {
H
hzcheng 已提交
1422 1423 1424 1425 1426 1427
      break;
    }

#ifdef _DEBUG_VIEW
    printf("chosen data in pTree[0] = %d\n", pTree->pNode[0].index);
#endif
1428
    assert((pTree->pNode[0].index < pLocalReducer->numOfBuffer) && (pTree->pNode[0].index >= 0) && tmpBuffer->num == 0);
H
hzcheng 已提交
1429 1430

    // chosen from loser tree
S
slguan 已提交
1431
    SLocalDataSource *pOneDataSrc = pLocalReducer->pLocalDataSrc[pTree->pNode[0].index];
H
hzcheng 已提交
1432

S
slguan 已提交
1433
    tColModelAppend(pModel, tmpBuffer, pOneDataSrc->filePage.data, pOneDataSrc->rowIdx, 1,
H
hjxilinx 已提交
1434
                    pOneDataSrc->pMemBuffer->pColumnModel->capacity);
H
hzcheng 已提交
1435 1436 1437 1438

#if defined(_DEBUG_VIEW)
    printf("chosen row:\t");
    SSrcColumnInfo colInfo[256] = {0};
1439
    tscGetSrcColumnInfo(colInfo, pQueryInfo);
H
hzcheng 已提交
1440

1441
    tColModelDisplayEx(pModel, tmpBuffer->data, tmpBuffer->num, pModel->capacity, colInfo);
H
hzcheng 已提交
1442
#endif
S
slguan 已提交
1443

H
hzcheng 已提交
1444 1445 1446 1447
    if (pLocalReducer->discard) {
      assert(pLocalReducer->hasUnprocessedRow == false);

      /* current record belongs to the same group of previous record, need to discard it */
S
slguan 已提交
1448
      if (isSameGroup(pCmd, pLocalReducer, pLocalReducer->discardData->data, tmpBuffer)) {
1449
        tmpBuffer->num = 0;
H
hzcheng 已提交
1450 1451
        pOneDataSrc->rowIdx += 1;

S
slguan 已提交
1452 1453 1454 1455
        adjustLoserTreeFromNewData(pLocalReducer, pOneDataSrc, pTree);

        // all inputs are exhausted, abort current process
        if (isAllSourcesCompleted(pLocalReducer)) {
H
hzcheng 已提交
1456 1457 1458
          break;
        }

S
slguan 已提交
1459
        // data belongs to the same group needs to be discarded
H
hzcheng 已提交
1460 1461 1462
        continue;
      } else {
        pLocalReducer->discard = false;
1463
        pLocalReducer->discardData->num = 0;
H
hzcheng 已提交
1464

S
slguan 已提交
1465 1466 1467 1468 1469 1470
        if (saveGroupResultInfo(pSql)) {
          pLocalReducer->status = TSC_LOCALREDUCE_READY;
          return TSDB_CODE_SUCCESS;
        }

        resetEnvForNewResultset(pRes, pCmd, pLocalReducer);
H
hzcheng 已提交
1471 1472 1473 1474
      }
    }

    if (pLocalReducer->hasPrevRow) {
1475
      if (needToMerge(pQueryInfo, pLocalReducer, tmpBuffer)) {
S
slguan 已提交
1476
        // belong to the group of the previous row, continue process it
S
slguan 已提交
1477
        doExecuteSecondaryMerge(pCmd, pLocalReducer, false);
H
hzcheng 已提交
1478 1479

        // copy to buffer
S
slguan 已提交
1480 1481 1482 1483 1484 1485
        savePreviousRow(pLocalReducer, tmpBuffer);
      } else {
        /*
         * current row does not belong to the group of previous row.
         * so the processing of previous group is completed.
         */
1486
        int32_t numOfRes = finalizeRes(pQueryInfo, pLocalReducer);
H
Haojun Liao 已提交
1487
        bool   sameGroup = isSameGroup(pCmd, pLocalReducer, pLocalReducer->prevRowOfInput, tmpBuffer);
H
hzcheng 已提交
1488 1489 1490 1491

        tFilePage *pResBuf = pLocalReducer->pResultBuf;

        /*
1492
         * if the previous group does NOT generate any result (pResBuf->num == 0),
H
hzcheng 已提交
1493 1494
         * continue to process results instead of return results.
         */
1495
        if ((!sameGroup && pResBuf->num > 0) || (pResBuf->num == pLocalReducer->resColModel->capacity)) {
H
hzcheng 已提交
1496
          // does not belong to the same group
1497
          bool notSkipped = genFinalResults(pSql, pLocalReducer, !sameGroup);
H
hzcheng 已提交
1498

S
slguan 已提交
1499
          // this row needs to discard, since it belongs to the group of previous
H
hzcheng 已提交
1500 1501
          if (pLocalReducer->discard && sameGroup) {
            pLocalReducer->hasUnprocessedRow = false;
1502
            tmpBuffer->num = 0;
H
hzcheng 已提交
1503
          } else {
S
slguan 已提交
1504
            // current row does not belongs to the previous group, so it is not be handled yet.
H
hzcheng 已提交
1505 1506 1507
            pLocalReducer->hasUnprocessedRow = true;
          }

1508
          resetOutputBuf(pQueryInfo, pLocalReducer);
H
hzcheng 已提交
1509 1510
          pOneDataSrc->rowIdx += 1;

S
slguan 已提交
1511 1512
          // here we do not check the return value
          adjustLoserTreeFromNewData(pLocalReducer, pOneDataSrc, pTree);
H
hzcheng 已提交
1513 1514 1515
          assert(pLocalReducer->status == TSC_LOCALREDUCE_IN_PROGRESS);

          if (pRes->numOfRows == 0) {
S
slguan 已提交
1516
            handleUnprocessedRow(pCmd, pLocalReducer, tmpBuffer);
H
hzcheng 已提交
1517 1518

            if (!sameGroup) {
S
slguan 已提交
1519 1520 1521 1522 1523 1524 1525 1526 1527 1528
              /*
               * previous group is done, prepare for the next group
               * If previous group is not skipped, keep it in pRes->numOfGroups
               */
              if (notSkipped && saveGroupResultInfo(pSql)) {
                pLocalReducer->status = TSC_LOCALREDUCE_READY;
                return TSDB_CODE_SUCCESS;
              }

              resetEnvForNewResultset(pRes, pCmd, pLocalReducer);
H
hzcheng 已提交
1529 1530 1531 1532 1533 1534 1535
            }
          } else {
            /*
             * if next record belongs to a new group, we do not handle this record here.
             * We start the process in a new round.
             */
            if (sameGroup) {
S
slguan 已提交
1536
              handleUnprocessedRow(pCmd, pLocalReducer, tmpBuffer);
H
hzcheng 已提交
1537 1538 1539
            }
          }

S
slguan 已提交
1540 1541 1542 1543 1544 1545
          // current group has no result,
          if (pRes->numOfRows == 0) {
            continue;
          } else {
            pLocalReducer->status = TSC_LOCALREDUCE_READY;  // set the flag, taos_free_result can release this result.
            return TSDB_CODE_SUCCESS;
H
hzcheng 已提交
1546
          }
S
slguan 已提交
1547
        } else {  // result buffer is not full
H
hjxilinx 已提交
1548
          doProcessResultInNextWindow(pSql, numOfRes);
S
slguan 已提交
1549
          savePreviousRow(pLocalReducer, tmpBuffer);
H
hzcheng 已提交
1550 1551
        }
      }
S
slguan 已提交
1552
    } else {
S
slguan 已提交
1553
      doExecuteSecondaryMerge(pCmd, pLocalReducer, true);
S
slguan 已提交
1554
      savePreviousRow(pLocalReducer, tmpBuffer);  // copy the processed row to buffer
H
hzcheng 已提交
1555 1556 1557
    }

    pOneDataSrc->rowIdx += 1;
S
slguan 已提交
1558
    adjustLoserTreeFromNewData(pLocalReducer, pOneDataSrc, pTree);
H
hzcheng 已提交
1559 1560 1561
  }

  if (pLocalReducer->hasPrevRow) {
1562
    finalizeRes(pQueryInfo, pLocalReducer);
H
hzcheng 已提交
1563 1564
  }

1565
  if (pLocalReducer->pResultBuf->num) {
1566
    genFinalResults(pSql, pLocalReducer, true);
H
hzcheng 已提交
1567 1568 1569
  }

  assert(pLocalReducer->status == TSC_LOCALREDUCE_IN_PROGRESS && pRes->row == 0);
S
slguan 已提交
1570
  pLocalReducer->status = TSC_LOCALREDUCE_READY;  // set the flag, taos_free_result can release this result.
H
hzcheng 已提交
1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588

  return TSDB_CODE_SUCCESS;
}

void tscInitResObjForLocalQuery(SSqlObj *pObj, int32_t numOfRes, int32_t rowLen) {
  SSqlRes *pRes = &pObj->res;
  if (pRes->pLocalReducer != NULL) {
    tscDestroyLocalReducer(pObj);
  }

  pRes->qhandle = 1;  // hack to pass the safety check in fetch_row function
  pRes->numOfRows = 0;
  pRes->row = 0;

  pRes->rspType = 0;  // used as a flag to denote if taos_retrieved() has been called yet
  pRes->pLocalReducer = (SLocalReducer *)calloc(1, sizeof(SLocalReducer));

  /*
S
slguan 已提交
1589 1590
   * we need one additional byte space
   * the sprintf function needs one additional space to put '\0' at the end of string
H
hzcheng 已提交
1591 1592 1593 1594
   */
  size_t allocSize = numOfRes * rowLen + sizeof(tFilePage) + 1;
  pRes->pLocalReducer->pResultBuf = (tFilePage *)calloc(1, allocSize);

1595
  pRes->pLocalReducer->pResultBuf->num = numOfRes;
H
hzcheng 已提交
1596 1597
  pRes->data = pRes->pLocalReducer->pResultBuf->data;
}