未验证 提交 e62e125d 编写于 作者: D dapan1121 提交者: GitHub

Merge pull request #20569 from taosdata/feat/mode_opt

enh(query): improve mode function page buffer allocation
......@@ -32,9 +32,6 @@
#define TAIL_MAX_POINTS_NUM 100
#define TAIL_MAX_OFFSET 100
#define UNIQUE_MAX_RESULT_SIZE (1024 * 1024 * 10)
#define MODE_MAX_RESULT_SIZE UNIQUE_MAX_RESULT_SIZE
#define HLL_BUCKET_BITS 14 // The bits of the bucket
#define HLL_DATA_BITS (64 - HLL_BUCKET_BITS)
#define HLL_BUCKETS (1 << HLL_BUCKET_BITS)
......@@ -244,12 +241,11 @@ typedef struct SUniqueInfo {
typedef struct SModeItem {
int64_t count;
STuplePos dataPos;
STuplePos tuplePos;
char data[];
} SModeItem;
typedef struct SModeInfo {
int32_t numOfPoints;
uint8_t colType;
int16_t colBytes;
SHashObj* pHash;
......@@ -257,7 +253,7 @@ typedef struct SModeInfo {
STuplePos nullTuplePos;
bool nullTupleSaved;
char pItems[];
char* buf; // serialize data buffer
} SModeInfo;
typedef struct SDerivInfo {
......@@ -3113,7 +3109,7 @@ void* serializeTupleData(const SSDataBlock* pSrcBlock, int32_t rowIndex, SSubsid
return buf;
}
static int32_t doSaveTupleData(SSerializeDataHandle* pHandle, const void* pBuf, size_t length, STupleKey key,
static int32_t doSaveTupleData(SSerializeDataHandle* pHandle, const void* pBuf, size_t length, STupleKey* key,
STuplePos* pPos) {
STuplePos p = {0};
if (pHandle->pBuf != NULL) {
......@@ -3149,8 +3145,8 @@ static int32_t doSaveTupleData(SSerializeDataHandle* pHandle, const void* pBuf,
releaseBufPage(pHandle->pBuf, pPage);
} else {
// other tuple save policy
if (streamStateFuncPut(pHandle->pState, &key, pBuf, length) >= 0) {
p.streamTupleKey = key;
if (streamStateFuncPut(pHandle->pState, key, pBuf, length) >= 0) {
p.streamTupleKey = *key;
}
}
......@@ -3174,7 +3170,7 @@ int32_t saveTupleData(SqlFunctionCtx* pCtx, int32_t rowIndex, const SSDataBlock*
}
char* buf = serializeTupleData(pSrcBlock, rowIndex, &pCtx->subsidiaries, pCtx->subsidiaries.buf);
return doSaveTupleData(&pCtx->saveHandle, buf, pCtx->subsidiaries.rowLen, key, pPos);
return doSaveTupleData(&pCtx->saveHandle, buf, pCtx->subsidiaries.rowLen, &key, pPos);
}
static int32_t doUpdateTupleData(SSerializeDataHandle* pHandle, const void* pBuf, size_t length, STuplePos* pPos) {
......@@ -4949,7 +4945,7 @@ int32_t uniqueFunction(SqlFunctionCtx* pCtx) {
}
bool getModeFuncEnv(SFunctionNode* pFunc, SFuncExecEnv* pEnv) {
pEnv->calcMemSize = sizeof(SModeInfo) + MODE_MAX_RESULT_SIZE;
pEnv->calcMemSize = sizeof(SModeInfo);
return true;
}
......@@ -4959,7 +4955,6 @@ bool modeFunctionSetup(SqlFunctionCtx* pCtx, SResultRowEntryInfo* pResInfo) {
}
SModeInfo* pInfo = GET_ROWCELL_INTERBUF(pResInfo);
pInfo->numOfPoints = 0;
pInfo->colType = pCtx->resDataInfo.type;
pInfo->colBytes = pCtx->resDataInfo.bytes;
if (pInfo->pHash != NULL) {
......@@ -4970,38 +4965,60 @@ bool modeFunctionSetup(SqlFunctionCtx* pCtx, SResultRowEntryInfo* pResInfo) {
pInfo->nullTupleSaved = false;
pInfo->nullTuplePos.pageId = -1;
pInfo->buf = taosMemoryMalloc(pInfo->colBytes);
return true;
}
static void modeFunctionCleanup(SModeInfo * pInfo) {
taosHashCleanup(pInfo->pHash);
taosMemoryFreeClear(pInfo->buf);
}
static int32_t saveModeTupleData(SqlFunctionCtx* pCtx, char* data, SModeInfo *pInfo, STuplePos* pPos) {
if (IS_VAR_DATA_TYPE(pInfo->colType)) {
memcpy(pInfo->buf, data, varDataTLen(data));
} else {
memcpy(pInfo->buf, data, pInfo->colBytes);
}
return doSaveTupleData(&pCtx->saveHandle, pInfo->buf, pInfo->colBytes, NULL, pPos);
}
static int32_t doModeAdd(SModeInfo* pInfo, int32_t rowIndex, SqlFunctionCtx* pCtx, char* data) {
int32_t hashKeyBytes = IS_STR_DATA_TYPE(pInfo->colType) ? varDataTLen(data) : pInfo->colBytes;
SModeItem** pHashItem = taosHashGet(pInfo->pHash, data, hashKeyBytes);
int32_t code = TSDB_CODE_SUCCESS;
int32_t hashKeyBytes = IS_STR_DATA_TYPE(pInfo->colType) ? varDataTLen(data) : pInfo->colBytes;
SModeItem* pHashItem = (SModeItem *)taosHashGet(pInfo->pHash, data, hashKeyBytes);
if (pHashItem == NULL) {
int32_t size = sizeof(SModeItem) + pInfo->colBytes;
SModeItem* pItem = (SModeItem*)(pInfo->pItems + pInfo->numOfPoints * size);
memcpy(pItem->data, data, hashKeyBytes);
pItem->count += 1;
int32_t size = sizeof(SModeItem);
SModeItem item = {0};
item.count += 1;
code = saveModeTupleData(pCtx, data, pInfo, &item.dataPos);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
if (pCtx->subsidiaries.num > 0) {
int32_t code = saveTupleData(pCtx, rowIndex, pCtx->pSrcBlock, &pItem->tuplePos);
code = saveTupleData(pCtx, rowIndex, pCtx->pSrcBlock, &item.tuplePos);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
}
taosHashPut(pInfo->pHash, data, hashKeyBytes, &pItem, sizeof(SModeItem*));
pInfo->numOfPoints++;
taosHashPut(pInfo->pHash, data, hashKeyBytes, &item, sizeof(SModeItem));
} else {
(*pHashItem)->count += 1;
pHashItem->count += 1;
if (pCtx->subsidiaries.num > 0) {
int32_t code = updateTupleData(pCtx, rowIndex, pCtx->pSrcBlock, &((*pHashItem)->tuplePos));
int32_t code = updateTupleData(pCtx, rowIndex, pCtx->pSrcBlock, &pHashItem->tuplePos);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
}
}
return TSDB_CODE_SUCCESS;
return code;
}
int32_t modeFunction(SqlFunctionCtx* pCtx) {
......@@ -5024,18 +5041,15 @@ int32_t modeFunction(SqlFunctionCtx* pCtx) {
char* data = colDataGetData(pInputCol, i);
int32_t code = doModeAdd(pInfo, i, pCtx, data);
if (code != TSDB_CODE_SUCCESS) {
modeFunctionCleanup(pInfo);
return code;
}
if (sizeof(SModeInfo) + pInfo->numOfPoints * (sizeof(SModeItem) + pInfo->colBytes) >= MODE_MAX_RESULT_SIZE) {
taosHashCleanup(pInfo->pHash);
return TSDB_CODE_OUT_OF_MEMORY;
}
}
if (numOfElems == 0 && pCtx->subsidiaries.num > 0 && !pInfo->nullTupleSaved) {
int32_t code = saveTupleData(pCtx, pInput->startRowIndex, pCtx->pSrcBlock, &pInfo->nullTuplePos);
if (code != TSDB_CODE_SUCCESS) {
modeFunctionCleanup(pInfo);
return code;
}
pInfo->nullTupleSaved = true;
......@@ -5054,26 +5068,37 @@ int32_t modeFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, slotId);
int32_t currentRow = pBlock->info.rows;
int32_t resIndex = -1;
STuplePos resDataPos, resTuplePos;
int32_t maxCount = 0;
for (int32_t i = 0; i < pInfo->numOfPoints; ++i) {
SModeItem* pItem = (SModeItem*)(pInfo->pItems + i * (sizeof(SModeItem) + pInfo->colBytes));
void *pIter = taosHashIterate(pInfo->pHash, NULL);
while (pIter != NULL) {
SModeItem *pItem = (SModeItem *)pIter;
if (pItem->count >= maxCount) {
maxCount = pItem->count;
resIndex = i;
resDataPos = pItem->dataPos;
resTuplePos = pItem->tuplePos;
}
pIter = taosHashIterate(pInfo->pHash, pIter);
}
if (maxCount != 0) {
SModeItem* pResItem = (SModeItem*)(pInfo->pItems + resIndex * (sizeof(SModeItem) + pInfo->colBytes));
colDataSetVal(pCol, currentRow, pResItem->data, false);
code = setSelectivityValue(pCtx, pBlock, &pResItem->tuplePos, currentRow);
const char* pData = loadTupleData(pCtx, &resDataPos);
if (pData == NULL) {
code = TSDB_CODE_NO_AVAIL_DISK;
modeFunctionCleanup(pInfo);
return code;
}
colDataSetVal(pCol, currentRow, pData, false);
code = setSelectivityValue(pCtx, pBlock, &resTuplePos, currentRow);
} else {
colDataSetNULL(pCol, currentRow);
code = setSelectivityValue(pCtx, pBlock, &pInfo->nullTuplePos, currentRow);
}
taosHashCleanup(pInfo->pHash);
modeFunctionCleanup(pInfo);
return code;
}
......
......@@ -53,6 +53,7 @@ class TDTestCase:
( '2022-02-01 01:01:20.000', 6, 66666, 666, 66, 6.66, 66.66, 1, "binary6", "nchar6", now()+6a )
( '2022-10-28 01:01:26.000', 7, 00000, 000, 00, 0.00, 00.00, 1, "binary7", "nchar7", "1970-01-01 08:00:00.000" )
( '2022-12-01 01:01:30.000', 8, -88888, -888, -88, -8.88, -88.88, 0, "binary8", "nchar8", "1969-01-01 01:00:00.000" )
( '2022-12-31 01:01:32.000', 9, -99999999999999999, -999, -99, -9.99, -999999999999999999999.99, 1, "binary9", "nchar9", "1900-01-01 00:00:00.000" )
( '2022-12-31 01:01:36.000', 9, -99999999999999999, -999, -99, -9.99, -999999999999999999999.99, 1, "binary9", "nchar9", "1900-01-01 00:00:00.000" )
( '2023-02-21 01:01:01.000', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL )
'''
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册