From 699a94cced8827c40e3ccdaaa071321b521c334e Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 11 Feb 2022 10:14:56 +0800 Subject: [PATCH] [td-11818] opt perf --- include/common/tep.h | 4 + source/common/src/tep.c | 218 ++++++++++++++++++-- source/common/src/ttszip.c | 4 +- source/libs/executor/test/executorTests.cpp | 74 +++++-- source/libs/parser/src/astToMsg.c | 2 +- 5 files changed, 262 insertions(+), 40 deletions(-) diff --git a/include/common/tep.h b/include/common/tep.h index 58ae6038e8..8194835223 100644 --- a/include/common/tep.h +++ b/include/common/tep.h @@ -18,6 +18,9 @@ typedef struct SBlockOrderInfo { int32_t order; int32_t colIndex; SColumnInfoData *pColData; +// int32_t type; +// int32_t bytes; +// bool hasNull; } SBlockOrderInfo; int taosGetFqdnPortFromEp(const char *ep, SEp *pEp); @@ -89,6 +92,7 @@ size_t blockDataGetSize(const SSDataBlock* pBlock); size_t blockDataGetRowSize(const SSDataBlock* pBlock); int32_t blockDataSort(SSDataBlock* pDataBlock, SArray* pOrderInfo, bool nullFirst); +int32_t blockDataSort_rv(SSDataBlock* pDataBlock, SArray* pOrderInfo, bool nullFirst); int32_t blockDataEnsureCapacity(SSDataBlock* pDataBlock, uint32_t numOfRows); void blockDataClearup(SSDataBlock* pDataBlock, bool hasVarCol); diff --git a/source/common/src/tep.c b/source/common/src/tep.c index bc0484a636..6ead90caee 100644 --- a/source/common/src/tep.c +++ b/source/common/src/tep.c @@ -1,4 +1,5 @@ #include "tep.h" +#include #include "common.h" #include "tglobal.h" #include "tlockfree.h" @@ -86,6 +87,7 @@ int32_t colDataAppend(SColumnInfoData* pColumnInfoData, uint32_t currentRow, con colDataSetNull_f(pColumnInfoData->nullbitmap, currentRow); } + pColumnInfoData->hasNull = true; return 0; } @@ -747,6 +749,23 @@ static void destroyTupleIndex(int32_t* index) { tfree(index); } +static __compar_fn_t getComparFn(int32_t type, int32_t order) { + switch(type) { + case TSDB_DATA_TYPE_TINYINT: return order == TSDB_ORDER_ASC? compareInt8Val:compareInt8ValDesc; + case TSDB_DATA_TYPE_SMALLINT: return order == TSDB_ORDER_ASC? compareInt16Val:compareInt16ValDesc; + case TSDB_DATA_TYPE_INT: return order == TSDB_ORDER_ASC? compareInt32Val:compareInt32ValDesc; + case TSDB_DATA_TYPE_BIGINT: return order == TSDB_ORDER_ASC? compareInt64Val:compareInt64ValDesc; + case TSDB_DATA_TYPE_FLOAT: return order == TSDB_ORDER_ASC? compareFloatVal:compareFloatValDesc; + case TSDB_DATA_TYPE_DOUBLE: return order == TSDB_ORDER_ASC? compareDoubleVal:compareDoubleValDesc; + case TSDB_DATA_TYPE_UTINYINT: return order == TSDB_ORDER_ASC? compareUint8Val:compareUint8ValDesc; + case TSDB_DATA_TYPE_USMALLINT:return order == TSDB_ORDER_ASC? compareUint16Val:compareUint16ValDesc; + case TSDB_DATA_TYPE_UINT: return order == TSDB_ORDER_ASC? compareUint32Val:compareUint32ValDesc; + case TSDB_DATA_TYPE_UBIGINT: return order == TSDB_ORDER_ASC? compareUint64Val:compareUint64ValDesc; + default: + return order == TSDB_ORDER_ASC? compareInt32Val:compareInt32ValDesc; + } +} + int32_t blockDataSort(SSDataBlock* pDataBlock, SArray* pOrderInfo, bool nullFirst) { ASSERT(pDataBlock != NULL && pOrderInfo != NULL); if (pDataBlock->info.rows <= 1) { @@ -755,6 +774,46 @@ int32_t blockDataSort(SSDataBlock* pDataBlock, SArray* pOrderInfo, bool nullFirs // Allocate the additional buffer. uint32_t rows = pDataBlock->info.rows; + + bool sortColumnHasNull = false; + bool varTypeSort = false; + + for (int32_t i = 0; i < taosArrayGetSize(pOrderInfo); ++i) { + SBlockOrderInfo* pInfo = taosArrayGet(pOrderInfo, i); + + SColumnInfoData* pColInfoData = taosArrayGet(pDataBlock->pDataBlock, pInfo->colIndex); + if (pColInfoData->hasNull) { + sortColumnHasNull = true; + } + + if (IS_VAR_DATA_TYPE(pColInfoData->info.type)) { + varTypeSort = true; + } + } + + if (taosArrayGetSize(pOrderInfo) == 1 && (!sortColumnHasNull)) { + if (pDataBlock->info.numOfCols == 1) { + if (!varTypeSort) { + SColumnInfoData* pColInfoData = taosArrayGet(pDataBlock->pDataBlock, 0); + SBlockOrderInfo* pOrder = taosArrayGet(pOrderInfo, 0); + + int64_t p0 = taosGetTimestampUs(); + + __compar_fn_t fn = getComparFn(pColInfoData->info.type, pOrder->order); + qsort(pColInfoData->pData, pDataBlock->info.rows, pColInfoData->info.bytes, fn); + + int64_t p1 = taosGetTimestampUs(); + printf("sort:%ld, rows:%d\n", p1 - p0, pDataBlock->info.rows); + + return TSDB_CODE_SUCCESS; + } else { // var data type + + } + } else if (pDataBlock->info.numOfCols == 2) { + + } + } + int32_t* index = createTupleIndex(rows); if (index == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -779,12 +838,6 @@ int32_t blockDataSort(SSDataBlock* pDataBlock, SArray* pOrderInfo, bool nullFirs return terrno; } -#if 0 - SColumnInfoData* px = taosArrayGet(pDataBlock->pDataBlock, 0); - for(int32_t i = 0; i < pDataBlock->info.rows; ++i) { - printf("%d, %d, %d\n", index[i], ((int32_t*)px->pData)[i], ((int32_t*)px->pData)[index[i]]); - } -#endif int64_t p2 = taosGetTimestampUs(); int32_t code = blockDataAssign(pCols, pDataBlock, index); @@ -795,18 +848,6 @@ int32_t blockDataSort(SSDataBlock* pDataBlock, SArray* pOrderInfo, bool nullFirs int64_t p3 = taosGetTimestampUs(); -#if 0 - for(int32_t i = 0; i < pDataBlock->info.rows; ++i) { - if (colDataIsNull(&pCols[0], rows, i, NULL)) { - printf("0\t"); - } else { - printf("%d\t", ((int32_t*)pCols[0].pData)[i]); - } - } - - printf("end\n"); -#endif - copyBackToBlock(pDataBlock, pCols); int64_t p4 = taosGetTimestampUs(); @@ -816,6 +857,147 @@ int32_t blockDataSort(SSDataBlock* pDataBlock, SArray* pOrderInfo, bool nullFirs return TSDB_CODE_SUCCESS; } +typedef struct SHelper { + int32_t index; + union {char *pData; int64_t i64; double d64;}; +} SHelper; + +SHelper* createTupleIndex_rv(int32_t numOfRows, SArray* pOrderInfo, SSDataBlock* pBlock) { + int32_t sortValLengthPerRow = 0; + int32_t numOfCols = taosArrayGetSize(pOrderInfo); + + for(int32_t i = 0; i < numOfCols; ++i) { + SBlockOrderInfo* pInfo = taosArrayGet(pOrderInfo, i); + SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, pInfo->colIndex); + pInfo->pColData = pColInfo; + sortValLengthPerRow += pColInfo->info.bytes; + } + + size_t len = sortValLengthPerRow * pBlock->info.rows; + + char* buf = calloc(1, len); + SHelper* phelper = calloc(numOfRows, sizeof(SHelper)); + for(int32_t i = 0; i < numOfRows; ++i) { + phelper[i].index = i; + phelper[i].pData = buf + sortValLengthPerRow * i; + } + + int32_t offset = 0; + for(int32_t i = 0; i < numOfCols; ++i) { + SBlockOrderInfo* pInfo = taosArrayGet(pOrderInfo, i); + for(int32_t j = 0; j < numOfRows; ++j) { + phelper[j].i64 = *(int32_t*) pInfo->pColData->pData + pInfo->pColData->info.bytes * j; +// memcpy(phelper[j].pData + offset, pInfo->pColData->pData + pInfo->pColData->info.bytes * j, pInfo->pColData->info.bytes); + } + + offset += pInfo->pColData->info.bytes; + } + + return phelper; +} + +int32_t dataBlockCompar_rv(const void* p1, const void* p2, const void* param) { + const SSDataBlockSortHelper* pHelper = (const SSDataBlockSortHelper*) param; + +// SSDataBlock* pDataBlock = pHelper->pDataBlock; + + SHelper* left = (SHelper*) p1; + SHelper* right = (SHelper*) p2; + + SArray* pInfo = pHelper->orderInfo; + + int32_t offset = 0; +// for(int32_t i = 0; i < pInfo->size; ++i) { +// SBlockOrderInfo* pOrder = TARRAY_GET_ELEM(pInfo, 0); +// SColumnInfoData* pColInfoData = pOrder->pColData;//TARRAY_GET_ELEM(pDataBlock->pDataBlock, pOrder->colIndex); + +// if (pColInfoData->hasNull) { +// bool leftNull = colDataIsNull(pColInfoData, pDataBlock->info.rows, left, pDataBlock->pBlockAgg); +// bool rightNull = colDataIsNull(pColInfoData, pDataBlock->info.rows, right, pDataBlock->pBlockAgg); +// if (leftNull && rightNull) { +// continue; // continue to next slot +// } +// +// if (rightNull) { +// return pHelper->nullFirst? 1:-1; +// } +// +// if (leftNull) { +// return pHelper->nullFirst? -1:1; +// } +// } + +// void* left1 = colDataGet(pColInfoData, left); +// void* right1 = colDataGet(pColInfoData, right); + +// switch(pColInfoData->info.type) { +// case TSDB_DATA_TYPE_INT: { + int32_t leftx = *(int32_t*)left->pData;//*(int32_t*)(left->pData + offset); + int32_t rightx = *(int32_t*)right->pData;//*(int32_t*)(right->pData + offset); + +// offset += pColInfoData->info.bytes; + if (leftx == rightx) { +// break; + return 0; + } else { +// if (pOrder->order == TSDB_ORDER_ASC) { + return (leftx < rightx)? -1:1; +// } else { +// return (leftx < rightx)? 1:-1; +// } + } +// } +// default: +// assert(0); +// } +// } + + return 0; +} + +int32_t varColSort(SColumnInfoData* pColumnInfoData, SBlockOrderInfo* pOrder) { + +} + +int32_t blockDataSort_rv(SSDataBlock* pDataBlock, SArray* pOrderInfo, bool nullFirst) { +// Allocate the additional buffer. + int64_t p0 = taosGetTimestampUs(); + + SSDataBlockSortHelper helper = {.nullFirst = nullFirst, .pDataBlock = pDataBlock, .orderInfo = pOrderInfo}; + + uint32_t rows = pDataBlock->info.rows; + SHelper* index = createTupleIndex_rv(rows, helper.orderInfo, pDataBlock); + if (index == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return terrno; + } + + taosqsort(index, rows, sizeof(SHelper), &helper, dataBlockCompar_rv); + + int64_t p1 = taosGetTimestampUs(); + SColumnInfoData* pCols = createHelpColInfoData(pDataBlock); + if (pCols == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return terrno; + } + + int64_t p2 = taosGetTimestampUs(); + + // int32_t code = blockDataAssign(pCols, pDataBlock, index); + // if (code != TSDB_CODE_SUCCESS) { + // terrno = code; + // return code; + // } + + int64_t p3 = taosGetTimestampUs(); + + copyBackToBlock(pDataBlock, pCols); + int64_t p4 = taosGetTimestampUs(); + + printf("sort:%ld, create:%ld, assign:%ld, copyback:%ld, rows:%d\n", p1 - p0, p2 - p1, p3 - p2, p4 - p3, rows); + // destroyTupleIndex(index); +} + void blockDataClearup(SSDataBlock* pDataBlock, bool hasVarCol) { pDataBlock->info.rows = 0; diff --git a/source/common/src/ttszip.c b/source/common/src/ttszip.c index 41eebc5da4..6d57992c35 100644 --- a/source/common/src/ttszip.c +++ b/source/common/src/ttszip.c @@ -344,8 +344,8 @@ STSBlock* readDataFromDisk(STSBuf* pTSBuf, int32_t order, bool decomp) { UNUSED(ret); } - fread(&pBlock->tag.nType, sizeof(pBlock->tag.nType), 1, pTSBuf->f); - fread(&pBlock->tag.nLen, sizeof(pBlock->tag.nLen), 1, pTSBuf->f); + int32_t ret = fread(&pBlock->tag.nType, sizeof(pBlock->tag.nType), 1, pTSBuf->f); + ret = fread(&pBlock->tag.nLen, sizeof(pBlock->tag.nLen), 1, pTSBuf->f); // NOTE: mix types tags are not supported size_t sz = 0; diff --git a/source/libs/executor/test/executorTests.cpp b/source/libs/executor/test/executorTests.cpp index 6e3e6a8ffc..f4b5f71ebf 100644 --- a/source/libs/executor/test/executorTests.cpp +++ b/source/libs/executor/test/executorTests.cpp @@ -65,16 +65,16 @@ SSDataBlock* getDummyBlock(void* param, bool* newgroup) { taosArrayPush(pInfo->pBlock->pDataBlock, &colInfo); - SColumnInfoData colInfo1 = {0}; - colInfo1.info.type = TSDB_DATA_TYPE_BINARY; - colInfo1.info.bytes = 40; - colInfo1.info.colId = 2; - - colInfo1.varmeta.allocLen = 0;//numOfRows * sizeof(int32_t); - colInfo1.varmeta.length = 0; - colInfo1.varmeta.offset = static_cast(calloc(1, numOfRows * sizeof(int32_t))); - - taosArrayPush(pInfo->pBlock->pDataBlock, &colInfo1); +// SColumnInfoData colInfo1 = {0}; +// colInfo1.info.type = TSDB_DATA_TYPE_BINARY; +// colInfo1.info.bytes = 40; +// colInfo1.info.colId = 2; +// +// colInfo1.varmeta.allocLen = 0;//numOfRows * sizeof(int32_t); +// colInfo1.varmeta.length = 0; +// colInfo1.varmeta.offset = static_cast(calloc(1, numOfRows * sizeof(int32_t))); +// +// taosArrayPush(pInfo->pBlock->pDataBlock, &colInfo1); } else { blockDataClearup(pInfo->pBlock, true); } @@ -86,18 +86,18 @@ SSDataBlock* getDummyBlock(void* param, bool* newgroup) { for(int32_t i = 0; i < numOfRows; ++i) { SColumnInfoData* pColInfo = static_cast(TARRAY_GET_ELEM(pBlock->pDataBlock, 0)); - int32_t v = (--pInfo->startVal); + int32_t v = rand();//(++pInfo->startVal); colDataAppend(pColInfo, i, reinterpret_cast(&v), false); - sprintf(buf, "this is %d row", i); - STR_TO_VARSTR(b1, buf); - - SColumnInfoData* pColInfo2 = static_cast(TARRAY_GET_ELEM(pBlock->pDataBlock, 1)); - colDataAppend(pColInfo2, i, b1, false); +// sprintf(buf, "this is %d row", i); +// STR_TO_VARSTR(b1, buf); +// +// SColumnInfoData* pColInfo2 = static_cast(TARRAY_GET_ELEM(pBlock->pDataBlock, 1)); +// colDataAppend(pColInfo2, i, b1, false); } pBlock->info.rows = numOfRows; - pBlock->info.numOfCols = 2; + pBlock->info.numOfCols = 1; pInfo->current += 1; return pBlock; @@ -245,7 +245,43 @@ TEST(testCase, build_executor_tree_Test) { // } //} +typedef struct su { + int32_t v; + char *c; +} su; + +int32_t cmp(const void* p1, const void* p2) { + su* v1 = (su*) p1; + su* v2 = (su*) p2; + + int32_t x1 = *(int32_t*) v1->c; + int32_t x2 = *(int32_t*) v2->c; + if (x1 == x2) { + return 0; + } else { + return x1 < x2? -1:1; + } +} + TEST(testCase, external_sort_Test) { +#if 0 + su* v = static_cast(calloc(1000000, sizeof(su))); + for(int32_t i = 0; i < 1000000; ++i) { + v[i].v = rand(); + v[i].c = static_cast(malloc(4)); + *(int32_t*) v[i].c = i; + } + + qsort(v, 1000000, sizeof(su), cmp); +// for(int32_t i = 0; i < 1000; ++i) { +// printf("%d ", v[i]); +// } +// printf("\n"); + return; +#endif + + srand(time(NULL)); + SArray* pOrderVal = taosArrayInit(4, sizeof(SOrder)); SOrder o = {0}; o.order = TSDB_ORDER_ASC; @@ -260,9 +296,9 @@ TEST(testCase, external_sort_Test) { SExprInfo *exp1 = static_cast(calloc(1, sizeof(SExprInfo))); exp1->base.resSchema = createSchema(TSDB_DATA_TYPE_BINARY, 40, 2, "res1"); - taosArrayPush(pExprInfo, &exp1); +// taosArrayPush(pExprInfo, &exp1); - SOperatorInfo* pOperator = createOrderOperatorInfo(createDummyOperator(100000), pExprInfo, pOrderVal); + SOperatorInfo* pOperator = createOrderOperatorInfo(createDummyOperator(50000), pExprInfo, pOrderVal); bool newgroup = false; SSDataBlock* pRes = NULL; diff --git a/source/libs/parser/src/astToMsg.c b/source/libs/parser/src/astToMsg.c index 697fd0c4cb..ac715045ba 100644 --- a/source/libs/parser/src/astToMsg.c +++ b/source/libs/parser/src/astToMsg.c @@ -103,7 +103,7 @@ SShowReq* buildShowMsg(SShowInfo* pShowInfo, SParseContext *pCtx, SMsgBuf* pMsgB SToken* pEpAddr = &pShowInfo->prefix; assert(pEpAddr->n > 0 && pEpAddr->type > 0); - strncpy(pShowMsg->payload, pEpAddr->z, pEpAddr->n); + tstrncpy(pShowMsg->payload, pEpAddr->z, pEpAddr->n + 1); pShowMsg->payloadLen = htons(pEpAddr->n); } -- GitLab