diff --git a/include/os/osMemory.h b/include/os/osMemory.h index 14d53a7a066f60b5f0d3c0f37526b2533b3b05af..4681ff66741d4781c88dba5ccc950094c96f6769 100644 --- a/include/os/osMemory.h +++ b/include/os/osMemory.h @@ -37,6 +37,7 @@ void taosMemoryFree(void *ptr); int64_t taosMemorySize(void *ptr); void taosPrintBackTrace(); void taosMemoryTrim(int32_t size); +void *taosMemoryMallocAlign(uint32_t alignment, int64_t size); #define taosMemoryFreeClear(ptr) \ do { \ diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index f5dba354405855cdc5a308dce7db91d6ac9c63cb..c79910978a2adadac8e52f413ab42693f9159517 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -19,6 +19,8 @@ #include "tlog.h" #include "tname.h" +#define MALLOC_ALIGN_BYTES 32 + int32_t colDataGetLength(const SColumnInfoData* pColumnInfoData, int32_t numOfRows) { ASSERT(pColumnInfoData != NULL); if (IS_VAR_DATA_TYPE(pColumnInfoData->info.type)) { @@ -1163,6 +1165,7 @@ static int32_t doEnsureCapacity(SColumnInfoData* pColumn, const SDataBlockInfo* pColumn->varmeta.offset = (int32_t*)tmp; memset(&pColumn->varmeta.offset[existedRows], 0, sizeof(int32_t) * (numOfRows - existedRows)); } else { + // prepare for the null bitmap char* tmp = taosMemoryRealloc(pColumn->nullbitmap, BitmapLen(numOfRows)); if (tmp == NULL) { return TSDB_CODE_OUT_OF_MEMORY; @@ -1173,11 +1176,19 @@ static int32_t doEnsureCapacity(SColumnInfoData* pColumn, const SDataBlockInfo* memset(&pColumn->nullbitmap[oldLen], 0, BitmapLen(numOfRows) - oldLen); ASSERT(pColumn->info.bytes); - tmp = taosMemoryRealloc(pColumn->pData, numOfRows * pColumn->info.bytes); + + // make sure the allocated memory is MALLOC_ALIGN_BYTES aligned + tmp = taosMemoryMallocAlign(MALLOC_ALIGN_BYTES, numOfRows * pColumn->info.bytes); if (tmp == NULL) { return TSDB_CODE_OUT_OF_MEMORY; } + // copy back the existed data + if (pColumn->pData != NULL) { + memcpy(tmp, pColumn->pData, existedRows * pColumn->info.bytes); + taosMemoryFreeClear(pColumn->pData); + } + pColumn->pData = tmp; if (clearPayload) { memset(tmp + pColumn->info.bytes * existedRows, 0, pColumn->info.bytes * (numOfRows - existedRows)); diff --git a/source/libs/function/src/detail/tminmax.c b/source/libs/function/src/detail/tminmax.c index ed297e2b66e67f045922be8f986ca5a74e7c7e0d..a98b17287327a03d5e69179494314d69259691cc 100644 --- a/source/libs/function/src/detail/tminmax.c +++ b/source/libs/function/src/detail/tminmax.c @@ -271,67 +271,6 @@ static int16_t i16VectorCmpAVX2(const int16_t* pData, int32_t numOfRows, bool is return v; } -//static int64_t i64VectorCmpAVX2(const int64_t* pData, int32_t numOfRows, bool isMinFunc) { -// int64_t v = 0; -// const int32_t bitWidth = 256; -// const int64_t* p = pData; -// -// int32_t width = (bitWidth>>3u) / sizeof(int64_t); -// int32_t remain = numOfRows % width; -// int32_t rounds = numOfRows / width; -// -//#if __AVX2__ -// __m256i next; -// __m256i initialVal = _mm256_loadu_si256((__m256i*)p); -// p += width; -// -// if (!isMinFunc) { // max function -// for (int32_t i = 0; i < rounds; ++i) { -// next = _mm256_lddqu_si256((__m256i*)p); -// initialVal = _mm256_max_epi64(initialVal, next); -// p += width; -// } -// -// // let sum up the final results -// const int64_t* q = (const int64_t*)&initialVal; -// v = TMAX(q[0], q[1]); -// for(int32_t k = 1; k < width; ++k) { -// v = TMAX(v, q[k]); -// } -// -// // calculate the front and the reminder items in array list -// int32_t start = rounds * width; -// for (int32_t j = 0; j < remain; ++j) { -// if (v < p[j + start]) { -// v = p[j + start]; -// } -// } -// } else { // min function -// for (int32_t i = 0; i < rounds; ++i) { -// next = _mm256_lddqu_si256((__m256i*)p); -// initialVal = _mm256_min_epi64(initialVal, next); -// p += width; -// } -// -// // let sum up the final results -// const int64_t* q = (const int64_t*)&initialVal; -// v = TMIN(q[0], q[1]); -// for(int32_t k = 1; k < width; ++k) { -// v = TMIN(v, q[k]); -// } -// -// // calculate the front and the remainder items in array list -// int32_t start = rounds * width; -// for (int32_t j = 0; j < remain; ++j) { -// if (v > p[j + start]) { -// v = p[j + start]; -// } -// } -// } -//#endif -// -// return v; -//} static int32_t handleInt32Col(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SqlFunctionCtx* pCtx, SMinmaxResInfo* pBuf, bool isMinFunc) { diff --git a/source/os/src/osMemory.c b/source/os/src/osMemory.c index 78fa362179051fafe4adc3f618a2fdd08ed5ec10..1facff1f3be1e2c61af98b9a022470dd3f231ba6 100644 --- a/source/os/src/osMemory.c +++ b/source/os/src/osMemory.c @@ -345,3 +345,11 @@ void taosMemoryTrim(int32_t size) { malloc_trim(size); #endif } + +void* taosMemoryMallocAlign(uint32_t alignment, int64_t size) { +#ifdef USE_TD_MEMORY + ASSERT(0); +#else + return memalign(alignment, size); +#endif +}