enh(query): support simd in min/max query.

0ed2c923 · Haojun Liao · d7143926 · 0ed2c923 · 0ed2c923
显示空白变更内容
内联并排

Showing with 163 addition and 279 deletion

source/libs/function/src/detail/tminmax.c source/libs/function/src/detail/tminmax.c +162 -279

source/libs/function/src/udfd.c source/libs/function/src/udfd.c +1 -0

未找到文件。
--- a/source/libs/function/src/detail/tminmax.c
+++ b/source/libs/function/src/detail/tminmax.c
@@ -19,14 +19,20 @@
 #include "tfunctionInt.h"
 #include "tglobal.h"
-static int32_t i32VectorCmpAVX2(const int32_t* pData, int32_t numOfRows, bool isMinFunc) {
+static void calculateRounds(int32_t numOfRows, int32_t bytes, int32_t* remainder, int32_t* rounds, int32_t* width) {
-  int32_t        v = 0;
  const int32_t bitWidth = 256;
-  const int32_t* p = pData;
-  int32_t width = (bitWidth>>3u) / sizeof(int32_t);
+  *width = (bitWidth>>3u) / bytes;
-  int32_t remain = numOfRows % width;
+  *remainder = numOfRows % (*width);
-  int32_t rounds = numOfRows / width;
+  *rounds = numOfRows / (*width);
+}
+static int8_t i8VectorCmpAVX2(const int8_t* pData, int32_t numOfRows, bool isMinFunc) {
+  int8_t        v = 0;
+  const int8_t* p = pData;
+  int32_t width, remain, rounds;
+  calculateRounds(numOfRows, sizeof(int8_t), &remain, &rounds, &width);
 #if __AVX2__
  __m256i next;
@@ -36,12 +42,12 @@ static int32_t i32VectorCmpAVX2(const int32_t* pData, int32_t numOfRows, bool is
  if (!isMinFunc) {  // max function
    for (int32_t i = 0; i < rounds; ++i) {
      next = _mm256_lddqu_si256((__m256i*)p);
-      initialVal = _mm256_max_epi32(initialVal, next);
+      initialVal = _mm256_max_epi8(initialVal, next);
      p += width;
    }
-    // let compare  the final results
+    // let sum up the final results
-    const int32_t* q = (const int32_t*)&initialVal;
+    const int8_t* q = (const int8_t*)&initialVal;
    v = TMAX(q[0], q[1]);
    for (int32_t k = 1; k < width; ++k) {
      v = TMAX(v, q[k]);
@@ -57,14 +63,15 @@ static int32_t i32VectorCmpAVX2(const int32_t* pData, int32_t numOfRows, bool is
  } else {  // min function
    for (int32_t i = 0; i < rounds; ++i) {
      next = _mm256_lddqu_si256((__m256i*)p);
-      initialVal = _mm256_min_epi32(initialVal, next);
+      initialVal = _mm256_min_epi8(initialVal, next);
      p += width;
    }
    // let sum up the final results
-    const int32_t* q = (const int32_t*)&initialVal;
+    const int8_t* q = (const int8_t*)&initialVal;
    v = TMIN(q[0], q[1]);
-    for (int32_t k = 1; k < width; ++k) {
+    for(int32_t k = 1; k < width; ++k) {
      v = TMIN(v, q[k]);
    }
@@ -81,58 +88,58 @@ static int32_t i32VectorCmpAVX2(const int32_t* pData, int32_t numOfRows, bool is
  return v;
 }
-static float floatVectorCmpAVX(const float* pData, int32_t numOfRows, bool isMinFunc) {
+static int16_t i16VectorCmpAVX2(const int16_t* pData, int32_t numOfRows, bool isMinFunc) {
-  float v = 0;
+  int16_t        v = 0;
-  const int32_t bitWidth = 256;
+  const int16_t* p = pData;
-  const float* p = pData;
-  int32_t width = (bitWidth>>3u) / sizeof(float);
-  int32_t remain = numOfRows % width;
-  int32_t rounds = numOfRows / width;
-#if __AVX__
+  int32_t width, remain, rounds;
+  calculateRounds(numOfRows, sizeof(int16_t), &remain, &rounds, &width);
-  __m256 next;
+#if __AVX2__
-  __m256 initialVal = _mm256_loadu_ps(p);
+  __m256i next;
+  __m256i initialVal = _mm256_lddqu_si256((__m256i*)p);
  p += width;
  if (!isMinFunc) {  // max function
-    for (int32_t i = 1; i < rounds; ++i) {
+    for (int32_t i = 0; i < rounds; ++i) {
-      next = _mm256_loadu_ps(p);
+      next = _mm256_lddqu_si256((__m256i*)p);
-      initialVal = _mm256_max_ps(initialVal, next);
+      initialVal = _mm256_max_epi16(initialVal, next);
      p += width;
    }
    // let sum up the final results
-    const float* q = (const float*)&initialVal;
+    const int16_t* q = (const int16_t*)&initialVal;
    v = TMAX(q[0], q[1]);
-    for (int32_t k = 1; k < width; ++k) {
+    for(int32_t k = 1; k < width; ++k) {
      v = TMAX(v, q[k]);
    }
    // calculate the front and the reminder items in array list
    int32_t start = rounds * width;
    for (int32_t j = 0; j < remain; ++j) {
-      if (v < p[j + width]) {
+      if (v < p[j + start]) {
-        v = p[j + width];
+        v = p[j + start];
      }
    }
  } else {  // min function
-    for (int32_t i = 1; i < rounds; ++i) {
+    for (int32_t i = 0; i < rounds; ++i) {
-      next = _mm256_loadu_ps(p);
+      next = _mm256_lddqu_si256((__m256i*)p);
-      initialVal = _mm256_min_ps(initialVal, next);
+      initialVal = _mm256_min_epi16(initialVal, next);
      p += width;
    }
    // let sum up the final results
-    const float* q = (const float*)&initialVal;
+    const int16_t* q = (const int16_t*)&initialVal;
    v = TMIN(q[0], q[1]);
-    for (int32_t k = 1; k < width; ++k) {
+    for(int32_t k = 1; k < width; ++k) {
      v = TMIN(v, q[k]);
    }
-    // calculate the front and the reminder items in array list
+    // calculate the front and the remainder items in array list
-    int32_t start = rounds * bitWidth;
+    int32_t start = rounds * width;
    for (int32_t j = 0; j < remain; ++j) {
      if (v > p[j + start]) {
        v = p[j + start];
@@ -144,14 +151,12 @@ static float floatVectorCmpAVX(const float* pData, int32_t numOfRows, bool isMin
  return v;
 }
-static int8_t i8VectorCmpAVX2(const int8_t* pData, int32_t numOfRows, bool isMinFunc) {
+static int32_t i32VectorCmpAVX2(const int32_t* pData, int32_t numOfRows, bool isMinFunc) {
-  int8_t        v = 0;
+  int32_t        v = 0;
-  const int32_t  bitWidth = 256;
+  const int32_t* p = pData;
-  const int8_t* p = pData;
-  int32_t width = (bitWidth>>3u) / sizeof(int8_t);
+  int32_t width, remain, rounds;
-  int32_t remain = numOfRows % width;
+  calculateRounds(numOfRows, sizeof(int32_t), &remain, &rounds, &width);
-  int32_t rounds = numOfRows / width;
 #if __AVX2__
  __m256i next;
@@ -161,12 +166,12 @@ static int8_t i8VectorCmpAVX2(const int8_t* pData, int32_t numOfRows, bool isMin
  if (!isMinFunc) {  // max function
    for (int32_t i = 0; i < rounds; ++i) {
      next = _mm256_lddqu_si256((__m256i*)p);
-      initialVal = _mm256_max_epi8(initialVal, next);
+      initialVal = _mm256_max_epi32(initialVal, next);
      p += width;
    }
-    // let sum up the final results
+    // let compare  the final results
-    const int8_t* q = (const int8_t*)&initialVal;
+    const int32_t* q = (const int32_t*)&initialVal;
    v = TMAX(q[0], q[1]);
    for (int32_t k = 1; k < width; ++k) {
      v = TMAX(v, q[k]);
@@ -182,15 +187,14 @@ static int8_t i8VectorCmpAVX2(const int8_t* pData, int32_t numOfRows, bool isMin
  } else {  // min function
    for (int32_t i = 0; i < rounds; ++i) {
      next = _mm256_lddqu_si256((__m256i*)p);
-      initialVal = _mm256_min_epi8(initialVal, next);
+      initialVal = _mm256_min_epi32(initialVal, next);
      p += width;
    }
    // let sum up the final results
-    const int8_t* q = (const int8_t*)&initialVal;
+    const int32_t* q = (const int32_t*)&initialVal;
    v = TMIN(q[0], q[1]);
-    for(int32_t k = 1; k < width; ++k) {
+    for (int32_t k = 1; k < width; ++k) {
      v = TMIN(v, q[k]);
    }
@@ -207,32 +211,30 @@ static int8_t i8VectorCmpAVX2(const int8_t* pData, int32_t numOfRows, bool isMin
  return v;
 }
-static int16_t i16VectorCmpAVX2(const int16_t* pData, int32_t numOfRows, bool isMinFunc) {
+static float floatVectorCmpAVX(const float* pData, int32_t numOfRows, bool isMinFunc) {
-  int16_t        v = 0;
+  float v = 0;
-  const int32_t  bitWidth = 256;
+  const float* p = pData;
-  const int16_t* p = pData;
-  int32_t width = (bitWidth>>3u) / sizeof(int16_t);
+  int32_t width, remain, rounds;
-  int32_t remain = numOfRows % width;
+  calculateRounds(numOfRows, sizeof(float), &remain, &rounds, &width);
-  int32_t rounds = numOfRows / width;
-#if __AVX2__
+#if __AVX__
-  __m256i next;
-  __m256i initialVal = _mm256_lddqu_si256((__m256i*)p);
+  __m256 next;
+  __m256 initialVal = _mm256_loadu_ps(p);
  p += width;
  if (!isMinFunc) {  // max function
-    for (int32_t i = 0; i < rounds; ++i) {
+    for (int32_t i = 1; i < rounds; ++i) {
-      next = _mm256_lddqu_si256((__m256i*)p);
+      next = _mm256_loadu_ps(p);
-      initialVal = _mm256_max_epi16(initialVal, next);
+      initialVal = _mm256_max_ps(initialVal, next);
      p += width;
    }
    // let sum up the final results
-    const int16_t* q = (const int16_t*)&initialVal;
+    const float* q = (const float*)&initialVal;
    v = TMAX(q[0], q[1]);
-    for(int32_t k = 1; k < width; ++k) {
+    for (int32_t k = 1; k < width; ++k) {
      v = TMAX(v, q[k]);
    }
@@ -244,21 +246,20 @@ static int16_t i16VectorCmpAVX2(const int16_t* pData, int32_t numOfRows, bool is
      }
    }
  } else {  // min function
-    for (int32_t i = 0; i < rounds; ++i) {
+    for (int32_t i = 1; i < rounds; ++i) {
-      next = _mm256_lddqu_si256((__m256i*)p);
+      next = _mm256_loadu_ps(p);
-      initialVal = _mm256_min_epi16(initialVal, next);
+      initialVal = _mm256_min_ps(initialVal, next);
      p += width;
    }
    // let sum up the final results
-    const int16_t* q = (const int16_t*)&initialVal;
+    const float* q = (const float*)&initialVal;
    v = TMIN(q[0], q[1]);
-    for(int32_t k = 1; k < width; ++k) {
+    for (int32_t k = 1; k < width; ++k) {
      v = TMIN(v, q[k]);
    }
-    // calculate the front and the remainder items in array list
+    // calculate the front and the reminder items in array list
    int32_t start = rounds * width;
    for (int32_t j = 0; j < remain; ++j) {
      if (v > p[j + start]) {
@@ -271,13 +272,68 @@ static int16_t i16VectorCmpAVX2(const int16_t* pData, int32_t numOfRows, bool is
  return v;
 }
-static int32_t handleInt8Col(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SqlFunctionCtx* pCtx,
+static double doubleVectorCmpAVX(const double* pData, int32_t numOfRows, bool isMinFunc) {
-                             SMinmaxResInfo* pBuf, bool isMinFunc) {
+  double        v = 0;
-  int8_t* pData = (int8_t*)pCol->pData;
+  const double* p = pData;
-  int8_t* val = (int8_t*)&pBuf->v;
-  int32_t numOfElems = 0;
+  int32_t width, remain, rounds;
-  if (pCol->hasNull || numOfRows <= 8 || pCtx->subsidiaries.num > 0) {
+  calculateRounds(numOfRows, sizeof(double), &remain, &rounds, &width);
+#if __AVX__
+  __m256d next;
+  __m256d initialVal = _mm256_loadu_pd(p);
+  p += width;
+  if (!isMinFunc) {  // max function
+    for (int32_t i = 1; i < rounds; ++i) {
+      next = _mm256_loadu_pd(p);
+      initialVal = _mm256_max_pd(initialVal, next);
+      p += width;
+    }
+    // let sum up the final results
+    const double* q = (const double*)&initialVal;
+    v = TMAX(q[0], q[1]);
+    for (int32_t k = 1; k < width; ++k) {
+      v = TMAX(v, q[k]);
+    }
+    // calculate the front and the reminder items in array list
+    int32_t start = rounds * width;
+    for (int32_t j = 0; j < remain; ++j) {
+      if (v < p[j + start]) {
+        v = p[j + start];
+      }
+    }
+  } else {  // min function
+    for (int32_t i = 1; i < rounds; ++i) {
+      next = _mm256_loadu_pd(p);
+      initialVal = _mm256_min_pd(initialVal, next);
+      p += width;
+    }
+    // let sum up the final results
+    const double* q = (const double*)&initialVal;
+    v = TMIN(q[0], q[1]);
+    for (int32_t k = 1; k < width; ++k) {
+      v = TMIN(v, q[k]);
+    }
+    // calculate the front and the reminder items in array list
+    int32_t start = rounds * width;
+    for (int32_t j = 0; j < remain; ++j) {
+      if (v > p[j + start]) {
+        v = p[j + start];
+      }
+    }
+  }
+#endif
+  return v;
+}
+static int32_t findFirstVal(const SColumnInfoData* pCol, int32_t start, int32_t numOfRows) {
  int32_t i = start;
  while (i < (start + numOfRows)) {
    if (!colDataIsNull_f(pCol->nullbitmap, i)) {
@@ -286,6 +342,18 @@ static int32_t handleInt8Col(SColumnInfoData* pCol, int32_t start, int32_t numOf
    i += 1;
  }
+  return i;
+}
+static int32_t handleInt8Col(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SqlFunctionCtx* pCtx,
+                             SMinmaxResInfo* pBuf, bool isMinFunc) {
+  int8_t* pData = (int8_t*)pCol->pData;
+  int8_t* val = (int8_t*)&pBuf->v;
+  int32_t numOfElems = 0;
+  if (pCol->hasNull || numOfRows <= 32 || pCtx->subsidiaries.num > 0) {
+    int32_t i = findFirstVal(pCol, start, numOfRows);
    if ((i < (start + numOfRows)) && (!pBuf->assign)) {
      *val = pData[i];
      if (pCtx->subsidiaries.num > 0) {
@@ -365,13 +433,7 @@ static int32_t handleInt16Col(SColumnInfoData* pCol, int32_t start, int32_t numO
  int32_t numOfElems = 0;
  if (pCol->hasNull || numOfRows <= 8 || pCtx->subsidiaries.num > 0) {
-    int32_t i = start;
+    int32_t i = findFirstVal(pCol, start, numOfRows);
-    while (i < (start + numOfRows)) {
-      if (!colDataIsNull_f(pCol->nullbitmap, i)) {
-        break;
-      }
-      i += 1;
-    }
    if ((i < (start + numOfRows)) && (!pBuf->assign)) {
      *val = pData[i];
@@ -452,13 +514,7 @@ static int32_t handleInt32Col(SColumnInfoData* pCol, int32_t start, int32_t numO
  int32_t numOfElems = 0;
  if (pCol->hasNull || numOfRows <= 8 || pCtx->subsidiaries.num > 0) {
-    int32_t i = start;
+    int32_t i = findFirstVal(pCol, start, numOfRows);
-    while (i < (start + numOfRows)) {
-      if (!colDataIsNull_f(pCol->nullbitmap, i)) {
-        break;
-      }
-      i += 1;
-    }
    if ((i < (start + numOfRows)) && (!pBuf->assign)) {
      *val = pData[i];
@@ -539,13 +595,7 @@ static int32_t handleInt64Col(SColumnInfoData* pCol, int32_t start, int32_t numO
  int32_t numOfElems = 0;
  if (pCol->hasNull || pCtx->subsidiaries.num > 0) {
-    int32_t i = start;
+    int32_t i = findFirstVal(pCol, start, numOfRows);
-    while (i < (start + numOfRows)) {
-      if (!colDataIsNull_f(pCol->nullbitmap, i)) {
-        break;
-      }
-      i += 1;
-    }
    if ((i < (start + numOfRows)) && (!pBuf->assign)) {
      *val = pData[i];
@@ -616,18 +666,11 @@ static int32_t handleInt64Col(SColumnInfoData* pCol, int32_t start, int32_t numO
 static int32_t handleFloatCol(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SqlFunctionCtx* pCtx,
                              SMinmaxResInfo* pBuf, bool isMinFunc) {
  float* pData = (float*)pCol->pData;
-  double* val = (double*)&pBuf->v;
+  float* val = (float*)&pBuf->v;
  int32_t numOfElems = 0;
  if (pCol->hasNull || numOfRows < 8 || pCtx->subsidiaries.num > 0) {
-    int32_t i = start;
+    int32_t i = findFirstVal(pCol, start, numOfRows);
-    while (i < (start + numOfRows)) {
-      if (!colDataIsNull_f(pCol->nullbitmap, i)) {
-        break;
-      }
-      i += 1;
-    }
    if ((i < (start + numOfRows)) && (!pBuf->assign)) {
      *val = pData[i];
      if (pCtx->subsidiaries.num > 0) {
@@ -701,18 +744,12 @@ static int32_t handleFloatCol(SColumnInfoData* pCol, int32_t start, int32_t numO
 static int32_t handleDoubleCol(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SqlFunctionCtx* pCtx,
                              SMinmaxResInfo* pBuf, bool isMinFunc) {
-  float* pData = (float*)pCol->pData;
+  double* pData = (double*)pCol->pData;
  double* val = (double*)&pBuf->v;
  int32_t numOfElems = 0;
-  if (pCol->hasNull || numOfRows < 8 || pCtx->subsidiaries.num > 0) {
+  if (pCol->hasNull || numOfRows < 4 || pCtx->subsidiaries.num > 0) {
-    int32_t i = start;
+    int32_t i = findFirstVal(pCol, start, numOfRows);
-    while (i < (start + numOfRows)) {
-      if (!colDataIsNull_f(pCol->nullbitmap, i)) {
-        break;
-      }
-      i += 1;
-    }
    if ((i < (start + numOfRows)) && (!pBuf->assign)) {
      *val = pData[i];
@@ -757,7 +794,7 @@ static int32_t handleDoubleCol(SColumnInfoData* pCol, int32_t start, int32_t num
  } else {  // not has null value
    // AVX version to speedup the loop
    if (tsAVXEnable && tsSIMDEnable) {
-      *val = (double) floatVectorCmpAVX(pData, numOfRows, isMinFunc);
+      *val = (double) doubleVectorCmpAVX(pData, numOfRows, isMinFunc);
    } else {
      if (!pBuf->assign) {
        *val = pData[0];
@@ -813,7 +850,7 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) {
  if (IS_NULL_TYPE(type)) {
    numOfElems = 0;
-    goto _min_max_over;
+    goto _over;
  }
  // data in current data block are qualified to the query
@@ -914,117 +951,10 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) {
      numOfElems = handleInt8Col(pCol, start, numOfRows, pCtx, pBuf, isMinFunc);
    } else if (type == TSDB_DATA_TYPE_SMALLINT) {
      numOfElems = handleInt16Col(pCol, start, numOfRows, pCtx, pBuf, isMinFunc);
-      int16_t* pData = (int16_t*)pCol->pData;
-      int16_t* val = (int16_t*)&pBuf->v;
-      for (int32_t i = start; i < start + numOfRows; ++i) {
-        if ((pCol->hasNull) && colDataIsNull_f(pCol->nullbitmap, i)) {
-          continue;
-        }
-        if (!pBuf->assign) {
-          *val = pData[i];
-          if (pCtx->subsidiaries.num > 0) {
-            pBuf->tuplePos = saveTupleData(pCtx, i, pCtx->pSrcBlock, NULL);
-          }
-          pBuf->assign = true;
-        } else {
-          // ignore the equivalent data value
-          // NOTE: An faster version to avoid one additional comparison with FPU.
-          if (isMinFunc) {  // min
-            if (*val > pData[i]) {
-              *val = pData[i];
-              if (pCtx->subsidiaries.num > 0) {
-                updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
-              }
-            }
-          } else {  // max
-            if (*val < pData[i]) {
-              *val = pData[i];
-              if (pCtx->subsidiaries.num > 0) {
-                updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
-              }
-            }
-          }
-        }
-        numOfElems += 1;
-      }
    } else if (type == TSDB_DATA_TYPE_INT) {
      numOfElems = handleInt32Col(pCol, start, numOfRows, pCtx, pBuf, isMinFunc);
-#if 0
-      for (int32_t i = start; i < start + numOfRows; ++i) {
-        if ((pCol->hasNull) && colDataIsNull_f(pCol->nullbitmap, i)) {
-          continue;
-        }
-        if (!pBuf->assign) {
-          *val = pData[i];
-          if (pCtx->subsidiaries.num > 0) {
-            pBuf->tuplePos = saveTupleData(pCtx, i, pCtx->pSrcBlock, NULL);
-          }
-          pBuf->assign = true;
-        } else {
-          // ignore the equivalent data value
-          // NOTE: An faster version to avoid one additional comparison with FPU.
-          if (isMinFunc) {  // min
-            if (*val > pData[i]) {
-              *val = pData[i];
-              if (pCtx->subsidiaries.num > 0) {
-                updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
-              }
-            }
-          } else {  // max
-            if (*val < pData[i]) {
-              *val = pData[i];
-              if (pCtx->subsidiaries.num > 0) {
-                updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
-              }
-            }
-          }
-        }
-        numOfElems += 1;
-      }
-#endif
    } else if (type == TSDB_DATA_TYPE_BIGINT) {
-      int64_t* pData = (int64_t*)pCol->pData;
+      numOfElems = handleInt64Col(pCol, start, numOfRows, pCtx, pBuf, isMinFunc);
-      int64_t* val = (int64_t*)&pBuf->v;
-      for (int32_t i = start; i < start + numOfRows; ++i) {
-        if ((pCol->hasNull) && colDataIsNull_f(pCol->nullbitmap, i)) {
-          continue;
-        }
-        if (!pBuf->assign) {
-          *val = pData[i];
-          if (pCtx->subsidiaries.num > 0) {
-            pBuf->tuplePos = saveTupleData(pCtx, i, pCtx->pSrcBlock, NULL);
-          }
-          pBuf->assign = true;
-        } else {
-          // ignore the equivalent data value
-          // NOTE: An faster version to avoid one additional comparison with FPU.
-          if (isMinFunc) {  // min
-            if (*val > pData[i]) {
-              *val = pData[i];
-              if (pCtx->subsidiaries.num > 0) {
-                updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
-              }
-            }
-          } else {  // max
-            if (*val < pData[i]) {
-              *val = pData[i];
-              if (pCtx->subsidiaries.num > 0) {
-                updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
-              }
-            }
-          }
-        }
-        numOfElems += 1;
-      }
    }
  } else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
    if (type == TSDB_DATA_TYPE_UTINYINT) {
@@ -1215,56 +1145,9 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) {
    }
  } else if (type == TSDB_DATA_TYPE_FLOAT) {
    numOfElems = handleFloatCol(pCol, start, numOfRows, pCtx, pBuf, isMinFunc);
-#if 0
-    for (int32_t i = start; i < start + numOfRows; ++i) {
-      if ((pCol->hasNull) && colDataIsNull_f(pCol->nullbitmap, i)) {
-        continue;
-      }
-      if (!pBuf->assign) {
-        *val = pData[i];
-        if (pCtx->subsidiaries.num > 0) {
-          pBuf->tuplePos = saveTupleData(pCtx, i, pCtx->pSrcBlock, NULL);
-        }
-        pBuf->assign = true;
-      } else {
-#if 0
-        if ((*val) == pData[i]) {
-          continue;
-        }
-        if ((*val < pData[i]) ^ isMinFunc) {
-          *val = pData[i];
-          if (pCtx->subsidiaries.num > 0) {
-            updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
-          }
-        }
-#endif
-        // NOTE: An faster version to avoid one additional comparison with FPU.
-        if (isMinFunc) {  // min
-          if (*val > pData[i]) {
-            *val = pData[i];
-            if (pCtx->subsidiaries.num > 0) {
-              updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
-            }
-          }
-        } else {  // max
-          if (*val < pData[i]) {
-            *val = pData[i];
-            if (pCtx->subsidiaries.num > 0) {
-              updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
-            }
-          }
-        }
-      }
-      numOfElems += 1;
-    }
-#endif
  }
-_min_max_over:
+_over:
  if (numOfElems == 0 && pCtx->subsidiaries.num > 0 && !pBuf->nullTupleSaved) {
    pBuf->nullTuplePos = saveTupleData(pCtx, pInput->startRowIndex, pCtx->pSrcBlock, NULL);
    pBuf->nullTupleSaved = true;

--- a/source/libs/function/src/udfd.c
+++ b/source/libs/function/src/udfd.c
@@ -27,6 +27,7 @@
 #include "tglobal.h"
 #include "tmsg.h"
 #include "trpc.h"
+#include "tmisce.h"
 // clang-foramt on
 typedef struct SUdfdContext {