fix(query): fix avg calculation error after SIMD optimize

TD-20803

fix(query): fix avg calculation error after SIMD optimize
TD-20803
40489889 · Ganlin Zhao · 4ea7d139 · 40489889
隐藏空白更改
内联并排

Showing with 60 addition and 27 deletion

source/libs/function/src/detail/tavgfunction.c source/libs/function/src/detail/tavgfunction.c +60 -27

未找到文件。
--- a/source/libs/function/src/detail/tavgfunction.c
+++ b/source/libs/function/src/detail/tavgfunction.c
@@ -133,6 +133,14 @@ static void i8VectorSumAVX2(const int8_t* plist, int32_t numOfRows, int32_t type
      sum = _mm256_add_epi64(sum, extVal);
      p += width;
    }
+
+    // let sum up the final results
+    const int64_t* q = (const int64_t*)&sum;
+    pRes->sum.isum += q[0] + q[1] + q[2] + q[3];
+
+    for (int32_t j = 0; j < remainder; ++j) {
+      pRes->sum.isum += plist[j + rounds * width];
+    }
  } else {
    const uint8_t* p = (const uint8_t*)plist;

@@ -142,16 +150,16 @@ static void i8VectorSumAVX2(const int8_t* plist, int32_t numOfRows, int32_t type
      sum = _mm256_add_epi64(sum, extVal);
      p += width;
    }
-  }

-  // let sum up the final results
-  const int64_t* q = (const int64_t*)&sum;
-  pRes->sum.isum += q[0] + q[1] + q[2] + q[3];
+    // let sum up the final results
+    const uint64_t* q = (const uint64_t*)&sum;
+    pRes->sum.usum += q[0] + q[1] + q[2] + q[3];

-  int32_t startIndex = rounds * width;
-  for (int32_t j = 0; j < remainder; ++j) {
-    pRes->sum.isum += plist[j + startIndex];
+    for (int32_t j = 0; j < remainder; ++j) {
+      pRes->sum.usum += (uint8_t)plist[j + rounds * width];
+    }
  }
+
 #endif
 }

@@ -176,8 +184,16 @@ static void i16VectorSumAVX2(const int16_t* plist, int32_t numOfRows, int32_t ty
      sum = _mm256_add_epi64(sum, extVal);
      p += width;
    }
+
+    // let sum up the final results
+    const int64_t* q = (const int64_t*)&sum;
+    pRes->sum.isum += q[0] + q[1] + q[2] + q[3];
+
+    for (int32_t j = 0; j < remainder; ++j) {
+      pRes->sum.isum += plist[j + rounds * width];
+    }
  } else {
-    const uint8_t* p = (const uint8_t*)plist;
+    const uint16_t* p = (const uint16_t*)plist;

    for(int32_t i = 0; i < rounds; ++i) {
      __m128i val = _mm_lddqu_si128((__m128i*)p);
@@ -185,16 +201,16 @@ static void i16VectorSumAVX2(const int16_t* plist, int32_t numOfRows, int32_t ty
      sum = _mm256_add_epi64(sum, extVal);
      p += width;
    }
-  }

-  // let sum up the final results
-  const int64_t* q = (const int64_t*)&sum;
-  pRes->sum.isum += q[0] + q[1] + q[2] + q[3];
+    // let sum up the final results
+    const uint64_t* q = (const uint64_t*)&sum;
+    pRes->sum.usum += q[0] + q[1] + q[2] + q[3];

-  int32_t startIndex = rounds * width;
-  for (int32_t j = 0; j < remainder; ++j) {
-    pRes->sum.isum += plist[j + startIndex];
+    for (int32_t j = 0; j < remainder; ++j) {
+      pRes->sum.usum += (uint16_t)plist[j + rounds * width];
+    }
  }
+
 #endif
 }

@@ -219,6 +235,14 @@ static void i32VectorSumAVX2(const int32_t* plist, int32_t numOfRows, int32_t ty
      sum = _mm256_add_epi64(sum, extVal);
      p += width;
    }
+
+    // let sum up the final results
+    const int64_t* q = (const int64_t*)&sum;
+    pRes->sum.isum += q[0] + q[1] + q[2] + q[3];
+
+    for (int32_t j = 0; j < remainder; ++j) {
+      pRes->sum.isum += plist[j + rounds * width];
+    }
  } else {
    const uint32_t* p = (const uint32_t*)plist;

@@ -228,16 +252,16 @@ static void i32VectorSumAVX2(const int32_t* plist, int32_t numOfRows, int32_t ty
      sum = _mm256_add_epi64(sum, extVal);
      p += width;
    }
-  }

-  // let sum up the final results
-  const int64_t* q = (const int64_t*)&sum;
-  pRes->sum.isum += q[0] + q[1] + q[2] + q[3];
+    // let sum up the final results
+    const uint64_t* q = (const uint64_t*)&sum;
+    pRes->sum.usum += q[0] + q[1] + q[2] + q[3];

-  int32_t startIndex = rounds * width;
-  for (int32_t j = 0; j < remainder; ++j) {
-    pRes->sum.isum += plist[j + startIndex];
+    for (int32_t j = 0; j < remainder; ++j) {
+      pRes->sum.usum += (uint32_t)plist[j + rounds * width];
+    }
  }
+
 #endif
 }

@@ -262,13 +286,22 @@ static void i64VectorSumAVX2(const int64_t* plist, int32_t numOfRows, SAvgRes* p
  }

  // let sum up the final results
-  const int64_t* q = (const int64_t*)&sum;
-  pRes->sum.isum += q[0] + q[1] + q[2] + q[3];
+  if (type == TSDB_DATA_TYPE_BIGINT) {
+    const int64_t* q = (const int64_t*)&sum;
+    pRes->sum.isum += q[0] + q[1] + q[2] + q[3];

-  int32_t startIndex = rounds * width;
-  for (int32_t j = 0; j < remainder; ++j) {
-    pRes->sum.isum += plist[j + startIndex];
+    for (int32_t j = 0; j < remainder; ++j) {
+      pRes->sum.isum += plist[j + rounds * width];
+    }
+  } else {
+    const uint64_t* q = (const uint64_t*)&sum;
+    pRes->sum.usum += q[0] + q[1] + q[2] + q[3];
+
+    for (int32_t j = 0; j < remainder; ++j) {
+      pRes->sum.usum += (uint64_t)plist[j + rounds * width];
+    }
  }
+
 #endif
 }