提交 40489889 编写于 作者: G Ganlin Zhao

fix(query): fix avg calculation error after SIMD optimize

TD-20803
上级 4ea7d139
...@@ -133,6 +133,14 @@ static void i8VectorSumAVX2(const int8_t* plist, int32_t numOfRows, int32_t type ...@@ -133,6 +133,14 @@ static void i8VectorSumAVX2(const int8_t* plist, int32_t numOfRows, int32_t type
sum = _mm256_add_epi64(sum, extVal); sum = _mm256_add_epi64(sum, extVal);
p += width; p += width;
} }
// let sum up the final results
const int64_t* q = (const int64_t*)∑
pRes->sum.isum += q[0] + q[1] + q[2] + q[3];
for (int32_t j = 0; j < remainder; ++j) {
pRes->sum.isum += plist[j + rounds * width];
}
} else { } else {
const uint8_t* p = (const uint8_t*)plist; const uint8_t* p = (const uint8_t*)plist;
...@@ -142,16 +150,16 @@ static void i8VectorSumAVX2(const int8_t* plist, int32_t numOfRows, int32_t type ...@@ -142,16 +150,16 @@ static void i8VectorSumAVX2(const int8_t* plist, int32_t numOfRows, int32_t type
sum = _mm256_add_epi64(sum, extVal); sum = _mm256_add_epi64(sum, extVal);
p += width; p += width;
} }
}
// let sum up the final results // let sum up the final results
const int64_t* q = (const int64_t*)&sum; const uint64_t* q = (const uint64_t*)&sum;
pRes->sum.isum += q[0] + q[1] + q[2] + q[3]; pRes->sum.usum += q[0] + q[1] + q[2] + q[3];
int32_t startIndex = rounds * width; for (int32_t j = 0; j < remainder; ++j) {
for (int32_t j = 0; j < remainder; ++j) { pRes->sum.usum += (uint8_t)plist[j + rounds * width];
pRes->sum.isum += plist[j + startIndex]; }
} }
#endif #endif
} }
...@@ -176,8 +184,16 @@ static void i16VectorSumAVX2(const int16_t* plist, int32_t numOfRows, int32_t ty ...@@ -176,8 +184,16 @@ static void i16VectorSumAVX2(const int16_t* plist, int32_t numOfRows, int32_t ty
sum = _mm256_add_epi64(sum, extVal); sum = _mm256_add_epi64(sum, extVal);
p += width; p += width;
} }
// let sum up the final results
const int64_t* q = (const int64_t*)&sum;
pRes->sum.isum += q[0] + q[1] + q[2] + q[3];
for (int32_t j = 0; j < remainder; ++j) {
pRes->sum.isum += plist[j + rounds * width];
}
} else { } else {
const uint8_t* p = (const uint8_t*)plist; const uint16_t* p = (const uint16_t*)plist;
for(int32_t i = 0; i < rounds; ++i) { for(int32_t i = 0; i < rounds; ++i) {
__m128i val = _mm_lddqu_si128((__m128i*)p); __m128i val = _mm_lddqu_si128((__m128i*)p);
...@@ -185,16 +201,16 @@ static void i16VectorSumAVX2(const int16_t* plist, int32_t numOfRows, int32_t ty ...@@ -185,16 +201,16 @@ static void i16VectorSumAVX2(const int16_t* plist, int32_t numOfRows, int32_t ty
sum = _mm256_add_epi64(sum, extVal); sum = _mm256_add_epi64(sum, extVal);
p += width; p += width;
} }
}
// let sum up the final results // let sum up the final results
const int64_t* q = (const int64_t*)&sum; const uint64_t* q = (const uint64_t*)&sum;
pRes->sum.isum += q[0] + q[1] + q[2] + q[3]; pRes->sum.usum += q[0] + q[1] + q[2] + q[3];
int32_t startIndex = rounds * width; for (int32_t j = 0; j < remainder; ++j) {
for (int32_t j = 0; j < remainder; ++j) { pRes->sum.usum += (uint16_t)plist[j + rounds * width];
pRes->sum.isum += plist[j + startIndex]; }
} }
#endif #endif
} }
...@@ -219,6 +235,14 @@ static void i32VectorSumAVX2(const int32_t* plist, int32_t numOfRows, int32_t ty ...@@ -219,6 +235,14 @@ static void i32VectorSumAVX2(const int32_t* plist, int32_t numOfRows, int32_t ty
sum = _mm256_add_epi64(sum, extVal); sum = _mm256_add_epi64(sum, extVal);
p += width; p += width;
} }
// let sum up the final results
const int64_t* q = (const int64_t*)&sum;
pRes->sum.isum += q[0] + q[1] + q[2] + q[3];
for (int32_t j = 0; j < remainder; ++j) {
pRes->sum.isum += plist[j + rounds * width];
}
} else { } else {
const uint32_t* p = (const uint32_t*)plist; const uint32_t* p = (const uint32_t*)plist;
...@@ -228,16 +252,16 @@ static void i32VectorSumAVX2(const int32_t* plist, int32_t numOfRows, int32_t ty ...@@ -228,16 +252,16 @@ static void i32VectorSumAVX2(const int32_t* plist, int32_t numOfRows, int32_t ty
sum = _mm256_add_epi64(sum, extVal); sum = _mm256_add_epi64(sum, extVal);
p += width; p += width;
} }
}
// let sum up the final results // let sum up the final results
const int64_t* q = (const int64_t*)&sum; const uint64_t* q = (const uint64_t*)&sum;
pRes->sum.isum += q[0] + q[1] + q[2] + q[3]; pRes->sum.usum += q[0] + q[1] + q[2] + q[3];
int32_t startIndex = rounds * width; for (int32_t j = 0; j < remainder; ++j) {
for (int32_t j = 0; j < remainder; ++j) { pRes->sum.usum += (uint32_t)plist[j + rounds * width];
pRes->sum.isum += plist[j + startIndex]; }
} }
#endif #endif
} }
...@@ -262,13 +286,22 @@ static void i64VectorSumAVX2(const int64_t* plist, int32_t numOfRows, SAvgRes* p ...@@ -262,13 +286,22 @@ static void i64VectorSumAVX2(const int64_t* plist, int32_t numOfRows, SAvgRes* p
} }
// let sum up the final results // let sum up the final results
const int64_t* q = (const int64_t*)&sum; if (type == TSDB_DATA_TYPE_BIGINT) {
pRes->sum.isum += q[0] + q[1] + q[2] + q[3]; const int64_t* q = (const int64_t*)&sum;
pRes->sum.isum += q[0] + q[1] + q[2] + q[3];
int32_t startIndex = rounds * width; for (int32_t j = 0; j < remainder; ++j) {
for (int32_t j = 0; j < remainder; ++j) { pRes->sum.isum += plist[j + rounds * width];
pRes->sum.isum += plist[j + startIndex]; }
} else {
const uint64_t* q = (const uint64_t*)&sum;
pRes->sum.usum += q[0] + q[1] + q[2] + q[3];
for (int32_t j = 0; j < remainder; ++j) {
pRes->sum.usum += (uint64_t)plist[j + rounds * width];
}
} }
#endif #endif
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册