提交 642651c1 编写于 作者: H Haojun Liao

refactor: do some internal refactor.

上级 c7560202
...@@ -283,96 +283,95 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha ...@@ -283,96 +283,95 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha
case TSDB_DATA_TYPE_BIGINT: { case TSDB_DATA_TYPE_BIGINT: {
int64_t* p = (int64_t*) output; int64_t* p = (int64_t*) output;
int32_t gRemainder = (nelements - _pos);
int32_t num = (gRemainder > elems)? elems:gRemainder;
int32_t batch = num >> 2;
int32_t remain = num & 0x03;
if (selector == 0 || selector == 1) { if (selector == 0 || selector == 1) {
int32_t gRemainder = nelements - _pos; if (tsAVX2Enable && tsSIMDBuiltins) {
int32_t num = gRemainder < elems? gRemainder:elems; for (int32_t i = 0; i < batch; ++i) {
__m256i prev = _mm256_set1_epi64x(prev_value);
int32_t batch = num >> 2; _mm256_storeu_si256((__m256i *)&p[_pos], prev);
int32_t remainder = num & 0x03; _pos += 4;
for (int32_t i = 0; i < batch; ++i) { }
p[_pos++] = prev_value;
p[_pos++] = prev_value;
p[_pos++] = prev_value;
p[_pos++] = prev_value;
}
for (int32_t i = 0; i < remainder; ++i) { for (int32_t i = 0; i < remain; ++i) {
p[_pos++] = prev_value; p[_pos++] = prev_value;
}
} else {
for (int32_t i = 0; i < elems && count < nelements; i++, count++) {
p[_pos++] = prev_value;
v += bit;
}
} }
count += num;
} else { } else {
int32_t gRemainder = (nelements - _pos); if (tsAVX2Enable && tsSIMDBuiltins) {
int32_t num = (gRemainder > elems)? elems:gRemainder; __m256i base = _mm256_set1_epi64x(w);
__m256i maskVal = _mm256_set1_epi64x(mask);
int32_t batch = num >> 2;
int32_t remain = num & 0x03; __m256i shiftBits = _mm256_set_epi64x(bit * 3 + 4, bit * 2 + 4, bit + 4, 4);
#if 1 __m256i inc = _mm256_set1_epi64x(bit << 2);
__m256i base = _mm256_set1_epi64x(w);
__m256i maskVal = _mm256_set1_epi64x(mask); for (int32_t i = 0; i < batch; ++i) {
__m256i after = _mm256_srlv_epi64(base, shiftBits);
__m256i shiftBits = _mm256_set_epi64x(bit * 3 + 4, bit * 2 + 4, bit + 4, 4); __m256i zigzagVal = _mm256_and_si256(after, maskVal);
__m256i inc = _mm256_set1_epi64x(bit << 2);
// ZIGZAG_DECODE(T, v) (((v) >> 1) ^ -((T)((v)&1)))
for(int32_t i = 0; i < batch; ++i) { __m256i signmask = _mm256_and_si256(_mm256_set1_epi64x(1), zigzagVal);
__m256i after = _mm256_srlv_epi64(base, shiftBits); signmask = _mm256_sub_epi64(_mm256_setzero_si256(), signmask);
__m256i zigzagVal= _mm256_and_si256(after, maskVal); // get the four zigzag values here
__m256i delta = _mm256_xor_si256(_mm256_srli_epi64(zigzagVal, 1), signmask);
// ZIGZAG_DECODE(T, v) (((v) >> 1) ^ -((T)((v)&1)))
__m256i signmask = _mm256_and_si256(_mm256_set1_epi64x(1), zigzagVal); // calculate the cumulative sum (prefix sum) for each number
signmask = _mm256_sub_epi64(_mm256_setzero_si256(), signmask); // decode[0] = prev_value + final[0]
// get the four zigzag values here // decode[1] = decode[0] + final[1] -----> prev_value + final[0] + final[1]
__m256i delta = _mm256_xor_si256(_mm256_srli_epi64(zigzagVal, 1), signmask); // decode[2] = decode[1] + final[1] -----> prev_value + final[0] + final[1] + final[2]
// decode[3] = decode[2] + final[1] -----> prev_value + final[0] + final[1] + final[2] + final[3]
// calculate the cumulative sum (prefix sum) for each number
// decode[0] = prev_value + final[0] // 1, 2, 3, 4
// decode[1] = decode[0] + final[1] -----> prev_value + final[0] + final[1] //+ 0, 1, 2, 3
// decode[2] = decode[1] + final[1] -----> prev_value + final[0] + final[1] + final[2] // 1, 3, 5, 7
// decode[3] = decode[2] + final[1] -----> prev_value + final[0] + final[1] + final[2] + final[3] // shift and add for the first round
__m128i prev = _mm_set1_epi64x(prev_value);
// 1, 2, 3, 4 delta = _mm256_add_epi64(delta, _mm256_slli_si256(delta, 8));
//+ 0, 1, 2, 3 _mm256_storeu_si256((__m256i *)&p[_pos], delta);
// 1, 3, 5, 7
// shift and add for the first round // 1, 3, 5, 7
__m128i prev = _mm_set1_epi64x(prev_value); //+ 0, 0, 1, 3
delta = _mm256_add_epi64(delta, _mm256_slli_si256(delta, 8)); // 1, 3, 6, 10
_mm256_storeu_si256((__m256i *)&p[_pos], delta); // shift and add operation for the second round
__m128i firstPart = _mm_loadu_si128((__m128i *)&p[_pos]);
// 1, 3, 5, 7 __m128i secPart = _mm_add_epi64(_mm_loadu_si128((__m128i *)&p[_pos + 2]), firstPart);
//+ 0, 0, 1, 3 firstPart = _mm_add_epi64(firstPart, prev);
// 1, 3, 6, 10 secPart = _mm_add_epi64(secPart, prev);
// shift and add operation for the second round
__m128i firstPart = _mm_loadu_si128((__m128i *)&p[_pos]); // save it in the memory
__m128i secPart = _mm_add_epi64(_mm_loadu_si128((__m128i *)&p[_pos + 2]), firstPart); _mm_storeu_si128((__m128i *)&p[_pos], firstPart);
firstPart = _mm_add_epi64(firstPart, prev); _mm_storeu_si128((__m128i *)&p[_pos + 2], secPart);
secPart = _mm_add_epi64(secPart, prev);
shiftBits = _mm256_add_epi64(shiftBits, inc);
// save it in the memory prev_value = p[_pos + 3];
_mm_storeu_si128((__m128i *)&p[_pos], firstPart); _pos += 4;
_mm_storeu_si128((__m128i *)&p[_pos + 2], secPart); }
shiftBits = _mm256_add_epi64(shiftBits, inc);
prev_value = p[_pos + 3];
_pos += 4;
}
// handle the remain value // handle the remain value
for (int32_t i = 0; i < remain; i++) { for (int32_t i = 0; i < remain; i++) {
zigzag_value = ((w >> (v + (batch * bit))) & mask); zigzag_value = ((w >> (v + (batch * bit))) & mask);
prev_value += ZIGZAG_DECODE(int64_t, zigzag_value); prev_value += ZIGZAG_DECODE(int64_t, zigzag_value);
p[_pos++] = prev_value; p[_pos++] = prev_value;
v += bit; v += bit;
} }
#else } else {
for (int32_t i = 0; i < elems && count < nelements; i++, count++) { for (int32_t i = 0; i < elems && count < nelements; i++, count++) {
zigzag_value = ((w >> v) & mask); zigzag_value = ((w >> v) & mask);
prev_value += ZIGZAG_DECODE(int64_t, zigzag_value); prev_value += ZIGZAG_DECODE(int64_t, zigzag_value);
p[_pos++] = prev_value; p[_pos++] = prev_value;
v += bit; v += bit;
}
} }
#endif
} }
} break; } break;
case TSDB_DATA_TYPE_INT: { case TSDB_DATA_TYPE_INT: {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册