提交 f90fa07e 编写于 作者: H Haojun Liao

refactor: add avx support zigzag decode.

上级 972f9b69
......@@ -47,6 +47,7 @@ int32_t taosByteArrayToHexStr(char bytes[], int32_t len, char hexstr[]);
int32_t taosHexStrToByteArray(char hexstr[], char bytes[]);
int32_t tintToHex(uint64_t val, char hex[]);
int32_t tintToStr(uint64_t val, size_t radix, char str[]);
char *taosIpStr(uint32_t ipInt);
uint32_t ip2uint(const char *const ip_addr);
......
......@@ -235,6 +235,7 @@ void tsdbHeadFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SHeadFile *pHeadF,
void tsdbDataFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SDataFile *pDataF, char fname[]);
void tsdbSttFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSttFile *pSttF, char fname[]);
void tsdbSmaFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSmaFile *pSmaF, char fname[]);
// SDelFile
void tsdbDelFileName(STsdb *pTsdb, SDelFile *pFile, char fname[]);
// tsdbFS.c ==============================================================================================
......
......@@ -93,8 +93,32 @@ static int32_t tGetSmaFile(uint8_t *p, SSmaFile *pSmaFile) {
// EXPOSED APIS ==================================================
void tsdbHeadFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SHeadFile *pHeadF, char fname[]) {
snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTsdb->pVnode->pTfs, did),
TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), fid, pHeadF->commitID, ".head");
const char* p1 = tfsGetDiskPath(pTsdb->pVnode->pTfs, did);
int32_t len = strlen(p1);
char* p = memcpy(fname, p1, len);
p += len;
*(p++) = TD_DIRSEP[0];
len = strlen(pTsdb->path);
memcpy(p, pTsdb->path, len);
p += len;
*(p++) = TD_DIRSEP[0];
*(p++) = 'v';
p += tintToStr(TD_VID(pTsdb->pVnode), 10, p);
*(p++) = 'f';
p += tintToStr(fid, 10, p);
memcpy(p, "ver", 3);
p += 3;
p += tintToStr(pHeadF->commitID, 10, p);
memcpy(p, ".head", 5);
p[5] = 0;
}
void tsdbDataFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SDataFile *pDataF, char fname[]) {
......
......@@ -308,30 +308,79 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha
int32_t batch = num >> 2;
int32_t remain = num & 0x03;
#if 1
#if 1
__m256i base = _mm256_set1_epi64x(w);
__m256i mask_ = _mm256_set1_epi64x(mask);
__m256i shiftBits = _mm256_set_epi64x(bit * 3 + 4, bit * 2 + 4, bit + 4, 4);
__m256i inc = _mm256_set1_epi64x(bit << 2);
for(int32_t i = 0; i < batch; ++i) {
__m256i after = _mm256_srlv_epi64(base, shiftBits);
__m256i zz = _mm256_and_si256(after, mask_);
printf("1\n");
//#define ZIGZAG_DECODE(T, v) (((v) >> 1) ^ -((T)((v)&1))) // zigzag decode
__m256i signmask = _mm256_and_si256(_mm256_set_epi64x(1, 1, 1, 1), zz);
signmask = _mm256_sub_epi64(_mm256_setzero_si256(), signmask);
// now here we get the four zigzag value
__m256i final = _mm256_xor_si256(_mm256_srli_epi64(zz, 1), signmask);
// calculate the cumulative sum (prefix sum)
// decode[0] = prev_value + final[0]
// decode[1] = decode[0] + final[1] -----> prev_value + final[0] + final[1]
// decode[2] = decode[1] + final[1] -----> prev_value + final[0] + final[1] + final[2]
// decode[3] = decode[2] + final[1] -----> prev_value + final[0] + final[1] + final[2] + final[3]
printf("2\n");
__m128i prev = _mm_set1_epi64x(prev_value);
final = _mm256_add_epi64(final, _mm256_slli_si256(final, 8));
// x = 1, 2, 3, 4
// + 0, 1, 2, 3
// = 1, 3, 5, 7
_mm256_storeu_si256((__m256i *)&p[_pos], final);
__m128i first = _mm_loadu_si128((__m128i *)&p[_pos]);
__m128i sec = _mm_add_epi64(_mm_loadu_si128((__m128i *)&p[_pos + 2]), first);
sec = _mm_add_epi64(sec, prev);
first = _mm_add_epi64(first, prev);
_mm_storeu_si128((__m128i *)&p[_pos], first);
_mm_storeu_si128((__m128i *)&p[_pos + 2], sec);
shiftBits = _mm256_add_epi64(shiftBits, inc);
prev_value = p[_pos + 3];
_pos += 4;
printf("3\n");
}
#else
// manual unrolling, to erase the hotspot
uint64_t zz[4];
for (int32_t i = 0; i < batch; ++i) {
zigzag_value = ((w >> v) & mask);
prev_value += ZIGZAG_DECODE(int64_t, zigzag_value);
zz[0] = ZIGZAG_DECODE(int64_t, zigzag_value);
p[_pos++] = prev_value;
v += bit;
zigzag_value = ((w >> v) & mask);
prev_value += ZIGZAG_DECODE(int64_t, zigzag_value);
zz[1] = ZIGZAG_DECODE(int64_t, zigzag_value);
p[_pos++] = prev_value;
v += bit;
zigzag_value = ((w >> v) & mask);
prev_value += ZIGZAG_DECODE(int64_t, zigzag_value);
zz[2] = ZIGZAG_DECODE(int64_t, zigzag_value);
p[_pos++] = prev_value;
v += bit;
zigzag_value = ((w >> v) & mask);
prev_value += ZIGZAG_DECODE(int64_t, zigzag_value);
zz[3] = ZIGZAG_DECODE(int64_t, zigzag_value);
p[_pos++] = prev_value;
p[_pos] = prev_value + zz[0];
p[_pos + 1] = p[_pos] + zz[1];
p[_pos + 2] = p[_pos + 1] + zz[2];
p[_pos + 3] = p[_pos + 2] + zz[3];
prev_value = p[_pos + 3];
v += bit;
}
......@@ -345,12 +394,12 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha
}
count += num;
#endif
#else
for (int32_t i = 0; i < elems && count < nelements; i++, count++) {
zigzag_value = ((w >> (4 + v)) & mask);
int64_t diff = ZIGZAG_DECODE(int64_t, zigzag_value);
prev_value = diff + prev_value;
zigzag_value = ((w >> v) & mask);
prev_value += ZIGZAG_DECODE(int64_t, zigzag_value);
p[_pos++] = prev_value;
v += bit;
......
......@@ -336,6 +336,29 @@ int32_t tintToHex(uint64_t val, char hex[]) {
return j;
}
int32_t tintToStr(uint64_t val, size_t radix, char str[]) {
if (radix < 2 || radix > 16) {
return 0;
}
const char* s = "0123456789abcdef";
char buf[65] = {0};
int32_t i = 0;
uint64_t v = val;
while(v > 0) {
buf[i++] = s[v % radix];
v /= radix;
}
// reverse order
for(int32_t j = 0; j < i; ++j) {
str[j] = buf[i - j - 1];
}
return i;
}
int32_t taosByteArrayToHexStr(char bytes[], int32_t len, char hexstr[]) {
int32_t i;
char hexval[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册