提交 d5ac976e 编写于 作者: H Hongze Cheng

compression

上级 6c777907
...@@ -57,6 +57,8 @@ static const int TEST_NUMBER = 1; ...@@ -57,6 +57,8 @@ static const int TEST_NUMBER = 1;
#define SIMPLE8B_MAX_INT64 ((uint64_t)2305843009213693951L) #define SIMPLE8B_MAX_INT64 ((uint64_t)2305843009213693951L)
#define safeInt64Add(a, b) (((a >= 0) && (b <= INT64_MAX - a)) || ((a < 0) && (b >= INT64_MIN - a))) #define safeInt64Add(a, b) (((a >= 0) && (b <= INT64_MAX - a)) || ((a < 0) && (b >= INT64_MIN - a)))
#define ZIGZAG_ENCODE(T, v) ((u##T)((v) >> (sizeof(T) * 8 - 1))) ^ (((u##T)(v)) << 1) // zigzag encode
#define ZIGZAG_DECODE(T, v) ((v) >> 1) ^ -((T)((v)&1)) // zigzag decode
/* /*
* Compress Integer (Simple8B). * Compress Integer (Simple8B).
...@@ -87,7 +89,7 @@ int tsCompressINTImp(const char *const input, const int nelements, char *const o ...@@ -87,7 +89,7 @@ int tsCompressINTImp(const char *const input, const int nelements, char *const o
break; break;
default: default:
perror("Wrong integer types.\n"); perror("Wrong integer types.\n");
exit(1); return -1;
} }
int byte_limit = nelements * word_length + 1; int byte_limit = nelements * word_length + 1;
...@@ -122,7 +124,7 @@ int tsCompressINTImp(const char *const input, const int nelements, char *const o ...@@ -122,7 +124,7 @@ int tsCompressINTImp(const char *const input, const int nelements, char *const o
int64_t diff = curr_value - prev_value_tmp; int64_t diff = curr_value - prev_value_tmp;
// Zigzag encode the value. // Zigzag encode the value.
uint64_t zigzag_value = (diff >> (LONG_BYTES * BITS_PER_BYTE - 1)) ^ (diff << 1); uint64_t zigzag_value = ZIGZAG_ENCODE(int64_t, diff);
if (zigzag_value >= SIMPLE8B_MAX_INT64) goto _copy_and_exit; if (zigzag_value >= SIMPLE8B_MAX_INT64) goto _copy_and_exit;
...@@ -168,7 +170,7 @@ int tsCompressINTImp(const char *const input, const int nelements, char *const o ...@@ -168,7 +170,7 @@ int tsCompressINTImp(const char *const input, const int nelements, char *const o
break; break;
} }
int64_t diff = curr_value - prev_value; int64_t diff = curr_value - prev_value;
uint64_t zigzag_value = (diff >> (LONG_BYTES * BITS_PER_BYTE - 1)) ^ (diff << 1); uint64_t zigzag_value = ZIGZAG_ENCODE(int64_t, diff);
buffer |= ((zigzag_value & INT64MASK(bit)) << (bit * k + 4)); buffer |= ((zigzag_value & INT64MASK(bit)) << (bit * k + 4));
i++; i++;
prev_value = curr_value; prev_value = curr_value;
...@@ -208,7 +210,7 @@ int tsDecompressINTImp(const char *const input, const int nelements, char *const ...@@ -208,7 +210,7 @@ int tsDecompressINTImp(const char *const input, const int nelements, char *const
break; break;
default: default:
perror("Wrong integer types.\n"); perror("Wrong integer types.\n");
exit(1); return -1;
} }
// If not compressed. // If not compressed.
...@@ -245,30 +247,30 @@ int tsDecompressINTImp(const char *const input, const int nelements, char *const ...@@ -245,30 +247,30 @@ int tsDecompressINTImp(const char *const input, const int nelements, char *const
} else { } else {
zigzag_value = ((w >> (4 + bit * i)) & INT64MASK(bit)); zigzag_value = ((w >> (4 + bit * i)) & INT64MASK(bit));
} }
int64_t diff = (zigzag_value >> 1) ^ -(zigzag_value & 1); int64_t diff = ZIGZAG_DECODE(int64_t, zigzag_value);
int64_t curr_value = diff + prev_value; int64_t curr_value = diff + prev_value;
prev_value = curr_value; prev_value = curr_value;
switch (type) { switch (type) {
case TSDB_DATA_TYPE_BIGINT: case TSDB_DATA_TYPE_BIGINT:
*((int64_t *)output + _pos) = curr_value; *((int64_t *)output + _pos) = (int64_t)curr_value;
_pos++; _pos++;
break; break;
case TSDB_DATA_TYPE_INT: case TSDB_DATA_TYPE_INT:
*((int32_t *)output + _pos) = curr_value; *((int32_t *)output + _pos) = (int32_t)curr_value;
_pos++; _pos++;
break; break;
case TSDB_DATA_TYPE_SMALLINT: case TSDB_DATA_TYPE_SMALLINT:
*((int16_t *)output + _pos) = curr_value; *((int16_t *)output + _pos) = (int16_t)curr_value;
_pos++; _pos++;
break; break;
case TSDB_DATA_TYPE_TINYINT: case TSDB_DATA_TYPE_TINYINT:
*((int8_t *)output + _pos) = curr_value; *((int8_t *)output + _pos) = (int8_t)curr_value;
_pos++; _pos++;
break; break;
default: default:
perror("Wrong integer types.\n"); perror("Wrong integer types.\n");
exit(1); return -1;
} }
count++; count++;
if (count == nelements) break; if (count == nelements) break;
...@@ -306,7 +308,7 @@ int tsCompressBoolImp(const char *const input, const int nelements, char *const ...@@ -306,7 +308,7 @@ int tsCompressBoolImp(const char *const input, const int nelements, char *const
output[pos] |= t; output[pos] |= t;
} else { } else {
perror("Wrong bool value.\n"); perror("Wrong bool value.\n");
exit(1); return -1;
} }
} }
...@@ -362,7 +364,7 @@ int tsCompressBoolRLEImp(const char *const input, const int nelements, char *con ...@@ -362,7 +364,7 @@ int tsCompressBoolRLEImp(const char *const input, const int nelements, char *con
output[_pos++] = (counter << 1) | INT8MASK(0); output[_pos++] = (counter << 1) | INT8MASK(0);
} else { } else {
perror("Wrong bool value!\n"); perror("Wrong bool value!\n");
exit(1); return -1;
} }
} }
...@@ -414,7 +416,7 @@ int tsDecompressStringImp(const char *const input, int compressedSize, char *con ...@@ -414,7 +416,7 @@ int tsDecompressStringImp(const char *const input, int compressedSize, char *con
char msg[128] = {0}; char msg[128] = {0};
sprintf(msg, "decomp_size:%d, Error decompress in LZ4 algorithm!\n", decompressed_size); sprintf(msg, "decomp_size:%d, Error decompress in LZ4 algorithm!\n", decompressed_size);
perror(msg); perror(msg);
exit(EXIT_FAILURE); return -1;
} }
return decompressed_size; return decompressed_size;
...@@ -424,7 +426,7 @@ int tsDecompressStringImp(const char *const input, int compressedSize, char *con ...@@ -424,7 +426,7 @@ int tsDecompressStringImp(const char *const input, int compressedSize, char *con
return compressedSize - 1; return compressedSize - 1;
} else { } else {
perror("Wrong compressed string indicator!\n"); perror("Wrong compressed string indicator!\n");
exit(EXIT_FAILURE); return -1;
} }
} }
...@@ -451,21 +453,21 @@ int tsCompressTimestampImp(const char *const input, const int nelements, char *c ...@@ -451,21 +453,21 @@ int tsCompressTimestampImp(const char *const input, const int nelements, char *c
if (!safeInt64Add(curr_delta, -prev_delta)) goto _exit_over; if (!safeInt64Add(curr_delta, -prev_delta)) goto _exit_over;
int64_t delta_of_delta = curr_delta - prev_delta; int64_t delta_of_delta = curr_delta - prev_delta;
// zigzag encode the value. // zigzag encode the value.
uint64_t zigzag_value = (delta_of_delta >> (LONG_BYTES * BITS_PER_BYTE - 1)) ^ (delta_of_delta << 1); uint64_t zigzag_value = ZIGZAG_ENCODE(int64_t, delta_of_delta);
if (i % 2 == 0) { if (i % 2 == 0) {
flags = 0; flags = 0;
dd1 = zigzag_value; dd1 = zigzag_value;
if (dd1 == 0) { if (dd1 == 0) {
flag1 = 0; flag1 = 0;
} else { } else {
flag1 = LONG_BYTES - BUILDIN_CLZL(dd1) / BITS_PER_BYTE; flag1 = (uint8_t)(LONG_BYTES - BUILDIN_CLZL(dd1) / BITS_PER_BYTE);
} }
} else { } else {
dd2 = zigzag_value; dd2 = zigzag_value;
if (dd2 == 0) { if (dd2 == 0) {
flag2 = 0; flag2 = 0;
} else { } else {
flag2 = LONG_BYTES - BUILDIN_CLZL(dd2) / BITS_PER_BYTE; flag2 = (uint8_t)(LONG_BYTES - BUILDIN_CLZL(dd2) / BITS_PER_BYTE);
} }
flags = flag1 | (flag2 << 4); flags = flag1 | (flag2 << 4);
// Encode the flag. // Encode the flag.
...@@ -552,7 +554,7 @@ int tsDecompressTimestampImp(const char *const input, const int nelements, char ...@@ -552,7 +554,7 @@ int tsDecompressTimestampImp(const char *const input, const int nelements, char
} else { } else {
memcpy(&dd1, input + ipos, nbytes); memcpy(&dd1, input + ipos, nbytes);
} }
delta_of_delta = (dd1 >> 1) ^ -(dd1 & 1); delta_of_delta = ZIGZAG_DECODE(int64_t, dd1);
} }
ipos += nbytes; ipos += nbytes;
if (opos == 0) { if (opos == 0) {
...@@ -578,7 +580,7 @@ int tsDecompressTimestampImp(const char *const input, const int nelements, char ...@@ -578,7 +580,7 @@ int tsDecompressTimestampImp(const char *const input, const int nelements, char
memcpy(&dd2, input + ipos, nbytes); memcpy(&dd2, input + ipos, nbytes);
} }
// zigzag_decoding // zigzag_decoding
delta_of_delta = (dd2 >> 1) ^ -(dd2 & 1); delta_of_delta = ZIGZAG_DECODE(int64_t, dd2);
} }
ipos += nbytes; ipos += nbytes;
prev_delta = delta_of_delta + prev_delta; prev_delta = delta_of_delta + prev_delta;
...@@ -640,12 +642,12 @@ int tsCompressDoubleImp(const char *const input, const int nelements, char *cons ...@@ -640,12 +642,12 @@ int tsCompressDoubleImp(const char *const input, const int nelements, char *cons
uint8_t flag; uint8_t flag;
if (trailing_zeros > leading_zeros) { if (trailing_zeros > leading_zeros) {
nbytes = LONG_BYTES - trailing_zeros / BITS_PER_BYTE; nbytes = (uint8_t)(LONG_BYTES - trailing_zeros / BITS_PER_BYTE);
if (nbytes > 0) nbytes--; if (nbytes > 0) nbytes--;
flag = ((uint8_t)1 << 3) | nbytes; flag = ((uint8_t)1 << 3) | nbytes;
} else { } else {
nbytes = LONG_BYTES - leading_zeros / BITS_PER_BYTE; nbytes = (uint8_t)(LONG_BYTES - leading_zeros / BITS_PER_BYTE);
if (nbytes > 0) nbytes--; if (nbytes > 0) nbytes--;
flag = nbytes; flag = nbytes;
} }
...@@ -787,12 +789,12 @@ int tsCompressFloatImp(const char *const input, const int nelements, char *const ...@@ -787,12 +789,12 @@ int tsCompressFloatImp(const char *const input, const int nelements, char *const
uint8_t flag; uint8_t flag;
if (trailing_zeros > leading_zeros) { if (trailing_zeros > leading_zeros) {
nbytes = FLOAT_BYTES - trailing_zeros / BITS_PER_BYTE; nbytes = (uint8_t)(FLOAT_BYTES - trailing_zeros / BITS_PER_BYTE);
if (nbytes > 0) nbytes--; if (nbytes > 0) nbytes--;
flag = ((uint8_t)1 << 3) | nbytes; flag = ((uint8_t)1 << 3) | nbytes;
} else { } else {
nbytes = FLOAT_BYTES - leading_zeros / BITS_PER_BYTE; nbytes = (uint8_t)(FLOAT_BYTES - leading_zeros / BITS_PER_BYTE);
if (nbytes > 0) nbytes--; if (nbytes > 0) nbytes--;
flag = nbytes; flag = nbytes;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册