From d5ac976e95af532dc880419085e11caa11cacb18 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 4 Aug 2020 13:37:53 +0800 Subject: [PATCH] compression --- src/util/src/tcompression.c | 48 +++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/src/util/src/tcompression.c b/src/util/src/tcompression.c index f26a6d75d7..c20ba59ac9 100644 --- a/src/util/src/tcompression.c +++ b/src/util/src/tcompression.c @@ -57,6 +57,8 @@ static const int TEST_NUMBER = 1; #define SIMPLE8B_MAX_INT64 ((uint64_t)2305843009213693951L) #define safeInt64Add(a, b) (((a >= 0) && (b <= INT64_MAX - a)) || ((a < 0) && (b >= INT64_MIN - a))) +#define ZIGZAG_ENCODE(T, v) ((u##T)((v) >> (sizeof(T) * 8 - 1))) ^ (((u##T)(v)) << 1) // zigzag encode +#define ZIGZAG_DECODE(T, v) ((v) >> 1) ^ -((T)((v)&1)) // zigzag decode /* * Compress Integer (Simple8B). @@ -87,7 +89,7 @@ int tsCompressINTImp(const char *const input, const int nelements, char *const o break; default: perror("Wrong integer types.\n"); - exit(1); + return -1; } int byte_limit = nelements * word_length + 1; @@ -122,7 +124,7 @@ int tsCompressINTImp(const char *const input, const int nelements, char *const o int64_t diff = curr_value - prev_value_tmp; // Zigzag encode the value. - uint64_t zigzag_value = (diff >> (LONG_BYTES * BITS_PER_BYTE - 1)) ^ (diff << 1); + uint64_t zigzag_value = ZIGZAG_ENCODE(int64_t, diff); if (zigzag_value >= SIMPLE8B_MAX_INT64) goto _copy_and_exit; @@ -168,7 +170,7 @@ int tsCompressINTImp(const char *const input, const int nelements, char *const o break; } int64_t diff = curr_value - prev_value; - uint64_t zigzag_value = (diff >> (LONG_BYTES * BITS_PER_BYTE - 1)) ^ (diff << 1); + uint64_t zigzag_value = ZIGZAG_ENCODE(int64_t, diff); buffer |= ((zigzag_value & INT64MASK(bit)) << (bit * k + 4)); i++; prev_value = curr_value; @@ -208,7 +210,7 @@ int tsDecompressINTImp(const char *const input, const int nelements, char *const break; default: perror("Wrong integer types.\n"); - exit(1); + return -1; } // If not compressed. @@ -245,30 +247,30 @@ int tsDecompressINTImp(const char *const input, const int nelements, char *const } else { zigzag_value = ((w >> (4 + bit * i)) & INT64MASK(bit)); } - int64_t diff = (zigzag_value >> 1) ^ -(zigzag_value & 1); + int64_t diff = ZIGZAG_DECODE(int64_t, zigzag_value); int64_t curr_value = diff + prev_value; prev_value = curr_value; switch (type) { case TSDB_DATA_TYPE_BIGINT: - *((int64_t *)output + _pos) = curr_value; + *((int64_t *)output + _pos) = (int64_t)curr_value; _pos++; break; case TSDB_DATA_TYPE_INT: - *((int32_t *)output + _pos) = curr_value; + *((int32_t *)output + _pos) = (int32_t)curr_value; _pos++; break; case TSDB_DATA_TYPE_SMALLINT: - *((int16_t *)output + _pos) = curr_value; + *((int16_t *)output + _pos) = (int16_t)curr_value; _pos++; break; case TSDB_DATA_TYPE_TINYINT: - *((int8_t *)output + _pos) = curr_value; + *((int8_t *)output + _pos) = (int8_t)curr_value; _pos++; break; default: perror("Wrong integer types.\n"); - exit(1); + return -1; } count++; if (count == nelements) break; @@ -306,7 +308,7 @@ int tsCompressBoolImp(const char *const input, const int nelements, char *const output[pos] |= t; } else { perror("Wrong bool value.\n"); - exit(1); + return -1; } } @@ -362,7 +364,7 @@ int tsCompressBoolRLEImp(const char *const input, const int nelements, char *con output[_pos++] = (counter << 1) | INT8MASK(0); } else { perror("Wrong bool value!\n"); - exit(1); + return -1; } } @@ -414,7 +416,7 @@ int tsDecompressStringImp(const char *const input, int compressedSize, char *con char msg[128] = {0}; sprintf(msg, "decomp_size:%d, Error decompress in LZ4 algorithm!\n", decompressed_size); perror(msg); - exit(EXIT_FAILURE); + return -1; } return decompressed_size; @@ -424,7 +426,7 @@ int tsDecompressStringImp(const char *const input, int compressedSize, char *con return compressedSize - 1; } else { perror("Wrong compressed string indicator!\n"); - exit(EXIT_FAILURE); + return -1; } } @@ -451,21 +453,21 @@ int tsCompressTimestampImp(const char *const input, const int nelements, char *c if (!safeInt64Add(curr_delta, -prev_delta)) goto _exit_over; int64_t delta_of_delta = curr_delta - prev_delta; // zigzag encode the value. - uint64_t zigzag_value = (delta_of_delta >> (LONG_BYTES * BITS_PER_BYTE - 1)) ^ (delta_of_delta << 1); + uint64_t zigzag_value = ZIGZAG_ENCODE(int64_t, delta_of_delta); if (i % 2 == 0) { flags = 0; dd1 = zigzag_value; if (dd1 == 0) { flag1 = 0; } else { - flag1 = LONG_BYTES - BUILDIN_CLZL(dd1) / BITS_PER_BYTE; + flag1 = (uint8_t)(LONG_BYTES - BUILDIN_CLZL(dd1) / BITS_PER_BYTE); } } else { dd2 = zigzag_value; if (dd2 == 0) { flag2 = 0; } else { - flag2 = LONG_BYTES - BUILDIN_CLZL(dd2) / BITS_PER_BYTE; + flag2 = (uint8_t)(LONG_BYTES - BUILDIN_CLZL(dd2) / BITS_PER_BYTE); } flags = flag1 | (flag2 << 4); // Encode the flag. @@ -552,7 +554,7 @@ int tsDecompressTimestampImp(const char *const input, const int nelements, char } else { memcpy(&dd1, input + ipos, nbytes); } - delta_of_delta = (dd1 >> 1) ^ -(dd1 & 1); + delta_of_delta = ZIGZAG_DECODE(int64_t, dd1); } ipos += nbytes; if (opos == 0) { @@ -578,7 +580,7 @@ int tsDecompressTimestampImp(const char *const input, const int nelements, char memcpy(&dd2, input + ipos, nbytes); } // zigzag_decoding - delta_of_delta = (dd2 >> 1) ^ -(dd2 & 1); + delta_of_delta = ZIGZAG_DECODE(int64_t, dd2); } ipos += nbytes; prev_delta = delta_of_delta + prev_delta; @@ -640,12 +642,12 @@ int tsCompressDoubleImp(const char *const input, const int nelements, char *cons uint8_t flag; if (trailing_zeros > leading_zeros) { - nbytes = LONG_BYTES - trailing_zeros / BITS_PER_BYTE; + nbytes = (uint8_t)(LONG_BYTES - trailing_zeros / BITS_PER_BYTE); if (nbytes > 0) nbytes--; flag = ((uint8_t)1 << 3) | nbytes; } else { - nbytes = LONG_BYTES - leading_zeros / BITS_PER_BYTE; + nbytes = (uint8_t)(LONG_BYTES - leading_zeros / BITS_PER_BYTE); if (nbytes > 0) nbytes--; flag = nbytes; } @@ -787,12 +789,12 @@ int tsCompressFloatImp(const char *const input, const int nelements, char *const uint8_t flag; if (trailing_zeros > leading_zeros) { - nbytes = FLOAT_BYTES - trailing_zeros / BITS_PER_BYTE; + nbytes = (uint8_t)(FLOAT_BYTES - trailing_zeros / BITS_PER_BYTE); if (nbytes > 0) nbytes--; flag = ((uint8_t)1 << 3) | nbytes; } else { - nbytes = FLOAT_BYTES - leading_zeros / BITS_PER_BYTE; + nbytes = (uint8_t)(FLOAT_BYTES - leading_zeros / BITS_PER_BYTE); if (nbytes > 0) nbytes--; flag = nbytes; } -- GitLab