From d5ac976e95af532dc880419085e11caa11cacb18 Mon Sep 17 00:00:00 2001
From: Hongze Cheng <hzcheng@taosdata.com>
Date: Tue, 4 Aug 2020 13:37:53 +0800
Subject: [PATCH] compression

---
 src/util/src/tcompression.c | 48 +++++++++++++++++++------------------
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/src/util/src/tcompression.c b/src/util/src/tcompression.c
index f26a6d75d7..c20ba59ac9 100644
--- a/src/util/src/tcompression.c
+++ b/src/util/src/tcompression.c
@@ -57,6 +57,8 @@ static const int TEST_NUMBER = 1;
 #define SIMPLE8B_MAX_INT64 ((uint64_t)2305843009213693951L)
 
 #define safeInt64Add(a, b) (((a >= 0) && (b <= INT64_MAX - a)) || ((a < 0) && (b >= INT64_MIN - a)))
+#define ZIGZAG_ENCODE(T, v) ((u##T)((v) >> (sizeof(T) * 8 - 1))) ^ (((u##T)(v)) << 1)  // zigzag encode
+#define ZIGZAG_DECODE(T, v) ((v) >> 1) ^ -((T)((v)&1))                                 // zigzag decode
 
 /*
  * Compress Integer (Simple8B).
@@ -87,7 +89,7 @@ int tsCompressINTImp(const char *const input, const int nelements, char *const o
       break;
     default:
       perror("Wrong integer types.\n");
-      exit(1);
+      return -1;
   }
 
   int     byte_limit = nelements * word_length + 1;
@@ -122,7 +124,7 @@ int tsCompressINTImp(const char *const input, const int nelements, char *const o
 
       int64_t diff = curr_value - prev_value_tmp;
       // Zigzag encode the value.
-      uint64_t zigzag_value = (diff >> (LONG_BYTES * BITS_PER_BYTE - 1)) ^ (diff << 1);
+      uint64_t zigzag_value = ZIGZAG_ENCODE(int64_t, diff);
 
       if (zigzag_value >= SIMPLE8B_MAX_INT64) goto _copy_and_exit;
 
@@ -168,7 +170,7 @@ int tsCompressINTImp(const char *const input, const int nelements, char *const o
           break;
       }
       int64_t  diff = curr_value - prev_value;
-      uint64_t zigzag_value = (diff >> (LONG_BYTES * BITS_PER_BYTE - 1)) ^ (diff << 1);
+      uint64_t zigzag_value = ZIGZAG_ENCODE(int64_t, diff);
       buffer |= ((zigzag_value & INT64MASK(bit)) << (bit * k + 4));
       i++;
       prev_value = curr_value;
@@ -208,7 +210,7 @@ int tsDecompressINTImp(const char *const input, const int nelements, char *const
       break;
     default:
       perror("Wrong integer types.\n");
-      exit(1);
+      return -1;
   }
 
   // If not compressed.
@@ -245,30 +247,30 @@ int tsDecompressINTImp(const char *const input, const int nelements, char *const
       } else {
         zigzag_value = ((w >> (4 + bit * i)) & INT64MASK(bit));
       }
-      int64_t diff = (zigzag_value >> 1) ^ -(zigzag_value & 1);
+      int64_t diff = ZIGZAG_DECODE(int64_t, zigzag_value);
       int64_t curr_value = diff + prev_value;
       prev_value = curr_value;
 
       switch (type) {
         case TSDB_DATA_TYPE_BIGINT:
-          *((int64_t *)output + _pos) = curr_value;
+          *((int64_t *)output + _pos) = (int64_t)curr_value;
           _pos++;
           break;
         case TSDB_DATA_TYPE_INT:
-          *((int32_t *)output + _pos) = curr_value;
+          *((int32_t *)output + _pos) = (int32_t)curr_value;
           _pos++;
           break;
         case TSDB_DATA_TYPE_SMALLINT:
-          *((int16_t *)output + _pos) = curr_value;
+          *((int16_t *)output + _pos) = (int16_t)curr_value;
           _pos++;
           break;
         case TSDB_DATA_TYPE_TINYINT:
-          *((int8_t *)output + _pos) = curr_value;
+          *((int8_t *)output + _pos) = (int8_t)curr_value;
           _pos++;
           break;
         default:
           perror("Wrong integer types.\n");
-          exit(1);
+          return -1;
       }
       count++;
       if (count == nelements) break;
@@ -306,7 +308,7 @@ int tsCompressBoolImp(const char *const input, const int nelements, char *const
       output[pos] |= t;
     } else {
       perror("Wrong bool value.\n");
-      exit(1);
+      return -1;
     }
   }
 
@@ -362,7 +364,7 @@ int tsCompressBoolRLEImp(const char *const input, const int nelements, char *con
       output[_pos++] = (counter << 1) | INT8MASK(0);
     } else {
       perror("Wrong bool value!\n");
-      exit(1);
+      return -1;
     }
   }
 
@@ -414,7 +416,7 @@ int tsDecompressStringImp(const char *const input, int compressedSize, char *con
       char msg[128] = {0};
       sprintf(msg, "decomp_size:%d, Error decompress in LZ4 algorithm!\n", decompressed_size);
       perror(msg);
-      exit(EXIT_FAILURE);
+      return -1;
     }
 
     return decompressed_size;
@@ -424,7 +426,7 @@ int tsDecompressStringImp(const char *const input, int compressedSize, char *con
     return compressedSize - 1;
   } else {
     perror("Wrong compressed string indicator!\n");
-    exit(EXIT_FAILURE);
+    return -1;
   }
 }
 
@@ -451,21 +453,21 @@ int tsCompressTimestampImp(const char *const input, const int nelements, char *c
     if (!safeInt64Add(curr_delta, -prev_delta)) goto _exit_over;
     int64_t delta_of_delta = curr_delta - prev_delta;
     // zigzag encode the value.
-    uint64_t zigzag_value = (delta_of_delta >> (LONG_BYTES * BITS_PER_BYTE - 1)) ^ (delta_of_delta << 1);
+    uint64_t zigzag_value = ZIGZAG_ENCODE(int64_t, delta_of_delta);
     if (i % 2 == 0) {
       flags = 0;
       dd1 = zigzag_value;
       if (dd1 == 0) {
         flag1 = 0;
       } else {
-        flag1 = LONG_BYTES - BUILDIN_CLZL(dd1) / BITS_PER_BYTE;
+        flag1 = (uint8_t)(LONG_BYTES - BUILDIN_CLZL(dd1) / BITS_PER_BYTE);
       }
     } else {
       dd2 = zigzag_value;
       if (dd2 == 0) {
         flag2 = 0;
       } else {
-        flag2 = LONG_BYTES - BUILDIN_CLZL(dd2) / BITS_PER_BYTE;
+        flag2 = (uint8_t)(LONG_BYTES - BUILDIN_CLZL(dd2) / BITS_PER_BYTE);
       }
       flags = flag1 | (flag2 << 4);
       // Encode the flag.
@@ -552,7 +554,7 @@ int tsDecompressTimestampImp(const char *const input, const int nelements, char
         } else {
           memcpy(&dd1, input + ipos, nbytes);
         }
-        delta_of_delta = (dd1 >> 1) ^ -(dd1 & 1);
+        delta_of_delta = ZIGZAG_DECODE(int64_t, dd1);
       }
       ipos += nbytes;
       if (opos == 0) {
@@ -578,7 +580,7 @@ int tsDecompressTimestampImp(const char *const input, const int nelements, char
           memcpy(&dd2, input + ipos, nbytes);
         }
         // zigzag_decoding
-        delta_of_delta = (dd2 >> 1) ^ -(dd2 & 1);
+        delta_of_delta = ZIGZAG_DECODE(int64_t, dd2);
       }
       ipos += nbytes;
       prev_delta = delta_of_delta + prev_delta;
@@ -640,12 +642,12 @@ int tsCompressDoubleImp(const char *const input, const int nelements, char *cons
     uint8_t flag;
 
     if (trailing_zeros > leading_zeros) {
-      nbytes = LONG_BYTES - trailing_zeros / BITS_PER_BYTE;
+      nbytes = (uint8_t)(LONG_BYTES - trailing_zeros / BITS_PER_BYTE);
 
       if (nbytes > 0) nbytes--;
       flag = ((uint8_t)1 << 3) | nbytes;
     } else {
-      nbytes = LONG_BYTES - leading_zeros / BITS_PER_BYTE;
+      nbytes = (uint8_t)(LONG_BYTES - leading_zeros / BITS_PER_BYTE);
       if (nbytes > 0) nbytes--;
       flag = nbytes;
     }
@@ -787,12 +789,12 @@ int tsCompressFloatImp(const char *const input, const int nelements, char *const
     uint8_t flag;
 
     if (trailing_zeros > leading_zeros) {
-      nbytes = FLOAT_BYTES - trailing_zeros / BITS_PER_BYTE;
+      nbytes = (uint8_t)(FLOAT_BYTES - trailing_zeros / BITS_PER_BYTE);
 
       if (nbytes > 0) nbytes--;
       flag = ((uint8_t)1 << 3) | nbytes;
     } else {
-      nbytes = FLOAT_BYTES - leading_zeros / BITS_PER_BYTE;
+      nbytes = (uint8_t)(FLOAT_BYTES - leading_zeros / BITS_PER_BYTE);
       if (nbytes > 0) nbytes--;
       flag = nbytes;
     }
-- 
GitLab