tscompression.h 14.5 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef TDENGINE_TSCOMPRESSION_H
#define TDENGINE_TSCOMPRESSION_H

#ifdef __cplusplus
extern "C" {
#endif

S
slguan 已提交
23
#include "taosdef.h"
H
hzcheng 已提交
24
#include "tutil.h"
H
hzcheng 已提交
25

T
tickduan 已提交
26

H
TD-166  
hzcheng 已提交
27
#define COMP_OVERFLOW_BYTES 2
H
hzcheng 已提交
28 29
#define BITS_PER_BYTE 8
// Masks
H
Hongze Cheng 已提交
30
#define INT64MASK(_x) ((((uint64_t)1) << _x) - 1)
H
hzcheng 已提交
31 32 33 34 35 36 37
#define INT32MASK(_x) (((uint32_t)1 << _x) - 1)
#define INT8MASK(_x) (((uint8_t)1 << _x) - 1)
// Compression algorithm
#define NO_COMPRESSION 0
#define ONE_STAGE_COMP 1
#define TWO_STAGE_COMP 2

T
tickduan 已提交
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
//
// compressed data first byte foramt
//   ------ 7 bit ---- | ---- 1 bit ----
//        algorithm           mode
//

// compression data mode save first byte lower 1 bit
#define MODE_NOCOMPRESS  0  // original data
#define MODE_COMPRESS    1  // compatible old compress

// compression algorithm save first byte higher 7 bit
#define ALGO_SZ_LOSSY     1 // SZ compress 

#define HEAD_MODE(x)  x%2
#define HEAD_ALGO(x)  x/2


extern bool gOpenLossy;

H
hzcheng 已提交
57 58 59 60 61 62 63 64 65 66 67 68
extern int tsCompressINTImp(const char *const input, const int nelements, char *const output, const char type);
extern int tsDecompressINTImp(const char *const input, const int nelements, char *const output, const char type);
extern int tsCompressBoolImp(const char *const input, const int nelements, char *const output);
extern int tsDecompressBoolImp(const char *const input, const int nelements, char *const output);
extern int tsCompressStringImp(const char *const input, int inputSize, char *const output, int outputSize);
extern int tsDecompressStringImp(const char *const input, int compressedSize, char *const output, int outputSize);
extern int tsCompressTimestampImp(const char *const input, const int nelements, char *const output);
extern int tsDecompressTimestampImp(const char *const input, const int nelements, char *const output);
extern int tsCompressDoubleImp(const char *const input, const int nelements, char *const output);
extern int tsDecompressDoubleImp(const char *const input, const int nelements, char *const output);
extern int tsCompressFloatImp(const char *const input, const int nelements, char *const output);
extern int tsDecompressFloatImp(const char *const input, const int nelements, char *const output);
T
tickduan 已提交
69
// lossy
T
tickduan 已提交
70 71 72 73
int tsCompressFloatLossyImp(const char * input, const int nelements, char *const output);
int tsDecompressFloatLossyImp(const char * input, int compressedSize, const int nelements, char *const output);
int tsCompressDoubleLossyImp(const char * input, const int nelements, char *const output);
int tsDecompressDoubleLossyImp(const char * input, int compressedSize, const int nelements, char *const output);
H
hzcheng 已提交
74

T
tickduan 已提交
75
// init
76 77 78
bool tsLossyInit();


T
tickduan 已提交
79

T
tickduan 已提交
80

H
hzcheng 已提交
81 82 83 84 85 86 87 88 89
static FORCE_INLINE int tsCompressTinyint(const char *const input, int inputSize, const int nelements, char *const output, int outputSize, char algorithm,
                      char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_TINYINT);
  } else if (algorithm == TWO_STAGE_COMP) {
    int len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_TINYINT);
    return tsCompressStringImp(buffer, len, output, outputSize);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
90
    return -1;
H
hzcheng 已提交
91 92 93 94 95 96 97 98
  }
}

static FORCE_INLINE int tsDecompressTinyint(const char *const input, int compressedSize, const int nelements, char *const output,
                        int outputSize, char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_TINYINT);
  } else if (algorithm == TWO_STAGE_COMP) {
99
    if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
H
hzcheng 已提交
100 101 102
    return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_TINYINT);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
103
    return -1;
H
hzcheng 已提交
104 105 106 107 108 109 110 111 112 113 114 115
  }
}

static FORCE_INLINE int tsCompressSmallint(const char *const input, int inputSize, const int nelements, char *const output, int outputSize, char algorithm,
                       char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_SMALLINT);
  } else if (algorithm == TWO_STAGE_COMP) {
    int len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_SMALLINT);
    return tsCompressStringImp(buffer, len, output, outputSize);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
116
    return -1;
H
hzcheng 已提交
117 118 119 120 121 122 123 124
  }
}

static FORCE_INLINE int tsDecompressSmallint(const char *const input, int compressedSize, const int nelements, char *const output,
                         int outputSize, char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_SMALLINT);
  } else if (algorithm == TWO_STAGE_COMP) {
125
    if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
H
hzcheng 已提交
126 127 128
    return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_SMALLINT);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
129
    return -1;
H
hzcheng 已提交
130 131 132 133 134 135 136 137 138 139 140 141
  }
}

static FORCE_INLINE int tsCompressInt(const char *const input, int inputSize, const int nelements, char *const output, int outputSize, char algorithm,
                  char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_INT);
  } else if (algorithm == TWO_STAGE_COMP) {
    int len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_INT);
    return tsCompressStringImp(buffer, len, output, outputSize);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
142
    return -1;
H
hzcheng 已提交
143 144 145 146 147 148 149 150
  }
}

static FORCE_INLINE int tsDecompressInt(const char *const input, int compressedSize, const int nelements, char *const output,
                    int outputSize, char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_INT);
  } else if (algorithm == TWO_STAGE_COMP) {
151
    if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
H
hzcheng 已提交
152 153 154
    return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_INT);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
155
    return -1;
H
hzcheng 已提交
156 157 158 159 160 161 162 163 164 165 166 167
  }
}

static FORCE_INLINE int tsCompressBigint(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                     char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_BIGINT);
  } else if (algorithm == TWO_STAGE_COMP) {
    int len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_BIGINT);
    return tsCompressStringImp(buffer, len, output, outputSize);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
168
    return -1;
H
hzcheng 已提交
169 170 171 172 173 174 175 176
  }
}

static FORCE_INLINE int tsDecompressBigint(const char *const input, int compressedSize, const int nelements, char *const output,
                       int outputSize, char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_BIGINT);
  } else if (algorithm == TWO_STAGE_COMP) {
177
    if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
H
hzcheng 已提交
178 179 180
    return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_BIGINT);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
181
    return -1;
H
hzcheng 已提交
182 183 184 185 186 187 188 189 190 191 192 193
  }
}

static FORCE_INLINE int tsCompressBool(const char *const input, int inputSize, const int nelements, char *const output, int outputSize, 
                   char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsCompressBoolImp(input, nelements, output);
  } else if (algorithm == TWO_STAGE_COMP) {
    int len = tsCompressBoolImp(input, nelements, buffer);
    return tsCompressStringImp(buffer, len, output, outputSize);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
194
    return -1;
H
hzcheng 已提交
195 196 197 198 199 200 201 202
  }
}

static FORCE_INLINE int tsDecompressBool(const char *const input, int compressedSize, const int nelements, char *const output,
                     int outputSize, char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsDecompressBoolImp(input, nelements, output);
  } else if (algorithm == TWO_STAGE_COMP) {
203
    if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
H
hzcheng 已提交
204 205 206
    return tsDecompressBoolImp(buffer, nelements, output);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
207
    return -1;
H
hzcheng 已提交
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
  }
}

static FORCE_INLINE int tsCompressString(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                     char algorithm, char *const buffer, int bufferSize) {
  return tsCompressStringImp(input, inputSize, output, outputSize);
}

static FORCE_INLINE int tsDecompressString(const char *const input, int compressedSize, const int nelements, char *const output,
                       int outputSize, char algorithm, char *const buffer, int bufferSize) {
  return tsDecompressStringImp(input, compressedSize, output, outputSize);
}

static FORCE_INLINE int tsCompressFloat(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                    char algorithm, char *const buffer, int bufferSize) {
T
tickduan 已提交
223 224 225 226
  // lossy mode
  if(gOpenLossy) {
    return tsCompressFloatLossyImp(input, nelements, output);
  // lossless mode  
H
hzcheng 已提交
227
  } else {
T
tickduan 已提交
228 229 230 231 232 233 234 235 236
    if (algorithm == ONE_STAGE_COMP) {
      return tsCompressFloatImp(input, nelements, output);
    } else if (algorithm == TWO_STAGE_COMP) {
      int len = tsCompressFloatImp(input, nelements, buffer);
      return tsCompressStringImp(buffer, len, output, outputSize);
    } else {
      assert(0);
      return -1;
    }    
H
hzcheng 已提交
237 238 239 240 241
  }
}

static FORCE_INLINE int tsDecompressFloat(const char *const input, int compressedSize, const int nelements, char *const output,
                      int outputSize, char algorithm, char *const buffer, int bufferSize) {
T
tickduan 已提交
242 243 244 245
  
  if(HEAD_ALGO(input[0]) == ALGO_SZ_LOSSY){
    // decompress lossy
    return tsDecompressFloatLossyImp(input, compressedSize, nelements, output);
H
hzcheng 已提交
246
  } else {
T
tickduan 已提交
247 248 249 250 251 252 253 254 255 256
    // decompress lossless
    if (algorithm == ONE_STAGE_COMP) {
      return tsDecompressFloatImp(input, nelements, output);
    } else if (algorithm == TWO_STAGE_COMP) {
      if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
      return tsDecompressFloatImp(buffer, nelements, output);
    } else {
      assert(0);
      return -1;
    }
H
hzcheng 已提交
257 258 259 260 261
  }
}

static FORCE_INLINE int tsCompressDouble(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                     char algorithm, char *const buffer, int bufferSize) {
T
tickduan 已提交
262 263 264
  if(gOpenLossy){
    // lossy mode
    return tsCompressDoubleLossyImp(input, nelements, output);
H
hzcheng 已提交
265
  } else {
T
tickduan 已提交
266 267 268 269 270 271 272 273 274 275
    // lossless mode
    if (algorithm == ONE_STAGE_COMP) {
      return tsCompressDoubleImp(input, nelements, output);
    } else if (algorithm == TWO_STAGE_COMP) {
      int len = tsCompressDoubleImp(input, nelements, buffer);
      return tsCompressStringImp(buffer, len, output, outputSize);
    } else {
      assert(0);
      return -1;
    }
H
hzcheng 已提交
276 277 278 279 280
  }
}

static FORCE_INLINE int tsDecompressDouble(const char *const input, int compressedSize, const int nelements, char *const output,
                       int outputSize, char algorithm, char *const buffer, int bufferSize) {
T
tickduan 已提交
281 282 283
  if(HEAD_ALGO(input[0]) == ALGO_SZ_LOSSY){
    // decompress lossy
    return tsDecompressDoubleLossyImp(input, compressedSize, nelements, output);
H
hzcheng 已提交
284
  } else {
T
tickduan 已提交
285 286 287 288 289 290 291 292 293 294
    // decompress lossless
    if (algorithm == ONE_STAGE_COMP) {
      return tsDecompressDoubleImp(input, nelements, output);
    } else if (algorithm == TWO_STAGE_COMP) {
      if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
      return tsDecompressDoubleImp(buffer, nelements, output);
    } else {
      assert(0);
      return -1;
    }
H
hzcheng 已提交
295 296 297
  }
}

T
tickduan 已提交
298 299 300
//
//  lossy float double
//
T
tickduan 已提交
301 302
static FORCE_INLINE int tsCompressFloatLossy(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                    char algorithm, char *const buffer, int bufferSize) {
T
tickduan 已提交
303 304
  return tsCompressFloatLossyImp(input, nelements, output);
}
T
tickduan 已提交
305 306 307

static FORCE_INLINE int tsDecompressFloatLossy(const char *const input, int compressedSize, const int nelements, char *const output,
                      int outputSize, char algorithm, char *const buffer, int bufferSize){
T
tickduan 已提交
308 309
  return tsDecompressFloatLossyImp(input, compressedSize, nelements, output);
}
T
tickduan 已提交
310 311 312

static FORCE_INLINE int tsCompressDoubleLossy(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                     char algorithm, char *const buffer, int bufferSize){
T
tickduan 已提交
313 314
  return tsCompressDoubleLossyImp(input, nelements, output);
}
T
tickduan 已提交
315 316 317

static FORCE_INLINE int tsDecompressDoubleLossy(const char *const input, int compressedSize, const int nelements, char *const output,
                       int outputSize, char algorithm, char *const buffer, int bufferSize){
T
tickduan 已提交
318 319
  return tsDecompressDoubleLossyImp(input, compressedSize, nelements, output);
}
T
tickduan 已提交
320 321


H
hzcheng 已提交
322 323 324 325 326 327 328 329 330
static FORCE_INLINE int tsCompressTimestamp(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                        char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsCompressTimestampImp(input, nelements, output);
  } else if (algorithm == TWO_STAGE_COMP) {
    int len = tsCompressTimestampImp(input, nelements, buffer);
    return tsCompressStringImp(buffer, len, output, outputSize);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
331
    return -1;
H
hzcheng 已提交
332 333 334 335 336 337 338 339
  }
}

static FORCE_INLINE int tsDecompressTimestamp(const char *const input, int compressedSize, const int nelements, char *const output,
                          int outputSize, char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsDecompressTimestampImp(input, nelements, output);
  } else if (algorithm == TWO_STAGE_COMP) {
340
    if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
H
hzcheng 已提交
341 342 343
    return tsDecompressTimestampImp(buffer, nelements, output);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
344
    return -1;
H
hzcheng 已提交
345 346
  }
}
H
hzcheng 已提交
347 348 349 350 351 352

#ifdef __cplusplus
}
#endif

#endif  // TDENGINE_TSCOMPRESSION_H