tscompression.h 15.1 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef TDENGINE_TSCOMPRESSION_H
#define TDENGINE_TSCOMPRESSION_H

#ifdef __cplusplus
extern "C" {
#endif

S
slguan 已提交
23
#include "taosdef.h"
H
hzcheng 已提交
24
#include "tutil.h"
H
hzcheng 已提交
25

H
TD-166  
hzcheng 已提交
26
#define COMP_OVERFLOW_BYTES 2
H
hzcheng 已提交
27 28
#define BITS_PER_BYTE 8
// Masks
H
Hongze Cheng 已提交
29
#define INT64MASK(_x) ((((uint64_t)1) << _x) - 1)
H
hzcheng 已提交
30 31 32 33 34 35 36
#define INT32MASK(_x) (((uint32_t)1 << _x) - 1)
#define INT8MASK(_x) (((uint8_t)1 << _x) - 1)
// Compression algorithm
#define NO_COMPRESSION 0
#define ONE_STAGE_COMP 1
#define TWO_STAGE_COMP 2

T
tickduan 已提交
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
//
// compressed data first byte foramt
//   ------ 7 bit ---- | ---- 1 bit ----
//        algorithm           mode
//

// compression data mode save first byte lower 1 bit
#define MODE_NOCOMPRESS  0  // original data
#define MODE_COMPRESS    1  // compatible old compress

// compression algorithm save first byte higher 7 bit
#define ALGO_SZ_LOSSY     1 // SZ compress 

#define HEAD_MODE(x)  x%2
#define HEAD_ALGO(x)  x/2

H
hzcheng 已提交
53 54 55 56 57 58 59 60 61 62 63 64
extern int tsCompressINTImp(const char *const input, const int nelements, char *const output, const char type);
extern int tsDecompressINTImp(const char *const input, const int nelements, char *const output, const char type);
extern int tsCompressBoolImp(const char *const input, const int nelements, char *const output);
extern int tsDecompressBoolImp(const char *const input, const int nelements, char *const output);
extern int tsCompressStringImp(const char *const input, int inputSize, char *const output, int outputSize);
extern int tsDecompressStringImp(const char *const input, int compressedSize, char *const output, int outputSize);
extern int tsCompressTimestampImp(const char *const input, const int nelements, char *const output);
extern int tsDecompressTimestampImp(const char *const input, const int nelements, char *const output);
extern int tsCompressDoubleImp(const char *const input, const int nelements, char *const output);
extern int tsDecompressDoubleImp(const char *const input, const int nelements, char *const output);
extern int tsCompressFloatImp(const char *const input, const int nelements, char *const output);
extern int tsDecompressFloatImp(const char *const input, const int nelements, char *const output);
T
tickduan 已提交
65
// lossy
T
tickduan 已提交
66 67 68 69
extern int tsCompressFloatLossyImp(const char * input, const int nelements, char *const output);
extern int tsDecompressFloatLossyImp(const char * input, int compressedSize, const int nelements, char *const output);
extern int tsCompressDoubleLossyImp(const char * input, const int nelements, char *const output);
extern int tsDecompressDoubleLossyImp(const char * input, int compressedSize, const int nelements, char *const output);
H
hzcheng 已提交
70

71
#ifdef TSZ_IMPL
T
tickduan 已提交
72 73 74 75 76 77
extern bool lossyFloat;
extern bool lossyDouble;
// init call
int tsCompressInit();
// exit call
void tsCompressExit();
78

T
tickduan 已提交
79 80 81
void cost_start();
double cost_end(const char* tag);
void show_rate( int in_len, int out_len);
82
#endif
T
tickduan 已提交
83

H
hzcheng 已提交
84 85 86 87 88 89 90 91 92
static FORCE_INLINE int tsCompressTinyint(const char *const input, int inputSize, const int nelements, char *const output, int outputSize, char algorithm,
                      char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_TINYINT);
  } else if (algorithm == TWO_STAGE_COMP) {
    int len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_TINYINT);
    return tsCompressStringImp(buffer, len, output, outputSize);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
93
    return -1;
H
hzcheng 已提交
94 95 96 97 98 99 100 101
  }
}

static FORCE_INLINE int tsDecompressTinyint(const char *const input, int compressedSize, const int nelements, char *const output,
                        int outputSize, char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_TINYINT);
  } else if (algorithm == TWO_STAGE_COMP) {
102
    if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
H
hzcheng 已提交
103 104 105
    return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_TINYINT);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
106
    return -1;
H
hzcheng 已提交
107 108 109 110 111 112 113 114 115 116 117 118
  }
}

static FORCE_INLINE int tsCompressSmallint(const char *const input, int inputSize, const int nelements, char *const output, int outputSize, char algorithm,
                       char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_SMALLINT);
  } else if (algorithm == TWO_STAGE_COMP) {
    int len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_SMALLINT);
    return tsCompressStringImp(buffer, len, output, outputSize);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
119
    return -1;
H
hzcheng 已提交
120 121 122 123 124 125 126 127
  }
}

static FORCE_INLINE int tsDecompressSmallint(const char *const input, int compressedSize, const int nelements, char *const output,
                         int outputSize, char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_SMALLINT);
  } else if (algorithm == TWO_STAGE_COMP) {
128
    if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
H
hzcheng 已提交
129 130 131
    return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_SMALLINT);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
132
    return -1;
H
hzcheng 已提交
133 134 135 136 137 138 139 140 141 142 143 144
  }
}

static FORCE_INLINE int tsCompressInt(const char *const input, int inputSize, const int nelements, char *const output, int outputSize, char algorithm,
                  char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_INT);
  } else if (algorithm == TWO_STAGE_COMP) {
    int len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_INT);
    return tsCompressStringImp(buffer, len, output, outputSize);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
145
    return -1;
H
hzcheng 已提交
146 147 148 149 150 151 152 153
  }
}

static FORCE_INLINE int tsDecompressInt(const char *const input, int compressedSize, const int nelements, char *const output,
                    int outputSize, char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_INT);
  } else if (algorithm == TWO_STAGE_COMP) {
154
    if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
H
hzcheng 已提交
155 156 157
    return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_INT);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
158
    return -1;
H
hzcheng 已提交
159 160 161 162 163 164 165 166 167 168 169 170
  }
}

static FORCE_INLINE int tsCompressBigint(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                     char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_BIGINT);
  } else if (algorithm == TWO_STAGE_COMP) {
    int len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_BIGINT);
    return tsCompressStringImp(buffer, len, output, outputSize);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
171
    return -1;
H
hzcheng 已提交
172 173 174 175 176 177 178 179
  }
}

static FORCE_INLINE int tsDecompressBigint(const char *const input, int compressedSize, const int nelements, char *const output,
                       int outputSize, char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_BIGINT);
  } else if (algorithm == TWO_STAGE_COMP) {
180
    if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
H
hzcheng 已提交
181 182 183
    return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_BIGINT);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
184
    return -1;
H
hzcheng 已提交
185 186 187 188 189 190 191 192 193 194 195 196
  }
}

static FORCE_INLINE int tsCompressBool(const char *const input, int inputSize, const int nelements, char *const output, int outputSize, 
                   char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsCompressBoolImp(input, nelements, output);
  } else if (algorithm == TWO_STAGE_COMP) {
    int len = tsCompressBoolImp(input, nelements, buffer);
    return tsCompressStringImp(buffer, len, output, outputSize);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
197
    return -1;
H
hzcheng 已提交
198 199 200 201 202 203 204 205
  }
}

static FORCE_INLINE int tsDecompressBool(const char *const input, int compressedSize, const int nelements, char *const output,
                     int outputSize, char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsDecompressBoolImp(input, nelements, output);
  } else if (algorithm == TWO_STAGE_COMP) {
206
    if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
H
hzcheng 已提交
207 208 209
    return tsDecompressBoolImp(buffer, nelements, output);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
210
    return -1;
H
hzcheng 已提交
211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
  }
}

static FORCE_INLINE int tsCompressString(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                     char algorithm, char *const buffer, int bufferSize) {
  return tsCompressStringImp(input, inputSize, output, outputSize);
}

static FORCE_INLINE int tsDecompressString(const char *const input, int compressedSize, const int nelements, char *const output,
                       int outputSize, char algorithm, char *const buffer, int bufferSize) {
  return tsDecompressStringImp(input, compressedSize, output, outputSize);
}

static FORCE_INLINE int tsCompressFloat(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                    char algorithm, char *const buffer, int bufferSize) {
226
#ifdef TSZ_IMPL
T
tickduan 已提交
227
  // lossy mode
T
tickduan 已提交
228
  if(lossyFloat) {
T
tickduan 已提交
229 230
    return tsCompressFloatLossyImp(input, nelements, output);
  // lossless mode  
H
hzcheng 已提交
231
  } else {
232
#endif    
T
tickduan 已提交
233 234 235
    if (algorithm == ONE_STAGE_COMP) {
      return tsCompressFloatImp(input, nelements, output);
    } else if (algorithm == TWO_STAGE_COMP) {
T
tickduan 已提交
236
      //cost_start();
T
tickduan 已提交
237
      int len = tsCompressFloatImp(input, nelements, buffer);
T
tickduan 已提交
238 239 240 241 242 243 244
      //cost_end(" td_first_compress");
      //show_rate(inputSize, len);
      //cost_start();
      int ret = tsCompressStringImp(buffer, len, output, outputSize);
      //cost_end(" td_second_compress");
      //show_rate(inputSize, ret);
      return ret;
T
tickduan 已提交
245 246 247 248
    } else {
      assert(0);
      return -1;
    }    
249
#ifdef TSZ_IMPL  
H
hzcheng 已提交
250
  }
251
#endif
H
hzcheng 已提交
252 253 254 255
}

static FORCE_INLINE int tsDecompressFloat(const char *const input, int compressedSize, const int nelements, char *const output,
                      int outputSize, char algorithm, char *const buffer, int bufferSize) {
256
#ifdef TSZ_IMPL
T
tickduan 已提交
257 258 259
  if(HEAD_ALGO(input[0]) == ALGO_SZ_LOSSY){
    // decompress lossy
    return tsDecompressFloatLossyImp(input, compressedSize, nelements, output);
H
hzcheng 已提交
260
  } else {
261
#endif    
T
tickduan 已提交
262 263 264 265 266 267 268 269 270 271
    // decompress lossless
    if (algorithm == ONE_STAGE_COMP) {
      return tsDecompressFloatImp(input, nelements, output);
    } else if (algorithm == TWO_STAGE_COMP) {
      if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
      return tsDecompressFloatImp(buffer, nelements, output);
    } else {
      assert(0);
      return -1;
    }
272
#ifdef TSZ_IMPL  
H
hzcheng 已提交
273
  }
274
#endif
H
hzcheng 已提交
275 276
}

277

H
hzcheng 已提交
278 279
static FORCE_INLINE int tsCompressDouble(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                     char algorithm, char *const buffer, int bufferSize) {
280
#ifdef TSZ_IMPL  
T
tickduan 已提交
281
  if(lossyDouble){
T
tickduan 已提交
282 283
    // lossy mode
    return tsCompressDoubleLossyImp(input, nelements, output);
H
hzcheng 已提交
284
  } else {
285
#endif    
T
tickduan 已提交
286 287 288 289 290 291 292 293 294 295
    // lossless mode
    if (algorithm == ONE_STAGE_COMP) {
      return tsCompressDoubleImp(input, nelements, output);
    } else if (algorithm == TWO_STAGE_COMP) {
      int len = tsCompressDoubleImp(input, nelements, buffer);
      return tsCompressStringImp(buffer, len, output, outputSize);
    } else {
      assert(0);
      return -1;
    }
296
#ifdef TSZ_IMPL      
H
hzcheng 已提交
297
  }
298
#endif  
H
hzcheng 已提交
299 300 301 302
}

static FORCE_INLINE int tsDecompressDouble(const char *const input, int compressedSize, const int nelements, char *const output,
                       int outputSize, char algorithm, char *const buffer, int bufferSize) {
303
  #ifdef TSZ_IMPL  
T
tickduan 已提交
304 305 306
  if(HEAD_ALGO(input[0]) == ALGO_SZ_LOSSY){
    // decompress lossy
    return tsDecompressDoubleLossyImp(input, compressedSize, nelements, output);
H
hzcheng 已提交
307
  } else {
308
  #endif  
T
tickduan 已提交
309 310 311 312 313 314 315 316 317 318
    // decompress lossless
    if (algorithm == ONE_STAGE_COMP) {
      return tsDecompressDoubleImp(input, nelements, output);
    } else if (algorithm == TWO_STAGE_COMP) {
      if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
      return tsDecompressDoubleImp(buffer, nelements, output);
    } else {
      assert(0);
      return -1;
    }
319
#ifdef TSZ_IMPL      
H
hzcheng 已提交
320
  }
321
#endif  
H
hzcheng 已提交
322 323
}

324
#ifdef TSZ_IMPL  
T
tickduan 已提交
325 326 327
//
//  lossy float double
//
T
tickduan 已提交
328 329
static FORCE_INLINE int tsCompressFloatLossy(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                    char algorithm, char *const buffer, int bufferSize) {
T
tickduan 已提交
330 331
  return tsCompressFloatLossyImp(input, nelements, output);
}
T
tickduan 已提交
332 333 334

static FORCE_INLINE int tsDecompressFloatLossy(const char *const input, int compressedSize, const int nelements, char *const output,
                      int outputSize, char algorithm, char *const buffer, int bufferSize){
T
tickduan 已提交
335 336
  return tsDecompressFloatLossyImp(input, compressedSize, nelements, output);
}
T
tickduan 已提交
337 338 339

static FORCE_INLINE int tsCompressDoubleLossy(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                     char algorithm, char *const buffer, int bufferSize){
T
tickduan 已提交
340 341
  return tsCompressDoubleLossyImp(input, nelements, output);
}
T
tickduan 已提交
342 343 344

static FORCE_INLINE int tsDecompressDoubleLossy(const char *const input, int compressedSize, const int nelements, char *const output,
                       int outputSize, char algorithm, char *const buffer, int bufferSize){
T
tickduan 已提交
345 346
  return tsDecompressDoubleLossyImp(input, compressedSize, nelements, output);
}
T
tickduan 已提交
347

348
#endif
T
tickduan 已提交
349

H
hzcheng 已提交
350 351 352 353 354 355 356 357 358
static FORCE_INLINE int tsCompressTimestamp(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                        char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsCompressTimestampImp(input, nelements, output);
  } else if (algorithm == TWO_STAGE_COMP) {
    int len = tsCompressTimestampImp(input, nelements, buffer);
    return tsCompressStringImp(buffer, len, output, outputSize);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
359
    return -1;
H
hzcheng 已提交
360 361 362 363 364 365 366 367
  }
}

static FORCE_INLINE int tsDecompressTimestamp(const char *const input, int compressedSize, const int nelements, char *const output,
                          int outputSize, char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsDecompressTimestampImp(input, nelements, output);
  } else if (algorithm == TWO_STAGE_COMP) {
368
    if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
H
hzcheng 已提交
369 370 371
    return tsDecompressTimestampImp(buffer, nelements, output);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
372
    return -1;
H
hzcheng 已提交
373 374
  }
}
H
hzcheng 已提交
375 376 377 378 379 380

#ifdef __cplusplus
}
#endif

#endif  // TDENGINE_TSCOMPRESSION_H