tscompression.h 15.3 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef TDENGINE_TSCOMPRESSION_H
#define TDENGINE_TSCOMPRESSION_H

#ifdef __cplusplus
extern "C" {
#endif

S
slguan 已提交
23
#include "taosdef.h"
H
hzcheng 已提交
24
#include "tutil.h"
H
hzcheng 已提交
25

H
TD-166  
hzcheng 已提交
26
#define COMP_OVERFLOW_BYTES 2
H
hzcheng 已提交
27 28
#define BITS_PER_BYTE 8
// Masks
H
Hongze Cheng 已提交
29
#define INT64MASK(_x) ((((uint64_t)1) << _x) - 1)
H
hzcheng 已提交
30 31 32 33 34 35 36
#define INT32MASK(_x) (((uint32_t)1 << _x) - 1)
#define INT8MASK(_x) (((uint8_t)1 << _x) - 1)
// Compression algorithm
#define NO_COMPRESSION 0
#define ONE_STAGE_COMP 1
#define TWO_STAGE_COMP 2

T
tickduan 已提交
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
//
// compressed data first byte foramt
//   ------ 7 bit ---- | ---- 1 bit ----
//        algorithm           mode
//

// compression data mode save first byte lower 1 bit
#define MODE_NOCOMPRESS  0  // original data
#define MODE_COMPRESS    1  // compatible old compress

// compression algorithm save first byte higher 7 bit
#define ALGO_SZ_LOSSY     1 // SZ compress 

#define HEAD_MODE(x)  x%2
#define HEAD_ALGO(x)  x/2

H
hzcheng 已提交
53 54 55 56 57 58 59 60 61 62 63 64
extern int tsCompressINTImp(const char *const input, const int nelements, char *const output, const char type);
extern int tsDecompressINTImp(const char *const input, const int nelements, char *const output, const char type);
extern int tsCompressBoolImp(const char *const input, const int nelements, char *const output);
extern int tsDecompressBoolImp(const char *const input, const int nelements, char *const output);
extern int tsCompressStringImp(const char *const input, int inputSize, char *const output, int outputSize);
extern int tsDecompressStringImp(const char *const input, int compressedSize, char *const output, int outputSize);
extern int tsCompressTimestampImp(const char *const input, const int nelements, char *const output);
extern int tsDecompressTimestampImp(const char *const input, const int nelements, char *const output);
extern int tsCompressDoubleImp(const char *const input, const int nelements, char *const output);
extern int tsDecompressDoubleImp(const char *const input, const int nelements, char *const output);
extern int tsCompressFloatImp(const char *const input, const int nelements, char *const output);
extern int tsDecompressFloatImp(const char *const input, const int nelements, char *const output);
T
tickduan 已提交
65
// lossy
T
tickduan 已提交
66 67 68 69
extern int tsCompressFloatLossyImp(const char * input, const int nelements, char *const output);
extern int tsDecompressFloatLossyImp(const char * input, int compressedSize, const int nelements, char *const output);
extern int tsCompressDoubleLossyImp(const char * input, const int nelements, char *const output);
extern int tsDecompressDoubleLossyImp(const char * input, int compressedSize, const int nelements, char *const output);
H
hzcheng 已提交
70

T
tickduan 已提交
71 72 73 74 75 76
extern bool lossyFloat;
extern bool lossyDouble;
// init call
int tsCompressInit();
// exit call
void tsCompressExit();
77

T
tickduan 已提交
78 79 80
void cost_start();
double cost_end(const char* tag);
void show_rate( int in_len, int out_len);
T
tickduan 已提交
81

T
tickduan 已提交
82

H
hzcheng 已提交
83 84 85 86 87 88 89 90 91
static FORCE_INLINE int tsCompressTinyint(const char *const input, int inputSize, const int nelements, char *const output, int outputSize, char algorithm,
                      char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_TINYINT);
  } else if (algorithm == TWO_STAGE_COMP) {
    int len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_TINYINT);
    return tsCompressStringImp(buffer, len, output, outputSize);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
92
    return -1;
H
hzcheng 已提交
93 94 95 96 97 98 99 100
  }
}

static FORCE_INLINE int tsDecompressTinyint(const char *const input, int compressedSize, const int nelements, char *const output,
                        int outputSize, char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_TINYINT);
  } else if (algorithm == TWO_STAGE_COMP) {
101
    if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
H
hzcheng 已提交
102 103 104
    return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_TINYINT);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
105
    return -1;
H
hzcheng 已提交
106 107 108 109 110 111 112 113 114 115 116 117
  }
}

static FORCE_INLINE int tsCompressSmallint(const char *const input, int inputSize, const int nelements, char *const output, int outputSize, char algorithm,
                       char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_SMALLINT);
  } else if (algorithm == TWO_STAGE_COMP) {
    int len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_SMALLINT);
    return tsCompressStringImp(buffer, len, output, outputSize);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
118
    return -1;
H
hzcheng 已提交
119 120 121 122 123 124 125 126
  }
}

static FORCE_INLINE int tsDecompressSmallint(const char *const input, int compressedSize, const int nelements, char *const output,
                         int outputSize, char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_SMALLINT);
  } else if (algorithm == TWO_STAGE_COMP) {
127
    if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
H
hzcheng 已提交
128 129 130
    return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_SMALLINT);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
131
    return -1;
H
hzcheng 已提交
132 133 134 135 136 137 138 139 140 141 142 143
  }
}

static FORCE_INLINE int tsCompressInt(const char *const input, int inputSize, const int nelements, char *const output, int outputSize, char algorithm,
                  char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_INT);
  } else if (algorithm == TWO_STAGE_COMP) {
    int len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_INT);
    return tsCompressStringImp(buffer, len, output, outputSize);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
144
    return -1;
H
hzcheng 已提交
145 146 147 148 149 150 151 152
  }
}

static FORCE_INLINE int tsDecompressInt(const char *const input, int compressedSize, const int nelements, char *const output,
                    int outputSize, char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_INT);
  } else if (algorithm == TWO_STAGE_COMP) {
153
    if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
H
hzcheng 已提交
154 155 156
    return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_INT);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
157
    return -1;
H
hzcheng 已提交
158 159 160 161 162 163 164 165 166 167 168 169
  }
}

static FORCE_INLINE int tsCompressBigint(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                     char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_BIGINT);
  } else if (algorithm == TWO_STAGE_COMP) {
    int len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_BIGINT);
    return tsCompressStringImp(buffer, len, output, outputSize);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
170
    return -1;
H
hzcheng 已提交
171 172 173 174 175 176 177 178
  }
}

static FORCE_INLINE int tsDecompressBigint(const char *const input, int compressedSize, const int nelements, char *const output,
                       int outputSize, char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_BIGINT);
  } else if (algorithm == TWO_STAGE_COMP) {
179
    if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
H
hzcheng 已提交
180 181 182
    return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_BIGINT);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
183
    return -1;
H
hzcheng 已提交
184 185 186 187 188 189 190 191 192 193 194 195
  }
}

static FORCE_INLINE int tsCompressBool(const char *const input, int inputSize, const int nelements, char *const output, int outputSize, 
                   char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsCompressBoolImp(input, nelements, output);
  } else if (algorithm == TWO_STAGE_COMP) {
    int len = tsCompressBoolImp(input, nelements, buffer);
    return tsCompressStringImp(buffer, len, output, outputSize);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
196
    return -1;
H
hzcheng 已提交
197 198 199 200 201 202 203 204
  }
}

static FORCE_INLINE int tsDecompressBool(const char *const input, int compressedSize, const int nelements, char *const output,
                     int outputSize, char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsDecompressBoolImp(input, nelements, output);
  } else if (algorithm == TWO_STAGE_COMP) {
205
    if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
H
hzcheng 已提交
206 207 208
    return tsDecompressBoolImp(buffer, nelements, output);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
209
    return -1;
H
hzcheng 已提交
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
  }
}

static FORCE_INLINE int tsCompressString(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                     char algorithm, char *const buffer, int bufferSize) {
  return tsCompressStringImp(input, inputSize, output, outputSize);
}

static FORCE_INLINE int tsDecompressString(const char *const input, int compressedSize, const int nelements, char *const output,
                       int outputSize, char algorithm, char *const buffer, int bufferSize) {
  return tsDecompressStringImp(input, compressedSize, output, outputSize);
}

static FORCE_INLINE int tsCompressFloat(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                    char algorithm, char *const buffer, int bufferSize) {
T
tickduan 已提交
225
  // lossy mode
T
tickduan 已提交
226
  if(lossyFloat) {
T
tickduan 已提交
227 228
    return tsCompressFloatLossyImp(input, nelements, output);
  // lossless mode  
H
hzcheng 已提交
229
  } else {
T
tickduan 已提交
230
    
T
tickduan 已提交
231 232 233
    if (algorithm == ONE_STAGE_COMP) {
      return tsCompressFloatImp(input, nelements, output);
    } else if (algorithm == TWO_STAGE_COMP) {
T
tickduan 已提交
234
      //cost_start();
T
tickduan 已提交
235
      int len = tsCompressFloatImp(input, nelements, buffer);
T
tickduan 已提交
236 237 238 239 240 241 242
      //cost_end(" td_first_compress");
      //show_rate(inputSize, len);
      //cost_start();
      int ret = tsCompressStringImp(buffer, len, output, outputSize);
      //cost_end(" td_second_compress");
      //show_rate(inputSize, ret);
      return ret;
T
tickduan 已提交
243 244 245 246
    } else {
      assert(0);
      return -1;
    }    
H
hzcheng 已提交
247 248 249 250 251
  }
}

static FORCE_INLINE int tsDecompressFloat(const char *const input, int compressedSize, const int nelements, char *const output,
                      int outputSize, char algorithm, char *const buffer, int bufferSize) {
T
tickduan 已提交
252 253 254 255
  
  if(HEAD_ALGO(input[0]) == ALGO_SZ_LOSSY){
    // decompress lossy
    return tsDecompressFloatLossyImp(input, compressedSize, nelements, output);
H
hzcheng 已提交
256
  } else {
T
tickduan 已提交
257 258 259 260 261 262 263 264 265 266
    // decompress lossless
    if (algorithm == ONE_STAGE_COMP) {
      return tsDecompressFloatImp(input, nelements, output);
    } else if (algorithm == TWO_STAGE_COMP) {
      if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
      return tsDecompressFloatImp(buffer, nelements, output);
    } else {
      assert(0);
      return -1;
    }
H
hzcheng 已提交
267 268 269 270 271
  }
}

static FORCE_INLINE int tsCompressDouble(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                     char algorithm, char *const buffer, int bufferSize) {
T
tickduan 已提交
272
  if(lossyDouble){
T
tickduan 已提交
273 274
    // lossy mode
    return tsCompressDoubleLossyImp(input, nelements, output);
H
hzcheng 已提交
275
  } else {
T
tickduan 已提交
276 277 278 279 280 281 282 283 284 285
    // lossless mode
    if (algorithm == ONE_STAGE_COMP) {
      return tsCompressDoubleImp(input, nelements, output);
    } else if (algorithm == TWO_STAGE_COMP) {
      int len = tsCompressDoubleImp(input, nelements, buffer);
      return tsCompressStringImp(buffer, len, output, outputSize);
    } else {
      assert(0);
      return -1;
    }
H
hzcheng 已提交
286 287 288 289 290
  }
}

static FORCE_INLINE int tsDecompressDouble(const char *const input, int compressedSize, const int nelements, char *const output,
                       int outputSize, char algorithm, char *const buffer, int bufferSize) {
T
tickduan 已提交
291 292 293
  if(HEAD_ALGO(input[0]) == ALGO_SZ_LOSSY){
    // decompress lossy
    return tsDecompressDoubleLossyImp(input, compressedSize, nelements, output);
H
hzcheng 已提交
294
  } else {
T
tickduan 已提交
295 296 297 298 299 300 301 302 303 304
    // decompress lossless
    if (algorithm == ONE_STAGE_COMP) {
      return tsDecompressDoubleImp(input, nelements, output);
    } else if (algorithm == TWO_STAGE_COMP) {
      if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
      return tsDecompressDoubleImp(buffer, nelements, output);
    } else {
      assert(0);
      return -1;
    }
H
hzcheng 已提交
305 306 307
  }
}

T
tickduan 已提交
308 309 310
//
//  lossy float double
//
T
tickduan 已提交
311 312
static FORCE_INLINE int tsCompressFloatLossy(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                    char algorithm, char *const buffer, int bufferSize) {
T
tickduan 已提交
313
  return tsCompressFloatLossyImp(input, nelements, output);
T
tickduan 已提交
314 315 316 317 318 319 320 321 322 323 324 325
  /*
  cost_start();
  int len = tsCompressFloatLossyImp(input, nelements, buffer);
  cost_end(" sz1_first_compress");
  show_rate(inputSize, len);
  cost_start();
  int ret = tsCompressStringImp(buffer, len, output, outputSize);
  cost_end(" sz1_second_compress");
  show_rate(inputSize, ret); 
  return ret;
  */

T
tickduan 已提交
326
}
T
tickduan 已提交
327 328 329

static FORCE_INLINE int tsDecompressFloatLossy(const char *const input, int compressedSize, const int nelements, char *const output,
                      int outputSize, char algorithm, char *const buffer, int bufferSize){
T
tickduan 已提交
330
  return tsDecompressFloatLossyImp(input, compressedSize, nelements, output);
T
tickduan 已提交
331 332 333
  //int outSize = tsDecompressStringImp(input, compressedSize, buffer, bufferSize);
  //return tsDecompressFloatLossyImp(buffer, outSize, nelements, output); 

T
tickduan 已提交
334
}
T
tickduan 已提交
335 336 337

static FORCE_INLINE int tsCompressDoubleLossy(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                     char algorithm, char *const buffer, int bufferSize){
T
tickduan 已提交
338 339
  return tsCompressDoubleLossyImp(input, nelements, output);
}
T
tickduan 已提交
340 341 342

static FORCE_INLINE int tsDecompressDoubleLossy(const char *const input, int compressedSize, const int nelements, char *const output,
                       int outputSize, char algorithm, char *const buffer, int bufferSize){
T
tickduan 已提交
343 344
  return tsDecompressDoubleLossyImp(input, compressedSize, nelements, output);
}
T
tickduan 已提交
345 346


H
hzcheng 已提交
347 348 349 350 351 352 353 354 355
static FORCE_INLINE int tsCompressTimestamp(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
                        char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsCompressTimestampImp(input, nelements, output);
  } else if (algorithm == TWO_STAGE_COMP) {
    int len = tsCompressTimestampImp(input, nelements, buffer);
    return tsCompressStringImp(buffer, len, output, outputSize);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
356
    return -1;
H
hzcheng 已提交
357 358 359 360 361 362 363 364
  }
}

static FORCE_INLINE int tsDecompressTimestamp(const char *const input, int compressedSize, const int nelements, char *const output,
                          int outputSize, char algorithm, char *const buffer, int bufferSize) {
  if (algorithm == ONE_STAGE_COMP) {
    return tsDecompressTimestampImp(input, nelements, output);
  } else if (algorithm == TWO_STAGE_COMP) {
365
    if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
H
hzcheng 已提交
366 367 368
    return tsDecompressTimestampImp(buffer, nelements, output);
  } else {
    assert(0);
S
TD-1057  
Shengliang Guan 已提交
369
    return -1;
H
hzcheng 已提交
370 371
  }
}
H
hzcheng 已提交
372 373 374 375 376 377

#ifdef __cplusplus
}
#endif

#endif  // TDENGINE_TSCOMPRESSION_H