未验证 提交 e28a0339 编写于 作者: W wade zhang 提交者: GitHub

Merge pull request #12236 from taosdata/feature/TD-15340-V24

refactor: V2.4 move TSZ to main repo 
......@@ -7,10 +7,6 @@
[submodule "deps/jemalloc"]
path = deps/jemalloc
url = https://github.com/jemalloc/jemalloc
[submodule "deps/TSZ"]
path = deps/TSZ
url = https://github.com/taosdata/TSZ.git
branch = master
[submodule "src/kit/taos-tools"]
path = src/kit/taos-tools
url = https://github.com/taosdata/taos-tools
......
Subproject commit 11c1060d4f917dd799ae628b131db5d6a5ef6954
CMAKE_MINIMUM_REQUIRED(VERSION 3.0)
PROJECT(TDengine)
# include
INCLUDE_DIRECTORIES(sz/include)
INCLUDE_DIRECTORIES(zstd/)
INCLUDE_DIRECTORIES(zstd/common/)
# source
AUX_SOURCE_DIRECTORY(sz/src SRC1)
AUX_SOURCE_DIRECTORY(zstd/dictBuilder SRC2)
AUX_SOURCE_DIRECTORY(zstd/common SRC3)
AUX_SOURCE_DIRECTORY(zstd/compress SRC4)
AUX_SOURCE_DIRECTORY(zstd/decompress SRC5)
AUX_SOURCE_DIRECTORY(zstd/deprecated SRC6)
AUX_SOURCE_DIRECTORY(zstd/legacy SRC7)
# archive
ADD_LIBRARY(TSZ STATIC ${SRC1} ${SRC2} ${SRC3} ${SRC4} ${SRC5} ${SRC6} ${SRC7})
TARGET_INCLUDE_DIRECTORIES(TSZ PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/sz/include)
# windows ignore warning
IF (TD_WINDOWS)
SET_TARGET_PROPERTIES(TSZ PROPERTIES COMPILE_FLAGS -w)
ENDIF ()
BSD 3-Clause License
Copyright (c) 2021, taosdata
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# TSZ
Error-bounded Lossy Data Compressor For Float Double
TSZ algorithm is come from SZ algorithm, Github url is https://github.com/szcompressor .
Bellow is aspect of improvement :
1) Better speed and size
SZ head size about 24 bytes, we are reduced to 2 bytes.
we delete some no use code and some unnecessary function could be droped.
2) Support multi-threads, interface is thread-safety.
3) Remove 2D 3D 4D 5D function, only 1D be remained.
4) Remove int8 int16 int32 and other datatype, only float double be remained.
5) Optimize code speed
6) Other optimize...
After modify, TSZ become faster、smaller and independent. TSZ more suitable for small block data compression.
/**
* @file ByteToolkit.h
* @author Sheng Di
* @date July, 2017
* @brief Header file for the ByteToolkit.c.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _ByteToolkit_H
#define _ByteToolkit_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
int bytesToInt_bigEndian(unsigned char* bytes);
void intToBytes_bigEndian(unsigned char *b, unsigned int num);
long bytesToLong_bigEndian(unsigned char* b);
void longToBytes_bigEndian(unsigned char *b, long num);
short getExponent_float(float value);
short getPrecisionReqLength_float(float precision);
short getExponent_double(double value);
short getPrecisionReqLength_double(double precision);
float bytesToFloat(unsigned char* bytes);
void floatToBytes(unsigned char *b, float num);
double bytesToDouble(unsigned char* bytes);
void doubleToBytes(unsigned char *b, double num);
int getMaskRightCode(int m);
int getLeftMovingCode(int kMod8);
int getRightMovingSteps(int kMod8, int resiBitLength);
int getRightMovingCode(int kMod8, int resiBitLength);
size_t bytesToSize(unsigned char* bytes, int size_type);
void sizeToBytes(unsigned char* outBytes, size_t size, int size_type);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _ByteToolkit_H ----- */
/**
* @file CompressElement.h
* @author Sheng Di
* @date April, 2016
* @brief Header file for Compress Elements such as DoubleCompressELement.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include <stdint.h>
#ifndef _CompressElement_H
#define _CompressElement_H
#ifdef __cplusplus
extern "C" {
#endif
typedef struct DoubleValueCompressElement
{
double data;
long curValue;
unsigned char curBytes[8]; //big_endian
int reqBytesLength;
int resiBitsLength;
} DoubleValueCompressElement;
typedef struct FloatValueCompressElement
{
float data; // diffValue + medianValue
int curValue; // diff int value
unsigned char curBytes[4]; // dif bytes value diffValue->iValue big_endian
int reqBytesLength;
int resiBitsLength;
} FloatValueCompressElement;
typedef struct LossyCompressionElement
{
int leadingZeroBytes; //0,1,2,or 3
unsigned char integerMidBytes[8];
int integerMidBytes_Length; //they are mid_bits actually
//char curBytes[8];
//int curBytes_Length; //4 for single_precision or 8 for double_precision
int resMidBitsLength;
int residualMidBits;
} LossyCompressionElement;
short computeGroupNum_float(float value);
short computeGroupNum_double(double value);
void listAdd_double(double last3CmprsData[3], double value);
void listAdd_float(float last3CmprsData[3], float value);
void listAdd_int(int64_t last3CmprsData[3], int64_t value);
void listAdd_int32(int32_t last3CmprsData[3], int32_t value);
void listAdd_float_group(float *groups, int *flags, char groupNum, float oriValue, float decValue, char* curGroupID);
void listAdd_double_group(double *groups, int *flags, char groupNum, double oriValue, double decValue, char* curGroupID);
int validPrediction_double(double minErr, double precision);
int validPrediction_float(float minErr, float precision);
double* generateGroupErrBounds(int errorBoundMode, double realPrecision, double pwrErrBound);
int generateGroupMaxIntervalCount(double* groupErrBounds);
void new_LossyCompressionElement(LossyCompressionElement *lce, int leadingNum, unsigned char* intMidBytes,
int intMidBytes_Length, int resiMidBitsLength, int resiBits);
void updateLossyCompElement_Double(unsigned char* curBytes, unsigned char* preBytes,
int reqBytesLength, int resiBitsLength, LossyCompressionElement *lce);
void updateLossyCompElement_Float(unsigned char* curBytes, unsigned char* preBytes,
int reqBytesLength, int resiBitsLength, LossyCompressionElement *lce);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _CompressElement_H ----- */
/**
* @file DynamicByteArray.h
* @author Sheng Di
* @date April, 2016
* @brief Header file for Dynamic Byte Array.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _DynamicByteArray_H
#define _DynamicByteArray_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
typedef struct DynamicByteArray
{
unsigned char* array;
size_t size;
size_t capacity;
} DynamicByteArray;
void new_DBA(DynamicByteArray **dba, size_t cap);
void convertDBAtoBytes(DynamicByteArray *dba, unsigned char** bytes);
void free_DBA(DynamicByteArray *dba);
unsigned char getDBA_Data(DynamicByteArray *dba, size_t pos);
void addDBA_Data(DynamicByteArray *dba, unsigned char value);
void memcpyDBA_Data(DynamicByteArray *dba, unsigned char* data, size_t length);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _DynamicByteArray_H ----- */
/**
* @file DynamicIntArray.h
* @author Sheng Di
* @date April, 2016
* @brief Header file for Dynamic Int Array.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _DynamicIntArray_H
#define _DynamicIntArray_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
typedef struct DynamicIntArray
{
unsigned char* array; //char* (one byte) is enough, don't have to be int*
size_t size;
size_t capacity;
} DynamicIntArray;
void new_DIA(DynamicIntArray **dia, size_t cap);
void convertDIAtoInts(DynamicIntArray *dia, unsigned char **data);
void free_DIA(DynamicIntArray *dia);
int getDIA_Data(DynamicIntArray *dia, size_t pos);
void addDIA_Data(DynamicIntArray *dia, int value);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _DynamicIntArray_H ----- */
/**
* @file Huffman.h
* @author Sheng Di
* @date Aug., 2016
* @brief Header file for the exponential segment constructor.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _Huffman_H
#define _Huffman_H
#ifdef __cplusplus
extern "C" {
#endif
//Note: when changing the following settings, intvCapacity in sz.h should be changed as well.
//#define allNodes 131072
//#define stateNum 65536
typedef struct node_t {
struct node_t *left, *right;
size_t freq;
char t; //in_node:0; otherwise:1
unsigned int c;
} *node;
typedef struct HuffmanTree {
unsigned int stateNum;
unsigned int allNodes;
struct node_t* pool;
node *qqq, *qq; //the root node of the HuffmanTree is qq[1]
int n_nodes; //n_nodes is for compression
int qend;
unsigned long **code;
unsigned char *cout;
int n_inode; //n_inode is for decompression
int maxBitCount;
} HuffmanTree;
HuffmanTree* createHuffmanTree(int stateNum);
HuffmanTree* createDefaultHuffmanTree();
node new_node(HuffmanTree *huffmanTree, size_t freq, unsigned int c, node a, node b);
node new_node2(HuffmanTree *huffmanTree, unsigned int c, unsigned char t);
void qinsert(HuffmanTree *huffmanTree, node n);
node qremove(HuffmanTree *huffmanTree);
void build_code(HuffmanTree *huffmanTree, node n, int len, unsigned long out1, unsigned long out2);
void init(HuffmanTree *huffmanTree, int *s, size_t length);
void init_static(HuffmanTree *huffmanTree, int *s, size_t length);
void encode(HuffmanTree *huffmanTree, int *s, size_t length, unsigned char *out, size_t *outSize);
void decode(unsigned char *s, size_t targetLength, node t, int *out);
void pad_tree_uchar(HuffmanTree* huffmanTree, unsigned char* L, unsigned char* R, unsigned int* C, unsigned char* t, unsigned int i, node root);
void pad_tree_ushort(HuffmanTree* huffmanTree, unsigned short* L, unsigned short* R, unsigned int* C, unsigned char* t, unsigned int i, node root);
void pad_tree_uint(HuffmanTree* huffmanTree, unsigned int* L, unsigned int* R, unsigned int* C, unsigned char* t, unsigned int i, node root);
unsigned int convert_HuffTree_to_bytes_anyStates(HuffmanTree* huffmanTree, int nodeCount, unsigned char** out);
void unpad_tree_uchar(HuffmanTree* huffmanTree, unsigned char* L, unsigned char* R, unsigned int* C, unsigned char *t, unsigned int i, node root);
void unpad_tree_ushort(HuffmanTree* huffmanTree, unsigned short* L, unsigned short* R, unsigned int* C, unsigned char* t, unsigned int i, node root);
void unpad_tree_uint(HuffmanTree* huffmanTree, unsigned int* L, unsigned int* R, unsigned int* C, unsigned char* t, unsigned int i, node root);
node reconstruct_HuffTree_from_bytes_anyStates(HuffmanTree *huffmanTree, unsigned char* bytes, int nodeCount);
void encode_withTree(HuffmanTree* huffmanTree, int *s, size_t length, unsigned char **out, size_t *outSize);
void decode_withTree(HuffmanTree* huffmanTree, unsigned char *s, size_t targetLength, int *out);
void SZ_ReleaseHuffman(HuffmanTree* huffmanTree);
#ifdef __cplusplus
}
#endif
#endif
/**
* @file TightDataPointStorageD.h
* @author Sheng Di
* @date April, 2016
* @brief Header file for the tight data point storage (TDPS).
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _TightDataPointStorageD_H
#define _TightDataPointStorageD_H
#include <stdbool.h>
#include "pub.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct TightDataPointStorageD
{
size_t dataSeriesLength;
int allSameData;
double realPrecision;
double medianValue;
char reqLength;
char radExpo; //used to compute reqLength based on segmented precisions in "pw_rel_compression"
double minLogValue;
int stateNum;
int allNodes;
size_t exactDataNum;
double reservedValue;
unsigned char* typeArray; //its size is dataSeriesLength/4 (or xxx/4+1)
size_t typeArray_size;
unsigned char* leadNumArray; //its size is exactDataNum/4 (or exactDataNum/4+1)
size_t leadNumArray_size;
unsigned char* exactMidBytes;
size_t exactMidBytes_size;
unsigned char* residualMidBits;
size_t residualMidBits_size;
unsigned int intervals;
unsigned char isLossless; //a mark to denote whether it's lossless compression (1 is yes, 0 is no)
size_t segment_size;
unsigned char* raBytes;
size_t raBytes_size;
unsigned char plus_bits;
unsigned char max_bits;
} TightDataPointStorageD;
void new_TightDataPointStorageD_Empty(TightDataPointStorageD **self);
int new_TightDataPointStorageD_fromFlatBytes(TightDataPointStorageD **self, unsigned char* flatBytes, size_t flatBytesLength, sz_exedata* pde_exe, sz_params* pde_params);
void new_TightDataPointStorageD(TightDataPointStorageD **self,
size_t dataSeriesLength, size_t exactDataNum,
int* type, unsigned char* exactMidBytes, size_t exactMidBytes_size,
unsigned char* leadNumIntArray, //leadNumIntArray contains readable numbers....
unsigned char* resiMidBits, size_t resiMidBits_size,
unsigned char resiBitLength,
double realPrecision, double medianValue, char reqLength,
unsigned int intervals, unsigned char radExpo);
void convertTDPStoBytes_double(TightDataPointStorageD* tdps, unsigned char* bytes, unsigned char* dsLengthBytes, unsigned char sameByte);
bool convertTDPStoFlatBytes_double(TightDataPointStorageD *tdps, unsigned char* bytes, size_t *size);
void free_TightDataPointStorageD(TightDataPointStorageD *tdps);
void free_TightDataPointStorageD2(TightDataPointStorageD *tdps);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _TightDataPointStorageD_H ----- */
/**
* @file TightDataPointStorageF.h
* @author Sheng Di and Dingwen Tao
* @date Aug, 2016
* @brief Header file for the tight data point storage (TDPS).
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _TightDataPointStorageF_H
#define _TightDataPointStorageF_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
#include <stdbool.h>
#include "pub.h"
typedef struct TightDataPointStorageF
{
size_t dataSeriesLength;
int allSameData;
double realPrecision; //it's used as the pwrErrBoundRatio when errBoundMode==PW_REL
float medianValue;
char reqLength;
char radExpo; //used to compute reqLength based on segmented precisions in "pw_rel_compression"
int stateNum;
int allNodes;
size_t exactDataNum;
float reservedValue;
float minLogValue;
unsigned char* typeArray; //its size is dataSeriesLength/4 (or xxx/4+1)
size_t typeArray_size;
unsigned char* leadNumArray; //its size is exactDataNum/4 (or exactDataNum/4+1)
size_t leadNumArray_size;
unsigned char* exactMidBytes;
size_t exactMidBytes_size;
unsigned char* residualMidBits;
size_t residualMidBits_size;
unsigned int intervals; //quantization_intervals
unsigned char isLossless; //a mark to denote whether it's lossless compression (1 is yes, 0 is no)
size_t segment_size;
unsigned char* raBytes;
size_t raBytes_size;
unsigned char plus_bits;
unsigned char max_bits;
} TightDataPointStorageF;
void new_TightDataPointStorageF_Empty(TightDataPointStorageF **self);
int new_TightDataPointStorageF_fromFlatBytes(TightDataPointStorageF **self, unsigned char* flatBytes, size_t flatBytesLength, sz_exedata* pde_exe, sz_params* pde_params);
void new_TightDataPointStorageF(TightDataPointStorageF **self,
size_t dataSeriesLength, size_t exactDataNum,
int* type, unsigned char* exactMidBytes, size_t exactMidBytes_size,
unsigned char* leadNumIntArray, //leadNumIntArray contains readable numbers....
unsigned char* resiMidBits, size_t resiMidBits_size,
unsigned char resiBitLength,
double realPrecision, float medianValue, char reqLength, unsigned int intervals,
unsigned char radExpo);
void convertTDPStoBytes_float(TightDataPointStorageF* tdps, unsigned char* bytes, unsigned char* dsLengthBytes, unsigned char sameByte);
bool convertTDPStoFlatBytes_float(TightDataPointStorageF *tdps, unsigned char* bytes, size_t *size);
void free_TightDataPointStorageF(TightDataPointStorageF *tdps);
void free_TightDataPointStorageF2(TightDataPointStorageF *tdps);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _TightDataPointStorageF_H ----- */
/**
* @file TypeManager.h
* @author Sheng Di
* @date July, 2017
* @brief Header file for the TypeManager.c.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _TypeManager_H
#define _TypeManager_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
#include <stdint.h>
//TypeManager.c
void convertByteArray2IntArray_fast_2b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray);
size_t convertIntArray2ByteArray_fast_2b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result);
int getLeftMovingSteps(size_t k, unsigned char resiBitLength);
size_t convertIntArray2ByteArray_fast_dynamic(unsigned char* timeStepType, unsigned char resiBitLength, size_t nbEle, unsigned char **bytes);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _TypeManager_H ----- */
/**
* @file conf.h
* @author Sheng Di
* @date July, 2017
* @brief Header file for the conf.c.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _Conf_H
#define _Conf_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
//
// set default value
//
void setDefaulParams(sz_exedata* exedata, sz_params* params);
//conf.c
void updateQuantizationInfo(int quant_intervals);
int SZ_ReadConf(const char* sz_cfgFile);
int SZ_LoadConf(const char* sz_cfgFile);
unsigned int roundUpToPowerOf2(unsigned int base);
double computeABSErrBoundFromPSNR(double psnr, double threshold, double value_range);
double computeABSErrBoundFromNORM_ERR(double normErr, size_t nbEle);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _Conf_H ----- */
/**
* @file dataCompression.h
* @author Sheng Di
* @date July, 2017
* @brief Header file for the dataCompression.c.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _DataCompression_H
#define _DataCompression_H
#ifdef __cplusplus
extern "C" {
#endif
#include "sz.h"
#include <stdio.h>
#include <stdbool.h>
#define computeMinMax(data) \
for(i=1;i<size;i++)\
{\
data_ = data[i];\
if(min>data_)\
min = data_;\
else if(max<data_)\
max = data_;\
}\
//dataCompression.c
double computeRangeSize_double(double* oriData, size_t size, double* valueRangeSize, double* medianValue);
float computeRangeSize_float(float* oriData, size_t size, float* valueRangeSize, float* medianValue);
double min_d(double a, double b);
double max_d(double a, double b);
float min_f(float a, float b);
float max_f(float a, float b);
double getRealPrecision_double(double valueRangeSize, int errBoundMode, double absErrBound, double relBoundRatio, int *status);
double getRealPrecision_float(float valueRangeSize, int errBoundMode, double absErrBound, double relBoundRatio, int *status);
double getRealPrecision_int(long valueRangeSize, int errBoundMode, double absErrBound, double relBoundRatio, int *status);
void symTransform_8bytes(unsigned char data[8]);
void symTransform_2bytes(unsigned char data[2]);
void symTransform_4bytes(unsigned char data[4]);
void compressSingleFloatValue(FloatValueCompressElement *vce, float tgtValue, float precision, float medianValue,
int reqLength, int reqBytesLength, int resiBitsLength);
void compressSingleDoubleValue(DoubleValueCompressElement *vce, double tgtValue, double precision, double medianValue,
int reqLength, int reqBytesLength, int resiBitsLength);
int compIdenticalLeadingBytesCount_double(unsigned char* preBytes, unsigned char* curBytes);
int compIdenticalLeadingBytesCount_float(unsigned char* preBytes, unsigned char* curBytes);
void addExactData(DynamicByteArray *exactMidByteArray, DynamicIntArray *exactLeadNumArray,
DynamicIntArray *resiBitArray, LossyCompressionElement *lce);
int getPredictionCoefficients(int layers, int dimension, int **coeff_array, int *status);
int computeBlockEdgeSize_3D(int segmentSize);
int computeBlockEdgeSize_2D(int segmentSize);
int generateLossyCoefficients_float(float* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, float* medianValue, float* decData);
int compressExactDataArray_float(float* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray,
int reqLength, int reqBytesLength, int resiBitsLength, float medianValue);
void decompressExactDataArray_float(unsigned char* leadNum, unsigned char* exactMidBytes, unsigned char* residualMidBits, size_t nbEle, int reqLength, float medianValue, float** decData);
int generateLossyCoefficients_double(double* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, double* medianValue, double* decData);
int compressExactDataArray_double(double* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray,
int reqLength, int reqBytesLength, int resiBitsLength, double medianValue);
void decompressExactDataArray_double(unsigned char* leadNum, unsigned char* exactMidBytes, unsigned char* residualMidBits, size_t nbEle, int reqLength, double medianValue, double** decData);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _DataCompression_H ----- */
/**
* @file defines.h
* @author Sheng Di
* @date July, 2019
* @brief Header file for the dataCompression.c.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _SZ_DEFINES_H
#define _SZ_DEFINES_H
// define data format version
#define DATA_FROMAT_VER1 1 // curretn version
#define PASTRI 103
#define HZ 102 //deprecated
#define SZ 101
#define SZ_Transpose 104
//prediction mode of temporal dimension based compression
#define SZ_PREVIOUS_VALUE_ESTIMATE 0
#define MIN_NUM_OF_ELEMENTS 10 //if the # elements <= 20, skip the compression
#define SZ_ABS 0
#define REL 1
#define VR_REL 1 //alternative name to REL
#define ABS_AND_REL 2
#define ABS_OR_REL 3
#define PSNR 4
#define NORM 5
#define PW_REL 10
#define ABS_AND_PW_REL 11
#define ABS_OR_PW_REL 12
#define REL_AND_PW_REL 13
#define REL_OR_PW_REL 14
#define SZ_FLOAT 0
#define SZ_DOUBLE 1
#define SZ_UINT8 2
#define SZ_INT8 3
#define SZ_UINT16 4
#define SZ_INT16 5
#define SZ_UINT32 6
#define SZ_INT32 7
#define SZ_UINT64 8
#define SZ_INT64 9
#define LITTLE_ENDIAN_DATA 0 //refers to the endian type of the data read from the disk
#define BIG_ENDIAN_DATA 1 //big_endian (ppc, max, etc.) ; little_endian (x86, x64, etc.)
#define LITTLE_ENDIAN_SYSTEM 0 //refers to the endian type of the system
#define BIG_ENDIAN_SYSTEM 1
#define DynArrayInitLen 1024
#define MIN_ZLIB_DEC_ALLOMEM_BYTES 1000000
//#define maxRangeRadius 32768
//#define maxRangeRadius 1048576//131072
#define SZ_BEST_SPEED 0
#define SZ_BEST_COMPRESSION 1
#define SZ_DEFAULT_COMPRESSION 2
#define SZ_TEMPORAL_COMPRESSION 3
#define SZ_NO_REGRESSION 0
#define SZ_WITH_LINEAR_REGRESSION 1
#define SZ_PWR_MIN_TYPE 0
#define SZ_PWR_AVG_TYPE 1
#define SZ_PWR_MAX_TYPE 2
#define SZ_FORCE_SNAPSHOT_COMPRESSION 0
#define SZ_FORCE_TEMPORAL_COMPRESSION 1
#define SZ_PERIO_TEMPORAL_COMPRESSION 2
//SUCCESS returning status
#define SZ_SUCCESS 0 //successful
#define SZ_FAILED -1 //Not successful
#define SZ_FERR -2 //Failed to open input file
#define SZ_TERR -3 //wrong data type (should be only float or double)
#define SZ_DERR -4 //dimension error
#define SZ_MERR -5 //sz_mode error
#define SZ_BERR -6 //bound-mode error (should be only SZ_ABS, REL, ABS_AND_REL, ABS_OR_REL, or PW_REL)
#define SZ_LITTER_ELEMENT -7
#define SZ_ALGORITHM_ERR -8
#define SZ_FORMAT_ERR -9
#define SZ_MAINTAIN_VAR_DATA 0
#define SZ_DESTROY_WHOLE_VARSET 1
#define GROUP_COUNT 16 //2^{16}=65536
// metaData remove some by tickduan
#define MetaDataByteLength 2 // original is 28 bytes
#define MetaDataByteLength_double 2 // original is 36 bytes
#define numOfBufferedSteps 1 //the number of time steps in the buffer
#define GZIP_COMPRESSOR 0 //i.e., ZLIB_COMPRSSOR
#define ZSTD_COMPRESSOR 1
#endif /* _SZ_DEFINES_H */
/*-------------------------------------------------------------------------*/
/**
@file dictionary.h
@author N. Devillard
@brief Implements a dictionary for string variables.
This module implements a simple dictionary object, i.e. a list
of string/string associations. This object is useful to store e.g.
informations retrieved from a configuration file (ini files).
*/
/*--------------------------------------------------------------------------*/
#ifndef _DICTIONARY_H_
#define _DICTIONARY_H_
/*---------------------------------------------------------------------------
Includes
---------------------------------------------------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*---------------------------------------------------------------------------
New types
---------------------------------------------------------------------------*/
#ifdef __cplusplus
extern "C" {
#endif
/*-------------------------------------------------------------------------*/
/**
@brief Dictionary object
This object contains a list of string/string associations. Each
association is identified by a unique string key. Looking up values
in the dictionary is speeded up by the use of a (hopefully collision-free)
hash function.
*/
/*-------------------------------------------------------------------------*/
typedef struct _dictionary_ {
int n ; /** Number of entries in dictionary */
int size ; /** Storage size */
char ** val ; /** List of string values */
char ** key ; /** List of string keys */
unsigned * hash ; /** List of hash values for keys */
} dictionary ;
/*---------------------------------------------------------------------------
Function prototypes
---------------------------------------------------------------------------*/
/*-------------------------------------------------------------------------*/
/**
@brief Compute the hash key for a string.
@param key Character string to use for key.
@return 1 unsigned int on at least 32 bits.
This hash function has been taken from an Article in Dr Dobbs Journal.
This is normally a collision-free function, distributing keys evenly.
The key is stored anyway in the struct so that collision can be avoided
by comparing the key itself in last resort.
*/
/*--------------------------------------------------------------------------*/
unsigned dictionary_hash(const char * key);
/*-------------------------------------------------------------------------*/
/**
@brief Create a new dictionary object.
@param size Optional initial size of the dictionary.
@return 1 newly allocated dictionary objet.
This function allocates a new dictionary object of given size and returns
it. If you do not know in advance (roughly) the number of entries in the
dictionary, give size=0.
*/
/*--------------------------------------------------------------------------*/
dictionary * dictionary_new(int size);
/*-------------------------------------------------------------------------*/
/**
@brief Delete a dictionary object
@param d dictionary object to deallocate.
@return void
Deallocate a dictionary object and all memory associated to it.
*/
/*--------------------------------------------------------------------------*/
void dictionary_del(dictionary * vd);
/*-------------------------------------------------------------------------*/
/**
@brief Get a value from a dictionary.
@param d dictionary object to search.
@param key Key to look for in the dictionary.
@param def Default value to return if key not found.
@return 1 pointer to internally allocated character string.
This function locates a key in a dictionary and returns a pointer to its
value, or the passed 'def' pointer if no such key can be found in
dictionary. The returned character pointer points to data internal to the
dictionary object, you should not try to free it or modify it.
*/
/*--------------------------------------------------------------------------*/
char * dictionary_get(dictionary * d, const char * key, char * def);
/*-------------------------------------------------------------------------*/
/**
@brief Set a value in a dictionary.
@param d dictionary object to modify.
@param key Key to modify or add.
@param val Value to add.
@return int 0 if Ok, anything else otherwise
If the given key is found in the dictionary, the associated value is
replaced by the provided one. If the key cannot be found in the
dictionary, it is added to it.
It is Ok to provide a NULL value for val, but NULL values for the dictionary
or the key are considered as errors: the function will return immediately
in such a case.
Notice that if you dictionary_set a variable to NULL, a call to
dictionary_get will return a NULL value: the variable will be found, and
its value (NULL) is returned. In other words, setting the variable
content to NULL is equivalent to deleting the variable from the
dictionary. It is not possible (in this implementation) to have a key in
the dictionary without value.
This function returns non-zero in case of failure.
*/
/*--------------------------------------------------------------------------*/
int dictionary_set(dictionary * vd, const char * key, const char * val);
/*-------------------------------------------------------------------------*/
/**
@brief Delete a key in a dictionary
@param d dictionary object to modify.
@param key Key to remove.
@return void
This function deletes a key in a dictionary. Nothing is done if the
key cannot be found.
*/
/*--------------------------------------------------------------------------*/
void dictionary_unset(dictionary * d, const char * key);
/*-------------------------------------------------------------------------*/
/**
@brief Dump a dictionary to an opened file pointer.
@param d Dictionary to dump
@param f Opened file pointer.
@return void
Dumps a dictionary onto an opened file pointer. Key pairs are printed out
as @c [Key]=[Value], one per line. It is Ok to provide stdout or stderr as
output file pointers.
*/
/*--------------------------------------------------------------------------*/
void dictionary_dump(dictionary * d, FILE * out);
#ifdef __cplusplus
}
#endif
#endif
/*-------------------------------------------------------------------------*/
/**
@file iniparser.h
@author N. Devillard
@brief Parser for ini files.
*/
/*--------------------------------------------------------------------------*/
#ifndef _INIPARSER_H_
#define _INIPARSER_H_
/*---------------------------------------------------------------------------
Includes
---------------------------------------------------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
* The following #include is necessary on many Unixes but not Linux.
* It is not needed for Windows platforms.
* Uncomment it if needed.
*/
/* #include <unistd.h> */
#include "dictionary.h"
/*-------------------------------------------------------------------------*/
/**
@brief Get number of sections in a dictionary
@param d Dictionary to examine
@return int Number of sections found in dictionary
This function returns the number of sections found in a dictionary.
The test to recognize sections is done on the string stored in the
dictionary: a section name is given as "section" whereas a key is
stored as "section:key", thus the test looks for entries that do not
contain a colon.
This clearly fails in the case a section name contains a colon, but
this should simply be avoided.
This function returns -1 in case of error.
*/
/*--------------------------------------------------------------------------*/
int iniparser_getnsec(dictionary * d);
/*-------------------------------------------------------------------------*/
/**
@brief Get name for section n in a dictionary.
@param d Dictionary to examine
@param n Section number (from 0 to nsec-1).
@return Pointer to char string
This function locates the n-th section in a dictionary and returns
its name as a pointer to a string statically allocated inside the
dictionary. Do not free or modify the returned string!
This function returns NULL in case of error.
*/
/*--------------------------------------------------------------------------*/
char * iniparser_getsecname(dictionary * d, int n);
/*-------------------------------------------------------------------------*/
/**
@brief Save a dictionary to a loadable ini file
@param d Dictionary to dump
@param f Opened file pointer to dump to
@return void
This function dumps a given dictionary into a loadable ini file.
It is Ok to specify @c stderr or @c stdout as output files.
*/
/*--------------------------------------------------------------------------*/
void iniparser_dump_ini(dictionary * d, FILE * f);
/*-------------------------------------------------------------------------*/
/**
@brief Save a dictionary section to a loadable ini file
@param d Dictionary to dump
@param s Section name of dictionary to dump
@param f Opened file pointer to dump to
@return void
This function dumps a given section of a given dictionary into a loadable ini
file. It is Ok to specify @c stderr or @c stdout as output files.
*/
/*--------------------------------------------------------------------------*/
void iniparser_dumpsection_ini(dictionary * d, char * s, FILE * f);
/*-------------------------------------------------------------------------*/
/**
@brief Dump a dictionary to an opened file pointer.
@param d Dictionary to dump.
@param f Opened file pointer to dump to.
@return void
This function prints out the contents of a dictionary, one element by
line, onto the provided file pointer. It is OK to specify @c stderr
or @c stdout as output files. This function is meant for debugging
purposes mostly.
*/
/*--------------------------------------------------------------------------*/
void iniparser_dump(dictionary * d, FILE * f);
/*-------------------------------------------------------------------------*/
/**
@brief Get the number of keys in a section of a dictionary.
@param d Dictionary to examine
@param s Section name of dictionary to examine
@return Number of keys in section
*/
/*--------------------------------------------------------------------------*/
int iniparser_getsecnkeys(dictionary * d, char * s);
/*-------------------------------------------------------------------------*/
/**
@brief Get the number of keys in a section of a dictionary.
@param d Dictionary to examine
@param s Section name of dictionary to examine
@return pointer to statically allocated character strings
This function queries a dictionary and finds all keys in a given section.
Each pointer in the returned char pointer-to-pointer is pointing to
a string allocated in the dictionary; do not free or modify them.
This function returns NULL in case of error.
*/
/*--------------------------------------------------------------------------*/
char ** iniparser_getseckeys(dictionary * d, char * s);
/*-------------------------------------------------------------------------*/
/**
@brief Get the string associated to a key
@param d Dictionary to search
@param key Key string to look for
@param def Default value to return if key not found.
@return pointer to statically allocated character string
This function queries a dictionary for a key. A key as read from an
ini file is given as "section:key". If the key cannot be found,
the pointer passed as 'def' is returned.
The returned char pointer is pointing to a string allocated in
the dictionary, do not free or modify it.
*/
/*--------------------------------------------------------------------------*/
char * iniparser_getstring(dictionary * d, const char * key, char * def);
/*-------------------------------------------------------------------------*/
/**
@brief Get the string associated to a key, convert to an int
@param d Dictionary to search
@param key Key string to look for
@param notfound Value to return in case of error
@return integer
This function queries a dictionary for a key. A key as read from an
ini file is given as "section:key". If the key cannot be found,
the notfound value is returned.
Supported values for integers include the usual C notation
so decimal, octal (starting with 0) and hexadecimal (starting with 0x)
are supported. Examples:
- "42" -> 42
- "042" -> 34 (octal -> decimal)
- "0x42" -> 66 (hexa -> decimal)
Warning: the conversion may overflow in various ways. Conversion is
totally outsourced to strtol(), see the associated man page for overflow
handling.
Credits: Thanks to A. Becker for suggesting strtol()
*/
/*--------------------------------------------------------------------------*/
int iniparser_getint(dictionary * d, const char * key, int notfound);
/*-------------------------------------------------------------------------*/
/**
@brief Get the string associated to a key, convert to a long
@param d Dictionary to search
@param key Key string to look for
@param notfound Value to return in case of error
@return long
Credits: This function bases completely on int iniparser_getint and was
slightly modified to return long instead of int.
*/
/*--------------------------------------------------------------------------*/
long iniparser_getlint(dictionary * d, const char * key, int notfound);
/*-------------------------------------------------------------------------*/
/**
@brief Get the string associated to a key, convert to a double
@param d Dictionary to search
@param key Key string to look for
@param notfound Value to return in case of error
@return double
This function queries a dictionary for a key. A key as read from an
ini file is given as "section:key". If the key cannot be found,
the notfound value is returned.
*/
/*--------------------------------------------------------------------------*/
double iniparser_getdouble(dictionary * d, const char * key, double notfound);
/*-------------------------------------------------------------------------*/
/**
@brief Get the string associated to a key, convert to a boolean
@param d Dictionary to search
@param key Key string to look for
@param notfound Value to return in case of error
@return integer
This function queries a dictionary for a key. A key as read from an
ini file is given as "section:key". If the key cannot be found,
the notfound value is returned.
A true boolean is found if one of the following is matched:
- A string starting with 'y'
- A string starting with 'Y'
- A string starting with 't'
- A string starting with 'T'
- A string starting with '1'
A false boolean is found if one of the following is matched:
- A string starting with 'n'
- A string starting with 'N'
- A string starting with 'f'
- A string starting with 'F'
- A string starting with '0'
The notfound value returned if no boolean is identified, does not
necessarily have to be 0 or 1.
*/
/*--------------------------------------------------------------------------*/
int iniparser_getboolean(dictionary * d, const char * key, int notfound);
/*-------------------------------------------------------------------------*/
/**
@brief Set an entry in a dictionary.
@param ini Dictionary to modify.
@param entry Entry to modify (entry name)
@param val New value to associate to the entry.
@return int 0 if Ok, -1 otherwise.
If the given entry can be found in the dictionary, it is modified to
contain the provided value. If it cannot be found, -1 is returned.
It is Ok to set val to NULL.
*/
/*--------------------------------------------------------------------------*/
int iniparser_set(dictionary * ini, const char * entry, const char * val);
/*-------------------------------------------------------------------------*/
/**
@brief Delete an entry in a dictionary
@param ini Dictionary to modify
@param entry Entry to delete (entry name)
@return void
If the given entry can be found, it is deleted from the dictionary.
*/
/*--------------------------------------------------------------------------*/
void iniparser_unset(dictionary * ini, const char * entry);
/*-------------------------------------------------------------------------*/
/**
@brief Finds out if a given entry exists in a dictionary
@param ini Dictionary to search
@param entry Name of the entry to look for
@return integer 1 if entry exists, 0 otherwise
Finds out if a given entry exists in the dictionary. Since sections
are stored as keys with NULL associated values, this is the only way
of querying for the presence of sections in a dictionary.
*/
/*--------------------------------------------------------------------------*/
int iniparser_find_entry(dictionary * ini, const char * entry) ;
/*-------------------------------------------------------------------------*/
/**
@brief Parse an ini file and return an allocated dictionary object
@param ininame Name of the ini file to read.
@return Pointer to newly allocated dictionary
This is the parser for ini files. This function is called, providing
the name of the file to be read. It returns a dictionary object that
should not be accessed directly, but through accessor functions
instead.
The returned dictionary must be freed using iniparser_freedict().
*/
/*--------------------------------------------------------------------------*/
dictionary * iniparser_load(const char * ininame);
/*-------------------------------------------------------------------------*/
/**
@brief Free all memory associated to an ini dictionary
@param d Dictionary to free
@return void
Free all memory associated to an ini dictionary.
It is mandatory to call this function before the dictionary object
gets out of the current context.
*/
/*--------------------------------------------------------------------------*/
void iniparser_freedict(dictionary * d);
#endif
/**
* @file sz.h
* @author Sheng Di
* @date April, 2015
* @brief Header file for the whole compressor.
* (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _PUB_H
#define _PUB_H
#include <stdio.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
/* array meta data and compression parameters for SZ_Init_Params() */
typedef struct sz_params
{
int dataType;
unsigned int max_quant_intervals; //max number of quantization intervals for quantization
unsigned int quantization_intervals;
unsigned int maxRangeRadius;
int sol_ID;// it's SZ or SZ_Transpose, unless the setting is PASTRI compression mode (./configure --enable-pastri)
int losslessCompressor;
int sampleDistance; //2 bytes
float predThreshold; // 2 bytes
int szMode; //* 0 (best speed) or 1 (better compression with Zstd/Gzip) or 3 temporal-dimension based compression
int errorBoundMode; //4bits (0.5byte), //SZ_ABS, REL, ABS_AND_REL, or ABS_OR_REL, PSNR, or PW_REL, PSNR
double absErrBound; //absolute error bound for float
double absErrBoundDouble; // for double
double relBoundRatio; //value range based relative error bound ratio
double psnr; //PSNR
double normErr;
double pw_relBoundRatio; //point-wise relative error bound
int segment_size; //only used for 2D/3D data compression with pw_relBoundRatio (deprecated)
int pwr_type; //only used for 2D/3D data compression with pw_relBoundRatio
int protectValueRange; //0 or 1
float fmin, fmax;
double dmin, dmax;
int snapshotCmprStep; //perform single-snapshot-based compression if time_step == snapshotCmprStep
int predictionMode;
int accelerate_pw_rel_compression;
int plus_bits;
int randomAccess;
int withRegression;
} sz_params;
typedef struct sz_exedata
{
char optQuantMode; //opt Quantization (0: fixed ; 1: optimized)
int intvCapacity; // the number of intervals for the linear-scaling quantization
int intvRadius; // the number of intervals for the radius of the quantization range (intvRadius=intvCapacity/2)
unsigned int SZ_SIZE_TYPE; //the length (# bytes) of the size_t in the system at runtime //4 or 8: sizeof(size_t)
} sz_exedata;
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _PUB_H ----- */
/**
* @file sz.h
* @author Sheng Di
* @date April, 2015
* @brief Header file for the whole compressor.
* (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _SZ_H
#define _SZ_H
#include <stdio.h>
#include <stdint.h>
#include <time.h> /* For time(), in seconds */
#include "pub.h"
#include "CompressElement.h"
#include "DynamicByteArray.h"
#include "DynamicIntArray.h"
#include "TightDataPointStorageD.h"
#include "TightDataPointStorageF.h"
#include "conf.h"
#include "dataCompression.h"
#include "ByteToolkit.h"
#include "TypeManager.h"
#include "sz_float.h"
#include "sz_double.h"
#include "szd_float.h"
#include "szd_double.h"
#include "utility.h"
#ifdef _WIN32
#define PATH_SEPARATOR ';'
#define INLINE
#else
#define PATH_SEPARATOR ':'
#define INLINE inline
#endif
#ifdef __cplusplus
extern "C" {
#endif
void cost_start();
double cost_end(const char* tag);
void show_rate( int in_len, int out_len);
//typedef char int8_t;
//typedef unsigned char uint8_t;
//typedef short int16_t;
//typedef unsigned short uint16_t;
//typedef int int32_t;
//typedef unsigned int uint32_t;
//typedef long int64_t;
//typedef unsigned long uint64_t;
#include "defines.h"
//Note: the following setting should be consistent with stateNum in Huffman.h
//#define intvCapacity 65536
//#define intvRadius 32768
//#define intvCapacity 131072
//#define intvRadius 65536
#define SZ_COMPUTE_1D_NUMBER_OF_BLOCKS( COUNT, NUM_BLOCKS, BLOCK_SIZE ) \
if (COUNT <= BLOCK_SIZE){ \
NUM_BLOCKS = 1; \
} \
else{ \
NUM_BLOCKS = COUNT / BLOCK_SIZE; \
} \
#define SZ_COMPUTE_2D_NUMBER_OF_BLOCKS( COUNT, NUM_BLOCKS, BLOCK_SIZE ) \
if (COUNT <= BLOCK_SIZE){ \
NUM_BLOCKS = 1; \
} \
else{ \
NUM_BLOCKS = COUNT / BLOCK_SIZE; \
} \
#define SZ_COMPUTE_3D_NUMBER_OF_BLOCKS( COUNT, NUM_BLOCKS, BLOCK_SIZE ) \
if (COUNT <= BLOCK_SIZE){ \
NUM_BLOCKS = 1; \
} \
else{ \
NUM_BLOCKS = COUNT / BLOCK_SIZE; \
} \
#define SZ_COMPUTE_BLOCKCOUNT( COUNT, NUM_BLOCKS, SPLIT_INDEX, \
EARLY_BLOCK_COUNT, LATE_BLOCK_COUNT ) \
EARLY_BLOCK_COUNT = LATE_BLOCK_COUNT = COUNT / NUM_BLOCKS; \
SPLIT_INDEX = COUNT % NUM_BLOCKS; \
if (0 != SPLIT_INDEX) { \
EARLY_BLOCK_COUNT = EARLY_BLOCK_COUNT + 1; \
} \
//typedef unsigned long unsigned long;
//typedef unsigned int uint;
typedef union lint16
{
unsigned short usvalue;
short svalue;
unsigned char byte[2];
} lint16;
typedef union lint32
{
int ivalue;
unsigned int uivalue;
unsigned char byte[4];
} lint32;
typedef union lint64
{
long lvalue;
unsigned long ulvalue;
unsigned char byte[8];
} lint64;
typedef union ldouble
{
double value;
unsigned long lvalue;
unsigned char byte[8];
} ldouble;
typedef union lfloat
{
float value;
unsigned int ivalue;
unsigned char byte[4];
} lfloat;
typedef struct sz_metadata
{
unsigned char ver; //only used for checking the version by calling SZ_GetMetaData()
int isConstant; //only used for checking if the data are constant values by calling SZ_GetMetaData()
int isLossless; //only used for checking if the data compression was lossless, used only by calling SZ_GetMetaData()
int sizeType; //only used for checking whether the size type is "int" or "long" in the compression, used only by calling SZ_GetMetaData()
size_t dataSeriesLength; //# number of data points in the dataset
int defactoNBBins; //real number of quantization bins
struct sz_params* conf_params; //configuration parameters
} sz_metadata;
/*We use a linked list to maintain time-step meta info for time-step based compression*/
typedef struct sz_tsc_metainfo
{
int totalNumOfSteps;
int currentStep;
char metadata_filename[256];
FILE *metadata_file;
unsigned char* bit_array; //sihuan added
size_t intersect_size; //sihuan added
int64_t* hist_index; //sihuan added: prestep index
} sz_tsc_metadata;
extern unsigned char versionNumber;
//-------------------key global variables--------------
extern int dataEndianType; //*endian type of the data read from disk
extern int sysEndianType; //*sysEndianType is actually set automatically.
extern sz_params *confparams_cpr;
extern sz_exedata *exe_params;
void SZ_Finalize();
int SZ_Init(const char *configFilePath);
int SZ_Init_Params(sz_params *params);
//
// compress output data to outData and return outSize
//
size_t SZ_compress_args(int dataType, void *data, size_t r1, unsigned char* outData, sz_params* params);
//
// decompress output data to outData and return outSize
//
size_t SZ_decompress(int dataType, unsigned char *bytes, size_t byteLength, size_t r1, unsigned char* outData);
void convertSZParamsToBytes(sz_params* params, unsigned char* result, char optQuantMode);
void convertBytesToSZParams(unsigned char* bytes, sz_params* params, sz_exedata* pde_exe);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _SZ_H ----- */
/**
* @file sz_double.h
* @author Sheng Di
* @date July, 2017
* @brief Header file for the sz_double.c.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _SZ_Double_H
#define _SZ_Double_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
#include <stdbool.h>
unsigned char* SZ_skip_compress_double(double* data, size_t dataLength, size_t* outSize);
void computeReqLength_double(double realPrecision, short radExpo, int* reqLength, double* medianValue);
unsigned int optimize_intervals_double_1D(double *oriData, size_t dataLength, double realPrecision);
unsigned int optimize_intervals_double_1D_opt(double *oriData, size_t dataLength, double realPrecision);
TightDataPointStorageD* SZ_compress_double_1D_MDQ(double *oriData,
size_t dataLength, double realPrecision, double valueRangeSize, double medianValue_d);
void SZ_compress_args_double_StoreOriData(double* oriData, size_t dataLength, unsigned char* newByteData, size_t *outSize);
bool SZ_compress_args_double_NoCkRngeNoGzip_1D( unsigned char* newByteData, double *oriData, size_t dataLength, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d);
void SZ_compress_args_double_withinRange(unsigned char* newByteData, double *oriData, size_t dataLength, size_t *outSize);
int SZ_compress_args_double(double *oriData, size_t r1, unsigned char* newByteData, size_t *outSize, sz_params* params);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _SZ_Double_H ----- */
/**
* @file sz_float.h
* @author Sheng Di
* @date July, 2017
* @brief Header file for the sz_float.c.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _SZ_Float_H
#define _SZ_Float_H
#ifdef __cplusplus
extern "C" {
#endif
void computeReqLength_float(double realPrecision, short radExpo, int* reqLength, float* medianValue);
unsigned int optimize_intervals_float_1D(float *oriData, size_t dataLength, double realPrecision);
unsigned int optimize_intervals_float_1D_opt(float *oriData, size_t dataLength, double realPrecision);
TightDataPointStorageF* SZ_compress_float_1D_MDQ(float *oriData,
size_t dataLength, float realPrecision, float valueRangeSize, float medianValue_f);
bool SZ_compress_args_float_NoCkRngeNoGzip_1D( unsigned char* newByteData, float *oriData,
size_t dataLength, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f);
void SZ_compress_args_float_withinRange(unsigned char* newByteData, float *oriData, size_t dataLength, size_t *outSize);
int SZ_compress_args_float(float *oriData, size_t r1, unsigned char* newByteData, size_t *outSize, sz_params* params);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _SZ_Float_H ----- */
/**
* @file szd_double.h
* @author Sheng Di
* @date July, 2017
* @brief Header file for the szd_double.c.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _SZD_Double_H
#define _SZD_Double_H
#ifdef __cplusplus
extern "C" {
#endif
#include "TightDataPointStorageD.h"
void decompressDataSeries_double_1D(double* data, size_t dataSeriesLength, double* hist_data, TightDataPointStorageD* tdps);
void getSnapshotData_double_1D(double* data, size_t dataSeriesLength, TightDataPointStorageD* tdps, int errBoundMode, int compressionType, double* hist_data, sz_params* pde_params);
int SZ_decompress_args_double(double* newData, size_t r1, unsigned char* cmpBytes, size_t cmpSize, int compressionType, double* hist_data, sz_exedata* pde_exe, sz_params* pde_params);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _SZD_Double_H ----- */
/**
* @file szd_float.h
* @author Sheng Di
* @date July, 2017
* @brief Header file for the szd_float.c.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _SZD_Float_H
#define _SZD_Float_H
#ifdef __cplusplus
extern "C" {
#endif
#include "TightDataPointStorageF.h"
void decompressDataSeries_float_1D(float* data, size_t dataSeriesLength, float* hist_data, TightDataPointStorageF* tdps);
void getSnapshotData_float_1D(float* data, size_t dataSeriesLength, TightDataPointStorageF* tdps, int errBoundMode, int compressionType, float* hist_data, sz_params* pde_params);
int SZ_decompress_args_float(float* newData, size_t r1, unsigned char* cmpBytes, size_t cmpSize, int compressionType, float* hist_data, sz_exedata* pde_exe, sz_params* pde_params);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _SZD_Float_H ----- */
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_SZ_H
#define _TD_SZ_H
#include "defines.h"
#ifdef __cplusplus
extern "C" {
#endif
void cost_start();
double cost_end(const char* tag);
//
// Init success return 1 else 0
//
void tdszInit(double fPrecision, double dPrecision, unsigned int maxIntervals, unsigned int intervals, const char* compressor);
//
// compress interface to tdengine return value is count of output with bytes
//
int tdszCompress(int type, const char * input, const int nelements, const char * output);
//
// decompress interface to tdengine return value is count of output with bytes
//
int tdszDecompress(int type, const char * input, int compressedSize, const int nelements, const char * output);
//
// Exit call
//
void tdszExit();
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _SZ_H ----- */
/**
* @file utility.h
* @author Sheng Di, Sihuan Li
* @date July, 2018
* @brief Header file for the utility.c.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _UTILITY_H
#define _UTILITY_H
#include "sz.h"
#ifdef __cplusplus
extern "C" {
#endif
int is_lossless_compressed_data(unsigned char* compressedBytes, size_t cmpSize);
unsigned long sz_lossless_compress(int losslessCompressor, unsigned char* data, unsigned long dataLength, unsigned char* compressBytes);
unsigned long sz_lossless_decompress(int losslessCompressor, unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData, unsigned long targetOriSize);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _UTILITY_H ----- */
/**
* @file ByteToolkit.c
* @author Sheng Di
* @date April, 2016
* @brief Byte Toolkit
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include <stdlib.h>
#include <string.h>
#include "sz.h"
INLINE int bytesToInt_bigEndian(unsigned char* bytes)
{
int res;
unsigned char* des = (unsigned char*)&res;
des[0] = bytes[3];
des[1] = bytes[2];
des[2] = bytes[1];
des[3] = bytes[0];
return res;
}
/**
* @unsigned char *b the variable to store the converted bytes (length=4)
* @unsigned int num
* */
INLINE void intToBytes_bigEndian(unsigned char *b, unsigned int num)
{
unsigned char* sou =(unsigned char*)&num;
b[0] = sou[3];
b[1] = sou[2];
b[2] = sou[1];
b[3] = sou[0];
}
/**
* @endianType: refers to the endian_type of unsigned char* b.
* */
INLINE long bytesToLong_bigEndian(unsigned char* b) {
long temp = 0;
long res = 0;
res <<= 8;
temp = b[0] & 0xff;
res |= temp;
res <<= 8;
temp = b[1] & 0xff;
res |= temp;
res <<= 8;
temp = b[2] & 0xff;
res |= temp;
res <<= 8;
temp = b[3] & 0xff;
res |= temp;
res <<= 8;
temp = b[4] & 0xff;
res |= temp;
res <<= 8;
temp = b[5] & 0xff;
res |= temp;
res <<= 8;
temp = b[6] & 0xff;
res |= temp;
res <<= 8;
temp = b[7] & 0xff;
res |= temp;
return res;
}
INLINE void longToBytes_bigEndian(unsigned char *b, long num)
{
unsigned char* sou = (unsigned char*)&num;
#if defined(_TD_LINUX_64) || defined(_TD_ARM_64) || defined(_TD_DARWIN_64)
// 8 bytes
b[7] = sou[0];
b[6] = sou[1];
b[5] = sou[2];
b[4] = sou[3];
b[3] = sou[4];
b[2] = sou[5];
b[1] = sou[6];
b[0] = sou[7];
#else
memset(b, 0, 4);
b[7] = sou[0];
b[6] = sou[1];
b[5] = sou[2];
b[4] = sou[3];
#endif
}
//TODO: debug: lfBuf.lvalue could be actually little_endian....
INLINE short getExponent_float(float value)
{
//int ivalue = floatToBigEndianInt(value);
lfloat lbuf;
lbuf.value = value;
int ivalue = lbuf.ivalue;
int expValue = (ivalue & 0x7F800000) >> 23;
expValue -= 127;
return (short)expValue;
}
INLINE short getPrecisionReqLength_float(float precision)
{
lfloat lbuf;
lbuf.value = precision;
int ivalue = lbuf.ivalue;
int expValue = (ivalue & 0x7F800000) >> 23;
expValue -= 127;
return (short)expValue;
}
INLINE short getExponent_double(double value)
{
//long lvalue = doubleToBigEndianLong(value);
ldouble lbuf;
lbuf.value = value;
long lvalue = lbuf.lvalue;
int expValue = (int)((lvalue & 0x7FF0000000000000) >> 52);
expValue -= 1023;
return (short)expValue;
}
INLINE short getPrecisionReqLength_double(double precision)
{
ldouble lbuf;
lbuf.value = precision;
long lvalue = lbuf.lvalue;
int expValue = (int)((lvalue & 0x7FF0000000000000) >> 52);
expValue -= 1023;
// unsigned char the1stManBit = (unsigned char)((lvalue & 0x0008000000000000) >> 51);
// if(the1stManBit==1)
// expValue--;
return (short)expValue;
}
//the byte to input is in the big-endian format
INLINE float bytesToFloat(unsigned char* bytes)
{
lfloat buf;
memcpy(buf.byte, bytes, 4);
if(sysEndianType==LITTLE_ENDIAN_SYSTEM)
symTransform_4bytes(buf.byte);
return buf.value;
}
INLINE void floatToBytes(unsigned char *b, float num)
{
lfloat buf;
buf.value = num;
memcpy(b, buf.byte, 4);
if(sysEndianType==LITTLE_ENDIAN_SYSTEM)
symTransform_4bytes(b);
}
//the byte to input is in the big-endian format
INLINE double bytesToDouble(unsigned char* bytes)
{
ldouble buf;
memcpy(buf.byte, bytes, 8);
if(sysEndianType==LITTLE_ENDIAN_SYSTEM)
symTransform_8bytes(buf.byte);
return buf.value;
}
INLINE void doubleToBytes(unsigned char *b, double num)
{
ldouble buf;
buf.value = num;
memcpy(b, buf.byte, 8);
if(sysEndianType==LITTLE_ENDIAN_SYSTEM)
symTransform_8bytes(b);
}
INLINE int getMaskRightCode(int m) {
switch (m) {
case 1:
return 0x01;
case 2:
return 0x03;
case 3:
return 0x07;
case 4:
return 0x0F;
case 5:
return 0x1F;
case 6:
return 0x3F;
case 7:
return 0X7F;
case 8:
return 0XFF;
default:
return 0;
}
}
INLINE int getLeftMovingCode(int kMod8)
{
return getMaskRightCode(8 - kMod8);
}
INLINE int getRightMovingSteps(int kMod8, int resiBitLength) {
return 8 - kMod8 - resiBitLength;
}
INLINE int getRightMovingCode(int kMod8, int resiBitLength)
{
int rightMovingSteps = 8 - kMod8 - resiBitLength;
if(rightMovingSteps < 0)
{
switch(-rightMovingSteps)
{
case 1:
return 0x80;
case 2:
return 0xC0;
case 3:
return 0xE0;
case 4:
return 0xF0;
case 5:
return 0xF8;
case 6:
return 0xFC;
case 7:
return 0XFE;
default:
return 0;
}
}
else //if(rightMovingSteps >= 0)
{
int a = getMaskRightCode(8 - kMod8);
int b = getMaskRightCode(8 - kMod8 - resiBitLength);
int c = a - b;
return c;
}
}
INLINE size_t bytesToSize(unsigned char* bytes, int size_type)
{
size_t result = 0;
if(size_type == 4)
result = bytesToInt_bigEndian(bytes);//4
else
result = bytesToLong_bigEndian(bytes);//8
return result;
}
INLINE void sizeToBytes(unsigned char* outBytes, size_t size, int size_type)
{
if(size_type == 4)
intToBytes_bigEndian(outBytes, (unsigned int)size);//4
else
longToBytes_bigEndian(outBytes, (unsigned long)size);//8
}
void convertSZParamsToBytes(sz_params* params, unsigned char* result, char optQuantMode)
{
//unsigned char* result = (unsigned char*)malloc(16);
unsigned char buf = 0;
buf = optQuantMode;
buf = (buf << 1) | dataEndianType;
buf = (buf << 1) | sysEndianType;
buf = (buf << 2) | params->szMode;
result[0] = buf;
}
void convertBytesToSZParams(unsigned char* bytes, sz_params* params, sz_exedata* pde_exe)
{
unsigned char flag1 = bytes[0];
pde_exe->optQuantMode = (flag1 & 0x40) >> 6;
dataEndianType = (flag1 & 0x20) >> 5;
params->szMode = (flag1 & 0x0c) >> 2;
}
/**
* @file CompressElement.c
* @author Sheng Di
* @date May, 2016
* @brief Functions of CompressElement
* (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef WINDOWS
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wchar-subscripts"
#endif
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include "sz.h"
#include "CompressElement.h"
INLINE short computeGroupNum_float(float value)
{
short expo = getExponent_float(value);
if(expo < 0)
expo = -1;
return expo;
}
INLINE short computeGroupNum_double(double value)
{
short expo = getExponent_double(value);
if(expo < 0)
expo = -1;
return expo;
}
/**
* Add preceding neighbor values to a buffer.
* @param last3CmprsData buffer
* @param value the value to be added to the buffer
* */
INLINE void listAdd_double(double last3CmprsData[3], double value)
{
last3CmprsData[2] = last3CmprsData[1];
last3CmprsData[1] = last3CmprsData[0];
last3CmprsData[0] = value;
}
INLINE void listAdd_float(float last3CmprsData[3], float value)
{
last3CmprsData[2] = last3CmprsData[1];
last3CmprsData[1] = last3CmprsData[0];
last3CmprsData[0] = value;
}
INLINE void listAdd_int(int64_t last3CmprsData[3], int64_t value)
{
last3CmprsData[2] = last3CmprsData[1];
last3CmprsData[1] = last3CmprsData[0];
last3CmprsData[0] = value;
}
INLINE void listAdd_int32(int32_t last3CmprsData[3], int32_t value)
{
last3CmprsData[2] = last3CmprsData[1];
last3CmprsData[1] = last3CmprsData[0];
last3CmprsData[0] = value;
}
INLINE void listAdd_float_group(float *groups, int *flags, char groupNum, float oriValue, float decValue, char* curGroupID)
{
if(groupNum>=0)
{
if(flags[groupNum]==0)
flags[groupNum] = 1;
groups[groupNum] = decValue;
}
else
{
groups[0] = decValue;
flags[0] = 1;
}
if(oriValue>=0)
*curGroupID = groupNum+2; //+[-1,0,1,2,3,....,16] is mapped to [1,2,....,18]
else
*curGroupID = -(groupNum+2); //-[-1,0,1,2,3,....,16] is mapped to [-1,-2,....,-18]
}
INLINE void listAdd_double_group(double *groups, int *flags, char groupNum, double oriValue, double decValue, char* curGroupID)
{
if(groupNum>=0)
{
if(flags[groupNum]==0)
flags[groupNum] = 1;
groups[groupNum] = decValue;
}
else
{
groups[0] = decValue;
flags[0] = 1;
}
if(oriValue>=0)
*curGroupID = groupNum+2; //+[-1,0,1,2,3,....,16] is mapped to [1,2,....,18]
else
*curGroupID = -(groupNum+2); //-[-1,0,1,2,3,....,16] is mapped to [-1,-2,....,-18]
}
/**
* Determine whether the prediction value minErr is valid.
*
* */
INLINE int validPrediction_double(double minErr, double precision)
{
if(minErr<=precision)
return 1;
else
return 0;
}
INLINE int validPrediction_float(float minErr, float precision)
{
if(minErr<=precision)
return 1;
else
return 0;
}
double* generateGroupErrBounds(int errorBoundMode, double realPrecision, double pwrErrBound)
{
double pwrError;
double* result = (double*)malloc(GROUP_COUNT*sizeof(double));
int i = 0;
for(i=0;i<GROUP_COUNT;i++)
{
pwrError = ((double)pow(2, i))*pwrErrBound;
switch(errorBoundMode)
{
case ABS_AND_PW_REL:
case REL_AND_PW_REL:
result[i] = pwrError<realPrecision?pwrError:realPrecision;
break;
case ABS_OR_PW_REL:
case REL_OR_PW_REL:
result[i] = pwrError<realPrecision?realPrecision:pwrError;
break;
case PW_REL:
result[i] = pwrError;
break;
}
}
return result;
}
int generateGroupMaxIntervalCount(double* groupErrBounds)
{
int i = 0;
int maxCount = 0, count = 0;
for(i=0;i<GROUP_COUNT;i++)
{
count = (int)(pow(2, i)/groupErrBounds[i] + 0.5);
if(maxCount<count)
maxCount = count;
}
return maxCount;
}
void new_LossyCompressionElement(LossyCompressionElement *lce, int leadingNum, unsigned char* intMidBytes,
int intMidBytes_Length, int resiMidBitsLength, int resiBits)
{
lce->leadingZeroBytes = leadingNum; //0,1,2,or 3
memcpy(lce->integerMidBytes,intMidBytes,intMidBytes_Length);
lce->integerMidBytes_Length = intMidBytes_Length; //they are mid_bits actually
lce->resMidBitsLength = resiMidBitsLength;
lce->residualMidBits = resiBits;
}
void updateLossyCompElement_Double(unsigned char* curBytes, unsigned char* preBytes,
int reqBytesLength, int resiBitsLength, LossyCompressionElement *lce)
{
int resiIndex, intMidBytes_Length = 0;
int leadingNum = compIdenticalLeadingBytesCount_double(preBytes, curBytes); //in fact, float is enough for both single-precision and double-precisiond ata.
int fromByteIndex = leadingNum;
int toByteIndex = reqBytesLength; //later on: should use "< toByteIndex" to tarverse....
if(fromByteIndex < toByteIndex)
{
intMidBytes_Length = reqBytesLength - leadingNum;
memcpy(lce->integerMidBytes, &(curBytes[fromByteIndex]), intMidBytes_Length);
}
int resiBits = 0;
if(resiBitsLength!=0)
{
resiIndex = reqBytesLength;
if(resiIndex < 8)
resiBits = (curBytes[resiIndex] & 0xFF) >> (8-resiBitsLength);
}
lce->leadingZeroBytes = leadingNum;
lce->integerMidBytes_Length = intMidBytes_Length;
lce->resMidBitsLength = resiBitsLength;
lce->residualMidBits = resiBits;
}
INLINE void updateLossyCompElement_Float(unsigned char* diffBytes, unsigned char* preDiffBytes,
int reqBytesLength, int resiBitsLength, LossyCompressionElement *lce)
{
int resiIndex, intMidBytes_Length = 0;
int leadingNum = compIdenticalLeadingBytesCount_float(preDiffBytes, diffBytes); //in fact, float is enough for both single-precision and double-precisiond ata.
int fromByteIndex = leadingNum;
int toByteIndex = reqBytesLength; //later on: should use "< toByteIndex" to tarverse....
if(fromByteIndex < toByteIndex)
{
intMidBytes_Length = reqBytesLength - leadingNum;
// set lce mid data
memcpy(lce->integerMidBytes, &(diffBytes[fromByteIndex]), intMidBytes_Length);
}
int resiBits = 0;
if(resiBitsLength!=0)
{
resiIndex = reqBytesLength;
if(resiIndex < 8)
resiBits = (diffBytes[resiIndex] & 0xFF) >> (8-resiBitsLength);
}
// set lce
lce->leadingZeroBytes = leadingNum;
lce->integerMidBytes_Length = intMidBytes_Length;
lce->resMidBitsLength = resiBitsLength;
lce->residualMidBits = resiBits;
}
#ifndef WINDOWS
#pragma GCC diagnostic pop
#endif
\ No newline at end of file
/**
* @file DynamicByteArray.c
* @author Sheng Di
* @date May, 2016
* @brief Dynamic Byte Array
* (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "DynamicByteArray.h"
#include "sz.h"
void new_DBA(DynamicByteArray **dba, size_t cap) {
*dba = (DynamicByteArray *)malloc(sizeof(DynamicByteArray));
(*dba)->size = 0;
(*dba)->capacity = cap;
(*dba)->array = (unsigned char*)malloc(sizeof(unsigned char)*cap);
}
void convertDBAtoBytes(DynamicByteArray *dba, unsigned char** bytes)
{
size_t size = dba->size;
if(size>0)
*bytes = (unsigned char*)malloc(size * sizeof(unsigned char));
else
{
*bytes = NULL;
return ;
}
memcpy(*bytes, dba->array, size*sizeof(unsigned char));
}
void free_DBA(DynamicByteArray *dba)
{
free(dba->array);
free(dba);
}
INLINE unsigned char getDBA_Data(DynamicByteArray *dba, size_t pos)
{
if(pos>=dba->size)
{
printf("Error: wrong position of DBA (impossible case unless bugs elsewhere in the code?).\n");
exit(0);
}
return dba->array[pos];
}
INLINE void addDBA_Data(DynamicByteArray *dba, unsigned char value)
{
if(dba->size==dba->capacity)
{
dba->capacity = dba->capacity << 1;
dba->array = (unsigned char *)realloc(dba->array, dba->capacity*sizeof(unsigned char));
}
dba->array[dba->size] = value;
dba->size ++;
}
INLINE void memcpyDBA_Data(DynamicByteArray *dba, unsigned char* data, size_t length)
{
if(dba->size + length > dba->capacity)
{
dba->capacity = dba->size + length;
dba->array = (unsigned char *)realloc(dba->array, dba->capacity*sizeof(unsigned char));
}
memcpy(&(dba->array[dba->size]), data, length);
dba->size += length;
}
/**
* @file DynamicIntArray.c
* @author Sheng Di
* @date May, 2016
* @brief Dynamic Int Array
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "DynamicIntArray.h"
#include "sz.h"
void new_DIA(DynamicIntArray **dia, size_t cap) {
*dia = (DynamicIntArray *)malloc(sizeof(DynamicIntArray));
(*dia)->size = 0;
(*dia)->capacity = cap;
(*dia)->array = (unsigned char*)malloc(sizeof(unsigned char)*cap);
}
void convertDIAtoInts(DynamicIntArray *dia, unsigned char **data)
{
size_t size = dia->size;
if(size>0)
*data = (unsigned char*)malloc(size * sizeof(char));
else
*data = NULL;
memcpy(*data, dia->array, size*sizeof(unsigned char));
}
void free_DIA(DynamicIntArray *dia)
{
free(dia->array);
free(dia);
}
int getDIA_Data(DynamicIntArray *dia, size_t pos)
{
if(pos>=dia->size)
{
printf("Error: wrong position of DIA.\n");
exit(0);
}
return dia->array[pos];
}
INLINE void addDIA_Data(DynamicIntArray *dia, int value)
{
if(dia->size==dia->capacity)
{
dia->capacity = dia->capacity << 1;
dia->array = (unsigned char *)realloc(dia->array, dia->capacity*sizeof(unsigned char));
}
dia->array[dia->size] = (unsigned char)value;
dia->size ++;
}
此差异已折叠。
/**
* @file TightPointDataStorageD.c
* @author Sheng Di and Dingwen Tao
* @date Aug, 2016
* @brief The functions used to construct the tightPointDataStorage element for storing compressed bytes.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "TightDataPointStorageD.h"
#include "sz.h"
#include "defines.h"
#include "Huffman.h"
void new_TightDataPointStorageD_Empty(TightDataPointStorageD **this)
{
TightDataPointStorageD* tdps = (TightDataPointStorageD*)malloc(sizeof(TightDataPointStorageD));
memset(tdps, 0, sizeof(TightDataPointStorageD));
*this = tdps;
}
int new_TightDataPointStorageD_fromFlatBytes(TightDataPointStorageD **this, unsigned char* flatBytes, size_t flatBytesLength, sz_exedata* pde_exe, sz_params* pde_params)
{
new_TightDataPointStorageD_Empty(this);
size_t i, index = 0;
unsigned char version = flatBytes[index++]; //3
unsigned char sameRByte = flatBytes[index++]; //1
// parse data format
switch (version)
{
case DATA_FROMAT_VER1:
break;
default:
printf(" error, compressed data format can not be recognised. ver=%d\n ", version);
return SZ_ABS;
}
int same = sameRByte & 0x01;
(*this)->isLossless = (sameRByte & 0x10)>>4;
pde_exe->SZ_SIZE_TYPE = ((sameRByte & 0x40)>>6)==1?8:4;
//pde_params->protectValueRange = (sameRByte & 0x04)>>2;
pde_params->accelerate_pw_rel_compression = (sameRByte & 0x08) >> 3;
int errorBoundMode = SZ_ABS;
convertBytesToSZParams(&(flatBytes[index]), pde_params, pde_exe);
index += MetaDataByteLength_double;
int isRegression = (sameRByte >> 7) & 0x01;
unsigned char dsLengthBytes[8];
for (i = 0; i < pde_exe->SZ_SIZE_TYPE; i++)
dsLengthBytes[i] = flatBytes[index++];
(*this)->dataSeriesLength = bytesToSize(dsLengthBytes, pde_exe->SZ_SIZE_TYPE);
if((*this)->isLossless==1)
{
//(*this)->exactMidBytes = flatBytes+8;
return errorBoundMode;
}
else if(same==1)
{
(*this)->allSameData = 1;
(*this)->exactMidBytes = &(flatBytes[index]);
return errorBoundMode;
}
else
(*this)->allSameData = 0;
if(isRegression == 1)
{
(*this)->raBytes_size = flatBytesLength - 3 - 1 - MetaDataByteLength_double - pde_exe->SZ_SIZE_TYPE;
(*this)->raBytes = &(flatBytes[index]);
return errorBoundMode;
}
unsigned char byteBuf[8];
for (i = 0; i < 4; i++)
byteBuf[i] = flatBytes[index++];
int max_quant_intervals = bytesToInt_bigEndian(byteBuf);// 4
pde_params->maxRangeRadius = max_quant_intervals/2;
for (i = 0; i < 4; i++)
byteBuf[i] = flatBytes[index++];
(*this)->intervals = bytesToInt_bigEndian(byteBuf);// 4
for (i = 0; i < 8; i++)
byteBuf[i] = flatBytes[index++];
(*this)->medianValue = bytesToDouble(byteBuf);//8
(*this)->reqLength = flatBytes[index++]; //1
for (i = 0; i < 8; i++)
byteBuf[i] = flatBytes[index++];
(*this)->realPrecision = bytesToDouble(byteBuf);//8
for (i = 0; i < pde_exe->SZ_SIZE_TYPE; i++)
byteBuf[i] = flatBytes[index++];
(*this)->typeArray_size = bytesToSize(byteBuf, pde_exe->SZ_SIZE_TYPE);
for (i = 0; i < pde_exe->SZ_SIZE_TYPE; i++)
byteBuf[i] = flatBytes[index++];
(*this)->exactDataNum = bytesToSize(byteBuf, pde_exe->SZ_SIZE_TYPE);// ST
for (i = 0; i < pde_exe->SZ_SIZE_TYPE; i++)
byteBuf[i] = flatBytes[index++];
(*this)->exactMidBytes_size = bytesToSize(byteBuf, pde_exe->SZ_SIZE_TYPE);// ST
size_t logicLeadNumBitsNum = (*this)->exactDataNum * 2;
if (logicLeadNumBitsNum % 8 == 0)
{
(*this)->leadNumArray_size = logicLeadNumBitsNum >> 3;
}
else
{
(*this)->leadNumArray_size = (logicLeadNumBitsNum >> 3) + 1;
}
(*this)->typeArray = &flatBytes[index];
//retrieve the number of states (i.e., stateNum)
(*this)->allNodes = bytesToInt_bigEndian((*this)->typeArray); //the first 4 bytes store the stateNum
(*this)->stateNum = ((*this)->allNodes+1)/2;
index+=(*this)->typeArray_size;
// todo need check length
(*this)->residualMidBits_size = flatBytesLength - 1 - 1 - MetaDataByteLength - pde_exe->SZ_SIZE_TYPE - 4 - 4 - 4 - 1 - 8
- pde_exe->SZ_SIZE_TYPE - pde_exe->SZ_SIZE_TYPE - pde_exe->SZ_SIZE_TYPE
- (*this)->leadNumArray_size - (*this)->exactMidBytes_size - (*this)->typeArray_size;
(*this)->leadNumArray = &flatBytes[index];
index+=(*this)->leadNumArray_size;
(*this)->exactMidBytes = &flatBytes[index];
index+=(*this)->exactMidBytes_size;
(*this)->residualMidBits = &flatBytes[index];
return errorBoundMode;
}
/**
*
* type's length == dataSeriesLength
* exactMidBytes's length == exactMidBytes_size
* leadNumIntArray's length == exactDataNum
* escBytes's length == escBytes_size
* resiBitLength's length == resiBitLengthSize
* */
void new_TightDataPointStorageD(TightDataPointStorageD **this,
size_t dataSeriesLength, size_t exactDataNum,
int* type, unsigned char* exactMidBytes, size_t exactMidBytes_size,
unsigned char* leadNumIntArray, //leadNumIntArray contains readable numbers....
unsigned char* resiMidBits, size_t resiMidBits_size,
unsigned char resiBitLength,
double realPrecision, double medianValue, char reqLength, unsigned int intervals,
unsigned char radExpo)
{
//int i = 0;
*this = (TightDataPointStorageD *)malloc(sizeof(TightDataPointStorageD));
memset(*this, 0, sizeof(TightDataPointStorageD));
(*this)->allSameData = 0;
(*this)->realPrecision = realPrecision;
(*this)->medianValue = medianValue;
(*this)->reqLength = reqLength;
(*this)->dataSeriesLength = dataSeriesLength;
(*this)->exactDataNum = exactDataNum;
int stateNum = 2*intervals;
HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
encode_withTree(huffmanTree, type, dataSeriesLength, &(*this)->typeArray, &(*this)->typeArray_size);
SZ_ReleaseHuffman(huffmanTree);
(*this)->exactMidBytes = exactMidBytes;
(*this)->exactMidBytes_size = exactMidBytes_size;
(*this)->leadNumArray_size = convertIntArray2ByteArray_fast_2b(leadNumIntArray, exactDataNum, &((*this)->leadNumArray));
(*this)->residualMidBits_size = convertIntArray2ByteArray_fast_dynamic(resiMidBits, resiBitLength, exactDataNum, &((*this)->residualMidBits));
(*this)->intervals = intervals;
(*this)->isLossless = 0;
(*this)->radExpo = radExpo;
}
void convertTDPStoBytes_double(TightDataPointStorageD* tdps, unsigned char* bytes, unsigned char* dsLengthBytes, unsigned char sameByte)
{
size_t i, k = 0;
unsigned char intervalsBytes[4];
unsigned char typeArrayLengthBytes[8];
unsigned char exactLengthBytes[8];
unsigned char exactMidBytesLength[8];
unsigned char realPrecisionBytes[8];
unsigned char medianValueBytes[8];
unsigned char max_quant_intervals_Bytes[4];
bytes[k++] = versionNumber;
bytes[k++] = sameByte; //1 byte
convertSZParamsToBytes(confparams_cpr, &(bytes[k]), exe_params->optQuantMode);
k = k + MetaDataByteLength_double;
for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++)//ST: 4 or 8 bytes
bytes[k++] = dsLengthBytes[i];
intToBytes_bigEndian(max_quant_intervals_Bytes, confparams_cpr->max_quant_intervals);
for(i = 0;i<4;i++)//4
bytes[k++] = max_quant_intervals_Bytes[i];
intToBytes_bigEndian(intervalsBytes, tdps->intervals);
for(i = 0;i<4;i++)//4
bytes[k++] = intervalsBytes[i];
doubleToBytes(medianValueBytes, tdps->medianValue);
for (i = 0; i < 8; i++)// 8
bytes[k++] = medianValueBytes[i];
bytes[k++] = tdps->reqLength; //1 byte
doubleToBytes(realPrecisionBytes, tdps->realPrecision);
for (i = 0; i < 8; i++)// 8
bytes[k++] = realPrecisionBytes[i];
sizeToBytes(typeArrayLengthBytes, tdps->typeArray_size, exe_params->SZ_SIZE_TYPE);
for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++)//ST
bytes[k++] = typeArrayLengthBytes[i];
sizeToBytes(exactLengthBytes, tdps->exactDataNum, exe_params->SZ_SIZE_TYPE);
for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++)//ST
bytes[k++] = exactLengthBytes[i];
sizeToBytes(exactMidBytesLength, tdps->exactMidBytes_size, exe_params->SZ_SIZE_TYPE);
for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++)//ST
bytes[k++] = exactMidBytesLength[i];
if(confparams_cpr->errorBoundMode>=PW_REL)
{
doubleToBytes(exactMidBytesLength, tdps->minLogValue);
for(i = 0;i < 8; i++)
bytes[k++] = exactMidBytesLength[i];
}
// copy data
memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size);
k += tdps->typeArray_size;
memcpy(&(bytes[k]), tdps->leadNumArray, tdps->leadNumArray_size);
k += tdps->leadNumArray_size;
memcpy(&(bytes[k]), tdps->exactMidBytes, tdps->exactMidBytes_size);
k += tdps->exactMidBytes_size;
if(tdps->residualMidBits!=NULL)
{
memcpy(&(bytes[k]), tdps->residualMidBits, tdps->residualMidBits_size);
k += tdps->residualMidBits_size;
}
}
//Convert TightDataPointStorageD to bytes...
bool convertTDPStoFlatBytes_double(TightDataPointStorageD *tdps, unsigned char* bytes, size_t *size)
{
size_t i, k = 0;
unsigned char dsLengthBytes[8];
if(exe_params->SZ_SIZE_TYPE==4)
intToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//4
else
longToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//8
unsigned char sameByte = tdps->allSameData==1?(unsigned char)1:(unsigned char)0;
//sameByte = sameByte | (confparams_cpr->szMode << 1);
if(tdps->isLossless)
sameByte = (unsigned char) (sameByte | 0x10);
if(confparams_cpr->errorBoundMode>=PW_REL)
sameByte = (unsigned char) (sameByte | 0x20); // 00100000, the 5th bit
if(exe_params->SZ_SIZE_TYPE==8)
sameByte = (unsigned char) (sameByte | 0x40); // 01000000, the 6th bit
if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression)
sameByte = (unsigned char) (sameByte | 0x08);
if(tdps->allSameData==1)
{
size_t totalByteLength = 1 + 1 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + tdps->exactMidBytes_size;
//bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength); // comment by tickduan
if(totalByteLength >= tdps->dataSeriesLength * sizeof(double))
{
return false;
}
bytes[k++] = versionNumber;
bytes[k++] = sameByte;
convertSZParamsToBytes(confparams_cpr, &(bytes[k]), exe_params->optQuantMode);
k = k + MetaDataByteLength_double;
for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
bytes[k++] = dsLengthBytes[i];
for (i = 0; i < tdps->exactMidBytes_size; i++)
bytes[k++] = tdps->exactMidBytes[i];
*size = totalByteLength;
}
else
{
size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size;
size_t totalByteLength = 1 + 1 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + 4 + 4 + 8 + 1 + 8
+ exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE
+ tdps->typeArray_size
+ tdps->leadNumArray_size
+ tdps->exactMidBytes_size
+ residualMidBitsLength;
//*bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength); comment by tickduan
if(totalByteLength >= tdps->dataSeriesLength * sizeof(double))
{
return false;
}
convertTDPStoBytes_double(tdps, bytes, dsLengthBytes, sameByte);
*size = totalByteLength;
}
return true;
}
void free_TightDataPointStorageD(TightDataPointStorageD *tdps)
{
if(tdps->leadNumArray!=NULL)
free(tdps->leadNumArray);
if(tdps->exactMidBytes!=NULL)
free(tdps->exactMidBytes);
if(tdps->residualMidBits!=NULL)
free(tdps->residualMidBits);
if(tdps->typeArray)
free(tdps->typeArray);
free(tdps);
}
/**
* to free the memory used in the decompression
* */
void free_TightDataPointStorageD2(TightDataPointStorageD *tdps)
{
free(tdps);
}
/**
* @file TightPointDataStorageF.c
* @author Sheng Di and Dingwen Tao
* @date Aug, 2016
* @brief The functions used to construct the tightPointDataStorage element for storing compressed bytes.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "TightDataPointStorageF.h"
#include "sz.h"
#include "defines.h"
#include "Huffman.h"
void new_TightDataPointStorageF_Empty(TightDataPointStorageF **this)
{
TightDataPointStorageF* tdpf = (TightDataPointStorageF*)malloc(sizeof(TightDataPointStorageF));
memset(tdpf, 0, sizeof(TightDataPointStorageF));
*this = tdpf;
}
int new_TightDataPointStorageF_fromFlatBytes(TightDataPointStorageF **this, unsigned char* flatBytes, size_t flatBytesLength, sz_exedata* pde_exe, sz_params* pde_params)
{
new_TightDataPointStorageF_Empty(this);
size_t i, index = 0;
//
// parse tdps
//
// 1 version(1)
unsigned char version = flatBytes[index++]; //1
unsigned char sameRByte = flatBytes[index++]; //1
// parse data format
switch (version)
{
case DATA_FROMAT_VER1:
break;
default:
printf(" error, float compressed data format can not be recognised. ver=%d\n ", version);
return SZ_ABS;
}
// 2 same(1) //note that 1000,0000 is reserved for regression tag.
int same = sameRByte & 0x01; //0000,0001
(*this)->isLossless = (sameRByte & 0x10)>>4; //0001,0000 //0010,0000
pde_exe->SZ_SIZE_TYPE = ((sameRByte & 0x40)>>6)==1?8:4; //0100,0000
int errorBoundMode = SZ_ABS;
// 3 meta(2)
convertBytesToSZParams(&(flatBytes[index]), pde_params, pde_exe);
index += MetaDataByteLength;
// 4 element count(4)
unsigned char dsLengthBytes[8];
for (i = 0; i < pde_exe->SZ_SIZE_TYPE; i++)
dsLengthBytes[i] = flatBytes[index++];
(*this)->dataSeriesLength = bytesToSize(dsLengthBytes, pde_exe->SZ_SIZE_TYPE);// 4 or 8
if((*this)->isLossless==1)
{
//(*this)->exactMidBytes = flatBytes+8;
return errorBoundMode;
}
else if(same==1)
{
(*this)->allSameData = 1;
(*this)->exactMidBytes = &(flatBytes[index]);
return errorBoundMode;
}
else
(*this)->allSameData = 0;
// regression
int isRegression = (sameRByte >> 7) & 0x01;
if(isRegression == 1)
{
(*this)->raBytes_size = flatBytesLength - 1 - 1 - MetaDataByteLength - pde_exe->SZ_SIZE_TYPE;
(*this)->raBytes = &(flatBytes[index]);
return errorBoundMode;
}
// 5 quant intervals(4)
unsigned char byteBuf[8];
for (i = 0; i < 4; i++)
byteBuf[i] = flatBytes[index++];
int max_quant_intervals = bytesToInt_bigEndian(byteBuf);// 4
pde_params->maxRangeRadius = max_quant_intervals/2;
// 6 intervals
for (i = 0; i < 4; i++)
byteBuf[i] = flatBytes[index++];
(*this)->intervals = bytesToInt_bigEndian(byteBuf);// 4
// 7 median
for (i = 0; i < 4; i++)
byteBuf[i] = flatBytes[index++];
(*this)->medianValue = bytesToFloat(byteBuf); //4
// 8 reqLength
(*this)->reqLength = flatBytes[index++]; //1
// 9 realPrecision(8)
for (i = 0; i < 8; i++)
byteBuf[i] = flatBytes[index++];
(*this)->realPrecision = bytesToDouble(byteBuf);//8
// 10 typeArray_size
for (i = 0; i < pde_exe->SZ_SIZE_TYPE; i++)
byteBuf[i] = flatBytes[index++];
(*this)->typeArray_size = bytesToSize(byteBuf, pde_exe->SZ_SIZE_TYPE);// 4
// 11 exactNum
for (i = 0; i < pde_exe->SZ_SIZE_TYPE; i++)
byteBuf[i] = flatBytes[index++];
(*this)->exactDataNum = bytesToSize(byteBuf, pde_exe->SZ_SIZE_TYPE);// ST
// 12 mid size
for (i = 0; i < pde_exe->SZ_SIZE_TYPE; i++)
byteBuf[i] = flatBytes[index++];
(*this)->exactMidBytes_size = bytesToSize(byteBuf, pde_exe->SZ_SIZE_TYPE);// STqq
// calc leadNumArray_size
size_t logicLeadNumBitsNum = (*this)->exactDataNum * 2;
if (logicLeadNumBitsNum % 8 == 0)
{
(*this)->leadNumArray_size = logicLeadNumBitsNum >> 3;
}
else
{
(*this)->leadNumArray_size = (logicLeadNumBitsNum >> 3) + 1;
}
// 13 typeArray
(*this)->typeArray = &flatBytes[index];
//retrieve the number of states (i.e., stateNum)
(*this)->allNodes = bytesToInt_bigEndian((*this)->typeArray); //the first 4 bytes store the stateNum
(*this)->stateNum = ((*this)->allNodes+1)/2;
index+=(*this)->typeArray_size;
// 14 leadNumArray
(*this)->leadNumArray = &flatBytes[index];
index += (*this)->leadNumArray_size;
// 15 exactMidBytes
(*this)->exactMidBytes = &flatBytes[index];
index+=(*this)->exactMidBytes_size;
// 16 residualMidBits
(*this)->residualMidBits = &flatBytes[index];
// calc residualMidBits_size
(*this)->residualMidBits_size = flatBytesLength - 1 - 1 - MetaDataByteLength - pde_exe->SZ_SIZE_TYPE - 4 - 4 - 4 - 1 - 8
- pde_exe->SZ_SIZE_TYPE - pde_exe->SZ_SIZE_TYPE - pde_exe->SZ_SIZE_TYPE
- (*this)->leadNumArray_size - (*this)->exactMidBytes_size - (*this)->typeArray_size;
return errorBoundMode;
}
/**
*
* type's length == dataSeriesLength
* exactMidBytes's length == exactMidBytes_size
* leadNumIntArray's length == exactDataNum
* escBytes's length == escBytes_size
* resiBitLength's length == resiBitLengthSize
* */
void new_TightDataPointStorageF(TightDataPointStorageF **this,
size_t dataSeriesLength, size_t exactDataNum,
int* type, unsigned char* exactMidBytes, size_t exactMidBytes_size,
unsigned char* leadNumIntArray, //leadNumIntArray contains readable numbers....
unsigned char* resiMidBits, size_t resiMidBits_size,
unsigned char resiBitLength,
double realPrecision, float medianValue, char reqLength, unsigned int intervals,
unsigned char radExpo) {
*this = (TightDataPointStorageF *)malloc(sizeof(TightDataPointStorageF));
memset(*this, 0, sizeof(TightDataPointStorageF));
(*this)->allSameData = 0;
(*this)->realPrecision = realPrecision;
(*this)->medianValue = medianValue;
(*this)->reqLength = reqLength;
(*this)->dataSeriesLength = dataSeriesLength;
(*this)->exactDataNum = exactDataNum;
// encode type with huffMan
int stateNum = 2*intervals;
HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
encode_withTree(huffmanTree, type, dataSeriesLength, &(*this)->typeArray, &(*this)->typeArray_size);
SZ_ReleaseHuffman(huffmanTree);
(*this)->exactMidBytes = exactMidBytes;
(*this)->exactMidBytes_size = exactMidBytes_size;
(*this)->leadNumArray_size = convertIntArray2ByteArray_fast_2b(leadNumIntArray, exactDataNum, &((*this)->leadNumArray));
(*this)->residualMidBits_size = convertIntArray2ByteArray_fast_dynamic(resiMidBits, resiBitLength, exactDataNum, &((*this)->residualMidBits));
(*this)->intervals = intervals;
(*this)->isLossless = 0;
(*this)->radExpo = radExpo;
}
void convertTDPStoBytes_float(TightDataPointStorageF* tdps, unsigned char* bytes, unsigned char* dsLengthBytes, unsigned char sameByte)
{
size_t i, k = 0;
unsigned char intervalsBytes[4];
unsigned char typeArrayLengthBytes[8];
unsigned char exactLengthBytes[8];
unsigned char exactMidBytesLength[8];
unsigned char realPrecisionBytes[8];
unsigned char medianValueBytes[4];
unsigned char max_quant_intervals_Bytes[4];
// 1 version
bytes[k++] = versionNumber;
// 2 same
bytes[k++] = sameByte; //1 byte
// 3 meta
convertSZParamsToBytes(confparams_cpr, &(bytes[k]), exe_params->optQuantMode);
k = k + MetaDataByteLength;
// 4 element count
for(i = 0; i < exe_params->SZ_SIZE_TYPE; i++)//ST: 4 or 8 bytes
bytes[k++] = dsLengthBytes[i];
intToBytes_bigEndian(max_quant_intervals_Bytes, confparams_cpr->max_quant_intervals);
// 5 max_quant_intervals length
for(i = 0;i<4;i++)//4
bytes[k++] = max_quant_intervals_Bytes[i];
// 6 intervals
intToBytes_bigEndian(intervalsBytes, tdps->intervals);
for(i = 0;i<4;i++)//4
bytes[k++] = intervalsBytes[i];
// 7 median
floatToBytes(medianValueBytes, tdps->medianValue);
for (i = 0; i < 4; i++)// 4
bytes[k++] = medianValueBytes[i];
// 8 reqLength
bytes[k++] = tdps->reqLength; //1 byte
// 9 realPrecision
doubleToBytes(realPrecisionBytes, tdps->realPrecision);
for (i = 0; i < 8; i++)// 8
bytes[k++] = realPrecisionBytes[i];
// 10 typeArray size
sizeToBytes(typeArrayLengthBytes, tdps->typeArray_size, exe_params->SZ_SIZE_TYPE);
for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++)//ST
bytes[k++] = typeArrayLengthBytes[i];
// 11 exactDataNum leadNum calc by this , so not save leadNum
sizeToBytes(exactLengthBytes, tdps->exactDataNum, exe_params->SZ_SIZE_TYPE);
for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++)//ST
bytes[k++] = exactLengthBytes[i];
// 12 Mid size
sizeToBytes(exactMidBytesLength, tdps->exactMidBytes_size, exe_params->SZ_SIZE_TYPE);
for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++)//ST
bytes[k++] = exactMidBytesLength[i];
// 13 typeArray
memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size);
k += tdps->typeArray_size;
// 14 leadNumArray_size
memcpy(&(bytes[k]), tdps->leadNumArray, tdps->leadNumArray_size);
k += tdps->leadNumArray_size;
// 15 mid data
memcpy(&(bytes[k]), tdps->exactMidBytes, tdps->exactMidBytes_size);
k += tdps->exactMidBytes_size;
// 16 residualMidBits
if(tdps->residualMidBits!=NULL)
{
memcpy(&(bytes[k]), tdps->residualMidBits, tdps->residualMidBits_size);
k += tdps->residualMidBits_size;
}
}
//convert TightDataPointStorageD to bytes...
bool convertTDPStoFlatBytes_float(TightDataPointStorageF *tdps, unsigned char* bytes, size_t *size)
{
size_t i, k = 0;
unsigned char dsLengthBytes[8];
if(exe_params->SZ_SIZE_TYPE==4)
intToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//4
else
longToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//8
unsigned char sameByte = tdps->allSameData==1?(unsigned char)1:(unsigned char)0; //0000,0001
//sameByte = sameByte | (confparams_cpr->szMode << 1); //0000,0110 (no need because of convertSZParamsToBytes
if(tdps->isLossless)
sameByte = (unsigned char) (sameByte | 0x10); // 0001,0000
if(confparams_cpr->errorBoundMode>=PW_REL)
sameByte = (unsigned char) (sameByte | 0x20); // 0010,0000, the 5th bit
if(exe_params->SZ_SIZE_TYPE==8)
sameByte = (unsigned char) (sameByte | 0x40); // 0100,0000, the 6th bit
if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression)
sameByte = (unsigned char) (sameByte | 0x08); //0000,1000
//if(confparams_cpr->protectValueRange)
// sameByte = (unsigned char) (sameByte | 0x04); //0000,0100
if(tdps->allSameData == 1 )
{
//
// same format
//
size_t totalByteLength = 1 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + tdps->exactMidBytes_size;
//*bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength); // not need malloc comment by tickduan
// check output buffer enough
if(totalByteLength >= tdps->dataSeriesLength * sizeof(float) )
{
*size = 0;
return false;
}
// 1 version 1 byte
bytes[k++] = versionNumber;
// 2 same flag 1 bytes
bytes[k++] = sameByte;
// 3 metaData 26 bytes
convertSZParamsToBytes(confparams_cpr, &(bytes[k]), exe_params->optQuantMode);
k = k + MetaDataByteLength;
// 4 data Length 4 or 8 bytes
for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
bytes[k++] = dsLengthBytes[i];
// 5 exactMidBytes exactMidBytes_size bytes
for (i = 0; i < tdps->exactMidBytes_size; i++)
bytes[k++] = tdps->exactMidBytes[i];
*size = totalByteLength;
}
else
{
//
// not same format
//
size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size;
// version(1) + samebyte(1)
size_t totalByteLength = 1 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + 4 + 4 + 1 + 8
+ exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE
+ tdps->typeArray_size
+ tdps->leadNumArray_size
+ tdps->exactMidBytes_size
+ residualMidBitsLength;
//*bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength); // comment by tickduan
if(totalByteLength >= tdps->dataSeriesLength * sizeof(float))
{
*size = 0;
return false;
}
convertTDPStoBytes_float(tdps, bytes, dsLengthBytes, sameByte);
*size = totalByteLength;
return true;
}
return true;
}
/**
* to free the memory used in the compression
* */
void free_TightDataPointStorageF(TightDataPointStorageF *tdps)
{
if(tdps->leadNumArray!=NULL)
free(tdps->leadNumArray);
if(tdps->exactMidBytes!=NULL)
free(tdps->exactMidBytes);
if(tdps->residualMidBits!=NULL)
free(tdps->residualMidBits);
if(tdps->typeArray)
free(tdps->typeArray);
free(tdps);
}
/**
* to free the memory used in the decompression
* */
void free_TightDataPointStorageF2(TightDataPointStorageF *tdps)
{
free(tdps);
}
/**
* @file TypeManager.c
* @author Sheng Di
* @date May, 2016
* @brief TypeManager is used to manage the type array: parsing of the bytes and other types in between.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include <stdio.h>
#include <stdlib.h>
#include "DynamicByteArray.h"
#include "sz.h"
//int convertIntArray2ByteArray_fast_8b()
size_t convertIntArray2ByteArray_fast_1b(unsigned char* intArray, size_t intArrayLength, unsigned char **result)
{
size_t byteLength = 0;
size_t i, j;
if(intArrayLength%8==0)
byteLength = intArrayLength/8;
else
byteLength = intArrayLength/8+1;
if(byteLength>0)
*result = (unsigned char*)malloc(byteLength*sizeof(unsigned char));
else
*result = NULL;
size_t n = 0;
int tmp, type;
for(i = 0;i<byteLength;i++)
{
tmp = 0;
for(j = 0;j<8&&n<intArrayLength;j++)
{
type = intArray[n];
if(type == 1)
tmp = (tmp | (1 << (7-j)));
n++;
}
(*result)[i] = (unsigned char)tmp;
}
return byteLength;
}
size_t convertIntArray2ByteArray_fast_1b_to_result(unsigned char* intArray, size_t intArrayLength, unsigned char *result)
{
size_t byteLength = 0;
size_t i, j;
if(intArrayLength%8==0)
byteLength = intArrayLength/8;
else
byteLength = intArrayLength/8+1;
size_t n = 0;
int tmp, type;
for(i = 0;i<byteLength;i++)
{
tmp = 0;
for(j = 0;j<8&&n<intArrayLength;j++)
{
type = intArray[n];
if(type == 1)
tmp = (tmp | (1 << (7-j)));
n++;
}
result[i] = (unsigned char)tmp;
}
return byteLength;
}
void convertByteArray2IntArray_fast_2b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray)
{
if(stepLength > byteArrayLength*4)
{
printf("Error: stepLength > byteArray.length*4\n");
printf("stepLength=%zu, byteArray.length=%zu\n", stepLength, byteArrayLength);
exit(0);
}
if(stepLength>0)
*intArray = (unsigned char*)malloc(stepLength*sizeof(unsigned char));
else
*intArray = NULL;
size_t i, n = 0;
for (i = 0; i < byteArrayLength; i++) {
unsigned char tmp = byteArray[i];
(*intArray)[n++] = (tmp & 0xC0) >> 6;
if(n==stepLength)
break;
(*intArray)[n++] = (tmp & 0x30) >> 4;
if(n==stepLength)
break;
(*intArray)[n++] = (tmp & 0x0C) >> 2;
if(n==stepLength)
break;
(*intArray)[n++] = tmp & 0x03;
if(n==stepLength)
break;
}
}
/**
* little endian
* [01|10|11|00|....]-->[01|10|11|00][....]
* @param timeStepType
* @return
*/
size_t convertIntArray2ByteArray_fast_2b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result)
{
size_t i, j, byteLength = 0;
if(timeStepTypeLength%4==0)
byteLength = timeStepTypeLength*2/8;
else
byteLength = timeStepTypeLength*2/8+1;
if(byteLength>0)
*result = (unsigned char*)malloc(byteLength*sizeof(unsigned char));
else
*result = NULL;
size_t n = 0;
for(i = 0;i<byteLength;i++)
{
int tmp = 0;
for(j = 0;j<4&&n<timeStepTypeLength;j++)
{
int type = timeStepType[n];
switch(type)
{
case 0:
break;
case 1:
tmp = (tmp | (1 << (6-j*2)));
break;
case 2:
tmp = (tmp | (2 << (6-j*2)));
break;
case 3:
tmp = (tmp | (3 << (6-j*2)));
break;
default:
printf("Error: wrong timestep type...: type[%zu]=%d\n", n, type);
exit(0);
}
n++;
}
(*result)[i] = (unsigned char)tmp;
}
return byteLength;
}
INLINE int getLeftMovingSteps(size_t k, unsigned char resiBitLength)
{
return 8 - k%8 - resiBitLength;
}
/**
*
* @param timeStepType is the resiMidBits
* @param resiBitLength is the length of resiMidBits for each element, (the number of resiBitLength == the # of unpredictable elements
* @return
*/
size_t convertIntArray2ByteArray_fast_dynamic(unsigned char* timeStepType, unsigned char resiBitLength, size_t nbEle, unsigned char **bytes)
{
size_t i = 0, j = 0, k = 0;
int value;
DynamicByteArray* dba;
new_DBA(&dba, 1024);
int tmp = 0, leftMovSteps = 0;
for(j = 0;j<nbEle;j++)
{
if(resiBitLength==0)
continue;
value = timeStepType[i];
leftMovSteps = getLeftMovingSteps(k, resiBitLength);
if(leftMovSteps < 0)
{
tmp = tmp | (value >> (-leftMovSteps));
addDBA_Data(dba, (unsigned char)tmp);
tmp = 0 | (value << (8+leftMovSteps));
}
else if(leftMovSteps > 0)
{
tmp = tmp | (value << leftMovSteps);
}
else //==0
{
tmp = tmp | value;
addDBA_Data(dba, (unsigned char)tmp);
tmp = 0;
}
i++;
k += resiBitLength;
}
if(leftMovSteps != 0)
addDBA_Data(dba, (unsigned char)tmp);
convertDBAtoBytes(dba, bytes);
size_t size = dba->size;
free_DBA(dba);
return size;
}
\ No newline at end of file
/**
* @file conf.c
* @author Sheng Di (sdi1@anl.gov or disheng222@gmail.com)
* @date 2015.
* @brief Configuration loading functions for the SZ library.
* (C) 2015 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include <math.h>
#include "string.h"
#include "sz.h"
#include "iniparser.h"
#include "Huffman.h"
//
// set default value
//
void setDefaulParams(sz_exedata* exedata, sz_params* params)
{
// sz_params
if(params)
{
// first important
params->errorBoundMode = SZ_ABS;
params->absErrBound = 1E-8;
params->absErrBoundDouble = 1E-16;
params->max_quant_intervals = 500;
params->quantization_intervals = 100;
params->losslessCompressor = ZSTD_COMPRESSOR; //other option: GZIP_COMPRESSOR;
// second important
params->sol_ID = SZ;
params->maxRangeRadius = params->max_quant_intervals/2;
params->predThreshold = 0.99;
params->sampleDistance = 100;
params->szMode = SZ_BEST_COMPRESSION;
// other
params->psnr = 90;
params->relBoundRatio = 1E-8;
params->accelerate_pw_rel_compression = 1;
params->pw_relBoundRatio = 1E-3;
params->segment_size = 36;
params->pwr_type = SZ_PWR_MIN_TYPE;
params->snapshotCmprStep = 5;
params->withRegression = SZ_WITH_LINEAR_REGRESSION;
params->randomAccess = 0; //0: no random access , 1: support random access
params->protectValueRange = 0;
params->plus_bits = 3;
}
// sz_exedata
if(exedata)
{
exedata->optQuantMode = 1;
exedata->SZ_SIZE_TYPE = 4;
if(params)
{
exedata->intvCapacity = params->maxRangeRadius*2;
exedata->intvRadius = params->maxRangeRadius;
}
else
{
exedata->intvCapacity = 500;
exedata->intvRadius = 200;
}
}
}
unsigned int roundUpToPowerOf2(unsigned int base)
{
base -= 1;
base = base | (base >> 1);
base = base | (base >> 2);
base = base | (base >> 4);
base = base | (base >> 8);
base = base | (base >> 16);
return base + 1;
}
void updateQuantizationInfo(int quant_intervals)
{
exe_params->intvCapacity = quant_intervals;
exe_params->intvRadius = quant_intervals/2;
}
double computeABSErrBoundFromPSNR(double psnr, double threshold, double value_range)
{
double v1 = psnr + 10 * log10(1-2.0/3.0*threshold);
double v2 = v1/(-20);
double v3 = pow(10, v2);
return value_range * v3;
}
double computeABSErrBoundFromNORM_ERR(double normErr, size_t nbEle)
{
return sqrt(3.0/nbEle)*normErr;
}
/*-------------------------------------------------------------------------*/
/**
*
*
* @return the status of loading conf. file: 1 (success) or 0 (error code);
* */
int SZ_ReadConf(const char* sz_cfgFile) {
// Check access to SZ configuration file and load dictionary
//record the setting in confparams_cpr
if(confparams_cpr == NULL)
confparams_cpr = (sz_params*)malloc(sizeof(sz_params));
if(exe_params == NULL)
exe_params = (sz_exedata*)malloc(sizeof(sz_exedata));
int x = 1;
char sol_name[256];
char *modeBuf;
char *errBoundMode;
char *endianTypeString;
dictionary *ini;
char *par;
char *y = (char*)&x;
if(*y==1)
sysEndianType = LITTLE_ENDIAN_SYSTEM;
else //=0
sysEndianType = BIG_ENDIAN_SYSTEM;
// default option
if(sz_cfgFile == NULL)
{
dataEndianType = LITTLE_ENDIAN_DATA;
setDefaulParams(exe_params, confparams_cpr);
return SZ_SUCCESS;
}
//printf("[SZ] Reading SZ configuration file (%s) ...\n", sz_cfgFile);
ini = iniparser_load(sz_cfgFile);
if (ini == NULL)
{
printf("[SZ] Iniparser failed to parse the conf. file.\n");
return SZ_FAILED;
}
endianTypeString = iniparser_getstring(ini, "ENV:dataEndianType", "LITTLE_ENDIAN_DATA");
if(strcmp(endianTypeString, "LITTLE_ENDIAN_DATA")==0)
dataEndianType = LITTLE_ENDIAN_DATA;
else if(strcmp(endianTypeString, "BIG_ENDIAN_DATA")==0)
dataEndianType = BIG_ENDIAN_DATA;
else
{
printf("Error: Wrong dataEndianType: please set it correctly in sz.config.\n");
iniparser_freedict(ini);
return SZ_FAILED;
}
// Reading/setting detection parameters
par = iniparser_getstring(ini, "ENV:sol_name", NULL);
snprintf(sol_name, 256, "%s", par);
if(strcmp(sol_name, "SZ")==0)
confparams_cpr->sol_ID = SZ;
else if(strcmp(sol_name, "PASTRI")==0)
confparams_cpr->sol_ID = PASTRI;
else if(strcmp(sol_name, "SZ_Transpose")==0)
confparams_cpr->sol_ID = SZ_Transpose;
else{
printf("[SZ] Error: wrong solution name (please check sz.config file), sol=%s\n", sol_name);
iniparser_freedict(ini);
return SZ_FAILED;
}
if(confparams_cpr->sol_ID==SZ || confparams_cpr->sol_ID==SZ_Transpose)
{
int max_quant_intervals = iniparser_getint(ini, "PARAMETER:max_quant_intervals", 65536);
confparams_cpr->max_quant_intervals = max_quant_intervals;
int quantization_intervals = (int)iniparser_getint(ini, "PARAMETER:quantization_intervals", 0);
confparams_cpr->quantization_intervals = quantization_intervals;
if(quantization_intervals>0)
{
updateQuantizationInfo(quantization_intervals);
confparams_cpr->max_quant_intervals = max_quant_intervals = quantization_intervals;
exe_params->optQuantMode = 0;
}
else //==0
{
confparams_cpr->maxRangeRadius = max_quant_intervals/2;
exe_params->intvCapacity = confparams_cpr->maxRangeRadius*2;
exe_params->intvRadius = confparams_cpr->maxRangeRadius;
exe_params->optQuantMode = 1;
}
if(quantization_intervals%2!=0)
{
printf("Error: quantization_intervals must be an even number!\n");
iniparser_freedict(ini);
return SZ_FAILED;
}
confparams_cpr->predThreshold = (float)iniparser_getdouble(ini, "PARAMETER:predThreshold", 0);
confparams_cpr->sampleDistance = (int)iniparser_getint(ini, "PARAMETER:sampleDistance", 0);
modeBuf = iniparser_getstring(ini, "PARAMETER:szMode", NULL);
if(modeBuf==NULL)
{
printf("[SZ] Error: Null szMode setting (please check sz.config file)\n");
iniparser_freedict(ini);
return SZ_FAILED;
}
else if(strcmp(modeBuf, "SZ_BEST_SPEED")==0)
confparams_cpr->szMode = SZ_BEST_SPEED;
else if(strcmp(modeBuf, "SZ_DEFAULT_COMPRESSION")==0)
confparams_cpr->szMode = SZ_DEFAULT_COMPRESSION;
else if(strcmp(modeBuf, "SZ_BEST_COMPRESSION")==0)
confparams_cpr->szMode = SZ_BEST_COMPRESSION;
else
{
printf("[SZ] Error: Wrong szMode setting (please check sz.config file)\n");
iniparser_freedict(ini);
return SZ_FAILED;
}
modeBuf = iniparser_getstring(ini, "PARAMETER:losslessCompressor", "ZSTD_COMPRESSOR");
if(strcmp(modeBuf, "GZIP_COMPRESSOR")==0)
confparams_cpr->losslessCompressor = GZIP_COMPRESSOR;
else if(strcmp(modeBuf, "ZSTD_COMPRESSOR")==0)
confparams_cpr->losslessCompressor = ZSTD_COMPRESSOR;
else
{
printf("[SZ] Error: Wrong losslessCompressor setting (please check sz.config file)\n");\
printf("No Such a lossless compressor: %s\n", modeBuf);
iniparser_freedict(ini);
return SZ_FAILED;
}
modeBuf = iniparser_getstring(ini, "PARAMETER:withLinearRegression", "YES");
if(strcmp(modeBuf, "YES")==0 || strcmp(modeBuf, "yes")==0)
confparams_cpr->withRegression = SZ_WITH_LINEAR_REGRESSION;
else
confparams_cpr->withRegression = SZ_NO_REGRESSION;
modeBuf = iniparser_getstring(ini, "PARAMETER:protectValueRange", "YES");
if(strcmp(modeBuf, "YES")==0)
confparams_cpr->protectValueRange = 1;
else
confparams_cpr->protectValueRange = 0;
confparams_cpr->randomAccess = (int)iniparser_getint(ini, "PARAMETER:randomAccess", 0);
//TODO
confparams_cpr->snapshotCmprStep = (int)iniparser_getint(ini, "PARAMETER:snapshotCmprStep", 5);
errBoundMode = iniparser_getstring(ini, "PARAMETER:errorBoundMode", NULL);
if(errBoundMode==NULL)
{
printf("[SZ] Error: Null error bound setting (please check sz.config file)\n");
iniparser_freedict(ini);
return SZ_FAILED;
}
else if(strcmp(errBoundMode,"ABS")==0||strcmp(errBoundMode,"abs")==0)
confparams_cpr->errorBoundMode=SZ_ABS;
else if(strcmp(errBoundMode, "REL")==0||strcmp(errBoundMode,"rel")==0)
confparams_cpr->errorBoundMode=REL;
else if(strcmp(errBoundMode, "VR_REL")==0||strcmp(errBoundMode, "vr_rel")==0)
confparams_cpr->errorBoundMode=REL;
else if(strcmp(errBoundMode, "ABS_AND_REL")==0||strcmp(errBoundMode, "abs_and_rel")==0)
confparams_cpr->errorBoundMode=ABS_AND_REL;
else if(strcmp(errBoundMode, "ABS_OR_REL")==0||strcmp(errBoundMode, "abs_or_rel")==0)
confparams_cpr->errorBoundMode=ABS_OR_REL;
else if(strcmp(errBoundMode, "PW_REL")==0||strcmp(errBoundMode, "pw_rel")==0)
confparams_cpr->errorBoundMode=PW_REL;
else if(strcmp(errBoundMode, "PSNR")==0||strcmp(errBoundMode, "psnr")==0)
confparams_cpr->errorBoundMode=PSNR;
else if(strcmp(errBoundMode, "ABS_AND_PW_REL")==0||strcmp(errBoundMode, "abs_and_pw_rel")==0)
confparams_cpr->errorBoundMode=ABS_AND_PW_REL;
else if(strcmp(errBoundMode, "ABS_OR_PW_REL")==0||strcmp(errBoundMode, "abs_or_pw_rel")==0)
confparams_cpr->errorBoundMode=ABS_OR_PW_REL;
else if(strcmp(errBoundMode, "REL_AND_PW_REL")==0||strcmp(errBoundMode, "rel_and_pw_rel")==0)
confparams_cpr->errorBoundMode=REL_AND_PW_REL;
else if(strcmp(errBoundMode, "REL_OR_PW_REL")==0||strcmp(errBoundMode, "rel_or_pw_rel")==0)
confparams_cpr->errorBoundMode=REL_OR_PW_REL;
else if(strcmp(errBoundMode, "NORM")==0||strcmp(errBoundMode, "norm")==0)
confparams_cpr->errorBoundMode=NORM;
else
{
printf("[SZ] Error: Wrong error bound mode (please check sz.config file)\n");
iniparser_freedict(ini);
return SZ_FAILED;
}
confparams_cpr->absErrBound = (double)iniparser_getdouble(ini, "PARAMETER:absErrBound", 0);
confparams_cpr->relBoundRatio = (double)iniparser_getdouble(ini, "PARAMETER:relBoundRatio", 0);
confparams_cpr->psnr = (double)iniparser_getdouble(ini, "PARAMETER:psnr", 0);
confparams_cpr->normErr = (double)iniparser_getdouble(ini, "PARAMETER:normErr", 0);
confparams_cpr->pw_relBoundRatio = (double)iniparser_getdouble(ini, "PARAMETER:pw_relBoundRatio", 0);
confparams_cpr->segment_size = (int)iniparser_getint(ini, "PARAMETER:segment_size", 0);
confparams_cpr->accelerate_pw_rel_compression = (int)iniparser_getint(ini, "PARAMETER:accelerate_pw_rel_compression", 1);
modeBuf = iniparser_getstring(ini, "PARAMETER:pwr_type", "MIN");
if(strcmp(modeBuf, "MIN")==0)
confparams_cpr->pwr_type = SZ_PWR_MIN_TYPE;
else if(strcmp(modeBuf, "AVG")==0)
confparams_cpr->pwr_type = SZ_PWR_AVG_TYPE;
else if(strcmp(modeBuf, "MAX")==0)
confparams_cpr->pwr_type = SZ_PWR_MAX_TYPE;
else if(modeBuf!=NULL)
{
printf("[SZ] Error: Wrong pwr_type setting (please check sz.config file).\n");
iniparser_freedict(ini);
return SZ_FAILED;
}
else //by default
confparams_cpr->pwr_type = SZ_PWR_AVG_TYPE;
//initialization for Huffman encoding
//SZ_Reset();
}
iniparser_freedict(ini);
return SZ_SUCCESS;
}
/*-------------------------------------------------------------------------*/
/**
@brief It reads and tests the configuration given.
@return integer 1 if successfull.
This function reads the configuration file. Then test that the
configuration parameters are correct (including directories).
**/
/*-------------------------------------------------------------------------*/
int SZ_LoadConf(const char* sz_cfgFile) {
int res = SZ_ReadConf(sz_cfgFile);
if (res != SZ_SUCCESS)
{
printf("[SZ] ERROR: Impossible to read configuration.\n");
return SZ_FAILED;
}
return SZ_SUCCESS;
}
此差异已折叠。
/*-------------------------------------------------------------------------*/
/**
@file dictionary.c
@author N. Devillard
@brief Implements a dictionary for string variables.
This module implements a simple dictionary object, i.e. a list
of string/string associations. This object is useful to store e.g.
informations retrieved from a configuration file (ini files).
*/
/*--------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------
Includes
---------------------------------------------------------------------------*/
#include "dictionary.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/** Maximum value size for integers and doubles. */
#define MAXVALSZ 1024
/** Minimal allocated number of entries in a dictionary */
#define DICTMINSZ 128
/** Invalid key token */
#define DICT_INVALID_KEY ((char*)-1)
/*---------------------------------------------------------------------------
Private functions
---------------------------------------------------------------------------*/
/* Doubles the allocated size associated to a pointer */
/* 'size' is the current allocated size. */
static void * mem_double(void * ptr, int size)
{
void * newptr ;
newptr = calloc(2*size, 1);
if (newptr==NULL) {
return NULL ;
}
memcpy(newptr, ptr, size);
free(ptr);
return newptr ;
}
/*-------------------------------------------------------------------------*/
/**
@brief Duplicate a string
@param s String to duplicate
@return Pointer to a newly allocated string, to be freed with free()
This is a replacement for strdup(). This implementation is provided
for systems that do not have it.
*/
/*--------------------------------------------------------------------------*/
static char * xstrdup(const char * s)
{
char * t ;
if (!s)
return NULL ;
t = (char*)malloc(strlen(s)+1) ;
if (t) {
strcpy(t,s);
}
return t ;
}
/*---------------------------------------------------------------------------
Function codes
---------------------------------------------------------------------------*/
/*-------------------------------------------------------------------------*/
/**
@brief Compute the hash key for a string.
@param key Character string to use for key.
@return 1 unsigned int on at least 32 bits.
This hash function has been taken from an Article in Dr Dobbs Journal.
This is normally a collision-free function, distributing keys evenly.
The key is stored anyway in the struct so that collision can be avoided
by comparing the key itself in last resort.
*/
/*--------------------------------------------------------------------------*/
unsigned dictionary_hash(const char * key)
{
int len ;
unsigned hash ;
int i ;
len = strlen(key);
for (hash=0, i=0 ; i<len ; i++) {
hash += (unsigned)key[i] ;
hash += (hash<<10);
hash ^= (hash>>6) ;
}
hash += (hash <<3);
hash ^= (hash >>11);
hash += (hash <<15);
return hash ;
}
/*-------------------------------------------------------------------------*/
/**
@brief Create a new dictionary object.
@param size Optional initial size of the dictionary.
@return 1 newly allocated dictionary objet.
This function allocates a new dictionary object of given size and returns
it. If you do not know in advance (roughly) the number of entries in the
dictionary, give size=0.
*/
/*--------------------------------------------------------------------------*/
dictionary * dictionary_new(int size)
{
dictionary * d ;
/* If no size was specified, allocate space for DICTMINSZ */
if (size<DICTMINSZ) size=DICTMINSZ ;
if (!(d = (dictionary *)calloc(1, sizeof(dictionary)))) {
return NULL;
}
d->size = size ;
d->val = (char **)calloc(size, sizeof(char*));
d->key = (char **)calloc(size, sizeof(char*));
d->hash = (unsigned int *)calloc(size, sizeof(unsigned));
return d ;
}
/*-------------------------------------------------------------------------*/
/**
@brief Delete a dictionary object
@param d dictionary object to deallocate.
@return void
Deallocate a dictionary object and all memory associated to it.
*/
/*--------------------------------------------------------------------------*/
void dictionary_del(dictionary * d)
{
int i ;
if (d==NULL) return ;
for (i=0 ; i<d->size ; i++) {
if (d->key[i]!=NULL)
free(d->key[i]);
if (d->val[i]!=NULL)
free(d->val[i]);
}
free(d->val);
free(d->key);
free(d->hash);
free(d);
return ;
}
/*-------------------------------------------------------------------------*/
/**
@brief Get a value from a dictionary.
@param d dictionary object to search.
@param key Key to look for in the dictionary.
@param def Default value to return if key not found.
@return 1 pointer to internally allocated character string.
This function locates a key in a dictionary and returns a pointer to its
value, or the passed 'def' pointer if no such key can be found in
dictionary. The returned character pointer points to data internal to the
dictionary object, you should not try to free it or modify it.
*/
/*--------------------------------------------------------------------------*/
char * dictionary_get(dictionary * d, const char * key, char * def)
{
unsigned hash ;
int i ;
hash = dictionary_hash(key);
for (i=0 ; i<d->size ; i++) {
if (d->key[i]==NULL)
continue ;
/* Compare hash */
if (hash==d->hash[i]) {
/* Compare string, to avoid hash collisions */
if (!strcmp(key, d->key[i])) {
return d->val[i] ;
}
}
}
return def ;
}
/*-------------------------------------------------------------------------*/
/**
@brief Set a value in a dictionary.
@param d dictionary object to modify.
@param key Key to modify or add.
@param val Value to add.
@return int 0 if Ok, anything else otherwise
If the given key is found in the dictionary, the associated value is
replaced by the provided one. If the key cannot be found in the
dictionary, it is added to it.
It is Ok to provide a NULL value for val, but NULL values for the dictionary
or the key are considered as errors: the function will return immediately
in such a case.
Notice that if you dictionary_set a variable to NULL, a call to
dictionary_get will return a NULL value: the variable will be found, and
its value (NULL) is returned. In other words, setting the variable
content to NULL is equivalent to deleting the variable from the
dictionary. It is not possible (in this implementation) to have a key in
the dictionary without value.
This function returns non-zero in case of failure.
*/
/*--------------------------------------------------------------------------*/
int dictionary_set(dictionary * d, const char * key, const char * val)
{
int i ;
unsigned hash ;
if (d==NULL || key==NULL) return -1 ;
/* Compute hash for this key */
hash = dictionary_hash(key) ;
/* Find if value is already in dictionary */
if (d->n>0) {
for (i=0 ; i<d->size ; i++) {
if (d->key[i]==NULL)
continue ;
if (hash==d->hash[i]) { /* Same hash value */
if (!strcmp(key, d->key[i])) { /* Same key */
/* Found a value: modify and return */
if (d->val[i]!=NULL)
free(d->val[i]);
d->val[i] = val ? xstrdup(val) : NULL ;
/* Value has been modified: return */
return 0 ;
}
}
}
}
/* Add a new value */
/* See if dictionary needs to grow */
if (d->n==d->size) {
/* Reached maximum size: reallocate dictionary */
d->val = (char **)mem_double(d->val, d->size * sizeof(char*)) ;
d->key = (char **)mem_double(d->key, d->size * sizeof(char*)) ;
d->hash = (unsigned int *)mem_double(d->hash, d->size * sizeof(unsigned)) ;
if ((d->val==NULL) || (d->key==NULL) || (d->hash==NULL)) {
/* Cannot grow dictionary */
return -1 ;
}
/* Double size */
d->size *= 2 ;
}
/* Insert key in the first empty slot. Start at d->n and wrap at
d->size. Because d->n < d->size this will necessarily
terminate. */
for (i=d->n ; d->key[i] ; ) {
if(++i == d->size) i = 0;
}
/* Copy key */
d->key[i] = xstrdup(key);
d->val[i] = val ? xstrdup(val) : NULL ;
d->hash[i] = hash;
d->n ++ ;
return 0 ;
}
/*-------------------------------------------------------------------------*/
/**
@brief Delete a key in a dictionary
@param d dictionary object to modify.
@param key Key to remove.
@return void
This function deletes a key in a dictionary. Nothing is done if the
key cannot be found.
*/
/*--------------------------------------------------------------------------*/
void dictionary_unset(dictionary * d, const char * key)
{
unsigned hash ;
int i ;
if (key == NULL) {
return;
}
hash = dictionary_hash(key);
for (i=0 ; i<d->size ; i++) {
if (d->key[i]==NULL)
continue ;
/* Compare hash */
if (hash==d->hash[i]) {
/* Compare string, to avoid hash collisions */
if (!strcmp(key, d->key[i])) {
/* Found key */
break ;
}
}
}
if (i>=d->size)
/* Key not found */
return ;
free(d->key[i]);
d->key[i] = NULL ;
if (d->val[i]!=NULL) {
free(d->val[i]);
d->val[i] = NULL ;
}
d->hash[i] = 0 ;
d->n -- ;
return ;
}
/*-------------------------------------------------------------------------*/
/**
@brief Dump a dictionary to an opened file pointer.
@param d Dictionary to dump
@param f Opened file pointer.
@return void
Dumps a dictionary onto an opened file pointer. Key pairs are printed out
as @c [Key]=[Value], one per line. It is Ok to provide stdout or stderr as
output file pointers.
*/
/*--------------------------------------------------------------------------*/
void dictionary_dump(dictionary * d, FILE * out)
{
int i ;
if (d==NULL || out==NULL) return ;
if (d->n<1) {
fprintf(out, "empty dictionary\n");
return ;
}
for (i=0 ; i<d->size ; i++) {
if (d->key[i]) {
fprintf(out, "%20s\t[%s]\n",
d->key[i],
d->val[i] ? d->val[i] : "UNDEF");
}
}
return ;
}
/* Test code */
#ifdef TESTDIC
#define NVALS 20000
int main(int argc, char *argv[])
{
dictionary * d ;
char * val ;
int i ;
char cval[90] ;
/* Allocate dictionary */
printf("allocating...\n");
d = dictionary_new(0);
/* Set values in dictionary */
printf("setting %d values...\n", NVALS);
for (i=0 ; i<NVALS ; i++) {
sprintf(cval, "%04d", i);
dictionary_set(d, cval, "salut");
}
printf("getting %d values...\n", NVALS);
for (i=0 ; i<NVALS ; i++) {
sprintf(cval, "%04d", i);
val = dictionary_get(d, cval, DICT_INVALID_KEY);
if (val==DICT_INVALID_KEY) {
printf("cannot get value for key [%s]\n", cval);
}
}
printf("unsetting %d values...\n", NVALS);
for (i=0 ; i<NVALS ; i++) {
sprintf(cval, "%04d", i);
dictionary_unset(d, cval);
}
if (d->n != 0) {
printf("error deleting values\n");
}
printf("deallocating...\n");
dictionary_del(d);
return 0 ;
}
#endif
/* vim: set ts=4 et sw=4 tw=75 */
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "td_sz.h"
#include "sz.h"
#include "conf.h"
//
// Init success return 1 else 0
//
void tdszInit(double fPrecision, double dPrecision, unsigned int maxIntervals, unsigned int intervals, const char* compressor)
{
// need malloc
if(confparams_cpr == NULL)
confparams_cpr = (sz_params*)malloc(sizeof(sz_params));
if(exe_params == NULL)
exe_params = (sz_exedata*)malloc(sizeof(sz_exedata));
// set default
setDefaulParams(exe_params, confparams_cpr);
// overwrite with args
confparams_cpr->absErrBound = fPrecision;
confparams_cpr->absErrBoundDouble = dPrecision;
confparams_cpr->max_quant_intervals = maxIntervals;
confparams_cpr->quantization_intervals = intervals;
if(strcmp(compressor, "GZIP_COMPRESSOR")==0)
confparams_cpr->losslessCompressor = GZIP_COMPRESSOR;
else if(strcmp(compressor, "ZSTD_COMPRESSOR")==0)
confparams_cpr->losslessCompressor = ZSTD_COMPRESSOR;
return ;
}
//
// compress interface to tdengine return value is count of output with bytes
//
int tdszCompress(int type, const char * input, const int nelements, const char * output)
{
// check valid
sz_params comp_params = *confparams_cpr;
size_t outSize = SZ_compress_args(type, (void*)input, (size_t)nelements, (unsigned char*)output, &comp_params);
return (int)outSize;
}
//
// decompress interface to tdengine return value is count of output with bytes
//
int tdszDecompress(int type, const char * input, int compressedSize, const int nelements, const char * output)
{
size_t outSize = SZ_decompress(type, (void*)input, compressedSize, (size_t)nelements, (unsigned char*)output);
return (int)outSize;
}
//
// tdszExit
//
void tdszExit()
{
if(confparams_cpr!=NULL)
{
free(confparams_cpr);
confparams_cpr = NULL;
}
if(exe_params!=NULL)
{
free(exe_params);
exe_params = NULL;
}
}
\ No newline at end of file
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/*-*************************************
* Dependencies
***************************************/
#include "error_private.h"
#include "zbuff.h"
/*-****************************************
* ZBUFF Error Management (deprecated)
******************************************/
/*! ZBUFF_isError() :
* tells if a return value is an error code */
unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
/*! ZBUFF_getErrorName() :
* provides error code string from function result (useful for debugging) */
const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册