From 7c1ae052effc7f4a4855980bf52c6cf5bc56535d Mon Sep 17 00:00:00 2001 From: tickduan <417921451@qq.com> Date: Tue, 6 Jul 2021 19:56:16 +0800 Subject: [PATCH] sz.py test 190ms and 31M size --- deps/SZ/sz/src/TightDataPointStorageD.c | 28 +---- deps/SZ/sz/src/TightDataPointStorageF.c | 27 +---- deps/SZ/sz/src/conf.c | 5 +- deps/SZ/sz/src/dataCompression.c | 2 +- deps/SZ/sz/src/sz_float.c | 8 +- deps/SZ/sz/src/szd_double.c | 18 +-- deps/SZ/sz/src/szd_float.c | 15 +-- src/kit/taospack/taospack.c | 145 +++++++++++++++++++----- 8 files changed, 143 insertions(+), 105 deletions(-) diff --git a/deps/SZ/sz/src/TightDataPointStorageD.c b/deps/SZ/sz/src/TightDataPointStorageD.c index 652cda2a81..a4dc2b4c79 100644 --- a/deps/SZ/sz/src/TightDataPointStorageD.c +++ b/deps/SZ/sz/src/TightDataPointStorageD.c @@ -17,31 +17,9 @@ void new_TightDataPointStorageD_Empty(TightDataPointStorageD **this) { - *this = (TightDataPointStorageD*)malloc(sizeof(TightDataPointStorageD)); - (*this)->dataSeriesLength = 0; - (*this)->allSameData = 0; - (*this)->exactDataNum = 0; - (*this)->reservedValue = 0; - (*this)->reqLength = 0; - (*this)->radExpo = 0; - - (*this)->leadNumArray = NULL; //its size is exactDataNum/4 (or exactDataNum/4+1) - (*this)->leadNumArray_size = 0; - - (*this)->exactMidBytes = NULL; - (*this)->exactMidBytes_size = 0; - - (*this)->residualMidBits = NULL; - (*this)->residualMidBits_size = 0; - - (*this)->intervals = 0; - (*this)->isLossless = 0; - - (*this)->segment_size = 0; - - (*this)->raBytes = NULL; - (*this)->raBytes_size = 0; - + TightDataPointStorageD* tdps = (TightDataPointStorageD*)malloc(sizeof(TightDataPointStorageD)); + memset(tdps, 0, sizeof(TightDataPointStorageD)); + *this = tdps; } int new_TightDataPointStorageD_fromFlatBytes(TightDataPointStorageD **this, unsigned char* flatBytes, size_t flatBytesLength, sz_exedata* pde_exe, sz_params* pde_params) diff --git a/deps/SZ/sz/src/TightDataPointStorageF.c b/deps/SZ/sz/src/TightDataPointStorageF.c index 7107891129..8d65d24cfd 100644 --- a/deps/SZ/sz/src/TightDataPointStorageF.c +++ b/deps/SZ/sz/src/TightDataPointStorageF.c @@ -17,30 +17,9 @@ void new_TightDataPointStorageF_Empty(TightDataPointStorageF **this) { - *this = (TightDataPointStorageF*)malloc(sizeof(TightDataPointStorageF)); - (*this)->dataSeriesLength = 0; - (*this)->allSameData = 0; - (*this)->exactDataNum = 0; - (*this)->reservedValue = 0; - (*this)->reqLength = 0; - (*this)->radExpo = 0; - - (*this)->leadNumArray = NULL; //its size is exactDataNum/4 (or exactDataNum/4+1) - (*this)->leadNumArray_size = 0; - - (*this)->exactMidBytes = NULL; - (*this)->exactMidBytes_size = 0; - - (*this)->residualMidBits = NULL; - (*this)->residualMidBits_size = 0; - - (*this)->intervals = 0; - (*this)->isLossless = 0; - - (*this)->segment_size = 0; - - (*this)->raBytes = NULL; - (*this)->raBytes_size = 0; + TightDataPointStorageF* tdpf = (TightDataPointStorageF*)malloc(sizeof(TightDataPointStorageF)); + memset(tdpf, 0, sizeof(TightDataPointStorageF)); + *this = tdpf; } int new_TightDataPointStorageF_fromFlatBytes(TightDataPointStorageF **this, unsigned char* flatBytes, size_t flatBytesLength, sz_exedata* pde_exe, sz_params* pde_params) diff --git a/deps/SZ/sz/src/conf.c b/deps/SZ/sz/src/conf.c index 69b0ee63bc..1684e155eb 100644 --- a/deps/SZ/sz/src/conf.c +++ b/deps/SZ/sz/src/conf.c @@ -99,14 +99,13 @@ int SZ_ReadConf(const char* sz_cfgFile) { { dataEndianType = LITTLE_ENDIAN_DATA; confparams_cpr->sol_ID = SZ; - confparams_cpr->max_quant_intervals = 500; + confparams_cpr->max_quant_intervals = 800; confparams_cpr->maxRangeRadius = confparams_cpr->max_quant_intervals/2; - confparams_cpr->quantization_intervals = 5000; exe_params->intvCapacity = confparams_cpr->maxRangeRadius*2; exe_params->intvRadius = confparams_cpr->maxRangeRadius; - confparams_cpr->quantization_intervals = 0; + confparams_cpr->quantization_intervals = 500; exe_params->optQuantMode = 1; confparams_cpr->predThreshold = 0.99; confparams_cpr->sampleDistance = 100; diff --git a/deps/SZ/sz/src/dataCompression.c b/deps/SZ/sz/src/dataCompression.c index 237a3315c1..6b6316ae5c 100644 --- a/deps/SZ/sz/src/dataCompression.c +++ b/deps/SZ/sz/src/dataCompression.c @@ -95,7 +95,7 @@ double getRealPrecision_double(double valueRangeSize, int errBoundMode, double a int state = SZ_SUCCESS; double precision = 0; if(errBoundMode==SZ_ABS||errBoundMode==ABS_OR_PW_REL||errBoundMode==ABS_AND_PW_REL) - precision = absErrBound; + precision = absErrBound*0.00000001; else if(errBoundMode==REL||errBoundMode==REL_OR_PW_REL||errBoundMode==REL_AND_PW_REL) precision = relBoundRatio*valueRangeSize; else if(errBoundMode==ABS_AND_REL) diff --git a/deps/SZ/sz/src/sz_float.c b/deps/SZ/sz/src/sz_float.c index ecd2ce56e8..2dfce720cf 100644 --- a/deps/SZ/sz/src/sz_float.c +++ b/deps/SZ/sz/src/sz_float.c @@ -139,7 +139,7 @@ TightDataPointStorageF* SZ_compress_float_1D_MDQ(float *oriData, // calc save byte length and bit lengths with reqLength int reqBytesLength = reqLength/8; int resiBitsLength = reqLength%8; - float last3CmprsData[3] = {0}; + //float last3CmprsData[3] = {0}; FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement)); LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); @@ -152,7 +152,7 @@ TightDataPointStorageF* SZ_compress_float_1D_MDQ(float *oriData, memcpy(preDiffBytes, vce->curBytes, 4); // lce to arrays addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); - listAdd_float(last3CmprsData, vce->data); + //listAdd_float(last3CmprsData, vce->data); //add the second data type[1] = 0; @@ -160,12 +160,12 @@ TightDataPointStorageF* SZ_compress_float_1D_MDQ(float *oriData, updateLossyCompElement_Float(vce->curBytes, preDiffBytes, reqBytesLength, resiBitsLength, lce); memcpy(preDiffBytes, vce->curBytes, 4); addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); - listAdd_float(last3CmprsData, vce->data); + //listAdd_float(last3CmprsData, vce->data); int state; float checkRadius; float oriFloat; - float pred = last3CmprsData[0]; + float pred = vce->data; float diff; checkRadius = (quantization_intervals-1)*realPrecision; float double_realpreci = 2*realPrecision; diff --git a/deps/SZ/sz/src/szd_double.c b/deps/SZ/sz/src/szd_double.c index 91866319f6..d3df89de77 100644 --- a/deps/SZ/sz/src/szd_double.c +++ b/deps/SZ/sz/src/szd_double.c @@ -26,7 +26,9 @@ int SZ_decompress_args_double(double* newData, size_t r1, unsigned char* cmpByte size_t targetUncompressSize = dataLength <<3; //i.e., *8 //tmpSize must be "much" smaller than dataLength size_t i, tmpSize = 12+MetaDataByteLength_double+exe_params->SZ_SIZE_TYPE; - unsigned char* szTmpBytes; + unsigned char* szTmpBytes = NULL; + bool needFree = false; + if(cmpSize!=12+4+MetaDataByteLength_double && cmpSize!=12+8+MetaDataByteLength_double) { pde_params->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); @@ -44,21 +46,13 @@ int SZ_decompress_args_double(double* newData, size_t r1, unsigned char* cmpByte tmpSize = cmpSize; szTmpBytes = cmpBytes; } - else if(pde_params->szMode==SZ_BEST_COMPRESSION || pde_params->szMode==SZ_DEFAULT_COMPRESSION || pde_params->szMode==SZ_TEMPORAL_COMPRESSION) + else { if(targetUncompressSizelosslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength_double+exe_params->SZ_SIZE_TYPE); - //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); - //memcpy(szTmpBytes, tmpBytes, tmpSize); - //free(tmpBytes); //release useless memory + needFree = true; } - else - { - printf("Wrong value of pde_params->szMode in the double compressed bytes.\n"); - status = SZ_MERR; - return status; - } } else szTmpBytes = cmpBytes; @@ -106,7 +100,7 @@ int SZ_decompress_args_double(double* newData, size_t r1, unsigned char* cmpByte } free_TightDataPointStorageD2(tdps); - if(pde_params->szMode!=SZ_BEST_SPEED && cmpSize!=12+MetaDataByteLength_double+exe_params->SZ_SIZE_TYPE) + if(szTmpBytes && needFree) free(szTmpBytes); return status; } diff --git a/deps/SZ/sz/src/szd_float.c b/deps/SZ/sz/src/szd_float.c index 98ce8d50df..ddbd610256 100644 --- a/deps/SZ/sz/src/szd_float.c +++ b/deps/SZ/sz/src/szd_float.c @@ -34,7 +34,8 @@ int SZ_decompress_args_float(float* newData, size_t r1, unsigned char* cmpBytes, size_t targetUncompressSize = dataLength <<2; //i.e., *4 //tmpSize must be "much" smaller than dataLength size_t i, tmpSize = 8+MetaDataByteLength+pde_exe->SZ_SIZE_TYPE; - unsigned char* szTmpBytes; + unsigned char* szTmpBytes = NULL; + bool needFree = false; if(cmpSize!=8+4+MetaDataByteLength && cmpSize!=8+8+MetaDataByteLength) //4,8 means two posibilities of SZ_SIZE_TYPE { @@ -52,19 +53,13 @@ int SZ_decompress_args_float(float* newData, size_t r1, unsigned char* cmpBytes, tmpSize = cmpSize; szTmpBytes = cmpBytes; } - else if(pde_params->szMode==SZ_BEST_COMPRESSION || pde_params->szMode==SZ_DEFAULT_COMPRESSION || pde_params->szMode==SZ_TEMPORAL_COMPRESSION) + else { if(targetUncompressSizelosslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize - + needFree = true; } - else - { - printf("Wrong value of pde_params->szMode in the double compressed bytes.\n"); - status = SZ_MERR; - return status; - } } else szTmpBytes = cmpBytes; @@ -111,7 +106,7 @@ int SZ_decompress_args_float(float* newData, size_t r1, unsigned char* cmpBytes, //cost_end_(); //printf("totalCost_=%f\n", totalCost_); free_TightDataPointStorageF2(tdps); - if(pde_params->szMode!=SZ_BEST_SPEED && cmpSize!=8+MetaDataByteLength+exe_params->SZ_SIZE_TYPE) + if(szTmpBytes && needFree) free(szTmpBytes); return status; } diff --git a/src/kit/taospack/taospack.c b/src/kit/taospack/taospack.c index 9d0ab5d82c..34f79961d1 100644 --- a/src/kit/taospack/taospack.c +++ b/src/kit/taospack/taospack.c @@ -120,33 +120,101 @@ float check_same(float* ft1, float* ft2, int count){ return same_rate; } +double check_same_double(double* ft1, double* ft2, int count){ + int same_cnt =0; + for(int i=0; i< count; i++){ + + if(ft1[i] == ft2[i]){ + same_cnt ++; + } + + if(i < 5){ + printf(" i=%d ft1=%.40f diff=%.40f \n", i, ft1[i], ft1[i] - ft2[i]); + printf(" i=%d ft2=%.40f \n", i, ft2[i]); + } + + } + double same_rate = same_cnt*100/count; + printf(" all count=%d same=%d same rate=%.0f%% \n", count, same_cnt, same_rate); + return same_rate; +} + // // test compress and decompress // extern bool gOpenLossy; -bool testFile(const char* inFile, char algorithm){ - // check valid - if(inFile == NULL || inFile[0] == 0 ){ - printf(" inFile is NULL or EMPTY.\n"); - return false; - } - int cnt = 0; - float* floats = read_float(inFile, &cnt); - if(floats == NULL) { - return false; +bool DoDouble(double* doubles, int cnt, int algorithm) { + // compress + const char* input = (const char*)doubles; + int input_len = cnt * sizeof(double); + char* output = (char*) malloc(input_len); + int output_len = input_len; + char* buff = (char*) malloc(input_len); + int buff_len = input_len; + + cost_start(); + int ret_len = 0; + if(algorithm == 2) + ret_len = tsCompressDouble(input, input_len, cnt, output, output_len, algorithm, buff, buff_len); + else + ret_len = tsCompressDoubleLossy(input, input_len, cnt, output, output_len, algorithm, buff, buff_len); + + if(ret_len == -1) { + printf(" compress error.\n"); + return 0; } + double use_ms1 = cost_end("compress"); + printf(" compress len=%d input len=%d\n", ret_len, input_len); + double rate=100*(double)ret_len/(double)input_len; + printf(" compress rate=%.1f an-rate=%.4f%%\n", (double)input_len/(double)ret_len, rate); + + // + // decompress + // + double* ft2 = (double*)malloc(input_len); + cost_start(); + int code = 0; + + if(algorithm == 2) + code = tsDecompressDouble(output, ret_len, cnt, (char*)ft2, input_len, algorithm, buff, buff_len); + else + code = tsDecompressDoubleLossy(output, ret_len, cnt, (char*)ft2, input_len, algorithm, buff, buff_len); + + + double use_ms2 = cost_end("Decompress"); + printf(" Decompress return length=%d \n", code); + + // compare same + double same_rate = check_same_double(doubles, ft2, cnt); + + printf("\n ------------------ count:%d <%s> ---------------- \n", cnt, algorithm == 2?"TD":"SZ"); + printf(" Compress Rate ......... [%.2f%%] \n", rate); + double speed1 = (cnt*sizeof(double)*1000/1024/1024)/use_ms1; + printf(" Compress Time ......... [%.4fms] speed=%.1f MB/s\n", use_ms1, speed1); + double speed2 = (cnt*sizeof(double)*1000/1024/1024)/use_ms2; + printf(" Decompress Time........ [%.4fms] speed=%.1f MB/s\n", use_ms2, speed2); + printf(" Same Rate ............. [%.0f%%] \n\n", same_rate); + + + // free + free(ft2); + free(buff); + free(output); + + return true; +} + +bool DoFloat(float* floats, int cnt, int algorithm) { // compress const char* input = (const char*)floats; - int input_len = cnt * sizeof(cnt); + int input_len = cnt * sizeof(float); char* output = (char*) malloc(input_len); int output_len = input_len; char* buff = (char*) malloc(input_len); int buff_len = input_len; - printf(" file %s have count=%d \n", inFile, cnt); - cost_start(); int ret_len = 0; if(algorithm == 2) @@ -194,12 +262,31 @@ bool testFile(const char* inFile, char algorithm){ // free free(ft2); - free(floats); free(buff); free(output); return true; } + + +bool testFile(const char* inFile, char algorithm){ + // check valid + if(inFile == NULL || inFile[0] == 0 ){ + printf(" inFile is NULL or EMPTY.\n"); + return false; + } + + int cnt = 0; + float* floats = read_float(inFile, &cnt); + if(floats == NULL) { + return false; + } + + DoFloat(floats, cnt, algorithm); + + free(floats); + return true; +} // // txt to binary file // @@ -577,25 +664,26 @@ void unitTestFloat() { } -void modulePath(char *buf, int size) -{ - char path[1024]; - sprintf(path, "/proc/%d/exe", getpid()); - readlink(path, buf, size); - char* pos = strrchr(buf, '/'); - if(pos) - pos[1]=0; +#define DB_CNT 500 +void test_same_double(int algo){ + double ori = 3.1415926; + + double doubles [DB_CNT]; + for(int i=0; i< DB_CNT; i++){ + doubles[i] = ori; + } + + DoDouble(doubles, DB_CNT, algo); + } + + // // ----------------- main ---------------------- // int main(int argc, char *argv[]) { printf("welcome to use taospack tools v1.3\n"); - - char szbuf[512]; - modulePath(szbuf, 512); - printf(szbuf); gOpenLossy = false; tsLossyInit(); @@ -626,6 +714,11 @@ int main(int argc, char *argv[]) { return 0; } + if(strcmp(argv[1], "-samed") == 0) { + test_same_double(atoi(argv[2])); + return 0; + } + if(algo == 0){ printf(" no param -tone -tw \n"); return 0; -- GitLab