TightDataPointStorageF.c 13.0 KB
Newer Older
T
tickduan 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/**
 *  @file TightPointDataStorageF.c
 *  @author Sheng Di and Dingwen Tao
 *  @date Aug, 2016
 *  @brief The functions used to construct the tightPointDataStorage element for storing compressed bytes.
 *  (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include <stdlib.h> 
#include <stdio.h>
#include <string.h>
#include "TightDataPointStorageF.h"
#include "sz.h"
T
tickduan 已提交
15
#include "defines.h"
T
tickduan 已提交
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
#include "Huffman.h"

void new_TightDataPointStorageF_Empty(TightDataPointStorageF **this)
{
	*this = (TightDataPointStorageF*)malloc(sizeof(TightDataPointStorageF));
	(*this)->dataSeriesLength = 0;
	(*this)->allSameData = 0;
	(*this)->exactDataNum = 0;
	(*this)->reservedValue = 0;
	(*this)->reqLength = 0;
	(*this)->radExpo = 0;

	(*this)->leadNumArray = NULL; //its size is exactDataNum/4 (or exactDataNum/4+1)
	(*this)->leadNumArray_size = 0;

	(*this)->exactMidBytes = NULL;
	(*this)->exactMidBytes_size = 0;

	(*this)->residualMidBits = NULL;
	(*this)->residualMidBits_size = 0;
	
	(*this)->intervals = 0;
	(*this)->isLossless = 0;
	
	(*this)->segment_size = 0;
	
	(*this)->raBytes = NULL;
	(*this)->raBytes_size = 0;
}

T
tickduan 已提交
46
int new_TightDataPointStorageF_fromFlatBytes(TightDataPointStorageF **this, unsigned char* flatBytes, size_t flatBytesLength, sz_exedata* pde_exe, sz_params* pde_params)
T
tickduan 已提交
47 48 49
{
	new_TightDataPointStorageF_Empty(this);
	size_t i, index = 0;
T
tickduan 已提交
50 51 52 53 54 55

	//
	// parse tdps
	//

	// 1 version(1)
T
tickduan 已提交
56
	unsigned char version = flatBytes[index++]; //1
T
tickduan 已提交
57 58
	unsigned char sameRByte = flatBytes[index++]; //1

T
tickduan 已提交
59 60
    // parse data format
	switch (version)
T
tickduan 已提交
61
	{
T
tickduan 已提交
62 63 64 65 66 67
	case DATA_FROMAT_VER1:
		break;
	default:
	    printf(" error, float compressed data format can not be recognised. ver=%d\n ", version);
		return SZ_ABS;
	}	
T
tickduan 已提交
68
	
T
tickduan 已提交
69 70 71 72 73 74
	// 2 same(1)														      //note that 1000,0000 is reserved for regression tag.
	int same = sameRByte & 0x01; 											//0000,0001
	(*this)->isLossless = (sameRByte & 0x10)>>4; 							//0001,0000								//0010,0000
	exe_params->SZ_SIZE_TYPE = ((sameRByte & 0x40)>>6)==1?8:4; 				//0100,0000	
	int errorBoundMode = SZ_ABS;
    // 3 meta(2)   
T
tickduan 已提交
75
	convertBytesToSZParams(&(flatBytes[index]), pde_params, pde_exe);
T
tickduan 已提交
76
	index += MetaDataByteLength;
T
tickduan 已提交
77
    // 4 element count(4)
T
tickduan 已提交
78 79 80
	unsigned char dsLengthBytes[8];
	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
		dsLengthBytes[i] = flatBytes[index++];
T
tickduan 已提交
81
	(*this)->dataSeriesLength = bytesToSize(dsLengthBytes);// 4 or 8		
T
tickduan 已提交
82 83 84 85 86 87 88 89
	if((*this)->isLossless==1)
	{
		//(*this)->exactMidBytes = flatBytes+8;
		return errorBoundMode;
	}
	else if(same==1)
	{
		(*this)->allSameData = 1;
T
tickduan 已提交
90
		//size_t exactMidBytesLength = sizeof(double);//flatBytesLength - 1 - 1 - MetaDataByteLength -exe_params->SZ_SIZE_TYPE;
T
tickduan 已提交
91 92 93 94 95
		(*this)->exactMidBytes = &(flatBytes[index]);
		return errorBoundMode;
	}
	else
		(*this)->allSameData = 0;
T
tickduan 已提交
96 97
    // regression  
    int isRegression = (sameRByte >> 7) & 0x01;
T
tickduan 已提交
98 99
	if(isRegression == 1)
	{
T
tickduan 已提交
100
		(*this)->raBytes_size = flatBytesLength - 1 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE;
T
tickduan 已提交
101 102 103
		(*this)->raBytes = &(flatBytes[index]);
		return errorBoundMode;
	}			
T
tickduan 已提交
104
    // 5 quant intervals(4)   
T
tickduan 已提交
105 106 107 108
	unsigned char byteBuf[8];
	for (i = 0; i < 4; i++)
		byteBuf[i] = flatBytes[index++];
	int max_quant_intervals = bytesToInt_bigEndian(byteBuf);// 4	
T
tickduan 已提交
109
	pde_params->maxRangeRadius = max_quant_intervals/2;
T
tickduan 已提交
110
    // 6 intervals
T
tickduan 已提交
111 112 113
	for (i = 0; i < 4; i++)
		byteBuf[i] = flatBytes[index++];
	(*this)->intervals = bytesToInt_bigEndian(byteBuf);// 4	
T
tickduan 已提交
114
    // 7 median
T
tickduan 已提交
115 116 117
	for (i = 0; i < 4; i++)
		byteBuf[i] = flatBytes[index++];
	(*this)->medianValue = bytesToFloat(byteBuf); //4
T
tickduan 已提交
118
	// 8 reqLength
T
tickduan 已提交
119
	(*this)->reqLength = flatBytes[index++]; //1
T
tickduan 已提交
120
	// 9 realPrecision(8)
T
tickduan 已提交
121 122 123
	for (i = 0; i < 8; i++)
		byteBuf[i] = flatBytes[index++];
	(*this)->realPrecision = bytesToDouble(byteBuf);//8
T
tickduan 已提交
124
	// 10 typeArray_size
T
tickduan 已提交
125 126 127
	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
		byteBuf[i] = flatBytes[index++];
	(*this)->typeArray_size = bytesToSize(byteBuf);// 4		
T
tickduan 已提交
128
    // 11 exactNum
T
tickduan 已提交
129
	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
T
tickduan 已提交
130
		byteBuf[i] = flatBytes[index++];    
T
tickduan 已提交
131
	(*this)->exactDataNum = bytesToSize(byteBuf);// ST
T
tickduan 已提交
132
    // 12 mid size
T
tickduan 已提交
133 134 135
	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
		byteBuf[i] = flatBytes[index++];
	(*this)->exactMidBytes_size = bytesToSize(byteBuf);// ST
T
tickduan 已提交
136 137
    
	// calc leadNumArray_size
T
tickduan 已提交
138 139 140 141 142 143 144 145
	size_t logicLeadNumBitsNum = (*this)->exactDataNum * 2;
	if (logicLeadNumBitsNum % 8 == 0)
	{
		(*this)->leadNumArray_size = logicLeadNumBitsNum >> 3;
	}
	else
	{
		(*this)->leadNumArray_size = (logicLeadNumBitsNum >> 3) + 1;
T
tickduan 已提交
146
	}	
T
tickduan 已提交
147

T
tickduan 已提交
148
    // 13 typeArray
T
tickduan 已提交
149 150 151 152 153
	(*this)->typeArray = &flatBytes[index]; 
	//retrieve the number of states (i.e., stateNum)
	(*this)->allNodes = bytesToInt_bigEndian((*this)->typeArray); //the first 4 bytes store the stateNum
	(*this)->stateNum = ((*this)->allNodes+1)/2;	
	index+=(*this)->typeArray_size;
T
tickduan 已提交
154 155

    // 14 leadNumArray
T
tickduan 已提交
156
	(*this)->leadNumArray = &flatBytes[index];
T
tickduan 已提交
157 158
	index += (*this)->leadNumArray_size;
	// 15 exactMidBytes
T
tickduan 已提交
159 160
	(*this)->exactMidBytes = &flatBytes[index];
	index+=(*this)->exactMidBytes_size;
T
tickduan 已提交
161
	// 16 residualMidBits
T
tickduan 已提交
162
	(*this)->residualMidBits = &flatBytes[index];
T
tickduan 已提交
163 164 165 166 167

    // calc residualMidBits_size
	(*this)->residualMidBits_size = flatBytesLength - 1 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE - 4 - 4 - 4 - 1 - 8 
			- exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE
			- (*this)->leadNumArray_size - (*this)->exactMidBytes_size - (*this)->typeArray_size;	
T
tickduan 已提交
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
	
	
	return errorBoundMode;
}

/**
 *
 * type's length == dataSeriesLength
 * exactMidBytes's length == exactMidBytes_size
 * leadNumIntArray's length == exactDataNum
 * escBytes's length == escBytes_size
 * resiBitLength's length == resiBitLengthSize
 * */
void new_TightDataPointStorageF(TightDataPointStorageF **this,
		size_t dataSeriesLength, size_t exactDataNum, 
		int* type, unsigned char* exactMidBytes, size_t exactMidBytes_size,
		unsigned char* leadNumIntArray,  //leadNumIntArray contains readable numbers....
		unsigned char* resiMidBits, size_t resiMidBits_size,
		unsigned char resiBitLength, 
		double realPrecision, float medianValue, char reqLength, unsigned int intervals, 
T
tickduan 已提交
188
		unsigned char radExpo) {
T
tickduan 已提交
189 190 191 192 193 194 195 196 197 198
	
	*this = (TightDataPointStorageF *)malloc(sizeof(TightDataPointStorageF));
	(*this)->allSameData = 0;
	(*this)->realPrecision = realPrecision;
	(*this)->medianValue = medianValue;
	(*this)->reqLength = reqLength;

	(*this)->dataSeriesLength = dataSeriesLength;
	(*this)->exactDataNum = exactDataNum;

T
tickduan 已提交
199
    // encode type with huffMan
T
tickduan 已提交
200 201 202 203 204 205 206 207 208 209 210 211 212
	int stateNum = 2*intervals;
	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
	if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression)
		(*this)->max_bits = encode_withTree_MSST19(huffmanTree, type, dataSeriesLength, &(*this)->typeArray, &(*this)->typeArray_size);
	else
		encode_withTree(huffmanTree, type, dataSeriesLength, &(*this)->typeArray, &(*this)->typeArray_size);
	SZ_ReleaseHuffman(huffmanTree);

	(*this)->exactMidBytes = exactMidBytes;
	(*this)->exactMidBytes_size = exactMidBytes_size;

	(*this)->leadNumArray_size = convertIntArray2ByteArray_fast_2b(leadNumIntArray, exactDataNum, &((*this)->leadNumArray));

T
tickduan 已提交
213
	(*this)->residualMidBits_size = convertIntArray2ByteArray_fast_dynamic(resiMidBits, resiBitLength, exactDataNum, &((*this)->residualMidBits));
T
tickduan 已提交
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
	
	(*this)->intervals = intervals;
	
	(*this)->isLossless = 0;
	
	(*this)->radExpo = radExpo;
}

void convertTDPStoBytes_float(TightDataPointStorageF* tdps, unsigned char* bytes, unsigned char* dsLengthBytes, unsigned char sameByte)
{
	size_t i, k = 0;
	unsigned char intervalsBytes[4];
	unsigned char typeArrayLengthBytes[8];
	unsigned char exactLengthBytes[8];
	unsigned char exactMidBytesLength[8];
	unsigned char realPrecisionBytes[8];
	
	unsigned char medianValueBytes[4];
	
	unsigned char segment_sizeBytes[8];
	unsigned char max_quant_intervals_Bytes[4];
	
T
tickduan 已提交
236
	// 1 version
T
tickduan 已提交
237
	bytes[k++] = versionNumber;
T
tickduan 已提交
238
	// 2 same
T
tickduan 已提交
239
	bytes[k++] = sameByte;	//1	byte
T
tickduan 已提交
240
	// 3 meta
T
tickduan 已提交
241 242
	convertSZParamsToBytes(confparams_cpr, &(bytes[k]));
	k = k + MetaDataByteLength;
T
tickduan 已提交
243
	// 4 element count
T
tickduan 已提交
244
	for(i = 0; i < exe_params->SZ_SIZE_TYPE; i++)//ST: 4 or 8 bytes
T
tickduan 已提交
245 246
		bytes[k++] = dsLengthBytes[i];	
	intToBytes_bigEndian(max_quant_intervals_Bytes, confparams_cpr->max_quant_intervals);
T
tickduan 已提交
247
	// 5 max_quant_intervals length
T
tickduan 已提交
248
	for(i = 0;i<4;i++)//4
T
tickduan 已提交
249 250
		bytes[k++] = max_quant_intervals_Bytes[i];			
	// 6 intervals
T
tickduan 已提交
251 252
	intToBytes_bigEndian(intervalsBytes, tdps->intervals);
	for(i = 0;i<4;i++)//4
T
tickduan 已提交
253 254
		bytes[k++] = intervalsBytes[i];				
	// 7 median
T
tickduan 已提交
255 256 257
	floatToBytes(medianValueBytes, tdps->medianValue);
	for (i = 0; i < 4; i++)// 4
		bytes[k++] = medianValueBytes[i];		
T
tickduan 已提交
258
    // 8 reqLength
T
tickduan 已提交
259
	bytes[k++] = tdps->reqLength; //1 byte
T
tickduan 已提交
260
    // 9 realPrecision
T
tickduan 已提交
261 262
	doubleToBytes(realPrecisionBytes, tdps->realPrecision);
	for (i = 0; i < 8; i++)// 8
T
tickduan 已提交
263 264
		bytes[k++] = realPrecisionBytes[i];		
   // 10 typeArray size
T
tickduan 已提交
265 266
	sizeToBytes(typeArrayLengthBytes, tdps->typeArray_size);
	for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++)//ST
T
tickduan 已提交
267 268
		bytes[k++] = typeArrayLengthBytes[i];			
    // 11 exactDataNum  leadNum calc by this , so not save leadNum
T
tickduan 已提交
269 270 271
	sizeToBytes(exactLengthBytes, tdps->exactDataNum);
	for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++)//ST
		bytes[k++] = exactLengthBytes[i];
T
tickduan 已提交
272
    // 12 Mid size
T
tickduan 已提交
273 274 275
	sizeToBytes(exactMidBytesLength, tdps->exactMidBytes_size);
	for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++)//ST
		bytes[k++] = exactMidBytesLength[i];
T
tickduan 已提交
276
	// 13 typeArray	
T
tickduan 已提交
277
	memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size);
T
tickduan 已提交
278 279
	k += tdps->typeArray_size;		
    // 14 leadNumArray_size
T
tickduan 已提交
280 281
	memcpy(&(bytes[k]), tdps->leadNumArray, tdps->leadNumArray_size);
	k += tdps->leadNumArray_size;
T
tickduan 已提交
282
	// 15 mid data
T
tickduan 已提交
283
	memcpy(&(bytes[k]), tdps->exactMidBytes, tdps->exactMidBytes_size);
T
tickduan 已提交
284 285
	k += tdps->exactMidBytes_size;	
    // 16 residualMidBits 
T
tickduan 已提交
286 287 288 289 290 291 292 293
	if(tdps->residualMidBits!=NULL)
	{
		memcpy(&(bytes[k]), tdps->residualMidBits, tdps->residualMidBits_size);
		k += tdps->residualMidBits_size;
	}	
}

//convert TightDataPointStorageD to bytes...
T
tickduan 已提交
294
bool convertTDPStoFlatBytes_float(TightDataPointStorageF *tdps, unsigned char* bytes, size_t *size)
T
tickduan 已提交
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313
{
	size_t i, k = 0; 
	unsigned char dsLengthBytes[8];
	
	if(exe_params->SZ_SIZE_TYPE==4)
		intToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//4
	else
		longToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//8
		
	unsigned char sameByte = tdps->allSameData==1?(unsigned char)1:(unsigned char)0; //0000,0001
	//sameByte = sameByte | (confparams_cpr->szMode << 1);  //0000,0110 (no need because of convertSZParamsToBytes
	if(tdps->isLossless)
		sameByte = (unsigned char) (sameByte | 0x10);  // 0001,0000
	if(confparams_cpr->errorBoundMode>=PW_REL)
		sameByte = (unsigned char) (sameByte | 0x20); // 0010,0000, the 5th bit
	if(exe_params->SZ_SIZE_TYPE==8)
		sameByte = (unsigned char) (sameByte | 0x40); // 0100,0000, the 6th bit
	if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression)
		sameByte = (unsigned char) (sameByte | 0x08); //0000,1000
T
tickduan 已提交
314 315
	//if(confparams_cpr->protectValueRange)
	//	sameByte = (unsigned char) (sameByte | 0x04); //0000,0100
T
tickduan 已提交
316
	
T
tickduan 已提交
317
	if(tdps->allSameData == 1 )
T
tickduan 已提交
318
	{
T
tickduan 已提交
319 320 321
		//
		// same format
		//
T
tickduan 已提交
322
		size_t totalByteLength = 1 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + tdps->exactMidBytes_size;
T
tickduan 已提交
323 324
		//*bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength); // not need malloc comment by tickduan
		// check output buffer enough
T
tickduan 已提交
325 326 327 328 329
		if(totalByteLength >=  tdps->dataSeriesLength * sizeof(float) )
		{
			*size = 0;
			return false;
		}
T
tickduan 已提交
330
		
T
tickduan 已提交
331 332
		// 1 version 1 byte
	    bytes[k++] = versionNumber;
T
tickduan 已提交
333
		// 2 same flag 1 bytes
T
tickduan 已提交
334
		bytes[k++] = sameByte;
T
tickduan 已提交
335
		// 3 metaData 26 bytes
T
tickduan 已提交
336
		convertSZParamsToBytes(confparams_cpr, &(bytes[k]));
T
tickduan 已提交
337
		k = k + MetaDataByteLength;
T
tickduan 已提交
338
		// 4 data Length 4 or 8 bytes	
T
tickduan 已提交
339
		for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
T
tickduan 已提交
340
			bytes[k++] = dsLengthBytes[i];
T
tickduan 已提交
341
		// 5 exactMidBytes exactMidBytes_size bytes
T
tickduan 已提交
342
		for (i = 0; i < tdps->exactMidBytes_size; i++)
T
tickduan 已提交
343
			bytes[k++] = tdps->exactMidBytes[i];
T
tickduan 已提交
344 345 346

		*size = totalByteLength;
	}
T
tickduan 已提交
347
	else
T
tickduan 已提交
348
	{
T
tickduan 已提交
349 350 351
		//
		// not same format
		//
T
tickduan 已提交
352 353
		size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size;

T
tickduan 已提交
354
        // version(1) + samebyte(1) 
T
tickduan 已提交
355 356 357 358 359 360
		size_t totalByteLength = 1 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + 4 + 4 + 1 + 8 
				+ exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE
			    + tdps->typeArray_size
				+ tdps->leadNumArray_size 
				+ tdps->exactMidBytes_size 
				+ residualMidBitsLength;		
T
tickduan 已提交
361

T
tickduan 已提交
362
		//*bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength);  // comment by tickduan
T
tickduan 已提交
363 364 365 366 367
		if(totalByteLength >= tdps->dataSeriesLength * sizeof(float))
		{
			*size = 0;
			return false;
		}
T
tickduan 已提交
368

T
tickduan 已提交
369
		convertTDPStoBytes_float(tdps, bytes, dsLengthBytes, sameByte);
T
tickduan 已提交
370
		*size = totalByteLength;
T
tickduan 已提交
371
		return true;
T
tickduan 已提交
372
	}
T
tickduan 已提交
373 374

	return true;
T
tickduan 已提交
375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
}

/**
 * to free the memory used in the compression
 * */
void free_TightDataPointStorageF(TightDataPointStorageF *tdps)
{
	if(tdps->leadNumArray!=NULL)
		free(tdps->leadNumArray);
	if(tdps->exactMidBytes!=NULL)
		free(tdps->exactMidBytes);
	if(tdps->residualMidBits!=NULL)
		free(tdps->residualMidBits);
	free(tdps);
}

/**
 * to free the memory used in the decompression
 * */
void free_TightDataPointStorageF2(TightDataPointStorageF *tdps)
{			
	free(tdps);
}