TightDataPointStorageF.c 12.5 KB
Newer Older
T
tickduan 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/**
 *  @file TightPointDataStorageF.c
 *  @author Sheng Di and Dingwen Tao
 *  @date Aug, 2016
 *  @brief The functions used to construct the tightPointDataStorage element for storing compressed bytes.
 *  (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include <stdlib.h> 
#include <stdio.h>
#include <string.h>
#include "TightDataPointStorageF.h"
#include "sz.h"
T
tickduan 已提交
15
#include "defines.h"
T
tickduan 已提交
16 17 18 19
#include "Huffman.h"

void new_TightDataPointStorageF_Empty(TightDataPointStorageF **this)
{
T
tickduan 已提交
20 21 22
	TightDataPointStorageF* tdpf = (TightDataPointStorageF*)malloc(sizeof(TightDataPointStorageF));
	memset(tdpf, 0, sizeof(TightDataPointStorageF));
    *this = tdpf;
T
tickduan 已提交
23 24
}

T
tickduan 已提交
25
int new_TightDataPointStorageF_fromFlatBytes(TightDataPointStorageF **this, unsigned char* flatBytes, size_t flatBytesLength, sz_exedata* pde_exe, sz_params* pde_params)
T
tickduan 已提交
26 27 28
{
	new_TightDataPointStorageF_Empty(this);
	size_t i, index = 0;
T
tickduan 已提交
29 30 31 32 33 34

	//
	// parse tdps
	//

	// 1 version(1)
T
tickduan 已提交
35
	unsigned char version = flatBytes[index++]; //1
T
tickduan 已提交
36 37
	unsigned char sameRByte = flatBytes[index++]; //1

T
tickduan 已提交
38 39
    // parse data format
	switch (version)
T
tickduan 已提交
40
	{
T
tickduan 已提交
41 42 43 44 45 46
	case DATA_FROMAT_VER1:
		break;
	default:
	    printf(" error, float compressed data format can not be recognised. ver=%d\n ", version);
		return SZ_ABS;
	}	
T
tickduan 已提交
47
	
T
tickduan 已提交
48 49 50 51 52 53
	// 2 same(1)														      //note that 1000,0000 is reserved for regression tag.
	int same = sameRByte & 0x01; 											//0000,0001
	(*this)->isLossless = (sameRByte & 0x10)>>4; 							//0001,0000								//0010,0000
	exe_params->SZ_SIZE_TYPE = ((sameRByte & 0x40)>>6)==1?8:4; 				//0100,0000	
	int errorBoundMode = SZ_ABS;
    // 3 meta(2)   
T
tickduan 已提交
54
	convertBytesToSZParams(&(flatBytes[index]), pde_params, pde_exe);
T
tickduan 已提交
55
	index += MetaDataByteLength;
T
tickduan 已提交
56
    // 4 element count(4)
T
tickduan 已提交
57 58 59
	unsigned char dsLengthBytes[8];
	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
		dsLengthBytes[i] = flatBytes[index++];
T
tickduan 已提交
60
	(*this)->dataSeriesLength = bytesToSize(dsLengthBytes);// 4 or 8		
T
tickduan 已提交
61 62 63 64 65 66 67 68
	if((*this)->isLossless==1)
	{
		//(*this)->exactMidBytes = flatBytes+8;
		return errorBoundMode;
	}
	else if(same==1)
	{
		(*this)->allSameData = 1;
T
tickduan 已提交
69
		//size_t exactMidBytesLength = sizeof(double);//flatBytesLength - 1 - 1 - MetaDataByteLength -exe_params->SZ_SIZE_TYPE;
T
tickduan 已提交
70 71 72 73 74
		(*this)->exactMidBytes = &(flatBytes[index]);
		return errorBoundMode;
	}
	else
		(*this)->allSameData = 0;
T
tickduan 已提交
75 76
    // regression  
    int isRegression = (sameRByte >> 7) & 0x01;
T
tickduan 已提交
77 78
	if(isRegression == 1)
	{
T
tickduan 已提交
79
		(*this)->raBytes_size = flatBytesLength - 1 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE;
T
tickduan 已提交
80 81 82
		(*this)->raBytes = &(flatBytes[index]);
		return errorBoundMode;
	}			
T
tickduan 已提交
83
    // 5 quant intervals(4)   
T
tickduan 已提交
84 85 86 87
	unsigned char byteBuf[8];
	for (i = 0; i < 4; i++)
		byteBuf[i] = flatBytes[index++];
	int max_quant_intervals = bytesToInt_bigEndian(byteBuf);// 4	
T
tickduan 已提交
88
	pde_params->maxRangeRadius = max_quant_intervals/2;
T
tickduan 已提交
89
    // 6 intervals
T
tickduan 已提交
90 91 92
	for (i = 0; i < 4; i++)
		byteBuf[i] = flatBytes[index++];
	(*this)->intervals = bytesToInt_bigEndian(byteBuf);// 4	
T
tickduan 已提交
93
    // 7 median
T
tickduan 已提交
94 95 96
	for (i = 0; i < 4; i++)
		byteBuf[i] = flatBytes[index++];
	(*this)->medianValue = bytesToFloat(byteBuf); //4
T
tickduan 已提交
97
	// 8 reqLength
T
tickduan 已提交
98
	(*this)->reqLength = flatBytes[index++]; //1
T
tickduan 已提交
99
	// 9 realPrecision(8)
T
tickduan 已提交
100 101 102
	for (i = 0; i < 8; i++)
		byteBuf[i] = flatBytes[index++];
	(*this)->realPrecision = bytesToDouble(byteBuf);//8
T
tickduan 已提交
103
	// 10 typeArray_size
T
tickduan 已提交
104 105 106
	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
		byteBuf[i] = flatBytes[index++];
	(*this)->typeArray_size = bytesToSize(byteBuf);// 4		
T
tickduan 已提交
107
    // 11 exactNum
T
tickduan 已提交
108
	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
T
tickduan 已提交
109
		byteBuf[i] = flatBytes[index++];    
T
tickduan 已提交
110
	(*this)->exactDataNum = bytesToSize(byteBuf);// ST
T
tickduan 已提交
111
    // 12 mid size
T
tickduan 已提交
112 113 114
	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
		byteBuf[i] = flatBytes[index++];
	(*this)->exactMidBytes_size = bytesToSize(byteBuf);// ST
T
tickduan 已提交
115 116
    
	// calc leadNumArray_size
T
tickduan 已提交
117 118 119 120 121 122 123 124
	size_t logicLeadNumBitsNum = (*this)->exactDataNum * 2;
	if (logicLeadNumBitsNum % 8 == 0)
	{
		(*this)->leadNumArray_size = logicLeadNumBitsNum >> 3;
	}
	else
	{
		(*this)->leadNumArray_size = (logicLeadNumBitsNum >> 3) + 1;
T
tickduan 已提交
125
	}	
T
tickduan 已提交
126

T
tickduan 已提交
127
    // 13 typeArray
T
tickduan 已提交
128 129 130 131 132
	(*this)->typeArray = &flatBytes[index]; 
	//retrieve the number of states (i.e., stateNum)
	(*this)->allNodes = bytesToInt_bigEndian((*this)->typeArray); //the first 4 bytes store the stateNum
	(*this)->stateNum = ((*this)->allNodes+1)/2;	
	index+=(*this)->typeArray_size;
T
tickduan 已提交
133 134

    // 14 leadNumArray
T
tickduan 已提交
135
	(*this)->leadNumArray = &flatBytes[index];
T
tickduan 已提交
136 137
	index += (*this)->leadNumArray_size;
	// 15 exactMidBytes
T
tickduan 已提交
138 139
	(*this)->exactMidBytes = &flatBytes[index];
	index+=(*this)->exactMidBytes_size;
T
tickduan 已提交
140
	// 16 residualMidBits
T
tickduan 已提交
141
	(*this)->residualMidBits = &flatBytes[index];
T
tickduan 已提交
142 143 144 145 146

    // calc residualMidBits_size
	(*this)->residualMidBits_size = flatBytesLength - 1 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE - 4 - 4 - 4 - 1 - 8 
			- exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE
			- (*this)->leadNumArray_size - (*this)->exactMidBytes_size - (*this)->typeArray_size;	
T
tickduan 已提交
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
	
	
	return errorBoundMode;
}

/**
 *
 * type's length == dataSeriesLength
 * exactMidBytes's length == exactMidBytes_size
 * leadNumIntArray's length == exactDataNum
 * escBytes's length == escBytes_size
 * resiBitLength's length == resiBitLengthSize
 * */
void new_TightDataPointStorageF(TightDataPointStorageF **this,
		size_t dataSeriesLength, size_t exactDataNum, 
		int* type, unsigned char* exactMidBytes, size_t exactMidBytes_size,
		unsigned char* leadNumIntArray,  //leadNumIntArray contains readable numbers....
		unsigned char* resiMidBits, size_t resiMidBits_size,
		unsigned char resiBitLength, 
		double realPrecision, float medianValue, char reqLength, unsigned int intervals, 
T
tickduan 已提交
167
		unsigned char radExpo) {
T
tickduan 已提交
168 169 170 171 172 173 174 175 176 177
	
	*this = (TightDataPointStorageF *)malloc(sizeof(TightDataPointStorageF));
	(*this)->allSameData = 0;
	(*this)->realPrecision = realPrecision;
	(*this)->medianValue = medianValue;
	(*this)->reqLength = reqLength;

	(*this)->dataSeriesLength = dataSeriesLength;
	(*this)->exactDataNum = exactDataNum;

T
tickduan 已提交
178
    // encode type with huffMan
T
tickduan 已提交
179 180 181 182 183 184 185 186 187 188 189 190 191
	int stateNum = 2*intervals;
	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
	if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression)
		(*this)->max_bits = encode_withTree_MSST19(huffmanTree, type, dataSeriesLength, &(*this)->typeArray, &(*this)->typeArray_size);
	else
		encode_withTree(huffmanTree, type, dataSeriesLength, &(*this)->typeArray, &(*this)->typeArray_size);
	SZ_ReleaseHuffman(huffmanTree);

	(*this)->exactMidBytes = exactMidBytes;
	(*this)->exactMidBytes_size = exactMidBytes_size;

	(*this)->leadNumArray_size = convertIntArray2ByteArray_fast_2b(leadNumIntArray, exactDataNum, &((*this)->leadNumArray));

T
tickduan 已提交
192
	(*this)->residualMidBits_size = convertIntArray2ByteArray_fast_dynamic(resiMidBits, resiBitLength, exactDataNum, &((*this)->residualMidBits));
T
tickduan 已提交
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
	
	(*this)->intervals = intervals;
	
	(*this)->isLossless = 0;
	
	(*this)->radExpo = radExpo;
}

void convertTDPStoBytes_float(TightDataPointStorageF* tdps, unsigned char* bytes, unsigned char* dsLengthBytes, unsigned char sameByte)
{
	size_t i, k = 0;
	unsigned char intervalsBytes[4];
	unsigned char typeArrayLengthBytes[8];
	unsigned char exactLengthBytes[8];
	unsigned char exactMidBytesLength[8];
	unsigned char realPrecisionBytes[8];
	unsigned char medianValueBytes[4];
	unsigned char max_quant_intervals_Bytes[4];
	
T
tickduan 已提交
212
	// 1 version
T
tickduan 已提交
213
	bytes[k++] = versionNumber;
T
tickduan 已提交
214
	// 2 same
T
tickduan 已提交
215
	bytes[k++] = sameByte;	//1	byte
T
tickduan 已提交
216
	// 3 meta
T
tickduan 已提交
217 218
	convertSZParamsToBytes(confparams_cpr, &(bytes[k]));
	k = k + MetaDataByteLength;
T
tickduan 已提交
219
	// 4 element count
T
tickduan 已提交
220
	for(i = 0; i < exe_params->SZ_SIZE_TYPE; i++)//ST: 4 or 8 bytes
T
tickduan 已提交
221 222
		bytes[k++] = dsLengthBytes[i];	
	intToBytes_bigEndian(max_quant_intervals_Bytes, confparams_cpr->max_quant_intervals);
T
tickduan 已提交
223
	// 5 max_quant_intervals length
T
tickduan 已提交
224
	for(i = 0;i<4;i++)//4
T
tickduan 已提交
225 226
		bytes[k++] = max_quant_intervals_Bytes[i];			
	// 6 intervals
T
tickduan 已提交
227 228
	intToBytes_bigEndian(intervalsBytes, tdps->intervals);
	for(i = 0;i<4;i++)//4
T
tickduan 已提交
229 230
		bytes[k++] = intervalsBytes[i];				
	// 7 median
T
tickduan 已提交
231 232 233
	floatToBytes(medianValueBytes, tdps->medianValue);
	for (i = 0; i < 4; i++)// 4
		bytes[k++] = medianValueBytes[i];		
T
tickduan 已提交
234
    // 8 reqLength
T
tickduan 已提交
235
	bytes[k++] = tdps->reqLength; //1 byte
T
tickduan 已提交
236
    // 9 realPrecision
T
tickduan 已提交
237 238
	doubleToBytes(realPrecisionBytes, tdps->realPrecision);
	for (i = 0; i < 8; i++)// 8
T
tickduan 已提交
239 240
		bytes[k++] = realPrecisionBytes[i];		
   // 10 typeArray size
T
tickduan 已提交
241 242
	sizeToBytes(typeArrayLengthBytes, tdps->typeArray_size);
	for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++)//ST
T
tickduan 已提交
243 244
		bytes[k++] = typeArrayLengthBytes[i];			
    // 11 exactDataNum  leadNum calc by this , so not save leadNum
T
tickduan 已提交
245 246 247
	sizeToBytes(exactLengthBytes, tdps->exactDataNum);
	for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++)//ST
		bytes[k++] = exactLengthBytes[i];
T
tickduan 已提交
248
    // 12 Mid size
T
tickduan 已提交
249 250 251
	sizeToBytes(exactMidBytesLength, tdps->exactMidBytes_size);
	for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++)//ST
		bytes[k++] = exactMidBytesLength[i];
T
tickduan 已提交
252
	// 13 typeArray	
T
tickduan 已提交
253
	memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size);
T
tickduan 已提交
254 255
	k += tdps->typeArray_size;		
    // 14 leadNumArray_size
T
tickduan 已提交
256 257
	memcpy(&(bytes[k]), tdps->leadNumArray, tdps->leadNumArray_size);
	k += tdps->leadNumArray_size;
T
tickduan 已提交
258
	// 15 mid data
T
tickduan 已提交
259
	memcpy(&(bytes[k]), tdps->exactMidBytes, tdps->exactMidBytes_size);
T
tickduan 已提交
260 261
	k += tdps->exactMidBytes_size;	
    // 16 residualMidBits 
T
tickduan 已提交
262 263 264 265 266 267 268 269
	if(tdps->residualMidBits!=NULL)
	{
		memcpy(&(bytes[k]), tdps->residualMidBits, tdps->residualMidBits_size);
		k += tdps->residualMidBits_size;
	}	
}

//convert TightDataPointStorageD to bytes...
T
tickduan 已提交
270
bool convertTDPStoFlatBytes_float(TightDataPointStorageF *tdps, unsigned char* bytes, size_t *size)
T
tickduan 已提交
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
{
	size_t i, k = 0; 
	unsigned char dsLengthBytes[8];
	
	if(exe_params->SZ_SIZE_TYPE==4)
		intToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//4
	else
		longToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//8
		
	unsigned char sameByte = tdps->allSameData==1?(unsigned char)1:(unsigned char)0; //0000,0001
	//sameByte = sameByte | (confparams_cpr->szMode << 1);  //0000,0110 (no need because of convertSZParamsToBytes
	if(tdps->isLossless)
		sameByte = (unsigned char) (sameByte | 0x10);  // 0001,0000
	if(confparams_cpr->errorBoundMode>=PW_REL)
		sameByte = (unsigned char) (sameByte | 0x20); // 0010,0000, the 5th bit
	if(exe_params->SZ_SIZE_TYPE==8)
		sameByte = (unsigned char) (sameByte | 0x40); // 0100,0000, the 6th bit
	if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression)
		sameByte = (unsigned char) (sameByte | 0x08); //0000,1000
T
tickduan 已提交
290 291
	//if(confparams_cpr->protectValueRange)
	//	sameByte = (unsigned char) (sameByte | 0x04); //0000,0100
T
tickduan 已提交
292
	
T
tickduan 已提交
293
	if(tdps->allSameData == 1 )
T
tickduan 已提交
294
	{
T
tickduan 已提交
295 296 297
		//
		// same format
		//
T
tickduan 已提交
298
		size_t totalByteLength = 1 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + tdps->exactMidBytes_size;
T
tickduan 已提交
299 300
		//*bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength); // not need malloc comment by tickduan
		// check output buffer enough
T
tickduan 已提交
301 302 303 304 305
		if(totalByteLength >=  tdps->dataSeriesLength * sizeof(float) )
		{
			*size = 0;
			return false;
		}
T
tickduan 已提交
306
		
T
tickduan 已提交
307 308
		// 1 version 1 byte
	    bytes[k++] = versionNumber;
T
tickduan 已提交
309
		// 2 same flag 1 bytes
T
tickduan 已提交
310
		bytes[k++] = sameByte;
T
tickduan 已提交
311
		// 3 metaData 26 bytes
T
tickduan 已提交
312
		convertSZParamsToBytes(confparams_cpr, &(bytes[k]));
T
tickduan 已提交
313
		k = k + MetaDataByteLength;
T
tickduan 已提交
314
		// 4 data Length 4 or 8 bytes	
T
tickduan 已提交
315
		for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
T
tickduan 已提交
316
			bytes[k++] = dsLengthBytes[i];
T
tickduan 已提交
317
		// 5 exactMidBytes exactMidBytes_size bytes
T
tickduan 已提交
318
		for (i = 0; i < tdps->exactMidBytes_size; i++)
T
tickduan 已提交
319
			bytes[k++] = tdps->exactMidBytes[i];
T
tickduan 已提交
320 321 322

		*size = totalByteLength;
	}
T
tickduan 已提交
323
	else
T
tickduan 已提交
324
	{
T
tickduan 已提交
325 326 327
		//
		// not same format
		//
T
tickduan 已提交
328 329
		size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size;

T
tickduan 已提交
330
        // version(1) + samebyte(1) 
T
tickduan 已提交
331 332 333 334 335 336
		size_t totalByteLength = 1 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + 4 + 4 + 1 + 8 
				+ exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE
			    + tdps->typeArray_size
				+ tdps->leadNumArray_size 
				+ tdps->exactMidBytes_size 
				+ residualMidBitsLength;		
T
tickduan 已提交
337

T
tickduan 已提交
338
		//*bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength);  // comment by tickduan
T
tickduan 已提交
339 340 341 342 343
		if(totalByteLength >= tdps->dataSeriesLength * sizeof(float))
		{
			*size = 0;
			return false;
		}
T
tickduan 已提交
344

T
tickduan 已提交
345
		convertTDPStoBytes_float(tdps, bytes, dsLengthBytes, sameByte);
T
tickduan 已提交
346
		*size = totalByteLength;
T
tickduan 已提交
347
		return true;
T
tickduan 已提交
348
	}
T
tickduan 已提交
349 350

	return true;
T
tickduan 已提交
351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
}

/**
 * to free the memory used in the compression
 * */
void free_TightDataPointStorageF(TightDataPointStorageF *tdps)
{
	if(tdps->leadNumArray!=NULL)
		free(tdps->leadNumArray);
	if(tdps->exactMidBytes!=NULL)
		free(tdps->exactMidBytes);
	if(tdps->residualMidBits!=NULL)
		free(tdps->residualMidBits);
	free(tdps);
}

/**
 * to free the memory used in the decompression
 * */
void free_TightDataPointStorageF2(TightDataPointStorageF *tdps)
{			
	free(tdps);
}