提交 b3bb6527 编写于 作者: C Cary Xu

data and bitmap compress/store/decompress separatedly

上级 ca3206d4
...@@ -135,8 +135,8 @@ typedef struct { ...@@ -135,8 +135,8 @@ typedef struct {
#define TD_VTYPE_PARTS 4 // 8 bits / TD_VTYPE_BITS = 4 #define TD_VTYPE_PARTS 4 // 8 bits / TD_VTYPE_BITS = 4
#define TD_VTYPE_OPTR 3 // TD_VTYPE_PARTS - 1, utilize to get remainder #define TD_VTYPE_OPTR 3 // TD_VTYPE_PARTS - 1, utilize to get remainder
#define TD_BITMAP_BYTES(cnt) (ceil((double)cnt / TD_VTYPE_PARTS)) #define TD_BITMAP_BYTES(cnt) (ceil((double)(cnt) / TD_VTYPE_PARTS))
#define TD_BIT_TO_BYTES(cnt) (ceil((double)cnt / 8)) #define TD_BIT_TO_BYTES(cnt) (ceil((double)(cnt) / 8))
int32_t tdInitTSchemaBuilder(STSchemaBuilder *pBuilder, schema_ver_t version); int32_t tdInitTSchemaBuilder(STSchemaBuilder *pBuilder, schema_ver_t version);
void tdDestroyTSchemaBuilder(STSchemaBuilder *pBuilder); void tdDestroyTSchemaBuilder(STSchemaBuilder *pBuilder);
...@@ -365,6 +365,7 @@ static FORCE_INLINE void tdCopyColOfRowBySchema(SDataRow dst, STSchema *pDstSche ...@@ -365,6 +365,7 @@ static FORCE_INLINE void tdCopyColOfRowBySchema(SDataRow dst, STSchema *pDstSche
} }
#endif #endif
// ----------------- Data column structure // ----------------- Data column structure
// SDataCol arrangement: data => bitmap => dataOffset
typedef struct SDataCol { typedef struct SDataCol {
int8_t type; // column type int8_t type; // column type
uint8_t bitmap : 1; // 0: has bitmap if has NULL/NORM rows, 1: no bitmap if all rows are NORM uint8_t bitmap : 1; // 0: has bitmap if has NULL/NORM rows, 1: no bitmap if all rows are NORM
......
...@@ -147,20 +147,20 @@ typedef struct { ...@@ -147,20 +147,20 @@ typedef struct {
typedef struct { typedef struct {
// basic info // basic info
int8_t rowType; int8_t rowType;
int16_t sver; schema_ver_t sver;
STSRow *pBuf; STSRow *pBuf;
// extended info // extended info
int32_t flen; int32_t flen;
int16_t nBoundCols; col_id_t nBoundCols;
int16_t nCols; col_id_t nCols;
int16_t nBitmaps; col_id_t nBitmaps;
int16_t nBoundBitmaps; col_id_t nBoundBitmaps;
int32_t offset; int32_t offset;
void *pBitmap; void *pBitmap;
void *pOffset; void *pOffset;
int32_t extendedRowSize; int32_t extendedRowSize;
} SRowBuilder; } SRowBuilder;
#define TD_ROW_HEAD_LEN (sizeof(STSRow)) #define TD_ROW_HEAD_LEN (sizeof(STSRow))
...@@ -448,9 +448,9 @@ static FORCE_INLINE int32_t tdSRowSetExtendedInfo(SRowBuilder *pBuilder, int32_t ...@@ -448,9 +448,9 @@ static FORCE_INLINE int32_t tdSRowSetExtendedInfo(SRowBuilder *pBuilder, int32_t
} }
#ifdef TD_SUPPORT_BITMAP #ifdef TD_SUPPORT_BITMAP
// the primary TS key is stored separatedly // the primary TS key is stored separatedly
pBuilder->nBitmaps = (int16_t)TD_BITMAP_BYTES(pBuilder->nCols - 1); pBuilder->nBitmaps = (col_id_t)TD_BITMAP_BYTES(pBuilder->nCols - 1);
if (nBoundCols > 0) { if (nBoundCols > 0) {
pBuilder->nBoundBitmaps = (int16_t)TD_BITMAP_BYTES(pBuilder->nBoundCols - 1); pBuilder->nBoundBitmaps = (col_id_t)TD_BITMAP_BYTES(pBuilder->nBoundCols - 1);
} else { } else {
pBuilder->nBoundBitmaps = 0; pBuilder->nBoundBitmaps = 0;
} }
......
...@@ -33,7 +33,8 @@ int tdAllocMemForCol(SDataCol *pCol, int maxPoints) { ...@@ -33,7 +33,8 @@ int tdAllocMemForCol(SDataCol *pCol, int maxPoints) {
spaceNeeded += (int)nBitmapBytes; spaceNeeded += (int)nBitmapBytes;
// TODO: Currently, the compression of bitmap parts is affiliated to the column data parts, thus allocate 1 more // TODO: Currently, the compression of bitmap parts is affiliated to the column data parts, thus allocate 1 more
// TYPE_BYTES as to comprise complete TYPE_BYTES. Otherwise, invalid read/write would be triggered. // TYPE_BYTES as to comprise complete TYPE_BYTES. Otherwise, invalid read/write would be triggered.
spaceNeeded += TYPE_BYTES[pCol->type]; // spaceNeeded += TYPE_BYTES[pCol->type]; // the bitmap part is append as a single part since 2022.04.03, thus remove
// the additional space
#endif #endif
if (pCol->spaceSize < spaceNeeded) { if (pCol->spaceSize < spaceNeeded) {
...@@ -47,6 +48,7 @@ int tdAllocMemForCol(SDataCol *pCol, int maxPoints) { ...@@ -47,6 +48,7 @@ int tdAllocMemForCol(SDataCol *pCol, int maxPoints) {
} }
} }
#ifdef TD_SUPPORT_BITMAP #ifdef TD_SUPPORT_BITMAP
if (IS_VAR_DATA_TYPE(pCol->type)) { if (IS_VAR_DATA_TYPE(pCol->type)) {
pCol->pBitmap = POINTER_SHIFT(pCol->pData, pCol->bytes * maxPoints); pCol->pBitmap = POINTER_SHIFT(pCol->pData, pCol->bytes * maxPoints);
pCol->dataOff = POINTER_SHIFT(pCol->pBitmap, nBitmapBytes); pCol->dataOff = POINTER_SHIFT(pCol->pBitmap, nBitmapBytes);
...@@ -306,7 +308,7 @@ static FORCE_INLINE const void *tdGetColDataOfRowUnsafe(SDataCol *pCol, int row) ...@@ -306,7 +308,7 @@ static FORCE_INLINE const void *tdGetColDataOfRowUnsafe(SDataCol *pCol, int row)
bool isNEleNull(SDataCol *pCol, int nEle) { bool isNEleNull(SDataCol *pCol, int nEle) {
if (isAllRowsNull(pCol)) return true; if (isAllRowsNull(pCol)) return true;
for (int i = 0; i < nEle; i++) { for (int i = 0; i < nEle; ++i) {
if (!isNull(tdGetColDataOfRowUnsafe(pCol, i), pCol->type)) return false; if (!isNull(tdGetColDataOfRowUnsafe(pCol, i), pCol->type)) return false;
} }
return true; return true;
...@@ -327,7 +329,7 @@ static FORCE_INLINE void dataColSetNullAt(SDataCol *pCol, int index) { ...@@ -327,7 +329,7 @@ static FORCE_INLINE void dataColSetNullAt(SDataCol *pCol, int index) {
static void dataColSetNEleNull(SDataCol *pCol, int nEle) { static void dataColSetNEleNull(SDataCol *pCol, int nEle) {
if (IS_VAR_DATA_TYPE(pCol->type)) { if (IS_VAR_DATA_TYPE(pCol->type)) {
pCol->len = 0; pCol->len = 0;
for (int i = 0; i < nEle; i++) { for (int i = 0; i < nEle; ++i) {
dataColSetNullAt(pCol, i); dataColSetNullAt(pCol, i);
} }
} else { } else {
...@@ -343,7 +345,7 @@ void *dataColSetOffset(SDataCol *pCol, int nEle) { ...@@ -343,7 +345,7 @@ void *dataColSetOffset(SDataCol *pCol, int nEle) {
// char *tptr = (char *)(pCol->pData); // char *tptr = (char *)(pCol->pData);
VarDataOffsetT offset = 0; VarDataOffsetT offset = 0;
for (int i = 0; i < nEle; i++) { for (int i = 0; i < nEle; ++i) {
pCol->dataOff[i] = offset; pCol->dataOff[i] = offset;
offset += varDataTLen(tptr); offset += varDataTLen(tptr);
tptr = POINTER_SHIFT(tptr, varDataTLen(tptr)); tptr = POINTER_SHIFT(tptr, varDataTLen(tptr));
...@@ -371,6 +373,7 @@ SDataCols *tdNewDataCols(int maxCols, int maxRows) { ...@@ -371,6 +373,7 @@ SDataCols *tdNewDataCols(int maxCols, int maxRows) {
tdFreeDataCols(pCols); tdFreeDataCols(pCols);
return NULL; return NULL;
} }
#if 0 // no need as calloc used
int i; int i;
for (i = 0; i < maxCols; i++) { for (i = 0; i < maxCols; i++) {
pCols->cols[i].spaceSize = 0; pCols->cols[i].spaceSize = 0;
...@@ -378,6 +381,7 @@ SDataCols *tdNewDataCols(int maxCols, int maxRows) { ...@@ -378,6 +381,7 @@ SDataCols *tdNewDataCols(int maxCols, int maxRows) {
pCols->cols[i].pData = NULL; pCols->cols[i].pData = NULL;
pCols->cols[i].dataOff = NULL; pCols->cols[i].dataOff = NULL;
} }
#endif
} }
return pCols; return pCols;
...@@ -391,17 +395,21 @@ int tdInitDataCols(SDataCols *pCols, STSchema *pSchema) { ...@@ -391,17 +395,21 @@ int tdInitDataCols(SDataCols *pCols, STSchema *pSchema) {
void *ptr = (SDataCol *)taosMemoryRealloc(pCols->cols, sizeof(SDataCol) * pCols->maxCols); void *ptr = (SDataCol *)taosMemoryRealloc(pCols->cols, sizeof(SDataCol) * pCols->maxCols);
if (ptr == NULL) return -1; if (ptr == NULL) return -1;
pCols->cols = ptr; pCols->cols = ptr;
for (i = oldMaxCols; i < pCols->maxCols; i++) { for (i = oldMaxCols; i < pCols->maxCols; ++i) {
pCols->cols[i].pData = NULL; pCols->cols[i].pData = NULL;
pCols->cols[i].dataOff = NULL; pCols->cols[i].dataOff = NULL;
pCols->cols[i].pBitmap = NULL;
pCols->cols[i].spaceSize = 0; pCols->cols[i].spaceSize = 0;
} }
} }
#if 0
tdResetDataCols(pCols); // redundant loop to reset len/blen to 0, already reset in following dataColInit(...)
#endif
tdResetDataCols(pCols); pCols->numOfRows = 0;
pCols->numOfCols = schemaNCols(pSchema); pCols->numOfCols = schemaNCols(pSchema);
for (i = 0; i < schemaNCols(pSchema); i++) { for (i = 0; i < schemaNCols(pSchema); ++i) {
dataColInit(pCols->cols + i, schemaColAt(pSchema, i), pCols->maxPoints); dataColInit(pCols->cols + i, schemaColAt(pSchema, i), pCols->maxPoints);
} }
...@@ -413,7 +421,7 @@ SDataCols *tdFreeDataCols(SDataCols *pCols) { ...@@ -413,7 +421,7 @@ SDataCols *tdFreeDataCols(SDataCols *pCols) {
if (pCols) { if (pCols) {
if (pCols->cols) { if (pCols->cols) {
int maxCols = pCols->maxCols; int maxCols = pCols->maxCols;
for (i = 0; i < maxCols; i++) { for (i = 0; i < maxCols; ++i) {
SDataCol *pCol = &pCols->cols[i]; SDataCol *pCol = &pCols->cols[i];
taosMemoryFreeClear(pCol->pData); taosMemoryFreeClear(pCol->pData);
} }
...@@ -464,7 +472,7 @@ SDataCols *tdDupDataCols(SDataCols *pDataCols, bool keepData) { ...@@ -464,7 +472,7 @@ SDataCols *tdDupDataCols(SDataCols *pDataCols, bool keepData) {
void tdResetDataCols(SDataCols *pCols) { void tdResetDataCols(SDataCols *pCols) {
if (pCols != NULL) { if (pCols != NULL) {
pCols->numOfRows = 0; pCols->numOfRows = 0;
for (int i = 0; i < pCols->maxCols; i++) { for (int i = 0; i < pCols->maxCols; ++i) {
dataColReset(pCols->cols + i); dataColReset(pCols->cols + i);
} }
} }
......
...@@ -503,7 +503,7 @@ SDataCols *tdDupDataCols(SDataCols *pDataCols, bool keepData) { ...@@ -503,7 +503,7 @@ SDataCols *tdDupDataCols(SDataCols *pDataCols, bool keepData) {
memcpy(pRet->cols[i].dataOff, pDataCols->cols[i].dataOff, dataOffSize); memcpy(pRet->cols[i].dataOff, pDataCols->cols[i].dataOff, dataOffSize);
} }
if (!TD_COL_ROWS_NORM(pRet->cols + i)) { if (!TD_COL_ROWS_NORM(pRet->cols + i)) {
int32_t nBitmapBytes = (int32_t)TD_BITMAP_BYTES(pDataCols->maxPoints); int32_t nBitmapBytes = (int32_t)TD_BITMAP_BYTES(pDataCols->numOfRows);
memcpy(pRet->cols[i].pBitmap, pDataCols->cols[i].pBitmap, nBitmapBytes); memcpy(pRet->cols[i].pBitmap, pDataCols->cols[i].pBitmap, nBitmapBytes);
} }
} }
......
...@@ -112,10 +112,10 @@ typedef struct { ...@@ -112,10 +112,10 @@ typedef struct {
#else #else
typedef struct { typedef struct {
int16_t colId; int16_t colId;
uint8_t bitmap : 1; // 0: has bitmap if has NULL/NORM rows, 1: no bitmap if all rows are NORM uint16_t type : 6;
uint8_t reserve : 7; uint16_t blen : 10; // bitmap length(TODO: full UT for the bitmap compress of various data input)
uint8_t type; uint32_t bitmap : 1; // 0: has bitmap if has NULL/NORM rows, 1: no bitmap if all rows are NORM
int32_t len; uint32_t len : 31; // data length + bitmap length
uint32_t offset; uint32_t offset;
} SBlockColV0; } SBlockColV0;
......
...@@ -1281,7 +1281,7 @@ int tsdbWriteBlockImpl(STsdb *pRepo, STable *pTable, SDFile *pDFile, SDFile *pDF ...@@ -1281,7 +1281,7 @@ int tsdbWriteBlockImpl(STsdb *pRepo, STable *pTable, SDFile *pDFile, SDFile *pDF
uint32_t tsizeAggr = (uint32_t)tsdbBlockAggrSize(nColsNotAllNull, SBlockVerLatest); uint32_t tsizeAggr = (uint32_t)tsdbBlockAggrSize(nColsNotAllNull, SBlockVerLatest);
int32_t keyLen = 0; int32_t keyLen = 0;
int32_t nBitmaps = (int32_t)TD_BITMAP_BYTES(rowsToWrite); int32_t nBitmaps = (int32_t)TD_BITMAP_BYTES(rowsToWrite);
int32_t tBitmaps = 0; // int32_t tBitmaps = 0;
for (int ncol = 0; ncol < pDataCols->numOfCols; ++ncol) { for (int ncol = 0; ncol < pDataCols->numOfCols; ++ncol) {
// All not NULL columns finish // All not NULL columns finish
...@@ -1297,7 +1297,10 @@ int tsdbWriteBlockImpl(STsdb *pRepo, STable *pTable, SDFile *pDFile, SDFile *pDF ...@@ -1297,7 +1297,10 @@ int tsdbWriteBlockImpl(STsdb *pRepo, STable *pTable, SDFile *pDFile, SDFile *pDF
#ifdef TD_SUPPORT_BITMAP #ifdef TD_SUPPORT_BITMAP
int32_t tBitmaps = 0; int32_t tBitmaps = 0;
int32_t tBitmapsLen = 0;
if ((ncol != 0) && !TD_COL_ROWS_NORM(pBlockCol)) { if ((ncol != 0) && !TD_COL_ROWS_NORM(pBlockCol)) {
tBitmaps = nBitmaps;
#if 0
if (IS_VAR_DATA_TYPE(pDataCol->type)) { if (IS_VAR_DATA_TYPE(pDataCol->type)) {
tBitmaps = nBitmaps; tBitmaps = nBitmaps;
tlen += tBitmaps; tlen += tBitmaps;
...@@ -1305,16 +1308,17 @@ int tsdbWriteBlockImpl(STsdb *pRepo, STable *pTable, SDFile *pDFile, SDFile *pDF ...@@ -1305,16 +1308,17 @@ int tsdbWriteBlockImpl(STsdb *pRepo, STable *pTable, SDFile *pDFile, SDFile *pDF
tBitmaps = (int32_t)ceil((double)nBitmaps / TYPE_BYTES[pDataCol->type]); tBitmaps = (int32_t)ceil((double)nBitmaps / TYPE_BYTES[pDataCol->type]);
tlen += tBitmaps * TYPE_BYTES[pDataCol->type]; tlen += tBitmaps * TYPE_BYTES[pDataCol->type];
} }
#endif
// move bitmap parts ahead // move bitmap parts ahead
// TODO: put bitmap part to the 1st location(pBitmap points to pData) to avoid the memmove // TODO: put bitmap part to the 1st location(pBitmap points to pData) to avoid the memmove
memcpy(POINTER_SHIFT(pDataCol->pData, pDataCol->len), pDataCol->pBitmap, nBitmaps); // memcpy(POINTER_SHIFT(pDataCol->pData, pDataCol->len), pDataCol->pBitmap, nBitmaps);
} }
#endif #endif
void *tptr; void *tptr;
// Make room // Make room
if (tsdbMakeRoom(ppBuf, lsize + tlen + COMP_OVERFLOW_BYTES + sizeof(TSCKSUM)) < 0) { if (tsdbMakeRoom(ppBuf, lsize + tlen + tBitmaps + 2 * COMP_OVERFLOW_BYTES + sizeof(TSCKSUM)) < 0) {
return -1; return -1;
} }
pBlockData = (SBlockData *)(*ppBuf); pBlockData = (SBlockData *)(*ppBuf);
...@@ -1327,23 +1331,44 @@ int tsdbWriteBlockImpl(STsdb *pRepo, STable *pTable, SDFile *pDFile, SDFile *pDF ...@@ -1327,23 +1331,44 @@ int tsdbWriteBlockImpl(STsdb *pRepo, STable *pTable, SDFile *pDFile, SDFile *pDF
// Compress or just copy // Compress or just copy
if (pCfg->compression) { if (pCfg->compression) {
#if 0
flen = (*(tDataTypes[pDataCol->type].compFunc))((char *)pDataCol->pData, tlen, rowsToWrite + tBitmaps, tptr, flen = (*(tDataTypes[pDataCol->type].compFunc))((char *)pDataCol->pData, tlen, rowsToWrite + tBitmaps, tptr,
tlen + COMP_OVERFLOW_BYTES, pCfg->compression, *ppCBuf, tlen + COMP_OVERFLOW_BYTES, pCfg->compression, *ppCBuf,
tlen + COMP_OVERFLOW_BYTES); tlen + COMP_OVERFLOW_BYTES);
#endif
flen = (*(tDataTypes[pDataCol->type].compFunc))((char *)pDataCol->pData, tlen, rowsToWrite, tptr,
tlen + COMP_OVERFLOW_BYTES, pCfg->compression, *ppCBuf,
tlen + COMP_OVERFLOW_BYTES);
if (tBitmaps > 0) {
tptr = POINTER_SHIFT(pBlockData, lsize + flen);
tBitmapsLen =
tsCompressTinyint((char *)pDataCol->pBitmap, tBitmaps, rowsToWrite, tptr, tBitmaps + COMP_OVERFLOW_BYTES,
pCfg->compression, *ppCBuf, tBitmaps + COMP_OVERFLOW_BYTES);
TASSERT((tBitmapsLen > 0) && (tBitmapsLen <= (tBitmaps + COMP_OVERFLOW_BYTES)));
flen += tBitmapsLen;
}
} else { } else {
flen = tlen; flen = tlen;
memcpy(tptr, pDataCol->pData, flen); memcpy(tptr, pDataCol->pData, flen);
if (tBitmaps > 0) {
tptr = POINTER_SHIFT(pBlockData, lsize + flen);
memcpy(tptr, pDataCol->pBitmap, tBitmaps);
tBitmapsLen = tBitmaps;
flen += tBitmapsLen;
}
} }
// Add checksum // Add checksum
ASSERT(flen > 0); ASSERT(flen > 0);
ASSERT(tBitmapsLen <= 1024);
flen += sizeof(TSCKSUM); flen += sizeof(TSCKSUM);
taosCalcChecksumAppend(0, (uint8_t *)tptr, flen); taosCalcChecksumAppend(0, (uint8_t *)tptr, flen);
tsdbUpdateDFileMagic(pDFile, POINTER_SHIFT(tptr, flen - sizeof(TSCKSUM))); tsdbUpdateDFileMagic(pDFile, POINTER_SHIFT(tptr, flen - sizeof(TSCKSUM)));
if (ncol != 0) { if (ncol != 0) {
tsdbSetBlockColOffset(pBlockCol, toffset); tsdbSetBlockColOffset(pBlockCol, toffset);
pBlockCol->len = flen; pBlockCol->len = flen; // data + bitmaps
pBlockCol->blen = tBitmapsLen;
++tcol; ++tcol;
} else { } else {
keyLen = flen; keyLen = flen;
......
...@@ -21,9 +21,8 @@ static void tsdbResetReadTable(SReadH *pReadh); ...@@ -21,9 +21,8 @@ static void tsdbResetReadTable(SReadH *pReadh);
static void tsdbResetReadFile(SReadH *pReadh); static void tsdbResetReadFile(SReadH *pReadh);
static int tsdbLoadBlockOffset(SReadH *pReadh, SBlock *pBlock); static int tsdbLoadBlockOffset(SReadH *pReadh, SBlock *pBlock);
static int tsdbLoadBlockDataImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDataCols); static int tsdbLoadBlockDataImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDataCols);
static int tsdbCheckAndDecodeColumnData(SDataCol *pDataCol, void *content, int32_t len, int8_t comp, int numOfRows, static int tsdbCheckAndDecodeColumnData(SDataCol *pDataCol, void *content, int32_t len, int32_t bitmapLen, int8_t comp,
int numOfBitmaps, int lenOfBitmaps, int maxPoints, char *buffer, int numOfRows, int numOfBitmaps, int maxPoints, char *buffer, int bufferSize);
int bufferSize);
static int tsdbLoadBlockDataColsImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDataCols, const int16_t *colIds, static int tsdbLoadBlockDataColsImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDataCols, const int16_t *colIds,
int numOfColIds); int numOfColIds);
static int tsdbLoadColData(SReadH *pReadh, SDFile *pDFile, SBlock *pBlock, SBlockCol *pBlockCol, SDataCol *pDataCol); static int tsdbLoadColData(SReadH *pReadh, SDFile *pDFile, SBlock *pBlock, SBlockCol *pBlockCol, SDataCol *pDataCol);
...@@ -548,7 +547,7 @@ static int tsdbLoadBlockDataImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDat ...@@ -548,7 +547,7 @@ static int tsdbLoadBlockDataImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDat
if (dcol != 0 && ccol >= pBlockData->numOfCols) { if (dcol != 0 && ccol >= pBlockData->numOfCols) {
// Set current column as NULL and forward // Set current column as NULL and forward
dataColReset(pDataCol); dataColReset(pDataCol);
dcol++; ++dcol;
continue; continue;
} }
...@@ -567,9 +566,11 @@ static int tsdbLoadBlockDataImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDat ...@@ -567,9 +566,11 @@ static int tsdbLoadBlockDataImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDat
TD_SET_COL_ROWS_NORM(pDataCol); TD_SET_COL_ROWS_NORM(pDataCol);
} }
int32_t tBitmaps = 0; // int32_t tBitmaps = 0;
int32_t tLenBitmap = 0; int32_t tLenBitmap = 0;
if ((dcol != 0) && !TD_COL_ROWS_NORM(pBlockCol)) { if ((dcol != 0) && !TD_COL_ROWS_NORM(pBlockCol)) {
tLenBitmap = nBitmaps;
#if 0
if (IS_VAR_DATA_TYPE(pDataCol->type)) { if (IS_VAR_DATA_TYPE(pDataCol->type)) {
tBitmaps = nBitmaps; tBitmaps = nBitmaps;
tLenBitmap = tBitmaps; tLenBitmap = tBitmaps;
...@@ -577,17 +578,18 @@ static int tsdbLoadBlockDataImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDat ...@@ -577,17 +578,18 @@ static int tsdbLoadBlockDataImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDat
tBitmaps = (int32_t)ceil((double)nBitmaps / TYPE_BYTES[pDataCol->type]); tBitmaps = (int32_t)ceil((double)nBitmaps / TYPE_BYTES[pDataCol->type]);
tLenBitmap = tBitmaps * TYPE_BYTES[pDataCol->type]; tLenBitmap = tBitmaps * TYPE_BYTES[pDataCol->type];
} }
#endif
} }
if (tcolId == pDataCol->colId) { if (tcolId == pDataCol->colId) {
if (pBlock->algorithm == TWO_STAGE_COMP) { if (pBlock->algorithm == TWO_STAGE_COMP) {
int zsize = pDataCol->bytes * pBlock->numOfRows + COMP_OVERFLOW_BYTES; int zsize = pDataCol->bytes * pBlock->numOfRows + tLenBitmap + 2 * COMP_OVERFLOW_BYTES;
if (tsdbMakeRoom((void **)(&TSDB_READ_COMP_BUF(pReadh)), zsize) < 0) return -1; if (tsdbMakeRoom((void **)(&TSDB_READ_COMP_BUF(pReadh)), zsize) < 0) return -1;
} }
if (tsdbCheckAndDecodeColumnData(pDataCol, POINTER_SHIFT(pBlockData, tsize + toffset), tlen, pBlock->algorithm, if (tsdbCheckAndDecodeColumnData(pDataCol, POINTER_SHIFT(pBlockData, tsize + toffset), tlen, pBlockCol->blen,
pBlock->numOfRows, tBitmaps, tLenBitmap, pDataCols->maxPoints, TSDB_READ_COMP_BUF(pReadh), pBlock->algorithm, pBlock->numOfRows, tLenBitmap, pDataCols->maxPoints,
(int)taosTSizeof(TSDB_READ_COMP_BUF(pReadh))) < 0) { TSDB_READ_COMP_BUF(pReadh), (int)taosTSizeof(TSDB_READ_COMP_BUF(pReadh))) < 0) {
tsdbError("vgId:%d file %s is broken at column %d block offset %" PRId64 " column offset %u", tsdbError("vgId:%d file %s is broken at column %d block offset %" PRId64 " column offset %u",
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), tcolId, (int64_t)pBlock->offset, toffset); TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), tcolId, (int64_t)pBlock->offset, toffset);
return -1; return -1;
...@@ -609,9 +611,8 @@ static int tsdbLoadBlockDataImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDat ...@@ -609,9 +611,8 @@ static int tsdbLoadBlockDataImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDat
return 0; return 0;
} }
static int tsdbCheckAndDecodeColumnData(SDataCol *pDataCol, void *content, int32_t len, int8_t comp, int numOfRows, static int tsdbCheckAndDecodeColumnData(SDataCol *pDataCol, void *content, int32_t len, int32_t bitmapLen, int8_t comp,
int numOfBitmaps, int lenOfBitmaps, int maxPoints, char *buffer, int numOfRows, int numOfBitmaps, int maxPoints, char *buffer, int bufferSize) {
int bufferSize) {
if (!taosCheckChecksumWhole((uint8_t *)content, len)) { if (!taosCheckChecksumWhole((uint8_t *)content, len)) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED; terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
return -1; return -1;
...@@ -623,21 +624,41 @@ static int tsdbCheckAndDecodeColumnData(SDataCol *pDataCol, void *content, int32 ...@@ -623,21 +624,41 @@ static int tsdbCheckAndDecodeColumnData(SDataCol *pDataCol, void *content, int32
if (comp) { if (comp) {
// Need to decompress // Need to decompress
int tlen = int tlen =
(*(tDataTypes[pDataCol->type].decompFunc))(content, len - sizeof(TSCKSUM), numOfRows + numOfBitmaps, (*(tDataTypes[pDataCol->type].decompFunc))(content, len - bitmapLen - sizeof(TSCKSUM), numOfRows,
pDataCol->pData, pDataCol->spaceSize, comp, buffer, bufferSize); pDataCol->pData, pDataCol->spaceSize, comp, buffer, bufferSize);
if (tlen <= 0) { if (tlen <= 0) {
tsdbError("Failed to decompress column, file corrupted, len:%d comp:%d numOfRows:%d maxPoints:%d bufferSize:%d", tsdbError(
len, comp, numOfRows, maxPoints, bufferSize); "Failed to decompress column data, file corrupted, len:%d comp:%d numOfRows:%d maxPoints:%d bufferSize:%d",
(int32_t)(len - bitmapLen - sizeof(TSCKSUM)), comp, numOfRows, maxPoints, bufferSize);
terrno = TSDB_CODE_TDB_FILE_CORRUPTED; terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
return -1; return -1;
} }
pDataCol->len = tlen; pDataCol->len = tlen;
if (numOfBitmaps > 0) {
tlen = tsDecompressTinyint(POINTER_SHIFT(content, len - bitmapLen - sizeof(TSCKSUM)), bitmapLen, numOfBitmaps,
pDataCol->pBitmap, pDataCol->spaceSize, comp, buffer, bufferSize);
if (tlen <= 0) {
tsdbError(
"Failed to decompress column bitmap, file corrupted, len:%d comp:%d numOfRows:%d maxPoints:%d "
"bufferSize:%d",
bitmapLen, comp, numOfBitmaps, maxPoints, bufferSize);
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
return -1;
}
// pDataCol->blen = tlen;
}
} else { } else {
// No need to decompress, just memcpy it // No need to decompress, just memcpy it
pDataCol->len = len - sizeof(TSCKSUM); pDataCol->len = len - bitmapLen - sizeof(TSCKSUM);
memcpy(pDataCol->pData, content, pDataCol->len); memcpy(pDataCol->pData, content, pDataCol->len);
if (numOfBitmaps > 0) {
// pDataCol->blen = bitmapLen;
memcpy(pDataCol->pBitmap, POINTER_SHIFT(content, len - bitmapLen - sizeof(TSCKSUM)), bitmapLen);
}
} }
#if 0
if (lenOfBitmaps > 0) { if (lenOfBitmaps > 0) {
pDataCol->len -= lenOfBitmaps; pDataCol->len -= lenOfBitmaps;
...@@ -653,7 +674,10 @@ static int tsdbCheckAndDecodeColumnData(SDataCol *pDataCol, void *content, int32 ...@@ -653,7 +674,10 @@ static int tsdbCheckAndDecodeColumnData(SDataCol *pDataCol, void *content, int32
} else if (IS_VAR_DATA_TYPE(pDataCol->type)) { } else if (IS_VAR_DATA_TYPE(pDataCol->type)) {
dataColSetOffset(pDataCol, numOfRows); dataColSetOffset(pDataCol, numOfRows);
} }
#endif
if (IS_VAR_DATA_TYPE(pDataCol->type)) {
dataColSetOffset(pDataCol, numOfRows);
}
return 0; return 0;
} }
...@@ -740,14 +764,16 @@ static int tsdbLoadBlockDataColsImpl(SReadH *pReadh, SBlock *pBlock, SDataCols * ...@@ -740,14 +764,16 @@ static int tsdbLoadBlockDataColsImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *
static int tsdbLoadColData(SReadH *pReadh, SDFile *pDFile, SBlock *pBlock, SBlockCol *pBlockCol, SDataCol *pDataCol) { static int tsdbLoadColData(SReadH *pReadh, SDFile *pDFile, SBlock *pBlock, SBlockCol *pBlockCol, SDataCol *pDataCol) {
ASSERT(pDataCol->colId == pBlockCol->colId); ASSERT(pDataCol->colId == pBlockCol->colId);
STsdb * pRepo = TSDB_READ_REPO(pReadh); STsdb *pRepo = TSDB_READ_REPO(pReadh);
STsdbCfg *pCfg = REPO_CFG(pRepo); STsdbCfg *pCfg = REPO_CFG(pRepo);
int nBitmaps = (int)TD_BITMAP_BYTES(pBlock->numOfRows); int nBitmaps = (int)TD_BITMAP_BYTES(pBlock->numOfRows);
int32_t tBitmaps = 0; // int32_t tBitmaps = 0;
int32_t tLenBitmap = 0; int32_t tLenBitmap = 0;
if (!TD_COL_ROWS_NORM(pBlockCol)) { if (!TD_COL_ROWS_NORM(pBlockCol)) {
tLenBitmap = nBitmaps;
#if 0
if (IS_VAR_DATA_TYPE(pDataCol->type)) { if (IS_VAR_DATA_TYPE(pDataCol->type)) {
tBitmaps = nBitmaps; tBitmaps = nBitmaps;
tLenBitmap = tBitmaps; tLenBitmap = tBitmaps;
...@@ -755,9 +781,10 @@ static int tsdbLoadColData(SReadH *pReadh, SDFile *pDFile, SBlock *pBlock, SBloc ...@@ -755,9 +781,10 @@ static int tsdbLoadColData(SReadH *pReadh, SDFile *pDFile, SBlock *pBlock, SBloc
tBitmaps = (int32_t)ceil((double)nBitmaps / TYPE_BYTES[pDataCol->type]); tBitmaps = (int32_t)ceil((double)nBitmaps / TYPE_BYTES[pDataCol->type]);
tLenBitmap = tBitmaps * TYPE_BYTES[pDataCol->type]; tLenBitmap = tBitmaps * TYPE_BYTES[pDataCol->type];
} }
#endif
} }
int tsize = pDataCol->bytes * pBlock->numOfRows + tLenBitmap + COMP_OVERFLOW_BYTES; int tsize = pDataCol->bytes * pBlock->numOfRows + tLenBitmap + 2 * COMP_OVERFLOW_BYTES;
if (tsdbMakeRoom((void **)(&TSDB_READ_BUF(pReadh)), pBlockCol->len) < 0) return -1; if (tsdbMakeRoom((void **)(&TSDB_READ_BUF(pReadh)), pBlockCol->len) < 0) return -1;
if (tsdbMakeRoom((void **)(&TSDB_READ_COMP_BUF(pReadh)), tsize) < 0) return -1; if (tsdbMakeRoom((void **)(&TSDB_READ_COMP_BUF(pReadh)), tsize) < 0) return -1;
...@@ -785,8 +812,8 @@ static int tsdbLoadColData(SReadH *pReadh, SDFile *pDFile, SBlock *pBlock, SBloc ...@@ -785,8 +812,8 @@ static int tsdbLoadColData(SReadH *pReadh, SDFile *pDFile, SBlock *pBlock, SBloc
return -1; return -1;
} }
if (tsdbCheckAndDecodeColumnData(pDataCol, pReadh->pBuf, pBlockCol->len, pBlock->algorithm, pBlock->numOfRows, if (tsdbCheckAndDecodeColumnData(pDataCol, pReadh->pBuf, pBlockCol->len, pBlockCol->blen, pBlock->algorithm,
tBitmaps, tLenBitmap, pCfg->maxRowsPerFileBlock, pReadh->pCBuf, pBlock->numOfRows, tLenBitmap, pCfg->maxRowsPerFileBlock, pReadh->pCBuf,
(int32_t)taosTSizeof(pReadh->pCBuf)) < 0) { (int32_t)taosTSizeof(pReadh->pCBuf)) < 0) {
tsdbError("vgId:%d file %s is broken at column %d offset %" PRId64, REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile), tsdbError("vgId:%d file %s is broken at column %d offset %" PRId64, REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile),
pBlockCol->colId, offset); pBlockCol->colId, offset);
......
...@@ -28,11 +28,11 @@ ...@@ -28,11 +28,11 @@
* *
* BOOLEAN Compression Algorithm: * BOOLEAN Compression Algorithm:
* We provide two methods for compress boolean types. Because boolean types in C * We provide two methods for compress boolean types. Because boolean types in C
* code are char bytes with 0 and 1 values only, only one bit can used to discrimenate * code are char bytes with 0 and 1 values only, only one bit can used to discriminate
* the values. * the values.
* 1. The first method is using only 1 bit to represent the boolean value with 1 for * 1. The first method is using only 1 bit to represent the boolean value with 1 for
* true and 0 for false. Then the compression rate is 1/8. * true and 0 for false. Then the compression rate is 1/8.
* 2. The second method is using run length encoding (RLE) methods. This methos works * 2. The second method is using run length encoding (RLE) methods. This method works
* better when there are a lot of consecutive true values or false values. * better when there are a lot of consecutive true values or false values.
* *
* STRING Compression Algorithm: * STRING Compression Algorithm:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册