From 835d9248fd753ef6739bb256955e359870f3c551 Mon Sep 17 00:00:00 2001 From: hzcheng Date: Thu, 30 Apr 2020 14:37:03 +0800 Subject: [PATCH] TD-166 --- src/common/inc/tdataformat.h | 48 ++++++++++++----------------- src/common/src/tdataformat.c | 58 +++++++++++++++++------------------- src/common/src/ttypes.c | 22 +++++++------- src/inc/taosdef.h | 7 +++++ src/tsdb/src/tsdbMeta.c | 4 +-- src/tsdb/src/tsdbRWHelper.c | 13 ++------ src/util/inc/tutil.h | 3 ++ 7 files changed, 73 insertions(+), 82 deletions(-) diff --git a/src/common/inc/tdataformat.h b/src/common/inc/tdataformat.h index 4b8940536f..489635420a 100644 --- a/src/common/inc/tdataformat.h +++ b/src/common/inc/tdataformat.h @@ -67,13 +67,6 @@ int tdGetSchemaEncodeSize(STSchema *pSchema); void * tdEncodeSchema(void *dst, STSchema *pSchema); STSchema *tdDecodeSchema(void **psrc); -// ----------------- For variable data types such as TSDB_DATA_TYPE_BINARY and TSDB_DATA_TYPE_NCHAR -typedef int32_t VarDataOffsetT; -typedef int16_t VarDataLenT; -#define varDataLen(v) ((VarDataLenT *)(v))[0] -#define varDataTLen(v) (sizeof(VarDataLenT) + varDataLen(v)) -#define varDataVal(v) ((void *)((char *)v + sizeof(VarDataLenT))) - // ----------------- Data row structure /* A data row, the format is like below: @@ -90,28 +83,27 @@ typedef void *SDataRow; #define TD_DATA_ROW_HEAD_SIZE sizeof(int32_t) #define dataRowLen(r) (*(int32_t *)(r)) -#define dataRowAt(r, idx) ((char *)(r) + (idx)) -#define dataRowTuple(r) dataRowAt(r, TD_DATA_ROW_HEAD_SIZE) +#define dataRowTuple(r) POINTER_DRIFT(r, TD_DATA_ROW_HEAD_SIZE) #define dataRowKey(r) (*(TSKEY *)(dataRowTuple(r))) #define dataRowSetLen(r, l) (dataRowLen(r) = (l)) #define dataRowCpy(dst, r) memcpy((dst), (r), dataRowLen(r)) -#define dataRowMaxBytesFromSchema(s) ((s)->tlen + TD_DATA_ROW_HEAD_SIZE) +#define dataRowMaxBytesFromSchema(s) (schemaTLen(s) + TD_DATA_ROW_HEAD_SIZE) SDataRow tdNewDataRowFromSchema(STSchema *pSchema); void tdFreeDataRow(SDataRow row); void tdInitDataRow(SDataRow row, STSchema *pSchema); int tdAppendColVal(SDataRow row, void *value, int8_t type, int32_t bytes, int32_t offset); -void tdDataRowReset(SDataRow row, STSchema *pSchema); SDataRow tdDataRowDup(SDataRow row); +// NOTE: offset here including the header size static FORCE_INLINE void *tdGetRowDataOfCol(SDataRow row, int8_t type, int32_t offset) { switch (type) { case TSDB_DATA_TYPE_BINARY: case TSDB_DATA_TYPE_NCHAR: - return dataRowAt(row, *(int32_t *)dataRowAt(row, offset)); + return POINTER_DRIFT(row, *(VarDataOffsetT *)POINTER_DRIFT(row, offset)); break; default: - return dataRowAt(row, offset); + return POINTER_DRIFT(row, offset); break; } } @@ -121,7 +113,7 @@ typedef struct SDataCol { int8_t type; // column type int16_t colId; // column ID int bytes; // column data bytes defined - int offset; // data offset in a SDataRow + int offset; // data offset in a SDataRow (including the header size) int spaceSize; // Total space size for this column int len; // column data length VarDataOffsetT *dataOff; // For binary and nchar data, the offset in the data column @@ -140,28 +132,26 @@ void dataColSetNEleNull(SDataCol *pCol, int nEle, int maxPoints); // Get the data pointer from a column-wised data static FORCE_INLINE void *tdGetColDataOfRow(SDataCol *pCol, int row) { - switch (pCol->type) - { - case TSDB_DATA_TYPE_BINARY: - case TSDB_DATA_TYPE_NCHAR: - return (void *)((char *)(pCol->pData) + pCol->dataOff[row]); - break; - - default: - return (void *)((char *)(pCol->pData) + TYPE_BYTES[pCol->type] * row); - break; + switch (pCol->type) { + case TSDB_DATA_TYPE_BINARY: + case TSDB_DATA_TYPE_NCHAR: + return POINTER_DRIFT(pCol->pData, pCol->dataOff[row]); + break; + + default: + return POINTER_DRIFT(pCol->pData, TYPE_BYTES[pCol->type] * row); + break; } } static FORCE_INLINE int32_t dataColGetNEleLen(SDataCol *pDataCol, int rows) { - void *ptr = NULL; + ASSERT(rows > 0); + switch (pDataCol->type) { case TSDB_DATA_TYPE_BINARY: case TSDB_DATA_TYPE_NCHAR: - ptr = tdGetColDataOfRow(pDataCol, rows - 1); - return ((VarDataOffsetT *)(pDataCol->pData))[rows-1] + varDataTLen(ptr); + return pDataCol->dataOff[rows - 1] + varDataTLen(tdGetColDataOfRow(pDataCol, rows - 1)); break; - default: return TYPE_BYTES[pDataCol->type] * rows; } @@ -182,7 +172,7 @@ typedef struct { } SDataCols; #define keyCol(pCols) (&((pCols)->cols[0])) // Key column -#define dataColsKeyAt(pCols, idx) ((int64_t *)(keyCol(pCols)->pData))[(idx)] +#define dataColsKeyAt(pCols, idx) ((TSKEY *)(keyCol(pCols)->pData))[(idx)] #define dataColsKeyFirst(pCols) dataColsKeyAt(pCols, 0) #define dataColsKeyLast(pCols) dataColsKeyAt(pCols, (pCols)->numOfPoints - 1) diff --git a/src/common/src/tdataformat.c b/src/common/src/tdataformat.c index 3034532d20..7321e1c921 100644 --- a/src/common/src/tdataformat.c +++ b/src/common/src/tdataformat.c @@ -47,17 +47,17 @@ int tdSchemaAddCol(STSchema *pSchema, int8_t type, int16_t colId, int32_t bytes) STColumn *pCol = schemaColAt(pSchema, schemaNCols(pSchema)); colSetType(pCol, type); colSetColId(pCol, colId); - if (pSchema->numOfCols == 0) { + if (schemaNCols(pSchema) == 0) { colSetOffset(pCol, 0); } else { - STColumn *pTCol = pSchema->columns + pSchema->numOfCols - 1; + STColumn *pTCol = schemaColAt(pSchema, schemaNCols(pSchema)-1); colSetOffset(pCol, pTCol->offset + TYPE_BYTES[pTCol->type]); } switch (type) { case TSDB_DATA_TYPE_BINARY: case TSDB_DATA_TYPE_NCHAR: - colSetBytes(pCol, bytes); - pSchema->tlen += (TYPE_BYTES[type] + sizeof(int16_t) + bytes); // TODO: remove int16_t here + colSetBytes(pCol, bytes); // Set as maximum bytes + pSchema->tlen += (TYPE_BYTES[type] + sizeof(VarDataLenT) + bytes); break; default: colSetBytes(pCol, TYPE_BYTES[type]); @@ -167,16 +167,16 @@ void tdFreeDataRow(SDataRow row) { int tdAppendColVal(SDataRow row, void *value, int8_t type, int32_t bytes, int32_t offset) { ASSERT(value != NULL); int32_t toffset = offset + TD_DATA_ROW_HEAD_SIZE; - char * ptr = dataRowAt(row, dataRowLen(row)); + char * ptr = POINTER_DRIFT(row, dataRowLen(row)); switch (type) { case TSDB_DATA_TYPE_BINARY: case TSDB_DATA_TYPE_NCHAR: // set offset - *(int32_t *)dataRowAt(row, toffset) = dataRowLen(row); + *(VarDataOffsetT *)POINTER_DRIFT(row, toffset) = dataRowLen(row); // set length - int16_t slen = 0; + VarDataLenT slen = 0; if (isNull(value, type)) { slen = (type == TSDB_DATA_TYPE_BINARY) ? sizeof(int8_t) : sizeof(int32_t); } else { @@ -188,23 +188,21 @@ int tdAppendColVal(SDataRow row, void *value, int8_t type, int32_t bytes, int32_ } ASSERT(slen <= bytes); - *(int16_t *)ptr = slen; - ptr += sizeof(int16_t); + *(VarDataLenT *)ptr = slen; + ptr = POINTER_DRIFT(ptr, sizeof(VarDataLenT)); memcpy((void *)ptr, value, slen); dataRowLen(row) += (sizeof(int16_t) + slen); break; default: - memcpy(dataRowAt(row, toffset), value, TYPE_BYTES[type]); + memcpy(POINTER_DRIFT(row, toffset), value, TYPE_BYTES[type]); break; } return 0; } -void tdDataRowReset(SDataRow row, STSchema *pSchema) { tdInitDataRow(row, pSchema); } - SDataRow tdDataRowDup(SDataRow row) { SDataRow trow = malloc(dataRowLen(row)); if (trow == NULL) return NULL; @@ -217,20 +215,21 @@ void dataColInit(SDataCol *pDataCol, STColumn *pCol, void **pBuf, int maxPoints) pDataCol->type = colType(pCol); pDataCol->colId = colColId(pCol); pDataCol->bytes = colBytes(pCol); - pDataCol->offset = colOffset(pCol); + pDataCol->offset = colOffset(pCol) + TD_DATA_ROW_HEAD_SIZE; pDataCol->len = 0; if (pDataCol->type == TSDB_DATA_TYPE_BINARY || pDataCol->type == TSDB_DATA_TYPE_NCHAR) { - pDataCol->spaceSize = (sizeof(int32_t) + sizeof(int16_t) + pDataCol->bytes) * maxPoints; + pDataCol->spaceSize = (sizeof(VarDataLenT) + pDataCol->bytes) * maxPoints; pDataCol->dataOff = (VarDataOffsetT *)(*pBuf); - pDataCol->pData = (void *)((char *)(*pBuf) + sizeof(int32_t) * maxPoints); + pDataCol->pData = POINTER_DRIFT(*pBuf, TYPE_BYTES[pDataCol->type] * maxPoints); + *pBuf = POINTER_DRIFT(*pBuf, pDataCol->spaceSize + TYPE_BYTES[pDataCol->type] * maxPoints); } else { pDataCol->spaceSize = pDataCol->bytes * maxPoints; pDataCol->dataOff = NULL; pDataCol->pData = *pBuf; + *pBuf = POINTER_DRIFT(*pBuf, pDataCol->spaceSize); } - *pBuf = (void *)((char *)(*pBuf) + pDataCol->spaceSize); } void dataColAppendVal(SDataCol *pCol, void *value, int numOfPoints, int maxPoints) { @@ -240,15 +239,15 @@ void dataColAppendVal(SDataCol *pCol, void *value, int numOfPoints, int maxPoint case TSDB_DATA_TYPE_BINARY: case TSDB_DATA_TYPE_NCHAR: // set offset - ((int32_t *)(pCol->pData))[numOfPoints] = pCol->len; + pCol->dataOff[numOfPoints] = pCol->len; // Copy data - memcpy((void *)((char *)pCol->pData + pCol->len), value, varDataTLen(value)); + memcpy(POINTER_DRIFT(pCol->pData, pCol->len), value, varDataTLen(value)); // Update the length pCol->len += varDataTLen(value); break; default: ASSERT(pCol->len == TYPE_BYTES[pCol->type] * numOfPoints); - memcpy((void *)((char *)pCol->pData + pCol->len), value, pCol->bytes); + memcpy(POINTER_DRIFT(pCol->pData, pCol->len), value, pCol->bytes); pCol->len += pCol->bytes; break; } @@ -261,26 +260,24 @@ void dataColPopPoints(SDataCol *pCol, int pointsToPop, int numOfPoints) { if (pCol->type == TSDB_DATA_TYPE_BINARY || pCol->type == TSDB_DATA_TYPE_NCHAR) { ASSERT(pCol->len > 0); - VarDataOffsetT toffset = ((VarDataOffsetT *)(pCol->pData))[pointsToPop]; + VarDataOffsetT toffset = pCol->dataOff[pointsToPop]; pCol->len = pCol->len - toffset; ASSERT(pCol->len > 0); - memmove(pCol->pData, (void *)((char *)(pCol->pData) + toffset), pCol->len); + memmove(pCol->pData, POINTER_DRIFT(pCol->pData, toffset), pCol->len); dataColSetOffset(pCol, pointsLeft); } else { ASSERT(pCol->len == TYPE_BYTES[pCol->type] * numOfPoints); pCol->len = TYPE_BYTES[pCol->type] * pointsLeft; - memmove(pCol->pData, (void *)((char *)(pCol->pData) + TYPE_BYTES[pCol->type] * pointsToPop), pCol->len); + memmove(pCol->pData, POINTER_DRIFT(pCol->pData, TYPE_BYTES[pCol->type] * pointsToPop), pCol->len); } } bool isNEleNull(SDataCol *pCol, int nEle) { - void *ptr = NULL; switch (pCol->type) { case TSDB_DATA_TYPE_BINARY: case TSDB_DATA_TYPE_NCHAR: for (int i = 0; i < nEle; i++) { - ptr = tdGetColDataOfRow(pCol, i); - if (!isNull(varDataVal(ptr), pCol->type)) return false; + if (!isNull(varDataVal(tdGetColDataOfRow(pCol, i)), pCol->type)) return false; } return true; default: @@ -316,13 +313,14 @@ void dataColSetNEleNull(SDataCol *pCol, int nEle, int maxPoints) { void dataColSetOffset(SDataCol *pCol, int nEle) { ASSERT(((pCol->type == TSDB_DATA_TYPE_BINARY) || (pCol->type == TSDB_DATA_TYPE_NCHAR))); - char *tptr = (char *)(pCol->pData); + void * tptr = pCol->pData; + // char *tptr = (char *)(pCol->pData); VarDataOffsetT offset = 0; for (int i = 0; i < nEle; i++) { - ((VarDataOffsetT *)(pCol->pData))[i] = offset; + pCol->dataOff[i] = offset; offset += varDataTLen(tptr); - tptr = tptr + varDataTLen(tptr); + tptr = POINTER_DRIFT(tptr, varDataTLen(tptr)); } } @@ -352,7 +350,7 @@ void tdInitDataCols(SDataCols *pCols, STSchema *pSchema) { void *ptr = pCols->buf; for (int i = 0; i < schemaNCols(pSchema); i++) { dataColInit(pCols->cols + i, schemaColAt(pSchema, i), &ptr, pCols->maxPoints); - ASSERT((char *)ptr - (char *)pCols <= pCols->bufSize); + ASSERT((char *)ptr - (char *)(pCols->buf) <= pCols->bufSize); } } @@ -390,7 +388,7 @@ SDataCols *tdDupDataCols(SDataCols *pDataCols, bool keepData) { pRet->cols[i].len = pDataCols->cols[i].len; memcpy(pRet->cols[i].pData, pDataCols->cols[i].pData, pDataCols->cols[i].len); if (pRet->cols[i].type == TSDB_DATA_TYPE_BINARY || pRet->cols[i].type == TSDB_DATA_TYPE_NCHAR) { - memcpy(pRet->cols[i].dataOff, pDataCols->cols[i].dataOff, sizeof(int32_t) * pDataCols->maxPoints); + memcpy(pRet->cols[i].dataOff, pDataCols->cols[i].dataOff, sizeof(VarDataOffsetT) * pDataCols->maxPoints); } } } diff --git a/src/common/src/ttypes.c b/src/common/src/ttypes.c index 9f392bcae5..d99e916c73 100644 --- a/src/common/src/ttypes.c +++ b/src/common/src/ttypes.c @@ -19,17 +19,17 @@ #include "tscompression.h" const int32_t TYPE_BYTES[11] = { - -1, // TSDB_DATA_TYPE_NULL - sizeof(int8_t), // TSDB_DATA_TYPE_BOOL - sizeof(int8_t), // TSDB_DATA_TYPE_TINYINT - sizeof(int16_t), // TSDB_DATA_TYPE_SMALLINT - sizeof(int32_t), // TSDB_DATA_TYPE_INT - sizeof(int64_t), // TSDB_DATA_TYPE_BIGINT - sizeof(float), // TSDB_DATA_TYPE_FLOAT - sizeof(double), // TSDB_DATA_TYPE_DOUBLE - sizeof(int32_t), // TSDB_DATA_TYPE_BINARY - sizeof(TSKEY), // TSDB_DATA_TYPE_TIMESTAMP - sizeof(int32_t) // TSDB_DATA_TYPE_NCHAR + -1, // TSDB_DATA_TYPE_NULL + sizeof(int8_t), // TSDB_DATA_TYPE_BOOL + sizeof(int8_t), // TSDB_DATA_TYPE_TINYINT + sizeof(int16_t), // TSDB_DATA_TYPE_SMALLINT + sizeof(int32_t), // TSDB_DATA_TYPE_INT + sizeof(int64_t), // TSDB_DATA_TYPE_BIGINT + sizeof(float), // TSDB_DATA_TYPE_FLOAT + sizeof(double), // TSDB_DATA_TYPE_DOUBLE + sizeof(VarDataOffsetT), // TSDB_DATA_TYPE_BINARY + sizeof(TSKEY), // TSDB_DATA_TYPE_TIMESTAMP + sizeof(VarDataOffsetT) // TSDB_DATA_TYPE_NCHAR }; tDataTypeDescriptor tDataTypeDesc[11] = { diff --git a/src/inc/taosdef.h b/src/inc/taosdef.h index 57c54efba4..ce0d52d737 100644 --- a/src/inc/taosdef.h +++ b/src/inc/taosdef.h @@ -32,6 +32,13 @@ extern "C" { #define TSKEY int64_t #endif +// ----------------- For variable data types such as TSDB_DATA_TYPE_BINARY and TSDB_DATA_TYPE_NCHAR +typedef int32_t VarDataOffsetT; +typedef int16_t VarDataLenT; +#define varDataLen(v) ((VarDataLenT *)(v))[0] +#define varDataTLen(v) (sizeof(VarDataLenT) + varDataLen(v)) +#define varDataVal(v) ((void *)((char *)v + sizeof(VarDataLenT))) + // this data type is internally used only in 'in' query to hold the values #define TSDB_DATA_TYPE_ARRAY (TSDB_DATA_TYPE_NCHAR + 1) diff --git a/src/tsdb/src/tsdbMeta.c b/src/tsdb/src/tsdbMeta.c index caeff5b0c8..ecd4c0225b 100644 --- a/src/tsdb/src/tsdbMeta.c +++ b/src/tsdb/src/tsdbMeta.c @@ -242,7 +242,7 @@ int32_t tsdbGetTableTagVal(TsdbRepoT* repo, STableId id, int32_t colId, int16_t* assert(pCol != NULL); SDataRow row = (SDataRow)pTable->tagVal; - char* d = dataRowAt(row, TD_DATA_ROW_HEAD_SIZE); + char* d = dataRowTuple(row); *val = d; *type = pCol->type; @@ -523,5 +523,5 @@ static int tsdbEstimateTableEncodeSize(STable *pTable) { char *getTupleKey(const void * data) { SDataRow row = (SDataRow)data; - return dataRowAt(row, TD_DATA_ROW_HEAD_SIZE); + return POINTER_DRIFT(row, TD_DATA_ROW_HEAD_SIZE); } \ No newline at end of file diff --git a/src/tsdb/src/tsdbRWHelper.c b/src/tsdb/src/tsdbRWHelper.c index 61c463801c..ee2f29ea55 100644 --- a/src/tsdb/src/tsdbRWHelper.c +++ b/src/tsdb/src/tsdbRWHelper.c @@ -566,16 +566,9 @@ static int tsdbCheckAndDecodeColumnData(SDataCol *pDataCol, char *content, int32 // Decode the data if (comp) { - // Need to decompress - void *pStart = NULL; - if (pDataCol->type == TSDB_DATA_TYPE_BINARY || pDataCol->type == TSDB_DATA_TYPE_NCHAR) { - pStart = (char *)(pDataCol->pData) + sizeof(int32_t) * maxPoints; - } else { - pStart = pDataCol->pData; - } - // TODO: get rid of INT32_MAX here - pDataCol->len = (*(tDataTypeDesc[pDataCol->type].decompFunc))(content, len - sizeof(TSCKSUM), numOfPoints, pStart, - INT32_MAX, comp, buffer, bufferSize); + // // Need to decompress + pDataCol->len = (*(tDataTypeDesc[pDataCol->type].decompFunc))( + content, len - sizeof(TSCKSUM), numOfPoints, pDataCol->pData, pDataCol->spaceSize, comp, buffer, bufferSize); if (pDataCol->type == TSDB_DATA_TYPE_BINARY || pDataCol->type == TSDB_DATA_TYPE_NCHAR) { pDataCol->len += (sizeof(int32_t) * maxPoints); dataColSetOffset(pDataCol, numOfPoints); diff --git a/src/util/inc/tutil.h b/src/util/inc/tutil.h index 9dcddcfcb7..55f4496755 100644 --- a/src/util/inc/tutil.h +++ b/src/util/inc/tutil.h @@ -44,6 +44,9 @@ extern "C" { #define tclose(x) taosCloseSocket(x) +// Pointer p drift right by b bytes +#define POINTER_DRIFT(p, b) ((void *)((char *)(p) + (b))) + #ifndef NDEBUG #define ASSERT(x) assert(x) #else -- GitLab