tdataformat.h 12.8 KB
Newer Older
H
more  
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
H
hzcheng 已提交
15
#ifndef _TD_DATA_FORMAT_H_
H
more  
Hongze Cheng 已提交
16 17 18
#define _TD_DATA_FORMAT_H_

#include <stdint.h>
H
hzcheng 已提交
19
#include <stdlib.h>
H
hzcheng 已提交
20
#include <string.h>
H
more  
Hongze Cheng 已提交
21

H
Hongze Cheng 已提交
22
#include "talgo.h"
H
hzcheng 已提交
23
#include "taosdef.h"
H
TD-166  
hzcheng 已提交
24
#include "tutil.h"
H
hzcheng 已提交
25

H
more  
hzcheng 已提交
26 27 28
#ifdef __cplusplus
extern "C" {
#endif
H
hzcheng 已提交
29

H
Hongze Cheng 已提交
30 31 32 33
#define STR_TO_VARSTR(x, str)             \
  do {                                    \
    VarDataLenT __len = strlen(str);      \
    *(VarDataLenT *)(x) = __len;          \
34
    memcpy(varDataVal(x), (str), __len); \
H
Hongze Cheng 已提交
35 36 37 38
  } while (0);

#define STR_WITH_MAXSIZE_TO_VARSTR(x, str, _maxs)      \
  do {                                                 \
H
Hui Li 已提交
39
    char *_e = stpncpy(varDataVal(x), (str), (_maxs)-VARSTR_HEADER_SIZE); \
H
Hongze Cheng 已提交
40 41 42 43 44 45
    varDataSetLen(x, (_e - (x)-VARSTR_HEADER_SIZE));   \
  } while (0)

#define STR_WITH_SIZE_TO_VARSTR(x, str, _size) \
  do {                                         \
    *(VarDataLenT *)(x) = (_size);             \
46
    memcpy(varDataVal(x), (str), (_size));    \
H
Hongze Cheng 已提交
47
  } while (0);
H
hjxilinx 已提交
48

H
hzcheng 已提交
49 50 51 52 53
// ----------------- TSDB COLUMN DEFINITION
typedef struct {
  int8_t  type;    // Column type
  int16_t colId;   // column ID
  int32_t bytes;   // column bytes
H
TD-166  
hzcheng 已提交
54
  int32_t offset;  // point offset in SDataRow after the header part
H
hzcheng 已提交
55 56 57 58 59 60 61 62 63 64 65 66 67 68
} STColumn;

#define colType(col) ((col)->type)
#define colColId(col) ((col)->colId)
#define colBytes(col) ((col)->bytes)
#define colOffset(col) ((col)->offset)

#define colSetType(col, t) (colType(col) = (t))
#define colSetColId(col, id) (colColId(col) = (id))
#define colSetBytes(col, b) (colBytes(col) = (b))
#define colSetOffset(col, o) (colOffset(col) = (o))

// ----------------- TSDB SCHEMA DEFINITION
typedef struct {
H
Hongze Cheng 已提交
69
  int      version;    // version
H
hzcheng 已提交
70
  int      numOfCols;  // Number of columns appended
H
TD-166  
hzcheng 已提交
71
  int      tlen;       // maximum length of a SDataRow without the header part
H
TD-353  
Hongze Cheng 已提交
72 73
  uint16_t flen;       // First part length in a SDataRow after the header part
  uint16_t vlen;       // pure value part length, excluded the overhead
H
hzcheng 已提交
74 75 76 77
  STColumn columns[];
} STSchema;

#define schemaNCols(s) ((s)->numOfCols)
H
Hongze Cheng 已提交
78
#define schemaVersion(s) ((s)->version)
H
TD-166  
hzcheng 已提交
79 80
#define schemaTLen(s) ((s)->tlen)
#define schemaFLen(s) ((s)->flen)
T
Tao Liu 已提交
81
#define schemaVLen(s) ((s)->vlen)
H
hzcheng 已提交
82
#define schemaColAt(s, i) ((s)->columns + i)
H
Hongze Cheng 已提交
83
#define tdFreeSchema(s) tfree((s))
H
hzcheng 已提交
84 85

STSchema *tdDupSchema(STSchema *pSchema);
H
TD-353  
Hongze Cheng 已提交
86
int       tdEncodeSchema(void **buf, STSchema *pSchema);
H
TD-353  
Hongze Cheng 已提交
87
void *    tdDecodeSchema(void *buf, STSchema **pRSchema);
H
hzcheng 已提交
88

H
Hongze Cheng 已提交
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
static FORCE_INLINE int comparColId(const void *key1, const void *key2) {
  if (*(int16_t *)key1 > ((STColumn *)key2)->colId) {
    return 1;
  } else if (*(int16_t *)key1 < ((STColumn *)key2)->colId) {
    return -1;
  } else {
    return 0;
  }
}

static FORCE_INLINE STColumn *tdGetColOfID(STSchema *pSchema, int16_t colId) {
  void *ptr = bsearch(&colId, (void *)pSchema->columns, schemaNCols(pSchema), sizeof(STColumn), comparColId);
  if (ptr == NULL) return NULL;
  return (STColumn *)ptr;
}

H
Hongze Cheng 已提交
105 106 107 108 109
// ----------------- SCHEMA BUILDER DEFINITION
typedef struct {
  int       tCols;
  int       nCols;
  int       tlen;
H
TD-353  
Hongze Cheng 已提交
110 111
  uint16_t  flen;
  uint16_t  vlen;
H
Hongze Cheng 已提交
112 113 114 115 116 117 118 119 120 121
  int       version;
  STColumn *columns;
} STSchemaBuilder;

int       tdInitTSchemaBuilder(STSchemaBuilder *pBuilder, int32_t version);
void      tdDestroyTSchemaBuilder(STSchemaBuilder *pBuilder);
void      tdResetTSchemaBuilder(STSchemaBuilder *pBuilder, int32_t version);
int       tdAddColToSchema(STSchemaBuilder *pBuilder, int8_t type, int16_t colId, int32_t bytes);
STSchema *tdGetSchemaFromBuilder(STSchemaBuilder *pBuilder);

H
more  
Hongze Cheng 已提交
122 123
// ----------------- Data row structure

H
hzcheng 已提交
124
/* A data row, the format is like below:
H
TD-90  
Hongze Cheng 已提交
125 126 127
 * |<--------------------+--------------------------- len ---------------------------------->|
 * |<--     Head      -->|<---------   flen -------------->|                                 |
 * +---------------------+---------------------------------+---------------------------------+
B
Bomin Zhang 已提交
128
 * | uint16_t |  int16_t |                                 |                                 |
H
TD-90  
Hongze Cheng 已提交
129 130 131
 * +----------+----------+---------------------------------+---------------------------------+
 * |   len    | sversion |           First part            |             Second part         |
 * +----------+----------+---------------------------------+---------------------------------+
H
more  
Hongze Cheng 已提交
132
 */
H
hzcheng 已提交
133 134
typedef void *SDataRow;

B
Bomin Zhang 已提交
135
#define TD_DATA_ROW_HEAD_SIZE (sizeof(uint16_t) + sizeof(int16_t))
H
hzcheng 已提交
136

B
Bomin Zhang 已提交
137
#define dataRowLen(r) (*(uint16_t *)(r))
H
TD-90  
Hongze Cheng 已提交
138
#define dataRowVersion(r) *(int16_t *)POINTER_SHIFT(r, sizeof(int16_t))
H
hzcheng 已提交
139
#define dataRowTuple(r) POINTER_SHIFT(r, TD_DATA_ROW_HEAD_SIZE)
H
TD-34  
hzcheng 已提交
140
#define dataRowKey(r) (*(TSKEY *)(dataRowTuple(r)))
H
hzcheng 已提交
141
#define dataRowSetLen(r, l) (dataRowLen(r) = (l))
H
TD-90  
Hongze Cheng 已提交
142
#define dataRowSetVersion(r, v) (dataRowVersion(r) = (v))
H
hzcheng 已提交
143
#define dataRowCpy(dst, r) memcpy((dst), (r), dataRowLen(r))
H
TD-166  
hzcheng 已提交
144
#define dataRowMaxBytesFromSchema(s) (schemaTLen(s) + TD_DATA_ROW_HEAD_SIZE)
H
hzcheng 已提交
145

H
hzcheng 已提交
146
SDataRow tdNewDataRowFromSchema(STSchema *pSchema);
H
hzcheng 已提交
147
void     tdFreeDataRow(SDataRow row);
H
TD-166  
hzcheng 已提交
148
void     tdInitDataRow(SDataRow row, STSchema *pSchema);
H
hzcheng 已提交
149
SDataRow tdDataRowDup(SDataRow row);
H
more  
Hongze Cheng 已提交
150

151
// offset here not include dataRow header length
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
static FORCE_INLINE int tdAppendColVal(SDataRow row, void *value, int8_t type, int32_t bytes, int32_t offset) {
  ASSERT(value != NULL);
  int32_t toffset = offset + TD_DATA_ROW_HEAD_SIZE;
  char *  ptr = (char *)POINTER_SHIFT(row, dataRowLen(row));

  switch (type) {
    case TSDB_DATA_TYPE_BINARY:
    case TSDB_DATA_TYPE_NCHAR:
      *(VarDataOffsetT *)POINTER_SHIFT(row, toffset) = dataRowLen(row);
      memcpy(ptr, value, varDataTLen(value));
      dataRowLen(row) += varDataTLen(value);
      break;
    default:
      memcpy(POINTER_SHIFT(row, toffset), value, TYPE_BYTES[type]);
      break;
  }

  return 0;
}

H
TD-166  
hzcheng 已提交
172
// NOTE: offset here including the header size
H
TD-166  
hzcheng 已提交
173 174 175 176
static FORCE_INLINE void *tdGetRowDataOfCol(SDataRow row, int8_t type, int32_t offset) {
  switch (type) {
    case TSDB_DATA_TYPE_BINARY:
    case TSDB_DATA_TYPE_NCHAR:
H
hzcheng 已提交
177
      return POINTER_SHIFT(row, *(VarDataOffsetT *)POINTER_SHIFT(row, offset));
H
TD-166  
hzcheng 已提交
178
    default:
H
hzcheng 已提交
179
      return POINTER_SHIFT(row, offset);
H
TD-166  
hzcheng 已提交
180 181 182
  }
}

H
TD-34  
hzcheng 已提交
183 184
// ----------------- Data column structure
typedef struct SDataCol {
H
TD-166  
hzcheng 已提交
185 186 187
  int8_t          type;       // column type
  int16_t         colId;      // column ID
  int             bytes;      // column data bytes defined
H
TD-166  
hzcheng 已提交
188
  int             offset;     // data offset in a SDataRow (including the header size)
H
TD-166  
hzcheng 已提交
189 190 191 192
  int             spaceSize;  // Total space size for this column
  int             len;        // column data length
  VarDataOffsetT *dataOff;    // For binary and nchar data, the offset in the data column
  void *          pData;      // Actual data pointer
H
TD-34  
hzcheng 已提交
193 194
} SDataCol;

H
TD-166  
hzcheng 已提交
195 196 197
static FORCE_INLINE void dataColReset(SDataCol *pDataCol) { pDataCol->len = 0; }

void dataColInit(SDataCol *pDataCol, STColumn *pCol, void **pBuf, int maxPoints);
H
Haojun Liao 已提交
198 199
void dataColAppendVal(SDataCol *pCol, void *value, int numOfRows, int maxPoints);
void dataColPopPoints(SDataCol *pCol, int pointsToPop, int numOfRows);
H
TD-166  
hzcheng 已提交
200 201
void dataColSetOffset(SDataCol *pCol, int nEle);

H
TD-166  
hzcheng 已提交
202 203
bool isNEleNull(SDataCol *pCol, int nEle);
void dataColSetNEleNull(SDataCol *pCol, int nEle, int maxPoints);
H
TD-166  
hzcheng 已提交
204 205 206

// Get the data pointer from a column-wised data
static FORCE_INLINE void *tdGetColDataOfRow(SDataCol *pCol, int row) {
H
TD-166  
hzcheng 已提交
207 208 209
  switch (pCol->type) {
    case TSDB_DATA_TYPE_BINARY:
    case TSDB_DATA_TYPE_NCHAR:
H
hzcheng 已提交
210
      return POINTER_SHIFT(pCol->pData, pCol->dataOff[row]);
H
TD-166  
hzcheng 已提交
211 212 213
      break;

    default:
H
hzcheng 已提交
214
      return POINTER_SHIFT(pCol->pData, TYPE_BYTES[pCol->type] * row);
H
TD-166  
hzcheng 已提交
215
      break;
H
TD-166  
hzcheng 已提交
216 217 218
  }
}

H
TD-166  
hzcheng 已提交
219
static FORCE_INLINE int32_t dataColGetNEleLen(SDataCol *pDataCol, int rows) {
H
TD-166  
hzcheng 已提交
220 221
  ASSERT(rows > 0);

H
TD-166  
hzcheng 已提交
222 223 224
  switch (pDataCol->type) {
    case TSDB_DATA_TYPE_BINARY:
    case TSDB_DATA_TYPE_NCHAR:
H
TD-166  
hzcheng 已提交
225
      return pDataCol->dataOff[rows - 1] + varDataTLen(tdGetColDataOfRow(pDataCol, rows - 1));
H
TD-166  
hzcheng 已提交
226 227
      break;
    default:
H
TD-166  
hzcheng 已提交
228
      return TYPE_BYTES[pDataCol->type] * rows;
H
TD-166  
hzcheng 已提交
229 230 231
  }
}

H
TD-34  
hzcheng 已提交
232
typedef struct {
H
Hongze Cheng 已提交
233 234 235 236
  int maxRowSize;
  int maxCols;    // max number of columns
  int maxPoints;  // max number of points
  int bufSize;
H
TD-166  
hzcheng 已提交
237

H
Haojun Liao 已提交
238
  int      numOfRows;
H
TD-34  
hzcheng 已提交
239
  int      numOfCols;  // Total number of cols
H
TD-34  
hzcheng 已提交
240
  int      sversion;   // TODO: set sversion
H
TD-34  
hzcheng 已提交
241 242 243 244
  void *   buf;
  SDataCol cols[];
} SDataCols;

H
TD-34  
hzcheng 已提交
245
#define keyCol(pCols) (&((pCols)->cols[0]))  // Key column
H
TD-166  
hzcheng 已提交
246
#define dataColsKeyAt(pCols, idx) ((TSKEY *)(keyCol(pCols)->pData))[(idx)]
H
TD-34  
hzcheng 已提交
247
#define dataColsKeyFirst(pCols) dataColsKeyAt(pCols, 0)
H
Haojun Liao 已提交
248
#define dataColsKeyLast(pCols) ((pCols->numOfRows == 0) ? 0 : dataColsKeyAt(pCols, (pCols)->numOfRows - 1))
H
TD-34  
hzcheng 已提交
249

H
TD-166  
hzcheng 已提交
250
SDataCols *tdNewDataCols(int maxRowSize, int maxCols, int maxRows);
H
TD-34  
hzcheng 已提交
251
void       tdResetDataCols(SDataCols *pCols);
H
TD-34  
hzcheng 已提交
252
void       tdInitDataCols(SDataCols *pCols, STSchema *pSchema);
H
TD-100  
hzcheng 已提交
253
SDataCols *tdDupDataCols(SDataCols *pCols, bool keepData);
H
TD-34  
hzcheng 已提交
254
void       tdFreeDataCols(SDataCols *pCols);
H
TD-90  
Hongze Cheng 已提交
255
void       tdAppendDataRowToDataCol(SDataRow row, STSchema *pSchema, SDataCols *pCols);
H
Hongze Cheng 已提交
256
void       tdPopDataColsPoints(SDataCols *pCols, int pointsToPop);  //!!!!
H
hzcheng 已提交
257
int        tdMergeDataCols(SDataCols *target, SDataCols *src, int rowsToMerge);
H
TD-521  
Hongze Cheng 已提交
258
void       tdMergeTwoDataCols(SDataCols *target, SDataCols *src1, int *iter1, int limit1, SDataCols *src2, int *iter2, int limit2, int tRows);
H
more  
Hongze Cheng 已提交
259

H
Hongze Cheng 已提交
260 261 262 263 264 265 266 267
// ----------------- K-V data row structure
/*
 * +----------+----------+---------------------------------+---------------------------------+
 * |  int16_t |  int16_t |                                 |                                 |
 * +----------+----------+---------------------------------+---------------------------------+
 * |    len   |   ncols  |           cols index            |             data part           |
 * +----------+----------+---------------------------------+---------------------------------+
 */
H
Hongze Cheng 已提交
268
typedef void *SKVRow;
H
Hongze Cheng 已提交
269 270 271 272 273 274

typedef struct {
  int16_t colId;
  int16_t offset;
} SColIdx;

B
Bomin Zhang 已提交
275
#define TD_KV_ROW_HEAD_SIZE (2 * sizeof(int16_t))
H
Hongze Cheng 已提交
276

H
Hongze Cheng 已提交
277 278
#define kvRowLen(r) (*(int16_t *)(r))
#define kvRowNCols(r) (*(int16_t *)POINTER_SHIFT(r, sizeof(int16_t)))
H
Hongze Cheng 已提交
279 280
#define kvRowSetLen(r, len) kvRowLen(r) = (len)
#define kvRowSetNCols(r, n) kvRowNCols(r) = (n)
H
Hongze Cheng 已提交
281 282 283 284 285
#define kvRowColIdx(r) (SColIdx *)POINTER_SHIFT(r, TD_KV_ROW_HEAD_SIZE)
#define kvRowValues(r) POINTER_SHIFT(r, TD_KV_ROW_HEAD_SIZE + sizeof(SColIdx) * kvRowNCols(r))
#define kvRowCpy(dst, r) memcpy((dst), (r), kvRowLen(r))
#define kvRowColVal(r, colIdx) POINTER_SHIFT(kvRowValues(r), (colIdx)->offset)
#define kvRowColIdxAt(r, i) (kvRowColIdx(r) + (i))
H
Hongze Cheng 已提交
286
#define kvRowFree(r) tfree(r)
H
TD-90  
Hongze Cheng 已提交
287
#define kvRowEnd(r) POINTER_SHIFT(r, kvRowLen(r))
H
Hongze Cheng 已提交
288

H
Hongze Cheng 已提交
289
SKVRow tdKVRowDup(SKVRow row);
H
TD-90  
Hongze Cheng 已提交
290
int    tdSetKVRowDataOfCol(SKVRow *orow, int16_t colId, int8_t type, void *value);
H
TD-353  
Hongze Cheng 已提交
291
int    tdEncodeKVRow(void **buf, SKVRow row);
H
Hongze Cheng 已提交
292
void * tdDecodeKVRow(void *buf, SKVRow *row);
H
Hongze Cheng 已提交
293 294 295 296 297 298 299 300 301 302 303

static FORCE_INLINE int comparTagId(const void *key1, const void *key2) {
  if (*(int16_t *)key1 > ((SColIdx *)key2)->colId) {
    return 1;
  } else if (*(int16_t *)key1 < ((SColIdx *)key2)->colId) {
    return -1;
  } else {
    return 0;
  }
}

H
Hongze Cheng 已提交
304
static FORCE_INLINE void *tdGetKVRowValOfCol(SKVRow row, int16_t colId) {
H
Hongze Cheng 已提交
305
  void *ret = taosbsearch(&colId, kvRowColIdx(row), kvRowNCols(row), sizeof(SColIdx), comparTagId, TD_EQ);
H
Hongze Cheng 已提交
306
  if (ret == NULL) return NULL;
H
Hongze Cheng 已提交
307
  return kvRowColVal(row, (SColIdx *)ret);
H
Hongze Cheng 已提交
308 309
}

H
Hongze Cheng 已提交
310 311 312 313 314 315 316 317
// ----------------- K-V data row builder
typedef struct {
  int16_t  tCols;
  int16_t  nCols;
  SColIdx *pColIdx;
  int16_t  alloc;
  int16_t  size;
  void *   buf;
H
Hongze Cheng 已提交
318
} SKVRowBuilder;
H
Hongze Cheng 已提交
319

H
Hongze Cheng 已提交
320 321 322 323
int    tdInitKVRowBuilder(SKVRowBuilder *pBuilder);
void   tdDestroyKVRowBuilder(SKVRowBuilder *pBuilder);
void   tdResetKVRowBuilder(SKVRowBuilder *pBuilder);
SKVRow tdGetKVRowFromBuilder(SKVRowBuilder *pBuilder);
H
Hongze Cheng 已提交
324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352

static FORCE_INLINE int tdAddColToKVRow(SKVRowBuilder *pBuilder, int16_t colId, int8_t type, void *value) {
  ASSERT(pBuilder->nCols == 0 || colId > pBuilder->pColIdx[pBuilder->nCols - 1].colId);

  if (pBuilder->nCols >= pBuilder->tCols) {
    pBuilder->tCols *= 2;
    pBuilder->pColIdx = (SColIdx *)realloc((void *)(pBuilder->pColIdx), sizeof(SColIdx) * pBuilder->tCols);
    if (pBuilder->pColIdx == NULL) return -1;
  }

  pBuilder->pColIdx[pBuilder->nCols].colId = colId;
  pBuilder->pColIdx[pBuilder->nCols].offset = pBuilder->size;

  pBuilder->nCols++;

  int tlen = IS_VAR_DATA_TYPE(type) ? varDataTLen(value) : TYPE_BYTES[type];
  if (tlen > pBuilder->alloc - pBuilder->size) {
    while (tlen > pBuilder->alloc - pBuilder->size) {
      pBuilder->alloc *= 2;
    }
    pBuilder->buf = realloc(pBuilder->buf, pBuilder->alloc);
    if (pBuilder->buf == NULL) return -1;
  }

  memcpy(POINTER_SHIFT(pBuilder->buf, pBuilder->size), value, tlen);
  pBuilder->size += tlen;

  return 0;
}
H
Hongze Cheng 已提交
353

H
more  
hzcheng 已提交
354 355 356 357
#ifdef __cplusplus
}
#endif

H
hzcheng 已提交
358
#endif  // _TD_DATA_FORMAT_H_