taosudf.h 10.3 KB
Newer Older
1
/*
H
Hongze Cheng 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
15 16 17 18 19

#ifndef TDENGINE_TAOSUDF_H
#define TDENGINE_TAOSUDF_H

#include <stdbool.h>
H
Hongze Cheng 已提交
20
#include <stdint.h>
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
#include <stdlib.h>
#include <string.h>

#include <taos.h>
#include <taoserror.h>

#ifdef __cplusplus
extern "C" {
#endif

#if defined(__GNUC__)
#define FORCE_INLINE inline __attribute__((always_inline))
#else
#define FORCE_INLINE
#endif
typedef struct SUdfColumnMeta {
  int16_t type;
  int32_t bytes;
  uint8_t precision;
  uint8_t scale;
} SUdfColumnMeta;

typedef struct SUdfColumnData {
  int32_t numOfRows;
  int32_t rowsAlloc;
  union {
    struct {
      int32_t nullBitmapLen;
      char   *nullBitmap;
      int32_t dataLen;
      char   *data;
    } fixLenCol;

    struct {
H
Hongze Cheng 已提交
55 56 57 58 59
      int32_t  varOffsetsLen;
      int32_t *varOffsets;
      int32_t  payloadLen;
      char    *payload;
      int32_t  payloadAllocLen;
60 61 62 63 64 65 66 67 68 69 70
    } varLenCol;
  };
} SUdfColumnData;

typedef struct SUdfColumn {
  SUdfColumnMeta colMeta;
  bool           hasNull;
  SUdfColumnData colData;
} SUdfColumn;

typedef struct SUdfDataBlock {
H
Hongze Cheng 已提交
71 72
  int32_t      numOfRows;
  int32_t      numOfCols;
73 74 75
  SUdfColumn **udfCols;
} SUdfDataBlock;

76
// TODO: deprecate SUdfInterBuf.numOfResult
77 78
typedef struct SUdfInterBuf {
  int32_t bufLen;
H
Hongze Cheng 已提交
79 80
  char   *buf;
  int8_t  numOfResult;  // zero or one
81 82 83 84 85 86 87 88
} SUdfInterBuf;
typedef void *UdfcFuncHandle;

// dynamic lib init and destroy
typedef int32_t (*TUdfInitFunc)();
typedef int32_t (*TUdfDestroyFunc)();

#define UDF_MEMORY_EXP_GROWTH 1.5
H
Hongze Cheng 已提交
89 90 91 92
#define NBIT                  (3u)
#define BitPos(_n)            ((_n) & ((1 << NBIT) - 1))
#define BMCharPos(bm_, r_)    ((bm_)[(r_) >> NBIT])
#define BitmapLen(_n)         (((_n) + ((1 << NBIT) - 1)) >> NBIT)
93 94

#define udfColDataIsNull_var(pColumn, row) ((pColumn->colData.varLenCol.varOffsets)[row] == -1)
H
Hongze Cheng 已提交
95 96
#define udfColDataIsNull_f(pColumn, row) \
  ((BMCharPos(pColumn->colData.fixLenCol.nullBitmap, row) & (1u << (7u - BitPos(row)))) == (1u << (7u - BitPos(row))))
97 98 99 100 101
#define udfColDataSetNull_f(pColumn, row)                                                \
  do {                                                                                   \
    BMCharPos(pColumn->colData.fixLenCol.nullBitmap, row) |= (1u << (7u - BitPos(row))); \
  } while (0)

H
Hongze Cheng 已提交
102 103 104
#define udfColDataSetNotNull_f(pColumn, r_)                                             \
  do {                                                                                  \
    BMCharPos(pColumn->colData.fixLenCol.nullBitmap, r_) &= ~(1u << (7u - BitPos(r_))); \
105
  } while (0)
H
Hongze Cheng 已提交
106
#define udfColDataSetNull_var(pColumn, row) ((pColumn->colData.varLenCol.varOffsets)[row] = -1)
107

D
dapan1121 已提交
108
typedef uint16_t VarDataLenT;  // maxVarDataLen: 65535
H
Hongze Cheng 已提交
109 110 111
#define VARSTR_HEADER_SIZE     sizeof(VarDataLenT)
#define varDataLen(v)          ((VarDataLenT *)(v))[0]
#define varDataVal(v)          ((char *)(v) + VARSTR_HEADER_SIZE)
112 113 114 115 116 117 118 119
#define varDataTLen(v)         (sizeof(VarDataLenT) + varDataLen(v))
#define varDataCopy(dst, v)    memcpy((dst), (void *)(v), varDataTLen(v))
#define varDataLenByData(v)    (*(VarDataLenT *)(((char *)(v)) - VARSTR_HEADER_SIZE))
#define varDataSetLen(v, _len) (((VarDataLenT *)(v))[0] = (VarDataLenT)(_len))
#define IS_VAR_DATA_TYPE(t) \
  (((t) == TSDB_DATA_TYPE_VARCHAR) || ((t) == TSDB_DATA_TYPE_NCHAR) || ((t) == TSDB_DATA_TYPE_JSON))
#define IS_STR_DATA_TYPE(t) (((t) == TSDB_DATA_TYPE_VARCHAR) || ((t) == TSDB_DATA_TYPE_NCHAR))

H
Hongze Cheng 已提交
120
static FORCE_INLINE char *udfColDataGetData(const SUdfColumn *pColumn, int32_t row) {
121 122 123 124 125 126 127
  if (IS_VAR_DATA_TYPE(pColumn->colMeta.type)) {
    return pColumn->colData.varLenCol.payload + pColumn->colData.varLenCol.varOffsets[row];
  } else {
    return pColumn->colData.fixLenCol.data + pColumn->colMeta.bytes * row;
  }
}

H
Hongze Cheng 已提交
128
static FORCE_INLINE bool udfColDataIsNull(const SUdfColumn *pColumn, int32_t row) {
129 130 131 132 133
  if (IS_VAR_DATA_TYPE(pColumn->colMeta.type)) {
    if (pColumn->colMeta.type == TSDB_DATA_TYPE_JSON) {
      if (udfColDataIsNull_var(pColumn, row)) {
        return true;
      }
H
Hongze Cheng 已提交
134
      char *data = udfColDataGetData(pColumn, row);
135 136 137 138 139 140 141 142 143
      return (*data == TSDB_DATA_TYPE_NULL);
    } else {
      return udfColDataIsNull_var(pColumn, row);
    }
  } else {
    return udfColDataIsNull_f(pColumn, row);
  }
}

H
Hongze Cheng 已提交
144
static FORCE_INLINE int32_t udfColEnsureCapacity(SUdfColumn *pColumn, int32_t newCapacity) {
145 146 147
  SUdfColumnMeta *meta = &pColumn->colMeta;
  SUdfColumnData *data = &pColumn->colData;

H
Hongze Cheng 已提交
148
  if (newCapacity == 0 || newCapacity <= data->rowsAlloc) {
149 150 151
    return TSDB_CODE_SUCCESS;
  }

H
Hongze Cheng 已提交
152
  int allocCapacity = (data->rowsAlloc < 8) ? 8 : data->rowsAlloc;
153 154 155 156 157
  while (allocCapacity < newCapacity) {
    allocCapacity *= UDF_MEMORY_EXP_GROWTH;
  }

  if (IS_VAR_DATA_TYPE(meta->type)) {
H
Hongze Cheng 已提交
158
    char *tmp = (char *)realloc(data->varLenCol.varOffsets, sizeof(int32_t) * allocCapacity);
159 160 161
    if (tmp == NULL) {
      return TSDB_CODE_OUT_OF_MEMORY;
    }
H
Hongze Cheng 已提交
162
    data->varLenCol.varOffsets = (int32_t *)tmp;
163 164 165
    data->varLenCol.varOffsetsLen = sizeof(int32_t) * allocCapacity;
    // for payload, add data in udfColDataAppend
  } else {
H
Hongze Cheng 已提交
166
    char *tmp = (char *)realloc(data->fixLenCol.nullBitmap, BitmapLen(allocCapacity));
167 168 169 170 171 172 173 174 175
    if (tmp == NULL) {
      return TSDB_CODE_OUT_OF_MEMORY;
    }
    data->fixLenCol.nullBitmap = tmp;
    data->fixLenCol.nullBitmapLen = BitmapLen(allocCapacity);
    if (meta->type == TSDB_DATA_TYPE_NULL) {
      return TSDB_CODE_SUCCESS;
    }

H
Hongze Cheng 已提交
176
    tmp = (char *)realloc(data->fixLenCol.data, allocCapacity * meta->bytes);
177 178 179 180 181
    if (tmp == NULL) {
      return TSDB_CODE_OUT_OF_MEMORY;
    }

    data->fixLenCol.data = tmp;
H
Hongze Cheng 已提交
182
    data->fixLenCol.dataLen = allocCapacity * meta->bytes;
183 184 185 186 187 188 189
  }

  data->rowsAlloc = allocCapacity;

  return TSDB_CODE_SUCCESS;
}

H
Hongze Cheng 已提交
190 191
static FORCE_INLINE void udfColDataSetNull(SUdfColumn *pColumn, int32_t row) {
  udfColEnsureCapacity(pColumn, row + 1);
192 193 194 195 196 197 198 199
  if (IS_VAR_DATA_TYPE(pColumn->colMeta.type)) {
    udfColDataSetNull_var(pColumn, row);
  } else {
    udfColDataSetNull_f(pColumn, row);
  }
  pColumn->hasNull = true;
}

H
Hongze Cheng 已提交
200
static FORCE_INLINE int32_t udfColDataSet(SUdfColumn *pColumn, uint32_t currentRow, const char *pData, bool isNull) {
201 202
  SUdfColumnMeta *meta = &pColumn->colMeta;
  SUdfColumnData *data = &pColumn->colData;
H
Hongze Cheng 已提交
203
  udfColEnsureCapacity(pColumn, currentRow + 1);
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
  bool isVarCol = IS_VAR_DATA_TYPE(meta->type);
  if (isNull) {
    udfColDataSetNull(pColumn, currentRow);
  } else {
    if (!isVarCol) {
      udfColDataSetNotNull_f(pColumn, currentRow);
      memcpy(data->fixLenCol.data + meta->bytes * currentRow, pData, meta->bytes);
    } else {
      int32_t dataLen = varDataTLen(pData);
      if (meta->type == TSDB_DATA_TYPE_JSON) {
        if (*pData == TSDB_DATA_TYPE_NULL) {
          dataLen = 0;
        } else if (*pData == TSDB_DATA_TYPE_NCHAR) {
          dataLen = varDataTLen(pData + sizeof(char));
        } else if (*pData == TSDB_DATA_TYPE_BIGINT || *pData == TSDB_DATA_TYPE_DOUBLE) {
          dataLen = sizeof(int64_t);
        } else if (*pData == TSDB_DATA_TYPE_BOOL) {
          dataLen = sizeof(char);
        }
        dataLen += sizeof(char);
      }

      if (data->varLenCol.payloadAllocLen < data->varLenCol.payloadLen + dataLen) {
        uint32_t newSize = data->varLenCol.payloadAllocLen;
        if (newSize <= 1) {
          newSize = 8;
        }

232
        while (newSize < (uint32_t)(data->varLenCol.payloadLen + dataLen)) {
233 234 235
          newSize = newSize * UDF_MEMORY_EXP_GROWTH;
        }

H
Hongze Cheng 已提交
236
        char *buf = (char *)realloc(data->varLenCol.payload, newSize);
237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
        if (buf == NULL) {
          return TSDB_CODE_OUT_OF_MEMORY;
        }

        data->varLenCol.payload = buf;
        data->varLenCol.payloadAllocLen = newSize;
      }

      uint32_t len = data->varLenCol.payloadLen;
      data->varLenCol.varOffsets[currentRow] = len;

      memcpy(data->varLenCol.payload + len, pData, dataLen);
      data->varLenCol.payloadLen += dataLen;
    }
  }
252
  data->numOfRows = ((int32_t)(currentRow + 1) > data->numOfRows) ? (int32_t)(currentRow + 1) : data->numOfRows;
253 254 255
  return 0;
}

H
Hongze Cheng 已提交
256
typedef int32_t (*TUdfScalarProcFunc)(SUdfDataBlock *block, SUdfColumn *resultCol);
257 258

typedef int32_t (*TUdfAggStartFunc)(SUdfInterBuf *buf);
S
slzhou 已提交
259 260 261
typedef int32_t (*TUdfAggProcessFunc)(SUdfDataBlock *block, SUdfInterBuf *interBuf, SUdfInterBuf *newInterBuf);
typedef int32_t (*TUdfAggMergeFunc)(SUdfInterBuf *inputBuf1, SUdfInterBuf *inputBuf2, SUdfInterBuf *outputBuf);
typedef int32_t (*TUdfAggFinishFunc)(SUdfInterBuf *buf, SUdfInterBuf *resultData);
262

263
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
264 265 266 267
typedef struct SScriptUdfEnvItem{
  char *name;
  char *value;
} SScriptUdfEnvItem;
268

S
slzhou 已提交
269 270 271 272 273 274
typedef enum EUdfFuncType {
  UDF_FUNC_TYPE_SCALAR = 1, 
  UDF_FUNC_TYPE_AGG = 2
} EUdfFuncType;

typedef struct SScriptUdfInfo {
S
slzhou 已提交
275
  char *name;
276

S
slzhou 已提交
277
  EUdfFuncType  funcType;
278 279 280 281 282
  int8_t  scriptType;
  int8_t  outputType;
  int32_t outputLen;
  int32_t bufSize;

S
slzhou 已提交
283
  char *path;
S
slzhou 已提交
284
} SScriptUdfInfo;
285 286 287 288 289 290 291 292 293

typedef int32_t (*TScriptUdfScalarProcFunc)(SUdfDataBlock *block, SUdfColumn *resultCol, void *udfCtx);

typedef int32_t (*TScriptUdfAggStartFunc)(SUdfInterBuf *buf, void *udfCtx);
typedef int32_t (*TScriptUdfAggProcessFunc)(SUdfDataBlock *block, SUdfInterBuf *interBuf, SUdfInterBuf *newInterBuf,
                                            void *udfCtx);
typedef int32_t (*TScriptUdfAggMergeFunc)(SUdfInterBuf *inputBuf1, SUdfInterBuf *inputBuf2, SUdfInterBuf *outputBuf,
                                          void *udfCtx);
typedef int32_t (*TScriptUdfAggFinishFunc)(SUdfInterBuf *buf, SUdfInterBuf *resultData, void *udfCtx);
S
slzhou 已提交
294
typedef int32_t (*TScriptUdfInitFunc)(SScriptUdfInfo *info, void **pUdfCtx);
295 296 297
typedef int32_t (*TScriptUdfDestoryFunc)(void *udfCtx);

// the following function is for open/close script plugin.
298 299
typedef int32_t (*TScriptOpenFunc)(SScriptUdfEnvItem* items, int numItems);
typedef int32_t (*TScriptCloseFunc)();
300

301 302 303 304 305
#ifdef __cplusplus
}
#endif

#endif  // TDENGINE_TAOSUDF_H