taosudf.h 10.2 KB
Newer Older
1
/*
H
Hongze Cheng 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
15 16 17 18 19

#ifndef TDENGINE_TAOSUDF_H
#define TDENGINE_TAOSUDF_H

#include <stdbool.h>
H
Hongze Cheng 已提交
20
#include <stdint.h>
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
#include <stdlib.h>
#include <string.h>

#include <taos.h>
#include <taoserror.h>

#ifdef __cplusplus
extern "C" {
#endif

#if defined(__GNUC__)
#define FORCE_INLINE inline __attribute__((always_inline))
#else
#define FORCE_INLINE
#endif
typedef struct SUdfColumnMeta {
  int16_t type;
  int32_t bytes;
  uint8_t precision;
  uint8_t scale;
} SUdfColumnMeta;

typedef struct SUdfColumnData {
  int32_t numOfRows;
  int32_t rowsAlloc;
  union {
    struct {
      int32_t nullBitmapLen;
      char   *nullBitmap;
      int32_t dataLen;
      char   *data;
    } fixLenCol;

    struct {
H
Hongze Cheng 已提交
55 56 57 58 59
      int32_t  varOffsetsLen;
      int32_t *varOffsets;
      int32_t  payloadLen;
      char    *payload;
      int32_t  payloadAllocLen;
60 61 62 63 64 65 66 67 68 69 70
    } varLenCol;
  };
} SUdfColumnData;

typedef struct SUdfColumn {
  SUdfColumnMeta colMeta;
  bool           hasNull;
  SUdfColumnData colData;
} SUdfColumn;

typedef struct SUdfDataBlock {
H
Hongze Cheng 已提交
71 72
  int32_t      numOfRows;
  int32_t      numOfCols;
73 74 75 76 77
  SUdfColumn **udfCols;
} SUdfDataBlock;

typedef struct SUdfInterBuf {
  int32_t bufLen;
H
Hongze Cheng 已提交
78 79
  char   *buf;
  int8_t  numOfResult;  // zero or one
80 81 82 83 84 85 86 87
} SUdfInterBuf;
typedef void *UdfcFuncHandle;

// dynamic lib init and destroy
typedef int32_t (*TUdfInitFunc)();
typedef int32_t (*TUdfDestroyFunc)();

#define UDF_MEMORY_EXP_GROWTH 1.5
H
Hongze Cheng 已提交
88 89 90 91
#define NBIT                  (3u)
#define BitPos(_n)            ((_n) & ((1 << NBIT) - 1))
#define BMCharPos(bm_, r_)    ((bm_)[(r_) >> NBIT])
#define BitmapLen(_n)         (((_n) + ((1 << NBIT) - 1)) >> NBIT)
92 93

#define udfColDataIsNull_var(pColumn, row) ((pColumn->colData.varLenCol.varOffsets)[row] == -1)
H
Hongze Cheng 已提交
94 95
#define udfColDataIsNull_f(pColumn, row) \
  ((BMCharPos(pColumn->colData.fixLenCol.nullBitmap, row) & (1u << (7u - BitPos(row)))) == (1u << (7u - BitPos(row))))
96 97 98 99 100
#define udfColDataSetNull_f(pColumn, row)                                                \
  do {                                                                                   \
    BMCharPos(pColumn->colData.fixLenCol.nullBitmap, row) |= (1u << (7u - BitPos(row))); \
  } while (0)

H
Hongze Cheng 已提交
101 102 103
#define udfColDataSetNotNull_f(pColumn, r_)                                             \
  do {                                                                                  \
    BMCharPos(pColumn->colData.fixLenCol.nullBitmap, r_) &= ~(1u << (7u - BitPos(r_))); \
104
  } while (0)
H
Hongze Cheng 已提交
105
#define udfColDataSetNull_var(pColumn, row) ((pColumn->colData.varLenCol.varOffsets)[row] = -1)
106

D
dapan1121 已提交
107
typedef uint16_t VarDataLenT;  // maxVarDataLen: 65535
H
Hongze Cheng 已提交
108 109 110
#define VARSTR_HEADER_SIZE     sizeof(VarDataLenT)
#define varDataLen(v)          ((VarDataLenT *)(v))[0]
#define varDataVal(v)          ((char *)(v) + VARSTR_HEADER_SIZE)
111 112 113 114 115 116 117 118
#define varDataTLen(v)         (sizeof(VarDataLenT) + varDataLen(v))
#define varDataCopy(dst, v)    memcpy((dst), (void *)(v), varDataTLen(v))
#define varDataLenByData(v)    (*(VarDataLenT *)(((char *)(v)) - VARSTR_HEADER_SIZE))
#define varDataSetLen(v, _len) (((VarDataLenT *)(v))[0] = (VarDataLenT)(_len))
#define IS_VAR_DATA_TYPE(t) \
  (((t) == TSDB_DATA_TYPE_VARCHAR) || ((t) == TSDB_DATA_TYPE_NCHAR) || ((t) == TSDB_DATA_TYPE_JSON))
#define IS_STR_DATA_TYPE(t) (((t) == TSDB_DATA_TYPE_VARCHAR) || ((t) == TSDB_DATA_TYPE_NCHAR))

H
Hongze Cheng 已提交
119
static FORCE_INLINE char *udfColDataGetData(const SUdfColumn *pColumn, int32_t row) {
120 121 122 123 124 125 126
  if (IS_VAR_DATA_TYPE(pColumn->colMeta.type)) {
    return pColumn->colData.varLenCol.payload + pColumn->colData.varLenCol.varOffsets[row];
  } else {
    return pColumn->colData.fixLenCol.data + pColumn->colMeta.bytes * row;
  }
}

H
Hongze Cheng 已提交
127
static FORCE_INLINE bool udfColDataIsNull(const SUdfColumn *pColumn, int32_t row) {
128 129 130 131 132
  if (IS_VAR_DATA_TYPE(pColumn->colMeta.type)) {
    if (pColumn->colMeta.type == TSDB_DATA_TYPE_JSON) {
      if (udfColDataIsNull_var(pColumn, row)) {
        return true;
      }
H
Hongze Cheng 已提交
133
      char *data = udfColDataGetData(pColumn, row);
134 135 136 137 138 139 140 141 142
      return (*data == TSDB_DATA_TYPE_NULL);
    } else {
      return udfColDataIsNull_var(pColumn, row);
    }
  } else {
    return udfColDataIsNull_f(pColumn, row);
  }
}

H
Hongze Cheng 已提交
143
static FORCE_INLINE int32_t udfColEnsureCapacity(SUdfColumn *pColumn, int32_t newCapacity) {
144 145 146
  SUdfColumnMeta *meta = &pColumn->colMeta;
  SUdfColumnData *data = &pColumn->colData;

H
Hongze Cheng 已提交
147
  if (newCapacity == 0 || newCapacity <= data->rowsAlloc) {
148 149 150
    return TSDB_CODE_SUCCESS;
  }

H
Hongze Cheng 已提交
151
  int allocCapacity = (data->rowsAlloc < 8) ? 8 : data->rowsAlloc;
152 153 154 155 156
  while (allocCapacity < newCapacity) {
    allocCapacity *= UDF_MEMORY_EXP_GROWTH;
  }

  if (IS_VAR_DATA_TYPE(meta->type)) {
H
Hongze Cheng 已提交
157
    char *tmp = (char *)realloc(data->varLenCol.varOffsets, sizeof(int32_t) * allocCapacity);
158 159 160
    if (tmp == NULL) {
      return TSDB_CODE_OUT_OF_MEMORY;
    }
H
Hongze Cheng 已提交
161
    data->varLenCol.varOffsets = (int32_t *)tmp;
162 163 164
    data->varLenCol.varOffsetsLen = sizeof(int32_t) * allocCapacity;
    // for payload, add data in udfColDataAppend
  } else {
H
Hongze Cheng 已提交
165
    char *tmp = (char *)realloc(data->fixLenCol.nullBitmap, BitmapLen(allocCapacity));
166 167 168 169 170 171 172 173 174
    if (tmp == NULL) {
      return TSDB_CODE_OUT_OF_MEMORY;
    }
    data->fixLenCol.nullBitmap = tmp;
    data->fixLenCol.nullBitmapLen = BitmapLen(allocCapacity);
    if (meta->type == TSDB_DATA_TYPE_NULL) {
      return TSDB_CODE_SUCCESS;
    }

H
Hongze Cheng 已提交
175
    tmp = (char *)realloc(data->fixLenCol.data, allocCapacity * meta->bytes);
176 177 178 179 180
    if (tmp == NULL) {
      return TSDB_CODE_OUT_OF_MEMORY;
    }

    data->fixLenCol.data = tmp;
H
Hongze Cheng 已提交
181
    data->fixLenCol.dataLen = allocCapacity * meta->bytes;
182 183 184 185 186 187 188
  }

  data->rowsAlloc = allocCapacity;

  return TSDB_CODE_SUCCESS;
}

H
Hongze Cheng 已提交
189 190
static FORCE_INLINE void udfColDataSetNull(SUdfColumn *pColumn, int32_t row) {
  udfColEnsureCapacity(pColumn, row + 1);
191 192 193 194 195 196 197 198
  if (IS_VAR_DATA_TYPE(pColumn->colMeta.type)) {
    udfColDataSetNull_var(pColumn, row);
  } else {
    udfColDataSetNull_f(pColumn, row);
  }
  pColumn->hasNull = true;
}

H
Hongze Cheng 已提交
199
static FORCE_INLINE int32_t udfColDataSet(SUdfColumn *pColumn, uint32_t currentRow, const char *pData, bool isNull) {
200 201
  SUdfColumnMeta *meta = &pColumn->colMeta;
  SUdfColumnData *data = &pColumn->colData;
H
Hongze Cheng 已提交
202
  udfColEnsureCapacity(pColumn, currentRow + 1);
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
  bool isVarCol = IS_VAR_DATA_TYPE(meta->type);
  if (isNull) {
    udfColDataSetNull(pColumn, currentRow);
  } else {
    if (!isVarCol) {
      udfColDataSetNotNull_f(pColumn, currentRow);
      memcpy(data->fixLenCol.data + meta->bytes * currentRow, pData, meta->bytes);
    } else {
      int32_t dataLen = varDataTLen(pData);
      if (meta->type == TSDB_DATA_TYPE_JSON) {
        if (*pData == TSDB_DATA_TYPE_NULL) {
          dataLen = 0;
        } else if (*pData == TSDB_DATA_TYPE_NCHAR) {
          dataLen = varDataTLen(pData + sizeof(char));
        } else if (*pData == TSDB_DATA_TYPE_BIGINT || *pData == TSDB_DATA_TYPE_DOUBLE) {
          dataLen = sizeof(int64_t);
        } else if (*pData == TSDB_DATA_TYPE_BOOL) {
          dataLen = sizeof(char);
        }
        dataLen += sizeof(char);
      }

      if (data->varLenCol.payloadAllocLen < data->varLenCol.payloadLen + dataLen) {
        uint32_t newSize = data->varLenCol.payloadAllocLen;
        if (newSize <= 1) {
          newSize = 8;
        }

231
        while (newSize < (uint32_t)(data->varLenCol.payloadLen + dataLen)) {
232 233 234
          newSize = newSize * UDF_MEMORY_EXP_GROWTH;
        }

H
Hongze Cheng 已提交
235
        char *buf = (char *)realloc(data->varLenCol.payload, newSize);
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
        if (buf == NULL) {
          return TSDB_CODE_OUT_OF_MEMORY;
        }

        data->varLenCol.payload = buf;
        data->varLenCol.payloadAllocLen = newSize;
      }

      uint32_t len = data->varLenCol.payloadLen;
      data->varLenCol.varOffsets[currentRow] = len;

      memcpy(data->varLenCol.payload + len, pData, dataLen);
      data->varLenCol.payloadLen += dataLen;
    }
  }
251
  data->numOfRows = ((int32_t)(currentRow + 1) > data->numOfRows) ? (int32_t)(currentRow + 1) : data->numOfRows;
252 253 254
  return 0;
}

H
Hongze Cheng 已提交
255
typedef int32_t (*TUdfScalarProcFunc)(SUdfDataBlock *block, SUdfColumn *resultCol);
256 257

typedef int32_t (*TUdfAggStartFunc)(SUdfInterBuf *buf);
S
slzhou 已提交
258 259 260
typedef int32_t (*TUdfAggProcessFunc)(SUdfDataBlock *block, SUdfInterBuf *interBuf, SUdfInterBuf *newInterBuf);
typedef int32_t (*TUdfAggMergeFunc)(SUdfInterBuf *inputBuf1, SUdfInterBuf *inputBuf2, SUdfInterBuf *outputBuf);
typedef int32_t (*TUdfAggFinishFunc)(SUdfInterBuf *buf, SUdfInterBuf *resultData);
261

262
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
263 264 265 266
typedef struct SScriptUdfEnvItem{
  char *name;
  char *value;
} SScriptUdfEnvItem;
267

S
slzhou 已提交
268 269 270 271 272 273
typedef enum EUdfFuncType {
  UDF_FUNC_TYPE_SCALAR = 1, 
  UDF_FUNC_TYPE_AGG = 2
} EUdfFuncType;

typedef struct SScriptUdfInfo {
S
slzhou 已提交
274
  char *name;
275

S
slzhou 已提交
276
  EUdfFuncType  funcType;
277 278 279 280 281
  int8_t  scriptType;
  int8_t  outputType;
  int32_t outputLen;
  int32_t bufSize;

S
slzhou 已提交
282
  char *path;
S
slzhou 已提交
283
} SScriptUdfInfo;
284 285 286 287 288 289 290 291 292

typedef int32_t (*TScriptUdfScalarProcFunc)(SUdfDataBlock *block, SUdfColumn *resultCol, void *udfCtx);

typedef int32_t (*TScriptUdfAggStartFunc)(SUdfInterBuf *buf, void *udfCtx);
typedef int32_t (*TScriptUdfAggProcessFunc)(SUdfDataBlock *block, SUdfInterBuf *interBuf, SUdfInterBuf *newInterBuf,
                                            void *udfCtx);
typedef int32_t (*TScriptUdfAggMergeFunc)(SUdfInterBuf *inputBuf1, SUdfInterBuf *inputBuf2, SUdfInterBuf *outputBuf,
                                          void *udfCtx);
typedef int32_t (*TScriptUdfAggFinishFunc)(SUdfInterBuf *buf, SUdfInterBuf *resultData, void *udfCtx);
S
slzhou 已提交
293
typedef int32_t (*TScriptUdfInitFunc)(SScriptUdfInfo *info, void **pUdfCtx);
294 295 296
typedef int32_t (*TScriptUdfDestoryFunc)(void *udfCtx);

// the following function is for open/close script plugin.
297 298
typedef int32_t (*TScriptOpenFunc)(SScriptUdfEnvItem* items, int numItems);
typedef int32_t (*TScriptCloseFunc)();
299

300 301 302 303 304
#ifdef __cplusplus
}
#endif

#endif  // TDENGINE_TAOSUDF_H