未验证 提交 b27e39be 编写于 作者: D dapan1121 提交者: GitHub

Merge pull request #20265 from taosdata/szhou/python-udf

feature: python udf
......@@ -80,10 +80,6 @@ typedef struct SUdfInterBuf {
} SUdfInterBuf;
typedef void *UdfcFuncHandle;
// dynamic lib init and destroy
typedef int32_t (*TUdfInitFunc)();
typedef int32_t (*TUdfDestroyFunc)();
#define UDF_MEMORY_EXP_GROWTH 1.5
#define NBIT (3u)
#define BitPos(_n) ((_n) & ((1 << NBIT) - 1))
......@@ -153,6 +149,8 @@ static FORCE_INLINE int32_t udfColEnsureCapacity(SUdfColumn *pColumn, int32_t ne
allocCapacity *= UDF_MEMORY_EXP_GROWTH;
}
int32_t existedRows = data->numOfRows;
if (IS_VAR_DATA_TYPE(meta->type)) {
char *tmp = (char *)realloc(data->varLenCol.varOffsets, sizeof(int32_t) * allocCapacity);
if (tmp == NULL) {
......@@ -160,6 +158,7 @@ static FORCE_INLINE int32_t udfColEnsureCapacity(SUdfColumn *pColumn, int32_t ne
}
data->varLenCol.varOffsets = (int32_t *)tmp;
data->varLenCol.varOffsetsLen = sizeof(int32_t) * allocCapacity;
memset(&data->varLenCol.varOffsets[existedRows], 0, sizeof(int32_t) * (allocCapacity - existedRows));
// for payload, add data in udfColDataAppend
} else {
char *tmp = (char *)realloc(data->fixLenCol.nullBitmap, BitmapLen(allocCapacity));
......@@ -168,6 +167,9 @@ static FORCE_INLINE int32_t udfColEnsureCapacity(SUdfColumn *pColumn, int32_t ne
}
data->fixLenCol.nullBitmap = tmp;
data->fixLenCol.nullBitmapLen = BitmapLen(allocCapacity);
int32_t oldLen = BitmapLen(existedRows);
memset(&data->fixLenCol.nullBitmap[oldLen], 0, BitmapLen(allocCapacity) - oldLen);
if (meta->type == TSDB_DATA_TYPE_NULL) {
return TSDB_CODE_SUCCESS;
}
......@@ -194,6 +196,7 @@ static FORCE_INLINE void udfColDataSetNull(SUdfColumn *pColumn, int32_t row) {
udfColDataSetNull_f(pColumn, row);
}
pColumn->hasNull = true;
pColumn->colData.numOfRows = ((int32_t)(row + 1) > pColumn->colData.numOfRows) ? (int32_t)(row + 1) : pColumn->colData.numOfRows;
}
static FORCE_INLINE int32_t udfColDataSet(SUdfColumn *pColumn, uint32_t currentRow, const char *pData, bool isNull) {
......@@ -252,6 +255,10 @@ static FORCE_INLINE int32_t udfColDataSet(SUdfColumn *pColumn, uint32_t currentR
return 0;
}
// dynamic lib init and destroy for C UDF
typedef int32_t (*TUdfInitFunc)();
typedef int32_t (*TUdfDestroyFunc)();
typedef int32_t (*TUdfScalarProcFunc)(SUdfDataBlock *block, SUdfColumn *resultCol);
typedef int32_t (*TUdfAggStartFunc)(SUdfInterBuf *buf);
......@@ -259,6 +266,41 @@ typedef int32_t (*TUdfAggProcessFunc)(SUdfDataBlock *block, SUdfInterBuf *interB
typedef int32_t (*TUdfAggMergeFunc)(SUdfInterBuf *inputBuf1, SUdfInterBuf *inputBuf2, SUdfInterBuf *outputBuf);
typedef int32_t (*TUdfAggFinishFunc)(SUdfInterBuf *buf, SUdfInterBuf *resultData);
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
typedef struct SScriptUdfEnvItem {
const char *name;
const char *value;
} SScriptUdfEnvItem;
typedef enum EUdfFuncType { UDF_FUNC_TYPE_SCALAR = 1, UDF_FUNC_TYPE_AGG = 2 } EUdfFuncType;
typedef struct SScriptUdfInfo {
const char *name;
EUdfFuncType funcType;
int8_t scriptType;
int8_t outputType;
int32_t outputLen;
int32_t bufSize;
const char *path;
} SScriptUdfInfo;
typedef int32_t (*TScriptUdfScalarProcFunc)(SUdfDataBlock *block, SUdfColumn *resultCol, void *udfCtx);
typedef int32_t (*TScriptUdfAggStartFunc)(SUdfInterBuf *buf, void *udfCtx);
typedef int32_t (*TScriptUdfAggProcessFunc)(SUdfDataBlock *block, SUdfInterBuf *interBuf, SUdfInterBuf *newInterBuf,
void *udfCtx);
typedef int32_t (*TScriptUdfAggMergeFunc)(SUdfInterBuf *inputBuf1, SUdfInterBuf *inputBuf2, SUdfInterBuf *outputBuf,
void *udfCtx);
typedef int32_t (*TScriptUdfAggFinishFunc)(SUdfInterBuf *buf, SUdfInterBuf *resultData, void *udfCtx);
typedef int32_t (*TScriptUdfInitFunc)(SScriptUdfInfo *info, void **pUdfCtx);
typedef int32_t (*TScriptUdfDestoryFunc)(void *udfCtx);
// the following function is for open/close script plugin.
typedef int32_t (*TScriptOpenFunc)(SScriptUdfEnvItem *items, int numItems);
typedef int32_t (*TScriptCloseFunc)();
#ifdef __cplusplus
}
#endif
......
......@@ -713,6 +713,8 @@ int32_t* taosGetErrno();
#define TSDB_CODE_UDF_NO_FUNC_HANDLE TAOS_DEF_ERROR_CODE(0, 0x2908)
#define TSDB_CODE_UDF_INVALID_BUFSIZE TAOS_DEF_ERROR_CODE(0, 0x2909)
#define TSDB_CODE_UDF_INVALID_OUTPUT_TYPE TAOS_DEF_ERROR_CODE(0, 0x290A)
#define TSDB_CODE_UDF_SCRIPT_NOT_SUPPORTED TAOS_DEF_ERROR_CODE(0, 0x290B)
#define TSDB_CODE_UDF_FUNC_EXEC_FAILURE TAOS_DEF_ERROR_CODE(0, 0x290C)
// sml
#define TSDB_CODE_SML_INVALID_PROTOCOL_TYPE TAOS_DEF_ERROR_CODE(0, 0x3000)
......
......@@ -202,7 +202,7 @@ typedef enum ELogicConditionType {
#define TSDB_FUNC_NAME_LEN 65
#define TSDB_FUNC_COMMENT_LEN 1024 * 1024
#define TSDB_FUNC_CODE_LEN 10 * 1024 * 1024
#define TSDB_FUNC_BUF_SIZE 512
#define TSDB_FUNC_BUF_SIZE 4096 * 64
#define TSDB_FUNC_TYPE_SCALAR 1
#define TSDB_FUNC_TYPE_AGGREGATE 2
#define TSDB_FUNC_SCRIPT_BIN_LIB 0
......
......@@ -41,7 +41,7 @@ typedef struct SUdfSetupRequest {
typedef struct SUdfSetupResponse {
int64_t udfHandle;
int8_t outputType;
int32_t outputLen;
int32_t bytes;
int32_t bufSize;
} SUdfSetupResponse;
......
......@@ -376,7 +376,7 @@ typedef struct SUdfcUvSession {
uv_pipe_t *udfUvPipe;
int8_t outputType;
int32_t outputLen;
int32_t bytes;
int32_t bufSize;
char udfName[TSDB_FUNC_NAME_LEN + 1];
......@@ -614,7 +614,7 @@ int32_t encodeUdfSetupResponse(void **buf, const SUdfSetupResponse *setupRsp) {
int32_t len = 0;
len += taosEncodeFixedI64(buf, setupRsp->udfHandle);
len += taosEncodeFixedI8(buf, setupRsp->outputType);
len += taosEncodeFixedI32(buf, setupRsp->outputLen);
len += taosEncodeFixedI32(buf, setupRsp->bytes);
len += taosEncodeFixedI32(buf, setupRsp->bufSize);
return len;
}
......@@ -622,7 +622,7 @@ int32_t encodeUdfSetupResponse(void **buf, const SUdfSetupResponse *setupRsp) {
void *decodeUdfSetupResponse(const void *buf, SUdfSetupResponse *setupRsp) {
buf = taosDecodeFixedI64(buf, &setupRsp->udfHandle);
buf = taosDecodeFixedI8(buf, &setupRsp->outputType);
buf = taosDecodeFixedI32(buf, &setupRsp->outputLen);
buf = taosDecodeFixedI32(buf, &setupRsp->bytes);
buf = taosDecodeFixedI32(buf, &setupRsp->bufSize);
return (void *)buf;
}
......@@ -808,6 +808,26 @@ int32_t convertDataBlockToUdfDataBlock(SSDataBlock *block, SUdfDataBlock *udfBlo
}
int32_t convertUdfColumnToDataBlock(SUdfColumn *udfCol, SSDataBlock *block) {
SUdfColumnMeta* meta = &udfCol->colMeta;
SColumnInfoData colInfoData = createColumnInfoData(meta->type, meta->bytes, 1);
blockDataAppendColInfo(block, &colInfoData);
blockDataEnsureCapacity(block, udfCol->colData.numOfRows);
SColumnInfoData *col = bdGetColumnInfoData(block, 0);
for (int i = 0; i < udfCol->colData.numOfRows; ++i) {
if (udfColDataIsNull(udfCol, i)) {
colDataSetNULL(col, i);
} else {
char* data = udfColDataGetData(udfCol, i);
colDataSetVal(col, i, data, false);
}
}
block->info.rows = udfCol->colData.numOfRows;
return 0;
}
int32_t convertUdfColumnToDataBlock2(SUdfColumn *udfCol, SSDataBlock *block) {
block->info.rows = udfCol->colData.numOfRows;
block->info.hasVarCol = IS_VAR_DATA_TYPE(udfCol->colMeta.type);
......@@ -899,9 +919,11 @@ int32_t convertDataBlockToScalarParm(SSDataBlock *input, SScalarParam *output) {
typedef struct SUdfAggRes {
int8_t finalResNum;
int8_t interResNum;
int32_t interResBufLen;
char *finalResBuf;
char *interResBuf;
} SUdfAggRes;
void onUdfcPipeClose(uv_handle_t *handle);
int32_t udfcGetUdfTaskResultFromUvTask(SClientUdfTask *task, SClientUvTaskNode *uvTask);
void udfcAllocateBuffer(uv_handle_t *handle, size_t suggestedSize, uv_buf_t *buf);
......@@ -1054,9 +1076,9 @@ int32_t callUdfScalarFunc(char *udfName, SScalarParam *input, int32_t numOfCols,
fnError("udfc scalar function calculate error. no column data");
code = TSDB_CODE_UDF_INVALID_OUTPUT_TYPE;
} else {
if (session->outputType != output->columnData->info.type || session->outputLen != output->columnData->info.bytes) {
if (session->outputType != output->columnData->info.type || session->bytes != output->columnData->info.bytes) {
fnError("udfc scalar function calculate error. type mismatch. session type: %d(%d), output type: %d(%d)",
session->outputType, session->outputLen, output->columnData->info.type, output->columnData->info.bytes);
session->outputType, session->bytes, output->columnData->info.type, output->columnData->info.bytes);
code = TSDB_CODE_UDF_INVALID_OUTPUT_TYPE;
}
}
......@@ -1084,11 +1106,11 @@ bool udfAggInit(struct SqlFunctionCtx *pCtx, struct SResultRowEntryInfo *pResult
}
SUdfcUvSession *session = (SUdfcUvSession *)handle;
SUdfAggRes *udfRes = (SUdfAggRes *)GET_ROWCELL_INTERBUF(pResultCellInfo);
int32_t envSize = sizeof(SUdfAggRes) + session->outputLen + session->bufSize;
int32_t envSize = sizeof(SUdfAggRes) + session->bytes + session->bufSize;
memset(udfRes, 0, envSize);
udfRes->finalResBuf = (char *)udfRes + sizeof(SUdfAggRes);
udfRes->interResBuf = (char *)udfRes + sizeof(SUdfAggRes) + session->outputLen;
udfRes->interResBuf = (char *)udfRes + sizeof(SUdfAggRes) + session->bytes;
SUdfInterBuf buf = {0};
if ((udfCode = doCallUdfAggInit(handle, &buf)) != 0) {
......@@ -1096,9 +1118,10 @@ bool udfAggInit(struct SqlFunctionCtx *pCtx, struct SResultRowEntryInfo *pResult
releaseUdfFuncHandle(pCtx->udfName);
return false;
}
udfRes->interResNum = buf.numOfResult;
if (buf.bufLen <= session->bufSize) {
memcpy(udfRes->interResBuf, buf.buf, buf.bufLen);
udfRes->interResBufLen = buf.bufLen;
udfRes->interResNum = buf.numOfResult;
} else {
fnError("udfc inter buf size %d is greater than function bufSize %d", buf.bufLen, session->bufSize);
releaseUdfFuncHandle(pCtx->udfName);
......@@ -1120,7 +1143,7 @@ int32_t udfAggProcess(struct SqlFunctionCtx *pCtx) {
SUdfcUvSession *session = handle;
SUdfAggRes *udfRes = (SUdfAggRes *)GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx));
udfRes->finalResBuf = (char *)udfRes + sizeof(SUdfAggRes);
udfRes->interResBuf = (char *)udfRes + sizeof(SUdfAggRes) + session->outputLen;
udfRes->interResBuf = (char *)udfRes + sizeof(SUdfAggRes) + session->bytes;
SInputColumnInfoData *pInput = &pCtx->input;
int32_t numOfCols = pInput->numOfInputCols;
......@@ -1136,7 +1159,7 @@ int32_t udfAggProcess(struct SqlFunctionCtx *pCtx) {
SSDataBlock *inputBlock = blockDataExtractBlock(pTempBlock, start, numOfRows);
SUdfInterBuf state = {.buf = udfRes->interResBuf, .bufLen = session->bufSize, .numOfResult = udfRes->interResNum};
SUdfInterBuf state = {.buf = udfRes->interResBuf, .bufLen = udfRes->interResBufLen, .numOfResult = udfRes->interResNum};
SUdfInterBuf newState = {0};
udfCode = doCallUdfAggProcess(session, inputBlock, &state, &newState);
......@@ -1144,17 +1167,17 @@ int32_t udfAggProcess(struct SqlFunctionCtx *pCtx) {
fnError("udfAggProcess error. code: %d", udfCode);
newState.numOfResult = 0;
} else {
udfRes->interResNum = newState.numOfResult;
if (newState.bufLen <= session->bufSize) {
memcpy(udfRes->interResBuf, newState.buf, newState.bufLen);
udfRes->interResBufLen = newState.bufLen;
udfRes->interResNum = newState.numOfResult;
} else {
fnError("udfc inter buf size %d is greater than function bufSize %d", newState.bufLen, session->bufSize);
udfCode = TSDB_CODE_UDF_INVALID_BUFSIZE;
}
}
if (newState.numOfResult == 1 || state.numOfResult == 1) {
GET_RES_INFO(pCtx)->numOfRes = 1;
}
GET_RES_INFO(pCtx)->numOfRes = udfRes->interResNum;
blockDataDestroy(inputBlock);
......@@ -1177,22 +1200,22 @@ int32_t udfAggFinalize(struct SqlFunctionCtx *pCtx, SSDataBlock *pBlock) {
SUdfcUvSession *session = handle;
SUdfAggRes *udfRes = (SUdfAggRes *)GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx));
udfRes->finalResBuf = (char *)udfRes + sizeof(SUdfAggRes);
udfRes->interResBuf = (char *)udfRes + sizeof(SUdfAggRes) + session->outputLen;
udfRes->interResBuf = (char *)udfRes + sizeof(SUdfAggRes) + session->bytes;
SUdfInterBuf resultBuf = {0};
SUdfInterBuf state = {.buf = udfRes->interResBuf, .bufLen = session->bufSize, .numOfResult = udfRes->interResNum};
SUdfInterBuf state = {.buf = udfRes->interResBuf, .bufLen = udfRes->interResBufLen, .numOfResult = udfRes->interResNum};
int32_t udfCallCode = 0;
udfCallCode = doCallUdfAggFinalize(session, &state, &resultBuf);
if (udfCallCode != 0) {
fnError("udfAggFinalize error. doCallUdfAggFinalize step. udf code:%d", udfCallCode);
GET_RES_INFO(pCtx)->numOfRes = 0;
} else {
if (resultBuf.bufLen <= session->outputLen) {
memcpy(udfRes->finalResBuf, resultBuf.buf, session->outputLen);
if (resultBuf.bufLen <= session->bytes) {
memcpy(udfRes->finalResBuf, resultBuf.buf, resultBuf.bufLen);
udfRes->finalResNum = resultBuf.numOfResult;
GET_RES_INFO(pCtx)->numOfRes = udfRes->finalResNum;
} else {
fnError("udfc inter buf size %d is greater than function output size %d", resultBuf.bufLen, session->outputLen);
fnError("udfc inter buf size %d is greater than function output size %d", resultBuf.bufLen, session->bytes);
GET_RES_INFO(pCtx)->numOfRes = 0;
udfCallCode = TSDB_CODE_UDF_INVALID_OUTPUT_TYPE;
}
......@@ -1696,13 +1719,13 @@ int32_t doSetupUdf(char udfName[], UdfcFuncHandle *funcHandle) {
SUdfSetupResponse *rsp = &task->_setup.rsp;
task->session->severHandle = rsp->udfHandle;
task->session->outputType = rsp->outputType;
task->session->outputLen = rsp->outputLen;
task->session->bytes = rsp->bytes;
task->session->bufSize = rsp->bufSize;
strncpy(task->session->udfName, udfName, TSDB_FUNC_NAME_LEN);
if (task->errCode != 0) {
fnError("failed to setup udf. udfname: %s, err: %d", udfName, task->errCode)
} else {
fnInfo("sucessfully setup udf func handle. udfName: %s, handle: %p", udfName, task->session);
fnInfo("successfully setup udf func handle. udfName: %s, handle: %p", udfName, task->session);
*funcHandle = task->session;
}
int32_t err = task->errCode;
......
此差异已折叠。
......@@ -14,15 +14,8 @@ DLL_EXPORT int32_t udf1_init() { return 0; }
DLL_EXPORT int32_t udf1_destroy() { return 0; }
DLL_EXPORT int32_t udf1(SUdfDataBlock *block, SUdfColumn *resultCol) {
SUdfColumnMeta *meta = &resultCol->colMeta;
meta->bytes = 4;
meta->type = TSDB_DATA_TYPE_INT;
meta->scale = 0;
meta->precision = 0;
SUdfColumnData *resultData = &resultCol->colData;
resultData->numOfRows = block->numOfRows;
for (int32_t i = 0; i < resultData->numOfRows; ++i) {
for (int32_t i = 0; i < block->numOfRows; ++i) {
int j = 0;
for (; j < block->numOfCols; ++j) {
if (udfColDataIsNull(block->udfCols[j], i)) {
......@@ -42,5 +35,6 @@ DLL_EXPORT int32_t udf1(SUdfDataBlock *block, SUdfColumn *resultCol) {
#ifdef WINDOWS
Sleep(1);
#endif
resultData->numOfRows = block->numOfRows;
return 0;
}
......@@ -12,12 +12,15 @@ DLL_EXPORT int32_t udf2_destroy() { return 0; }
DLL_EXPORT int32_t udf2_start(SUdfInterBuf* buf) {
*(int64_t*)(buf->buf) = 0;
buf->bufLen = sizeof(double);
buf->numOfResult = 0;
buf->numOfResult = 1;
return 0;
}
DLL_EXPORT int32_t udf2(SUdfDataBlock* block, SUdfInterBuf* interBuf, SUdfInterBuf* newInterBuf) {
double sumSquares = *(double*)interBuf->buf;
double sumSquares = 0;
if (interBuf->numOfResult == 1) {
sumSquares = *(double*)interBuf->buf;
}
int8_t numNotNull = 0;
for (int32_t i = 0; i < block->numOfCols; ++i) {
SUdfColumn* col = block->udfCols[i];
......
......@@ -6399,6 +6399,7 @@ static int32_t translateCreateFunction(STranslateContext* pCxt, SCreateFunctionS
req.funcType = pStmt->isAgg ? TSDB_FUNC_TYPE_AGGREGATE : TSDB_FUNC_TYPE_SCALAR;
req.scriptType = pStmt->language;
req.outputType = pStmt->outputDt.type;
pStmt->outputDt.bytes = calcTypeBytes(pStmt->outputDt);
req.outputLen = pStmt->outputDt.bytes;
req.bufSize = pStmt->bufSize;
int32_t code = readFromFile(pStmt->libraryPath, &req.codeLen, &req.pCode);
......
......@@ -584,6 +584,8 @@ TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_INPUT, "udf invalid functio
TAOS_DEFINE_ERROR(TSDB_CODE_UDF_NO_FUNC_HANDLE, "udf no function handle")
TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_BUFSIZE, "udf invalid bufsize")
TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_OUTPUT_TYPE, "udf invalid output type")
TAOS_DEFINE_ERROR(TSDB_CODE_UDF_SCRIPT_NOT_SUPPORTED, "udf program language not supported")
TAOS_DEFINE_ERROR(TSDB_CODE_UDF_FUNC_EXEC_FAILURE, "udf function execution failure")
//schemaless
TAOS_DEFINE_ERROR(TSDB_CODE_SML_INVALID_PROTOCOL_TYPE, "Invalid line protocol type")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册