提交 d24254fb 编写于 作者: G Ganlin Zhao

feat(query): add sample function

上级 1e503061
......@@ -101,6 +101,11 @@ bool getMavgFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv);
bool mavgFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo);
int32_t mavgFunction(SqlFunctionCtx* pCtx);
bool getSampleFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv);
bool sampleFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo);
int32_t sampleFunction(SqlFunctionCtx* pCtx);
int32_t sampleFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock);
bool getSelectivityFuncEnv(SFunctionNode* pFunc, SFuncExecEnv* pEnv);
#ifdef __cplusplus
......
......@@ -360,6 +360,32 @@ static int32_t translateMavg(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
return TSDB_CODE_SUCCESS;
}
static int32_t translateSample(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
if (2 != LIST_LENGTH(pFunc->pParameterList)) {
return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName);
}
SNode* pPara = nodesListGetNode(pFunc->pParameterList, 0);
if (QUERY_NODE_COLUMN != nodeType(pPara)) {
return buildFuncErrMsg(pErrBuf, len, TSDB_CODE_FUNC_FUNTION_ERROR,
"The input parameter of SAMPLE function can only be column");
}
uint8_t paraType = ((SExprNode*)nodesListGetNode(pFunc->pParameterList, 1))->resType.type;
if (!IS_INTEGER_TYPE(paraType)) {
return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName);
}
SExprNode* pCol = (SExprNode*)nodesListGetNode(pFunc->pParameterList, 0);
uint8_t colType = pCol->resType.type;
if (IS_VAR_DATA_TYPE(colType)) {
pFunc->node.resType = (SDataType){.bytes = pCol->resType.bytes, .type = colType};
} else {
pFunc->node.resType = (SDataType){.bytes = tDataTypes[colType].bytes, .type = colType};
}
return TSDB_CODE_SUCCESS;
}
static int32_t translateLastRow(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
// todo
return TSDB_CODE_SUCCESS;
......@@ -814,6 +840,16 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
.processFunc = mavgFunction,
.finalizeFunc = NULL
},
{
.name = "sample",
.type = FUNCTION_TYPE_SAMPLE,
.classification = FUNC_MGT_NONSTANDARD_SQL_FUNC | FUNC_MGT_TIMELINE_FUNC,
.translateFunc = translateSample,
.getEnvFunc = getSampleFuncEnv,
.initFunc = sampleFunctionSetup,
.processFunc = sampleFunction,
.finalizeFunc = sampleFinalize
},
{
.name = "abs",
.type = FUNCTION_TYPE_ABS,
......
......@@ -23,6 +23,7 @@
#define HISTOGRAM_MAX_BINS_NUM 1000
#define MAVG_MAX_POINTS_NUM 1000
#define SAMPLE_MAX_POINTS_NUM 1000
typedef struct SSumRes {
union {
......@@ -150,6 +151,15 @@ typedef struct SMavgInfo {
double points[];
} SMavgInfo;
typedef struct SSampleInfo {
int32_t samples;
int32_t totalPoints;
int32_t numSampled;
int16_t colBytes;
char *data;
int64_t *timestamp;
} SSampleInfo;
#define SET_VAL(_info, numOfElem, res) \
do { \
if ((numOfElem) <= 0) { \
......@@ -3033,3 +3043,95 @@ int32_t mavgFunction(SqlFunctionCtx* pCtx) {
return numOfElems;
}
bool getSampleFuncEnv(SFunctionNode* pFunc, SFuncExecEnv* pEnv) {
SColumnNode* pCol = (SColumnNode*)nodesListGetNode(pFunc->pParameterList, 0);
SValueNode* pVal = (SValueNode*)nodesListGetNode(pFunc->pParameterList, 1);
int32_t numOfSamples = pVal->datum.i;
pEnv->calcMemSize = sizeof(SSampleInfo) + numOfSamples * (pCol->node.resType.bytes + sizeof(int64_t));
return true;
}
bool sampleFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo *pResultInfo) {
if (!functionSetup(pCtx, pResultInfo)) {
return false;
}
taosSeedRand(taosSafeRand());
SSampleInfo *pInfo = GET_ROWCELL_INTERBUF(pResultInfo);
pInfo->samples = pCtx->param[1].param.i;
pInfo->totalPoints = 0;
pInfo->numSampled = 0;
pInfo->colBytes = ((SColumnInfoData*)pCtx->pOutput)->info.bytes;
if (pInfo->samples < 1 || pInfo->samples > SAMPLE_MAX_POINTS_NUM) {
return false;
}
pInfo->data = (char *)pInfo + sizeof(SSampleInfo);
pInfo->timestamp = (int64_t *)((char *)pInfo + sizeof(SSampleInfo) + pInfo->samples * pInfo->colBytes);
return true;
}
static void sampleAssignResult(SColumnInfoData *pOutput, SSampleInfo* pInfo,
char *data, TSKEY ts, int32_t index) {
assignVal(pInfo->data + index * pInfo->colBytes, data, pOutput->info.bytes, pOutput->info.type);
*(pInfo->timestamp + index) = ts;
}
static void doReservoirSample(SColumnInfoData *pOutput, SSampleInfo* pInfo,
char *data, TSKEY ts, int32_t index) {
pInfo->totalPoints++;
if (pInfo->numSampled < pInfo->samples) {
sampleAssignResult(pOutput, pInfo, data, ts, pInfo->numSampled);
pInfo->numSampled++;
} else {
int32_t j = taosRand() % (pInfo->totalPoints);
if (j < pInfo->samples) {
sampleAssignResult(pOutput, pInfo, data, ts, j);
}
}
}
int32_t sampleFunction(SqlFunctionCtx* pCtx) {
SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx);
SSampleInfo* pInfo = GET_ROWCELL_INTERBUF(pResInfo);
SInputColumnInfoData* pInput = &pCtx->input;
TSKEY* tsList = (int64_t*)pInput->pPTS->pData;
SColumnInfoData* pInputCol = pInput->pData[0];
SColumnInfoData* pTsOutput = pCtx->pTsOutput;
SColumnInfoData* pOutput = (SColumnInfoData*)pCtx->pOutput;
int32_t type = pInputCol->info.type;
int32_t startOffset = pCtx->offset;
for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; i += 1) {
if (colDataIsNull_f(pInputCol->nullbitmap, i)) {
//colDataAppendNULL(pOutput, i);
continue;
}
char* data = colDataGetData(pInputCol, i);
doReservoirSample(pOutput, pInfo, data, tsList[i], i);
}
return pInfo->numSampled;
}
int32_t sampleFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx);
SSampleInfo* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx));
int32_t slotId = pCtx->pExpr->base.resSchema.slotId;
SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, slotId);
//int32_t currentRow = pBlock->info.rows;
pResInfo->numOfRes = pInfo->numSampled;
for (int32_t i = 0; i < pInfo->numSampled; ++i) {
colDataAppend(pCol, i, pInfo->data + i * pInfo->colBytes, false);
//TODO: handle ts output
}
return pResInfo->numOfRes;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册