提交 b7917aa4 编写于 作者: H Haojun Liao

[td-10564] Add codes for query parsing.

上级 132e3b45
......@@ -36,4 +36,32 @@
// int16_t bytes;
//} SSchema;
typedef struct SColumnDataAgg {
int16_t colId;
int64_t sum;
int64_t max;
int64_t min;
int16_t maxIndex;
int16_t minIndex;
int16_t numOfNull;
} SColumnDataAgg;
typedef struct SDataBlockInfo {
STimeWindow window;
int32_t rows;
int32_t numOfCols;
int64_t uid;
} SDataBlockInfo;
typedef struct SSDataBlock {
SColumnDataAgg *pBlockAgg;
SArray *pDataBlock; // SArray<SColumnInfoData>
SDataBlockInfo info;
} SSDataBlock;
typedef struct SColumnInfoData {
SColumnInfo info; // TODO filter info needs to be removed
char *pData; // the corresponding block data in memory
} SColumnInfoData;
#endif // TDENGINE_COMMON_H
......@@ -184,16 +184,6 @@ void** qReleaseQInfo(void* pMgmt, void* pQInfo);
*/
void** qDeregisterQInfo(void* pMgmt, void* pQInfo);
//======================================================================================================================
// built-in sql functions
/**
* If the given name is a valid built-in sql function, the value of true will be returned.
* @param name
* @param len
* @return
*/
bool isBuiltinFunction(const char* name, int32_t len);
#ifdef __cplusplus
}
#endif
......
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_TFUNCTION_H
#define TDENGINE_TFUNCTION_H
#ifdef __cplusplus
extern "C" {
#endif
#include "common.h"
#define FUNCTION_SCALAR 1
#define FUNCTION_AGG 2
#define FUNCTIONS_NAME_MAX_LENGTH 16
#define FUNCTION_INVALID_ID -1
#define FUNCTION_COUNT 0
#define FUNCTION_SUM 1
#define FUNCTION_AVG 2
#define FUNCTION_MIN 3
#define FUNCTION_MAX 4
#define FUNCTION_STDDEV 5
#define FUNCTION_PERCT 6
#define FUNCTION_APERCT 7
#define FUNCTION_FIRST 8
#define FUNCTION_LAST 9
#define FUNCTION_LAST_ROW 10
#define FUNCTION_TOP 11
#define FUNCTION_BOTTOM 12
#define FUNCTION_SPREAD 13
#define FUNCTION_TWA 14
#define FUNCTION_LEASTSQR 15
#define FUNCTION_TS 16
#define FUNCTION_TS_DUMMY 17
#define FUNCTION_TAG_DUMMY 18
#define FUNCTION_TS_COMP 19
#define FUNCTION_TAG 20
#define FUNCTION_PRJ 21
#define FUNCTION_TAGPRJ 22
#define FUNCTION_ARITHM 23
#define FUNCTION_DIFF 24
#define FUNCTION_FIRST_DST 25
#define FUNCTION_LAST_DST 26
#define FUNCTION_STDDEV_DST 27
#define FUNCTION_INTERP 28
#define FUNCTION_RATE 29
#define FUNCTION_IRATE 30
#define FUNCTION_TID_TAG 31
#define FUNCTION_DERIVATIVE 32
#define FUNCTION_BLKINFO 33
#define FUNCTION_HISTOGRAM 34
#define FUNCTION_HLL 35
#define FUNCTION_MODE 36
#define FUNCTION_SAMPLE 37
typedef struct SPoint1 {
int64_t key;
union{double val; char* ptr;};
} SPoint1;
struct SQLFunctionCtx;
struct SResultRowCellInfo;
//for selectivity query, the corresponding tag value is assigned if the data is qualified
typedef struct SExtTagsInfo {
int16_t tagsLen; // keep the tags data for top/bottom query result
int16_t numOfTagCols;
struct SQLFunctionCtx **pTagCtxList;
} SExtTagsInfo;
// sql function runtime context
typedef struct SQLFunctionCtx {
int32_t size; // number of rows
void * pInput; // input data buffer
uint32_t order; // asc|desc
int16_t inputType;
int16_t inputBytes;
int16_t outputType;
int16_t outputBytes; // size of results, determined by function and input column data type
int32_t interBufBytes; // internal buffer size
bool hasNull; // null value exist in current block
bool requireNull; // require null in some function
bool stableQuery;
int16_t functionId; // function id
char * pOutput; // final result output buffer, point to sdata->data
uint8_t currentStage; // record current running step, default: 0
int64_t startTs; // timestamp range of current query when function is executed on a specific data block
int32_t numOfParams;
SVariant param[4]; // input parameter, e.g., top(k, 20), the number of results for top query is kept in param
int64_t *ptsList; // corresponding timestamp array list
void *ptsOutputBuf; // corresponding output buffer for timestamp of each result, e.g., top/bottom*/
SVariant tag;
bool isSmaSet;
SColumnDataAgg sma;
struct SResultRowCellInfo *resultInfo;
SExtTagsInfo tagInfo;
SPoint1 start;
SPoint1 end;
} SQLFunctionCtx;
typedef struct SAggFunctionInfo {
char name[FUNCTIONS_NAME_MAX_LENGTH];
int8_t type; // Scalar function or aggregation function
uint8_t functionId; // Function Id
int8_t sFunctionId; // Transfer function for super table query
uint16_t status;
bool (*init)(SQLFunctionCtx *pCtx, struct SResultRowCellInfo* pResultCellInfo); // setup the execute environment
void (*exec)(SQLFunctionCtx *pCtx);
// finalizer must be called after all exec has been executed to generated final result.
void (*xFinalize)(SQLFunctionCtx *pCtx);
void (*mergeFunc)(SQLFunctionCtx *pCtx);
int32_t (*dataReqFunc)(SQLFunctionCtx *pCtx, STimeWindow* w, int32_t colId);
} SAggFunctionInfo;
typedef struct SScalarFunctionInfo {
char name[FUNCTIONS_NAME_MAX_LENGTH];
int8_t type; // scalar function or aggregation function
uint8_t functionId; // index of scalar function
bool (*init)(SQLFunctionCtx *pCtx, struct SResultRowCellInfo* pResultCellInfo); // setup the execute environment
void (*exec)(SQLFunctionCtx *pCtx);
} SScalarFunctionInfo;
int32_t getResultDataInfo(int32_t dataType, int32_t dataBytes, int32_t functionId, int32_t param, int16_t *type,
int16_t *len, int32_t *interBytes, int16_t extLength, bool isSuperTable/*, SUdfInfo* pUdfInfo*/);
/**
* If the given name is a valid built-in sql function, the value of true will be returned.
* @param name
* @param len
* @return
*/
int32_t qIsBuiltinFunction(const char* name, int32_t len);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_TFUNCTION_H
......@@ -95,10 +95,7 @@ typedef struct STagCond {
typedef struct STableMetaInfo {
STableMeta *pTableMeta; // table meta, cached in client side and acquired by name
uint32_t tableMetaSize;
// size_t tableMetaCapacity;
SVgroupsInfo *vgroupList;
SArray *pVgroupTables; // SArray<SVgroupTableInfo>
/*
* 1. keep the vgroup index during the multi-vnode super table projection query
......@@ -110,6 +107,20 @@ typedef struct STableMetaInfo {
SArray *tagColList; // SArray<SColumn*>, involved tag columns
} STableMetaInfo;
typedef struct SQueryType {
bool stableQuery;
bool groupbyColumn;
bool simpleAgg;
bool arithmeticOnAgg;
bool projectionQuery;
bool hasFilter;
bool onlyTagQuery;
bool orderProjectQuery;
bool stateWindow;
bool globalMerge;
bool multigroupResult;
} SQueryType;
typedef struct SQueryStmtInfo {
int16_t command; // the command may be different for each subclause, so keep it seperately.
uint32_t type; // query/insert type
......@@ -152,17 +163,6 @@ typedef struct SQueryStmtInfo {
SArray *pUpstream; // SArray<struct SQueryStmtInfo>
struct SQueryStmtInfo *pDownstream;
int32_t havingFieldNum;
bool stableQuery;
bool groupbyColumn;
bool simpleAgg;
bool arithmeticOnAgg;
bool projectionQuery;
bool hasFilter;
bool onlyTagQuery;
bool orderProjectQuery;
bool stateWindow;
bool globalMerge;
bool multigroupResult;
} SQueryStmtInfo;
struct SInsertStmtInfo;
......
......@@ -44,6 +44,7 @@ extern "C" {
#include <errno.h>
#include <float.h>
#include <math.h>
#include <sys/stat.h>
#include "osAtomic.h"
#include "osDef.h"
......
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_TPAGEDFILE_H
#define TDENGINE_TPAGEDFILE_H
#ifdef __cplusplus
extern "C" {
#endif
#include "tlist.h"
#include "thash.h"
#include "os.h"
#include "tlockfree.h"
typedef struct SArray* SIDList;
typedef struct SPageDiskInfo {
int32_t offset;
int32_t length;
} SPageDiskInfo;
typedef struct SPageInfo {
SListNode* pn; // point to list node
int32_t pageId;
SPageDiskInfo info;
void* pData;
bool used; // set current page is in used
} SPageInfo;
typedef struct SFreeListItem {
int32_t offset;
int32_t len;
} SFreeListItem;
typedef struct SResultBufStatis {
int32_t flushBytes;
int32_t loadBytes;
int32_t getPages;
int32_t releasePages;
int32_t flushPages;
} SResultBufStatis;
typedef struct SDiskbasedResultBuf {
int32_t numOfPages;
int64_t totalBufSize;
int64_t fileSize; // disk file size
FILE* file;
int32_t allocateId; // allocated page id
char* path; // file path
int32_t pageSize; // current used page size
int32_t inMemPages; // numOfPages that are allocated in memory
SHashObj* groupSet; // id hash table
SHashObj* all;
SList* lruList;
void* emptyDummyIdList; // dummy id list
void* assistBuf; // assistant buffer for compress/decompress data
SArray* pFree; // free area in file
bool comp; // compressed before flushed to disk
int32_t nextPos; // next page flush position
uint64_t qId; // for debug purpose
SResultBufStatis statis;
} SDiskbasedResultBuf;
#define DEFAULT_INTERN_BUF_PAGE_SIZE (1024L) // in bytes
#define PAGE_INFO_INITIALIZER (SPageDiskInfo){-1, -1}
#define DEFAULT_PAGE_SIZE (16384L)
typedef struct SFilePage {
int64_t num;
char data[];
} SFilePage;
/**
* create disk-based result buffer
* @param pResultBuf
* @param rowSize
* @param pagesize
* @param inMemPages
* @param handle
* @return
*/
int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t pagesize, int32_t inMemBufSize, uint64_t qId);
/**
*
* @param pResultBuf
* @param groupId
* @param pageId
* @return
*/
SFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t* pageId);
/**
*
* @param pResultBuf
* @param groupId
* @return
*/
SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId);
/**
* get the specified buffer page by id
* @param pResultBuf
* @param id
* @return
*/
tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id);
/**
* release the referenced buf pages
* @param pResultBuf
* @param page
*/
void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page);
/**
*
* @param pResultBuf
* @param pi
*/
void releaseResBufPageInfo(SDiskbasedResultBuf* pResultBuf, SPageInfo* pi);
/**
* get the total buffer size in the format of disk file
* @param pResultBuf
* @return
*/
size_t getResBufSize(const SDiskbasedResultBuf* pResultBuf);
/**
* get the number of groups in the result buffer
* @param pResultBuf
* @return
*/
size_t getNumOfResultBufGroupId(const SDiskbasedResultBuf* pResultBuf);
/**
* destroy result buffer
* @param pResultBuf
*/
void destroyResultBuf(SDiskbasedResultBuf* pResultBuf);
/**
*
* @param pList
* @return
*/
SPageInfo* getLastPageInfo(SIDList pList);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_TPAGEDFILE_H
......@@ -8,4 +8,5 @@ add_subdirectory(scheduler)
add_subdirectory(lru)
add_subdirectory(catalog)
add_subdirectory(executor)
add_subdirectory(planner)
\ No newline at end of file
add_subdirectory(planner)
add_subdirectory(function)
\ No newline at end of file
aux_source_directory(src FUNCTION_SRC)
add_library(function ${FUNCTION_SRC})
target_include_directories(
function
PUBLIC "${CMAKE_SOURCE_DIR}/include/libs/function"
PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc"
)
target_link_libraries(
function
PRIVATE os util common
)
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_TAGGFUNCTION_H
#define TDENGINE_TAGGFUNCTION_H
#ifdef __cplusplus
extern "C" {
#endif
#include "os.h"
#include "tname.h"
#include "taosdef.h"
#include "tvariant.h"
#include "function.h"
#include "tudf.h"
extern SAggFunctionInfo aggFunc[34];
typedef struct SResultRowCellInfo {
int8_t hasResult; // result generated, not NULL value
bool initialized; // output buffer has been initialized
bool complete; // query has completed
uint32_t numOfRes; // num of output result in current buffer
} SResultRowCellInfo;
#define FUNCSTATE_SO 0x0u
#define FUNCSTATE_MO 0x1u // dynamic number of output, not multinumber of output e.g., TOP/BOTTOM
#define FUNCSTATE_STREAM 0x2u // function avail for stream
#define FUNCSTATE_STABLE 0x4u // function avail for super table
#define FUNCSTATE_NEED_TS 0x8u // timestamp is required during query processing
#define FUNCSTATE_SELECTIVITY 0x10u // selectivity functions, can exists along with tag columns
#define BASIC_FUNC_SO FUNCSTATE_SO | FUNCSTATE_STREAM | FUNCSTATE_STABLE
#define BASIC_FUNC_MO FUNCSTATE_MO | FUNCSTATE_STREAM | FUNCSTATE_STABLE
#define AVG_FUNCTION_INTER_BUFFER_SIZE 50
#define DATA_SET_FLAG ',' // to denote the output area has data, not null value
#define DATA_SET_FLAG_SIZE sizeof(DATA_SET_FLAG)
#define QUERY_ASC_FORWARD_STEP 1
#define QUERY_DESC_FORWARD_STEP -1
#define GET_FORWARD_DIRECTION_FACTOR(ord) (((ord) == TSDB_ORDER_ASC) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP)
#define MAX_INTERVAL_TIME_WINDOW 1000000 // maximum allowed time windows in final results
#define TOP_BOTTOM_QUERY_LIMIT 100
enum {
MASTER_SCAN = 0x0u,
REVERSE_SCAN = 0x1u,
REPEAT_SCAN = 0x2u, //repeat scan belongs to the master scan
MERGE_STAGE = 0x20u,
};
#define QUERY_IS_STABLE_QUERY(type) (((type)&TSDB_QUERY_TYPE_STABLE_QUERY) != 0)
#define QUERY_IS_JOIN_QUERY(type) (TSDB_QUERY_HAS_TYPE(type, TSDB_QUERY_TYPE_JOIN_QUERY))
#define QUERY_IS_PROJECTION_QUERY(type) (((type)&TSDB_QUERY_TYPE_PROJECTION_QUERY) != 0)
#define QUERY_IS_FREE_RESOURCE(type) (((type)&TSDB_QUERY_TYPE_FREE_RESOURCE) != 0)
typedef struct SArithmeticSupport {
SExprInfo *pExprInfo;
int32_t numOfCols;
SColumnInfo *colList;
void *exprList; // client side used
int32_t offset;
char** data;
} SArithmeticSupport;
typedef struct SInterpInfoDetail {
TSKEY ts; // interp specified timestamp
int8_t type;
int8_t primaryCol;
} SInterpInfoDetail;
#define GET_ROWCELL_INTERBUF(_c) ((void*) ((char*)(_c) + sizeof(SResultRowCellInfo)))
#define GET_RES_INFO(ctx) ((ctx)->resultInfo)
#define IS_STREAM_QUERY_VALID(x) (((x)&TSDB_FUNCSTATE_STREAM) != 0)
#define IS_MULTIOUTPUT(x) (((x)&TSDB_FUNCSTATE_MO) != 0)
// determine the real data need to calculated the result
enum {
BLK_DATA_NO_NEEDED = 0x0,
BLK_DATA_STATIS_NEEDED = 0x1,
BLK_DATA_ALL_NEEDED = 0x3,
BLK_DATA_DISCARD = 0x4, // discard current data block since it is not qualified for filter
};
typedef struct STwaInfo {
int8_t hasResult; // flag to denote has value
double dOutput;
SPoint1 p;
STimeWindow win;
} STwaInfo;
extern int32_t functionCompatList[]; // compatible check array list
bool topbot_datablock_filter(SQLFunctionCtx *pCtx, const char *minval, const char *maxval);
/**
* the numOfRes should be kept, since it may be used later
* and allow the ResultInfo to be re initialized
*/
#define RESET_RESULT_INFO(_r) \
do { \
(_r)->initialized = false; \
} while (0)
static FORCE_INLINE void initResultInfo(SResultRowCellInfo *pResInfo, int32_t bufLen) {
pResInfo->initialized = true; // the this struct has been initialized flag
pResInfo->complete = false;
pResInfo->hasResult = false;
pResInfo->numOfRes = 0;
memset(GET_ROWCELL_INTERBUF(pResInfo), 0, bufLen);
}
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_TAGGFUNCTION_H
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_TFILL_H
#define TDENGINE_TFILL_H
#ifdef __cplusplus
extern "C" {
#endif
#include "os.h"
#include "taosdef.h"
struct SSDataBlock;
typedef struct {
STColumn col; // column info
int16_t functionId; // sql function id
int16_t flag; // column flag: TAG COLUMN|NORMAL COLUMN
int16_t tagIndex; // index of current tag in SFillTagColInfo array list
union {int64_t i; double d;} fillVal;
} SFillColInfo;
typedef struct {
SSchema col;
char* tagVal;
} SFillTagColInfo;
typedef struct SFillInfo {
TSKEY start; // start timestamp
TSKEY end; // endKey for fill
TSKEY currentKey; // current active timestamp, the value may be changed during the fill procedure.
int32_t order; // order [TSDB_ORDER_ASC|TSDB_ORDER_DESC]
int32_t type; // fill type
int32_t numOfRows; // number of rows in the input data block
int32_t index; // active row index
int32_t numOfTotal; // number of filled rows in one round
int32_t numOfCurrent; // number of filled rows in current results
int32_t numOfTags; // number of tags
int32_t numOfCols; // number of columns, including the tags columns
int32_t rowSize; // size of each row
SInterval interval;
char * prevValues; // previous row of data, to generate the interpolation results
char * nextValues; // next row of data
char** pData; // original result data block involved in filling data
int32_t alloc; // data buffer size in rows
int8_t precision; // time resoluation
SFillColInfo* pFillCol; // column info for fill operations
SFillTagColInfo* pTags; // tags value for filling gap
void* handle; // for debug purpose
} SFillInfo;
typedef struct SPoint {
int64_t key;
void * val;
} SPoint;
SFillInfo* taosCreateFillInfo(int32_t order, TSKEY skey, int32_t numOfTags, int32_t capacity, int32_t numOfCols,
int64_t slidingTime, int8_t slidingUnit, int8_t precision, int32_t fillType,
SFillColInfo* pFillCol, void* handle);
void taosResetFillInfo(SFillInfo* pFillInfo, TSKEY startTimestamp);
void* taosDestroyFillInfo(SFillInfo *pFillInfo);
void taosFillSetStartInfo(SFillInfo* pFillInfo, int32_t numOfRows, TSKEY endKey);
void taosFillSetInputDataBlock(SFillInfo* pFillInfo, const struct SSDataBlock* pInput);
bool taosFillHasMoreResults(SFillInfo* pFillInfo);
int64_t getNumOfResultsAfterFillGap(SFillInfo* pFillInfo, int64_t ekey, int32_t maxNumOfRows);
int32_t taosGetLinearInterpolationVal(SPoint* point, int32_t outputType, SPoint* point1, SPoint* point2, int32_t inputType);
int64_t taosFillResultDataBlock(SFillInfo* pFillInfo, void** output, int32_t capacity);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_TFILL_H
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_HISTOGRAM_H
#define TDENGINE_HISTOGRAM_H
#ifdef __cplusplus
extern "C" {
#endif
#define USE_ARRAYLIST
#define MAX_HISTOGRAM_BIN 500
typedef struct SHistBin {
double val;
int64_t num;
#if !defined(USE_ARRAYLIST)
double delta;
int32_t index; // index in min-heap list
#endif
} SHistBin;
typedef struct SHeapEntry {
void* pData;
double val;
} SHeapEntry;
typedef struct SHistogramInfo {
int64_t numOfElems;
int32_t numOfEntries;
int32_t maxEntries;
double min;
double max;
#if defined(USE_ARRAYLIST)
SHistBin* elems;
#else
tSkipList* pList;
SLoserTreeInfo* pLoserTree;
int32_t maxIndex;
bool ordered;
#endif
} SHistogramInfo;
SHistogramInfo* tHistogramCreate(int32_t numOfBins);
SHistogramInfo* tHistogramCreateFrom(void* pBuf, int32_t numOfBins);
int32_t tHistogramAdd(SHistogramInfo** pHisto, double val);
int64_t tHistogramSum(SHistogramInfo* pHisto, double v);
double* tHistogramUniform(SHistogramInfo* pHisto, double* ratio, int32_t num);
SHistogramInfo* tHistogramMerge(SHistogramInfo* pHisto1, SHistogramInfo* pHisto2, int32_t numOfEntries);
void tHistogramDestroy(SHistogramInfo** pHisto);
void tHistogramPrint(SHistogramInfo* pHisto);
int32_t histoBinarySearch(SHistBin* pEntry, int32_t len, double val);
SHeapEntry* tHeapCreate(int32_t numOfEntries);
void tHeapSort(SHeapEntry* pEntry, int32_t len);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_HISTOGRAM_H
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_TPERCENTILE_H
#define TDENGINE_TPERCENTILE_H
#ifdef __cplusplus
extern "C" {
#endif
#include "qExtbuffer.h"
#include "qResultbuf.h"
#include "ttszip.h"
typedef struct MinMaxEntry {
union {
double dMinVal;
int64_t i64MinVal;
uint64_t u64MinVal;
};
union {
double dMaxVal;
int64_t i64MaxVal;
int64_t u64MaxVal;
};
} MinMaxEntry;
typedef struct {
int32_t size;
int32_t pageId;
tFilePage *data;
} SSlotInfo;
typedef struct tMemBucketSlot {
SSlotInfo info;
MinMaxEntry range;
} tMemBucketSlot;
struct tMemBucket;
typedef int32_t (*__perc_hash_func_t)(struct tMemBucket *pBucket, const void *value);
typedef struct tMemBucket {
int16_t numOfSlots;
int16_t type;
int16_t bytes;
int32_t total;
int32_t elemPerPage; // number of elements for each object
int32_t maxCapacity; // maximum allowed number of elements that can be sort directly to get the result
int32_t bufPageSize; // disk page size
MinMaxEntry range; // value range
int32_t times; // count that has been checked for deciding the correct data value buckets.
__compar_fn_t comparFn;
tMemBucketSlot * pSlots;
SDiskbasedResultBuf *pBuffer;
__perc_hash_func_t hashFunc;
} tMemBucket;
tMemBucket *tMemBucketCreate(int16_t nElemSize, int16_t dataType, double minval, double maxval);
void tMemBucketDestroy(tMemBucket *pBucket);
int32_t tMemBucketPut(tMemBucket *pBucket, const void *data, size_t size);
double getPercentile(tMemBucket *pMemBucket, double percent);
#endif // TDENGINE_TPERCENTILE_H
#ifdef __cplusplus
}
#endif
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_TSCALARFUNCTION_H
#define TDENGINE_TSCALARFUNCTION_H
#ifdef __cplusplus
extern "C" {
#endif
#include "function.h"
extern struct SScalarFunctionInfo scalarFunc[1];
#define FUNCTION_CEIL 38
#define FUNCTION_FLOOR 39
#define FUNCTION_ROUND 40
#define FUNCTION_MAVG 41
#define FUNCTION_CSUM 42
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_TSCALARFUNCTION_H
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_TTSZIP_H
#define TDENGINE_TTSZIP_H
#ifdef __cplusplus
extern "C" {
#endif
#include "os.h"
#include "taosdef.h"
#include "tvariant.h"
#define MEM_BUF_SIZE (1 << 20)
#define TS_COMP_FILE_MAGIC 0x87F5EC4C
#define TS_COMP_FILE_GROUP_MAX 512
typedef struct STSList {
char* rawBuf;
int32_t allocSize;
int32_t threshold;
int32_t len;
} STSList;
typedef struct STSElem {
TSKEY ts;
tVariant* tag;
int32_t id;
} STSElem;
typedef struct STSCursor {
int32_t vgroupIndex;
int32_t blockIndex;
int32_t tsIndex;
uint32_t order;
} STSCursor;
typedef struct STSBlock {
tVariant tag; // tag value
int32_t numOfElem; // number of elements
int32_t compLen; // size after compressed
int32_t padding; // 0xFFFFFFFF by default, after the payload
char* payload; // actual data that is compressed
} STSBlock;
/*
* The size of buffer file should not be greater than 2G,
* and the offset of int32_t type is enough
*/
typedef struct STSGroupBlockInfo {
int32_t id; // group id
int32_t offset; // offset set value in file
int32_t numOfBlocks; // number of total blocks
int32_t compLen; // compressed size
} STSGroupBlockInfo;
typedef struct STSGroupBlockInfoEx {
STSGroupBlockInfo info;
int32_t len; // length before compress
} STSGroupBlockInfoEx;
typedef struct STSBuf {
FILE* f;
char path[PATH_MAX];
uint32_t fileSize;
// todo use array
STSGroupBlockInfoEx* pData;
uint32_t numOfAlloc;
uint32_t numOfGroups;
char* assistBuf;
int32_t bufSize;
STSBlock block;
STSList tsData; // uncompressed raw ts data
uint64_t numOfTotal;
bool autoDelete;
bool remainOpen;
int32_t tsOrder; // order of timestamp in ts comp buffer
STSCursor cur;
} STSBuf;
typedef struct STSBufFileHeader {
uint32_t magic; // file magic number
uint32_t numOfGroup; // number of group stored in current file
int32_t tsOrder; // timestamp order in current file
} STSBufFileHeader;
STSBuf* tsBufCreate(bool autoDelete, int32_t order);
STSBuf* tsBufCreateFromFile(const char* path, bool autoDelete);
STSBuf* tsBufCreateFromCompBlocks(const char* pData, int32_t numOfBlocks, int32_t len, int32_t tsOrder, int32_t id);
void* tsBufDestroy(STSBuf* pTSBuf);
void tsBufAppend(STSBuf* pTSBuf, int32_t id, tVariant* tag, const char* pData, int32_t len);
int32_t tsBufMerge(STSBuf* pDestBuf, const STSBuf* pSrcBuf);
STSBuf* tsBufClone(STSBuf* pTSBuf);
STSGroupBlockInfo* tsBufGetGroupBlockInfo(STSBuf* pTSBuf, int32_t id);
void tsBufFlush(STSBuf* pTSBuf);
void tsBufResetPos(STSBuf* pTSBuf);
bool tsBufNextPos(STSBuf* pTSBuf);
STSElem tsBufGetElem(STSBuf* pTSBuf);
STSElem tsBufGetElemStartPos(STSBuf* pTSBuf, int32_t id, tVariant* tag);
STSCursor tsBufGetCursor(STSBuf* pTSBuf);
void tsBufSetTraverseOrder(STSBuf* pTSBuf, int32_t order);
void tsBufSetCursor(STSBuf* pTSBuf, STSCursor* pCur);
/**
* display all data in comp block file, for debug purpose only
* @param pTSBuf
*/
void tsBufDisplay(STSBuf* pTSBuf);
int32_t tsBufGetNumOfGroup(STSBuf* pTSBuf);
void tsBufGetGroupIdList(STSBuf* pTSBuf, int32_t* num, int32_t** id);
int32_t dumpFileBlockByGroupId(STSBuf* pTSBuf, int32_t id, void* buf, int32_t* len, int32_t* numOfBlocks);
STSElem tsBufFindElemStartPosByTag(STSBuf* pTSBuf, tVariant* pTag);
bool tsBufIsValidElem(STSElem* pElem);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_TTSZIP_H
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_TUDF_H
#define TDENGINE_TUDF_H
enum {
TSDB_UDF_FUNC_NORMAL = 0,
TSDB_UDF_FUNC_INIT,
TSDB_UDF_FUNC_FINALIZE,
TSDB_UDF_FUNC_MERGE,
TSDB_UDF_FUNC_DESTROY,
TSDB_UDF_FUNC_MAX_NUM
};
typedef struct SUdfInit {
int32_t maybe_null; /* 1 if function can return NULL */
uint32_t decimals; /* for real functions */
uint64_t length; /* For string functions */
char* ptr; /* free pointer for function data */
int32_t const_item; /* 0 if result is independent of arguments */
// script like lua/javascript
void* script_ctx;
void (*destroyCtxFunc)(void* script_ctx);
} SUdfInit;
typedef struct SUdfInfo {
int32_t functionId; // system assigned function id
int32_t funcType; // scalar function or aggregate function
int8_t resType; // result type
int16_t resBytes; // result byte
int32_t contLen; // content length
int32_t bufSize; // interbuf size
char* name; // function name
void* handle; // handle loaded in mem
void* funcs[TSDB_UDF_FUNC_MAX_NUM]; // function ptr
// for script like lua/javascript only
int isScript;
void* pScriptCtx;
SUdfInit init;
char* content;
char* path;
} SUdfInfo;
// script
typedef int32_t (*scriptInitFunc)(void* pCtx);
typedef void (*scriptNormalFunc)(void* pCtx, char* data, int16_t iType, int16_t iBytes, int32_t numOfRows,
int64_t* ptList, int64_t key, char* dataOutput, char* tsOutput, int32_t* numOfOutput,
int16_t oType, int16_t oBytes);
typedef void (*scriptFinalizeFunc)(void* pCtx, int64_t key, char* dataOutput, int32_t* numOfOutput);
typedef void (*scriptMergeFunc)(void* pCtx, char* data, int32_t numOfRows, char* dataOutput, int32_t* numOfOutput);
typedef void (*scriptDestroyFunc)(void* pCtx);
// dynamic lib
typedef void (*udfNormalFunc)(char* data, int16_t itype, int16_t iBytes, int32_t numOfRows, int64_t* ts,
char* dataOutput, char* interBuf, char* tsOutput, int32_t* numOfOutput, int16_t oType,
int16_t oBytes, SUdfInit* buf);
typedef int32_t (*udfInitFunc)(SUdfInit* data);
typedef void (*udfFinalizeFunc)(char* dataOutput, char* interBuf, int32_t* numOfOutput, SUdfInit* buf);
typedef void (*udfMergeFunc)(char* data, int32_t numOfRows, char* dataOutput, int32_t* numOfOutput, SUdfInit* buf);
typedef void (*udfDestroyFunc)(SUdfInit* buf);
#endif // TDENGINE_TUDF_H
此差异已折叠。
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "os.h"
#include "taosdef.h"
#include "taosmsg.h"
#include "ttypes.h"
#include "tfill.h"
#include "thash.h"
#include "function.h"
#include "common.h"
#include "ttime.h"
#define FILL_IS_ASC_FILL(_f) ((_f)->order == TSDB_ORDER_ASC)
#define DO_INTERPOLATION(_v1, _v2, _k1, _k2, _k) ((_v1) + ((_v2) - (_v1)) * (((double)(_k)) - ((double)(_k1))) / (((double)(_k2)) - ((double)(_k1))))
#define GET_FORWARD_DIRECTION_FACTOR(_ord) (((_ord) == TSDB_ORDER_ASC)? 1:-1)
static void setTagsValue(SFillInfo* pFillInfo, void** data, int32_t genRows) {
for(int32_t j = 0; j < pFillInfo->numOfCols; ++j) {
SFillColInfo* pCol = &pFillInfo->pFillCol[j];
if (TSDB_COL_IS_NORMAL_COL(pCol->flag) || TSDB_COL_IS_UD_COL(pCol->flag)) {
continue;
}
char* val1 = elePtrAt(data[j], pCol->col.bytes, genRows);
assert(pCol->tagIndex >= 0 && pCol->tagIndex < pFillInfo->numOfTags);
SFillTagColInfo* pTag = &pFillInfo->pTags[pCol->tagIndex];
assert (pTag->col.colId == pCol->col.colId);
assignVal(val1, pTag->tagVal, pCol->col.bytes, pCol->col.type);
}
}
static void setNullValueForRow(SFillInfo* pFillInfo, void** data, int32_t numOfCol, int32_t rowIndex) {
// the first are always the timestamp column, so start from the second column.
for (int32_t i = 1; i < numOfCol; ++i) {
SFillColInfo* pCol = &pFillInfo->pFillCol[i];
char* output = elePtrAt(data[i], pCol->col.bytes, rowIndex);
setNull(output, pCol->col.type, pCol->col.bytes);
}
}
static void doFillOneRowResult(SFillInfo* pFillInfo, void** data, char** srcData, int64_t ts, bool outOfBound) {
char* prev = pFillInfo->prevValues;
char* next = pFillInfo->nextValues;
SPoint point1, point2, point;
int32_t step = GET_FORWARD_DIRECTION_FACTOR(pFillInfo->order);
// set the primary timestamp column value
int32_t index = pFillInfo->numOfCurrent;
char* val = elePtrAt(data[0], TSDB_KEYSIZE, index);
*(TSKEY*) val = pFillInfo->currentKey;
// set the other values
if (pFillInfo->type == TSDB_FILL_PREV) {
char* p = FILL_IS_ASC_FILL(pFillInfo) ? prev : next;
if (p != NULL) {
for (int32_t i = 1; i < pFillInfo->numOfCols; ++i) {
SFillColInfo* pCol = &pFillInfo->pFillCol[i];
if (TSDB_COL_IS_TAG(pCol->flag)) {
continue;
}
char* output = elePtrAt(data[i], pCol->col.bytes, index);
assignVal(output, p + pCol->col.offset, pCol->col.bytes, pCol->col.type);
}
} else { // no prev value yet, set the value for NULL
setNullValueForRow(pFillInfo, data, pFillInfo->numOfCols, index);
}
} else if (pFillInfo->type == TSDB_FILL_NEXT) {
char* p = FILL_IS_ASC_FILL(pFillInfo)? next : prev;
if (p != NULL) {
for (int32_t i = 1; i < pFillInfo->numOfCols; ++i) {
SFillColInfo* pCol = &pFillInfo->pFillCol[i];
if (TSDB_COL_IS_TAG(pCol->flag)) {
continue;
}
char* output = elePtrAt(data[i], pCol->col.bytes, index);
assignVal(output, p + pCol->col.offset, pCol->col.bytes, pCol->col.type);
}
} else { // no prev value yet, set the value for NULL
setNullValueForRow(pFillInfo, data, pFillInfo->numOfCols, index);
}
} else if (pFillInfo->type == TSDB_FILL_LINEAR) {
// TODO : linear interpolation supports NULL value
if (prev != NULL && !outOfBound) {
for (int32_t i = 1; i < pFillInfo->numOfCols; ++i) {
SFillColInfo* pCol = &pFillInfo->pFillCol[i];
if (TSDB_COL_IS_TAG(pCol->flag)) {
continue;
}
int16_t type = pCol->col.type;
int16_t bytes = pCol->col.bytes;
char *val1 = elePtrAt(data[i], pCol->col.bytes, index);
if (type == TSDB_DATA_TYPE_BINARY|| type == TSDB_DATA_TYPE_NCHAR || type == TSDB_DATA_TYPE_BOOL) {
setNull(val1, pCol->col.type, bytes);
continue;
}
point1 = (SPoint){.key = *(TSKEY*)(prev), .val = prev + pCol->col.offset};
point2 = (SPoint){.key = ts, .val = srcData[i] + pFillInfo->index * bytes};
point = (SPoint){.key = pFillInfo->currentKey, .val = val1};
taosGetLinearInterpolationVal(&point, type, &point1, &point2, type);
}
} else {
setNullValueForRow(pFillInfo, data, pFillInfo->numOfCols, index);
}
} else { // fill the default value */
for (int32_t i = 1; i < pFillInfo->numOfCols; ++i) {
SFillColInfo* pCol = &pFillInfo->pFillCol[i];
if (TSDB_COL_IS_TAG(pCol->flag)/* || IS_VAR_DATA_TYPE(pCol->col.type)*/) {
continue;
}
char* val1 = elePtrAt(data[i], pCol->col.bytes, index);
assignVal(val1, (char*)&pCol->fillVal.i, pCol->col.bytes, pCol->col.type);
}
}
setTagsValue(pFillInfo, data, index);
pFillInfo->currentKey = taosTimeAdd(pFillInfo->currentKey, pFillInfo->interval.sliding * step, pFillInfo->interval.slidingUnit, pFillInfo->precision);
pFillInfo->numOfCurrent++;
}
static void initBeforeAfterDataBuf(SFillInfo* pFillInfo, char** next) {
if (*next != NULL) {
return;
}
*next = calloc(1, pFillInfo->rowSize);
for (int i = 1; i < pFillInfo->numOfCols; i++) {
SFillColInfo* pCol = &pFillInfo->pFillCol[i];
setNull(*next + pCol->col.offset, pCol->col.type, pCol->col.bytes);
}
}
static void copyCurrentRowIntoBuf(SFillInfo* pFillInfo, char** srcData, char* buf) {
int32_t rowIndex = pFillInfo->index;
for (int32_t i = 0; i < pFillInfo->numOfCols; ++i) {
SFillColInfo* pCol = &pFillInfo->pFillCol[i];
memcpy(buf + pCol->col.offset, srcData[i] + rowIndex * pCol->col.bytes, pCol->col.bytes);
}
}
static int32_t fillResultImpl(SFillInfo* pFillInfo, void** data, int32_t outputRows) {
pFillInfo->numOfCurrent = 0;
char** srcData = pFillInfo->pData;
char** prev = &pFillInfo->prevValues;
char** next = &pFillInfo->nextValues;
int32_t step = GET_FORWARD_DIRECTION_FACTOR(pFillInfo->order);
if (FILL_IS_ASC_FILL(pFillInfo)) {
assert(pFillInfo->currentKey >= pFillInfo->start);
} else {
assert(pFillInfo->currentKey <= pFillInfo->start);
}
while (pFillInfo->numOfCurrent < outputRows) {
int64_t ts = ((int64_t*)pFillInfo->pData[0])[pFillInfo->index];
// set the next value for interpolation
if ((pFillInfo->currentKey < ts && FILL_IS_ASC_FILL(pFillInfo)) ||
(pFillInfo->currentKey > ts && !FILL_IS_ASC_FILL(pFillInfo))) {
initBeforeAfterDataBuf(pFillInfo, next);
copyCurrentRowIntoBuf(pFillInfo, srcData, *next);
}
if (((pFillInfo->currentKey < ts && FILL_IS_ASC_FILL(pFillInfo)) || (pFillInfo->currentKey > ts && !FILL_IS_ASC_FILL(pFillInfo))) &&
pFillInfo->numOfCurrent < outputRows) {
// fill the gap between two actual input rows
while (((pFillInfo->currentKey < ts && FILL_IS_ASC_FILL(pFillInfo)) ||
(pFillInfo->currentKey > ts && !FILL_IS_ASC_FILL(pFillInfo))) &&
pFillInfo->numOfCurrent < outputRows) {
doFillOneRowResult(pFillInfo, data, srcData, ts, false);
}
// output buffer is full, abort
if (pFillInfo->numOfCurrent == outputRows) {
pFillInfo->numOfTotal += pFillInfo->numOfCurrent;
return outputRows;
}
} else {
assert(pFillInfo->currentKey == ts);
initBeforeAfterDataBuf(pFillInfo, prev);
if (pFillInfo->type == TSDB_FILL_NEXT && (pFillInfo->index + 1) < pFillInfo->numOfRows) {
initBeforeAfterDataBuf(pFillInfo, next);
++pFillInfo->index;
copyCurrentRowIntoBuf(pFillInfo, srcData, *next);
--pFillInfo->index;
}
// assign rows to dst buffer
for (int32_t i = 0; i < pFillInfo->numOfCols; ++i) {
SFillColInfo* pCol = &pFillInfo->pFillCol[i];
if (TSDB_COL_IS_TAG(pCol->flag)/* || IS_VAR_DATA_TYPE(pCol->col.type)*/) {
continue;
}
char* output = elePtrAt(data[i], pCol->col.bytes, pFillInfo->numOfCurrent);
char* src = elePtrAt(srcData[i], pCol->col.bytes, pFillInfo->index);
if (i == 0 || (pCol->functionId != FUNCTION_COUNT && !isNull(src, pCol->col.type)) ||
(pCol->functionId == FUNCTION_COUNT && GET_INT64_VAL(src) != 0)) {
assignVal(output, src, pCol->col.bytes, pCol->col.type);
memcpy(*prev + pCol->col.offset, src, pCol->col.bytes);
} else { // i > 0 and data is null , do interpolation
if (pFillInfo->type == TSDB_FILL_PREV) {
assignVal(output, *prev + pCol->col.offset, pCol->col.bytes, pCol->col.type);
} else if (pFillInfo->type == TSDB_FILL_LINEAR) {
assignVal(output, src, pCol->col.bytes, pCol->col.type);
memcpy(*prev + pCol->col.offset, src, pCol->col.bytes);
} else if (pFillInfo->type == TSDB_FILL_NEXT) {
if (*next) {
assignVal(output, *next + pCol->col.offset, pCol->col.bytes, pCol->col.type);
} else {
setNull(output, pCol->col.type, pCol->col.bytes);
}
} else {
assignVal(output, (char*)&pCol->fillVal.i, pCol->col.bytes, pCol->col.type);
}
}
}
// set the tag value for final result
setTagsValue(pFillInfo, data, pFillInfo->numOfCurrent);
pFillInfo->currentKey = taosTimeAdd(pFillInfo->currentKey, pFillInfo->interval.sliding * step,
pFillInfo->interval.slidingUnit, pFillInfo->precision);
pFillInfo->index += 1;
pFillInfo->numOfCurrent += 1;
}
if (pFillInfo->index >= pFillInfo->numOfRows || pFillInfo->numOfCurrent >= outputRows) {
/* the raw data block is exhausted, next value does not exists */
if (pFillInfo->index >= pFillInfo->numOfRows) {
tfree(*next);
}
pFillInfo->numOfTotal += pFillInfo->numOfCurrent;
return pFillInfo->numOfCurrent;
}
}
return pFillInfo->numOfCurrent;
}
static int64_t appendFilledResult(SFillInfo* pFillInfo, void** output, int64_t resultCapacity) {
/*
* These data are generated according to fill strategy, since the current timestamp is out of the time window of
* real result set. Note that we need to keep the direct previous result rows, to generated the filled data.
*/
pFillInfo->numOfCurrent = 0;
while (pFillInfo->numOfCurrent < resultCapacity) {
doFillOneRowResult(pFillInfo, output, pFillInfo->pData, pFillInfo->start, true);
}
pFillInfo->numOfTotal += pFillInfo->numOfCurrent;
assert(pFillInfo->numOfCurrent == resultCapacity);
return resultCapacity;
}
// there are no duplicated tags in the SFillTagColInfo list
static int32_t setTagColumnInfo(SFillInfo* pFillInfo, int32_t numOfCols, int32_t capacity) {
int32_t rowsize = 0;
int32_t numOfTags = 0;
int32_t k = 0;
for (int32_t i = 0; i < numOfCols; ++i) {
SFillColInfo* pColInfo = &pFillInfo->pFillCol[i];
pFillInfo->pData[i] = NULL;
if (TSDB_COL_IS_TAG(pColInfo->flag) || pColInfo->col.type == TSDB_DATA_TYPE_BINARY) {
numOfTags += 1;
bool exists = false;
int32_t index = -1;
for (int32_t j = 0; j < k; ++j) {
if (pFillInfo->pTags[j].col.colId == pColInfo->col.colId) {
exists = true;
index = j;
break;
}
}
if (!exists) {
SSchema* pSchema = &pFillInfo->pTags[k].col;
pSchema->colId = pColInfo->col.colId;
pSchema->type = pColInfo->col.type;
pSchema->bytes = pColInfo->col.bytes;
pFillInfo->pTags[k].tagVal = calloc(1, pColInfo->col.bytes);
pColInfo->tagIndex = k;
k += 1;
} else {
pColInfo->tagIndex = index;
}
}
rowsize += pColInfo->col.bytes;
}
pFillInfo->numOfTags = numOfTags;
assert(k <= pFillInfo->numOfTags);
return rowsize;
}
static int32_t taosNumOfRemainRows(SFillInfo* pFillInfo) {
if (pFillInfo->numOfRows == 0 || (pFillInfo->numOfRows > 0 && pFillInfo->index >= pFillInfo->numOfRows)) {
return 0;
}
return pFillInfo->numOfRows - pFillInfo->index;
}
SFillInfo* taosCreateFillInfo(int32_t order, TSKEY skey, int32_t numOfTags, int32_t capacity, int32_t numOfCols,
int64_t slidingTime, int8_t slidingUnit, int8_t precision, int32_t fillType,
SFillColInfo* pCol, void* handle) {
if (fillType == TSDB_FILL_NONE) {
return NULL;
}
SFillInfo* pFillInfo = calloc(1, sizeof(SFillInfo));
taosResetFillInfo(pFillInfo, skey);
pFillInfo->order = order;
pFillInfo->type = fillType;
pFillInfo->pFillCol = pCol;
pFillInfo->numOfTags = numOfTags;
pFillInfo->numOfCols = numOfCols;
pFillInfo->precision = precision;
pFillInfo->alloc = capacity;
pFillInfo->handle = handle;
pFillInfo->interval.interval = slidingTime;
pFillInfo->interval.intervalUnit = slidingUnit;
pFillInfo->interval.sliding = slidingTime;
pFillInfo->interval.slidingUnit = slidingUnit;
pFillInfo->pData = malloc(POINTER_BYTES * numOfCols);
// if (numOfTags > 0) {
pFillInfo->pTags = calloc(numOfCols, sizeof(SFillTagColInfo));
for (int32_t i = 0; i < numOfCols; ++i) {
pFillInfo->pTags[i].col.colId = -2; // TODO
}
// }
pFillInfo->rowSize = setTagColumnInfo(pFillInfo, pFillInfo->numOfCols, pFillInfo->alloc);
assert(pFillInfo->rowSize > 0);
return pFillInfo;
}
void taosResetFillInfo(SFillInfo* pFillInfo, TSKEY startTimestamp) {
pFillInfo->start = startTimestamp;
pFillInfo->currentKey = startTimestamp;
pFillInfo->end = startTimestamp;
pFillInfo->index = -1;
pFillInfo->numOfRows = 0;
pFillInfo->numOfCurrent = 0;
pFillInfo->numOfTotal = 0;
}
void* taosDestroyFillInfo(SFillInfo* pFillInfo) {
if (pFillInfo == NULL) {
return NULL;
}
tfree(pFillInfo->prevValues);
tfree(pFillInfo->nextValues);
for(int32_t i = 0; i < pFillInfo->numOfTags; ++i) {
tfree(pFillInfo->pTags[i].tagVal);
}
tfree(pFillInfo->pTags);
tfree(pFillInfo->pData);
tfree(pFillInfo->pFillCol);
tfree(pFillInfo);
return NULL;
}
void taosFillSetStartInfo(SFillInfo* pFillInfo, int32_t numOfRows, TSKEY endKey) {
if (pFillInfo->type == TSDB_FILL_NONE) {
return;
}
pFillInfo->end = endKey;
if (!FILL_IS_ASC_FILL(pFillInfo)) {
pFillInfo->end = taosTimeTruncate(endKey, &pFillInfo->interval, pFillInfo->precision);
}
pFillInfo->index = 0;
pFillInfo->numOfRows = numOfRows;
}
void taosFillSetInputDataBlock(SFillInfo* pFillInfo, const SSDataBlock* pInput) {
for (int32_t i = 0; i < pFillInfo->numOfCols; ++i) {
SFillColInfo* pCol = &pFillInfo->pFillCol[i];
SColumnInfoData* pColData = taosArrayGet(pInput->pDataBlock, i);
pFillInfo->pData[i] = pColData->pData;
if (TSDB_COL_IS_TAG(pCol->flag)) { // copy the tag value to tag value buffer
SFillTagColInfo* pTag = &pFillInfo->pTags[pCol->tagIndex];
assert (pTag->col.colId == pCol->col.colId);
memcpy(pTag->tagVal, pColData->pData, pCol->col.bytes); // TODO not memcpy??
}
}
}
bool taosFillHasMoreResults(SFillInfo* pFillInfo) {
int32_t remain = taosNumOfRemainRows(pFillInfo);
if (remain > 0) {
return true;
}
if (pFillInfo->numOfTotal > 0 && (((pFillInfo->end > pFillInfo->start) && FILL_IS_ASC_FILL(pFillInfo)) ||
(pFillInfo->end < pFillInfo->start && !FILL_IS_ASC_FILL(pFillInfo)))) {
return getNumOfResultsAfterFillGap(pFillInfo, pFillInfo->end, 4096) > 0;
}
return false;
}
int64_t getNumOfResultsAfterFillGap(SFillInfo* pFillInfo, TSKEY ekey, int32_t maxNumOfRows) {
int64_t* tsList = (int64_t*) pFillInfo->pData[0];
int32_t numOfRows = taosNumOfRemainRows(pFillInfo);
TSKEY ekey1 = ekey;
if (!FILL_IS_ASC_FILL(pFillInfo)) {
pFillInfo->end = taosTimeTruncate(ekey, &pFillInfo->interval, pFillInfo->precision);
}
int64_t numOfRes = -1;
if (numOfRows > 0) { // still fill gap within current data block, not generating data after the result set.
TSKEY lastKey = tsList[pFillInfo->numOfRows - 1];
numOfRes = taosTimeCountInterval(
lastKey,
pFillInfo->currentKey,
pFillInfo->interval.sliding,
pFillInfo->interval.slidingUnit,
pFillInfo->precision);
numOfRes += 1;
assert(numOfRes >= numOfRows);
} else { // reach the end of data
if ((ekey1 < pFillInfo->currentKey && FILL_IS_ASC_FILL(pFillInfo)) ||
(ekey1 > pFillInfo->currentKey && !FILL_IS_ASC_FILL(pFillInfo))) {
return 0;
}
numOfRes = taosTimeCountInterval(
ekey1,
pFillInfo->currentKey,
pFillInfo->interval.sliding,
pFillInfo->interval.slidingUnit,
pFillInfo->precision);
numOfRes += 1;
}
return (numOfRes > maxNumOfRows) ? maxNumOfRows : numOfRes;
}
int32_t taosGetLinearInterpolationVal(SPoint* point, int32_t outputType, SPoint* point1, SPoint* point2, int32_t inputType) {
double v1 = -1, v2 = -1;
GET_TYPED_DATA(v1, double, inputType, point1->val);
GET_TYPED_DATA(v2, double, inputType, point2->val);
double r = DO_INTERPOLATION(v1, v2, point1->key, point2->key, point->key);
SET_TYPED_DATA(point->val, outputType, r);
return TSDB_CODE_SUCCESS;
}
int64_t taosFillResultDataBlock(SFillInfo* pFillInfo, void** output, int32_t capacity) {
int32_t remain = taosNumOfRemainRows(pFillInfo);
int64_t numOfRes = getNumOfResultsAfterFillGap(pFillInfo, pFillInfo->end, capacity);
assert(numOfRes <= capacity);
// no data existed for fill operation now, append result according to the fill strategy
if (remain == 0) {
appendFilledResult(pFillInfo, output, numOfRes);
} else {
fillResultImpl(pFillInfo, output, (int32_t) numOfRes);
assert(numOfRes == pFillInfo->numOfCurrent);
}
// qDebug("fill:%p, generated fill result, src block:%d, index:%d, brange:%"PRId64"-%"PRId64", currentKey:%"PRId64", current:%d, total:%d, %p",
// pFillInfo, pFillInfo->numOfRows, pFillInfo->index, pFillInfo->start, pFillInfo->end, pFillInfo->currentKey, pFillInfo->numOfCurrent,
// pFillInfo->numOfTotal, pFillInfo->handle);
return numOfRes;
}
#include "os.h"
#include "tarray.h"
#include "function.h"
#include "thash.h"
#include "taggfunction.h"
#include "tscalarfunction.h"
static SHashObj* functionHashTable = NULL;
static void doInitFunctionHashTable() {
int numOfEntries = tListLen(aggFunc);
functionHashTable = taosHashInit(numOfEntries, MurmurHash3_32, false, false);
for (int32_t i = 0; i < numOfEntries; i++) {
int32_t len = (uint32_t)strlen(aggFunc[i].name);
taosHashPut(functionHashTable, aggFunc[i].name, len, (void*)&aggFunc[i], POINTER_BYTES);
}
numOfEntries = tListLen(scalarFunc);
for(int32_t i = 0; i < numOfEntries; ++i) {
int32_t len = (int32_t) strlen(scalarFunc[i].name);
taosHashPut(functionHashTable, scalarFunc[i].name, len, (void*)&scalarFunc[i], POINTER_BYTES);
}
}
static pthread_once_t functionHashTableInit = PTHREAD_ONCE_INIT;
int32_t qIsBuiltinFunction(const char* name, int32_t len) {
pthread_once(&functionHashTableInit, doInitFunctionHashTable);
SAggFunctionInfo** pInfo = taosHashGet(functionHashTable, name, len);
if (pInfo != NULL) {
return (*pInfo)->functionId;
} else {
return -1;
}
}
bool isTagsQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int16_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
// "select count(tbname)" query
// if (functId == FUNCTION_COUNT && pExpr->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
// continue;
// }
if (f != FUNCTION_TAGPRJ && f != FUNCTION_TID_TAG) {
return false;
}
}
return true;
}
//bool tscMultiRoundQuery(SArray* pFunctionIdList, int32_t index) {
// if (!UTIL_TABLE_IS_SUPER_TABLE(pQueryInfo->pTableMetaInfo[index])) {
// return false;
// }
//
// size_t numOfExprs = (int32_t) getNumOfExprs(pQueryInfo);
// for(int32_t i = 0; i < numOfExprs; ++i) {
// SExprInfo* pExpr = getExprInfo(pQueryInfo, i);
// if (pExpr->base.functionId == FUNCTION_STDDEV_DST) {
// return true;
// }
// }
//
// return false;
//}
bool isBlockInfoQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_BLKINFO) {
return true;
}
}
return false;
}
bool isProjectionQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_TS_DUMMY) {
continue;
}
if (f != FUNCTION_PRJ && f != FUNCTION_TAGPRJ && f != FUNCTION_TAG &&
f != FUNCTION_TS && f != FUNCTION_ARITHM && f != FUNCTION_DIFF &&
f != FUNCTION_DERIVATIVE) {
return false;
}
}
return true;
}
bool isDiffDerivQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_TS_DUMMY) {
continue;
}
if (f == FUNCTION_DIFF || f == FUNCTION_DERIVATIVE) {
return true;
}
}
return false;
}
bool isPointInterpQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_TAG || f == FUNCTION_TS) {
continue;
}
if (f != FUNCTION_INTERP) {
return false;
}
}
return true;
}
bool isArithmeticQueryOnAggResult(SArray* pFunctionIdList) {
if (isProjectionQuery(pFunctionIdList)) {
return false;
}
assert(0);
// size_t numOfOutput = getNumOfFields(pQueryInfo);
// for(int32_t i = 0; i < numOfOutput; ++i) {
// SExprInfo* pExprInfo = tscFieldInfoGetInternalField(&pQueryInfo->fieldsInfo, i)->pExpr;
// if (pExprInfo->pExpr != NULL) {
// return true;
// }
// }
return false;
}
bool isGroupbyColumn(SArray* pFunctionIdList) {
// STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0);
// int32_t numOfCols = getNumOfColumns(pTableMetaInfo->pTableMeta);
//
// SGroupbyExpr* pGroupbyExpr = &pQueryInfo->groupbyExpr;
// for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
// SColIndex* pIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
// if (!TSDB_COL_IS_TAG(pIndex->flag) && pIndex->colIndex < numOfCols) { // group by normal columns
// return true;
// }
// }
return false;
}
bool isTopBotQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_TS) {
continue;
}
if (f == FUNCTION_TOP || f == FUNCTION_BOTTOM) {
return true;
}
}
return false;
}
bool isTsCompQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
if (num != 1) {
return false;
}
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, 0);
return f == FUNCTION_TS_COMP;
}
bool isTWAQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_TWA) {
return true;
}
}
return false;
}
bool isIrateQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_IRATE) {
return true;
}
}
return false;
}
bool isStabledev(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_STDDEV_DST) {
return true;
}
}
return false;
}
bool needReverseScan(SArray* pFunctionIdList) {
assert(0);
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_TS || f == FUNCTION_TS_DUMMY || f == FUNCTION_TAG) {
continue;
}
// if ((f == FUNCTION_FIRST || f == FUNCTION_FIRST_DST) && pQueryInfo->order.order == TSDB_ORDER_DESC) {
// return true;
// }
if (f == FUNCTION_LAST || f == FUNCTION_LAST_DST) {
// the scan order to acquire the last result of the specified column
// int32_t order = (int32_t)pExpr->base.param[0].i64;
// if (order != pQueryInfo->order.order) {
// return true;
// }
}
}
return false;
}
bool isSimpleAggregateRv(SArray* pFunctionIdList) {
assert(0);
// if (pQueryInfo->interval.interval > 0 || pQueryInfo->sessionWindow.gap > 0) {
// return false;
// }
//
// if (tscIsDiffDerivQuery(pQueryInfo)) {
// return false;
// }
//
// size_t numOfExprs = getNumOfExprs(pQueryInfo);
// for (int32_t i = 0; i < numOfExprs; ++i) {
// SExprInfo* pExpr = getExprInfo(pQueryInfo, i);
// if (pExpr == NULL) {
// continue;
// }
//
// int32_t functionId = pExpr->base.functionId;
// if (functionId < 0) {
// SUdfInfo* pUdfInfo = taosArrayGet(pQueryInfo->pUdfInfo, -1 * functionId - 1);
// if (pUdfInfo->funcType == TSDB_UDF_TYPE_AGGREGATE) {
// return true;
// }
//
// continue;
// }
//
// if (functionId == FUNCTION_TS || functionId == FUNCTION_TS_DUMMY) {
// continue;
// }
//
// if ((!IS_MULTIOUTPUT(aAggs[functionId].status)) ||
// (functionId == FUNCTION_TOP || functionId == FUNCTION_BOTTOM || functionId == FUNCTION_TS_COMP)) {
// return true;
// }
// }
return false;
}
bool isBlockDistQuery(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, 0);
return (num == 1 && f == FUNCTION_BLKINFO);
}
bool isTwoStageSTableQuery(SArray* pFunctionIdList, int32_t tableIndex) {
// if (pQueryInfo == NULL) {
// return false;
// }
//
// STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, tableIndex);
// if (pTableMetaInfo == NULL) {
// return false;
// }
//
// if ((pQueryInfo->type & TSDB_QUERY_TYPE_FREE_RESOURCE) == TSDB_QUERY_TYPE_FREE_RESOURCE) {
// return false;
// }
//
// // for ordered projection query, iterate all qualified vnodes sequentially
// if (tscNonOrderedProjectionQueryOnSTable(pQueryInfo, tableIndex)) {
// return false;
// }
//
// if (!TSDB_QUERY_HAS_TYPE(pQueryInfo->type, TSDB_QUERY_TYPE_STABLE_SUBQUERY) && pQueryInfo->command == TSDB_SQL_SELECT) {
// return UTIL_TABLE_IS_SUPER_TABLE(pTableMetaInfo);
// }
return false;
}
bool isProjectionQueryOnSTable(SArray* pFunctionIdList, int32_t tableIndex) {
// STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, tableIndex);
//
// /*
// * In following cases, return false for non ordered project query on super table
// * 1. failed to get tableMeta from server; 2. not a super table; 3. limitation is 0;
// * 4. show queries, instead of a select query
// */
// size_t numOfExprs = getNumOfExprs(pQueryInfo);
// if (pTableMetaInfo == NULL || !UTIL_TABLE_IS_SUPER_TABLE(pTableMetaInfo) ||
// pQueryInfo->command == TSDB_SQL_RETRIEVE_EMPTY_RESULT || numOfExprs == 0) {
// return false;
// }
//
// for (int32_t i = 0; i < numOfExprs; ++i) {
// int32_t functionId = getExprInfo(pQueryInfo, i)->base.functionId;
//
// if (functionId < 0) {
// SUdfInfo* pUdfInfo = taosArrayGet(pQueryInfo->pUdfInfo, -1 * functionId - 1);
// if (pUdfInfo->funcType == TSDB_UDF_TYPE_AGGREGATE) {
// return false;
// }
//
// continue;
// }
//
// if (functionId != FUNCTION_PRJ &&
// functionId != FUNCTION_TAGPRJ &&
// functionId != FUNCTION_TAG &&
// functionId != FUNCTION_TS &&
// functionId != FUNCTION_ARITHM &&
// functionId != FUNCTION_TS_COMP &&
// functionId != FUNCTION_DIFF &&
// functionId != FUNCTION_DERIVATIVE &&
// functionId != FUNCTION_TS_DUMMY &&
// functionId != FUNCTION_TID_TAG) {
// return false;
// }
// }
return true;
}
bool hasTagValOutput(SArray* pFunctionIdList) {
// size_t numOfExprs = getNumOfExprs(pQueryInfo);
// SExprInfo* pExpr1 = getExprInfo(pQueryInfo, 0);
//
// if (numOfExprs == 1 && pExpr1->base.functionId == FUNCTION_TS_COMP) {
// return true;
// }
//
// for (int32_t i = 0; i < numOfExprs; ++i) {
// SExprInfo* pExpr = getExprInfo(pQueryInfo, i);
// if (pExpr == NULL) {
// continue;
// }
//
// // ts_comp column required the tag value for join filter
// if (TSDB_COL_IS_TAG(pExpr->base.colInfo.flag)) {
// return true;
// }
// }
return false;
}
bool timeWindowInterpoRequired(SArray* pFunctionIdList) {
int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList);
for (int32_t i = 0; i < num; ++i) {
int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i);
if (f == FUNCTION_TWA || f == FUNCTION_INTERP) {
return true;
}
}
return false;
}
//SQueryType setQueryType(SArray* pFunctionIdList) {
// assert(pFunctionIdList != NULL);
//
//
//}
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "os.h"
#include "thistogram.h"
#include "taosdef.h"
#include "taosmsg.h"
#include "tlosertree.h"
/**
*
* implement the histogram and percentile_approx based on the paper:
* Yael Ben-Haim, Elad Tom-Tov. A Streaming Parallel Decision Tree Algorithm,
* The Journal of Machine Learning Research.Volume 11, 3/1/2010 pp.849-872
* https://dl.acm.org/citation.cfm?id=1756034
*
* @data 2018-12-14
* @version 0.1
*
*/
static int32_t histogramCreateBin(SHistogramInfo* pHisto, int32_t index, double val);
SHistogramInfo* tHistogramCreate(int32_t numOfEntries) {
/* need one redundant slot */
SHistogramInfo* pHisto = malloc(sizeof(SHistogramInfo) + sizeof(SHistBin) * (numOfEntries + 1));
#if !defined(USE_ARRAYLIST)
pHisto->pList = SSkipListCreate(MAX_SKIP_LIST_LEVEL, TSDB_DATA_TYPE_DOUBLE, sizeof(double));
SInsertSupporter* pss = malloc(sizeof(SInsertSupporter));
pss->numOfEntries = pHisto->maxEntries;
pss->pSkipList = pHisto->pList;
int32_t ret = tLoserTreeCreate1(&pHisto->pLoserTree, numOfEntries, pss, compare);
pss->pTree = pHisto->pLoserTree;
#endif
return tHistogramCreateFrom(pHisto, numOfEntries);
}
SHistogramInfo* tHistogramCreateFrom(void* pBuf, int32_t numOfBins) {
memset(pBuf, 0, sizeof(SHistogramInfo) + sizeof(SHistBin) * (numOfBins + 1));
SHistogramInfo* pHisto = (SHistogramInfo*)pBuf;
pHisto->elems = (SHistBin*)((char*)pBuf + sizeof(SHistogramInfo));
for(int32_t i = 0; i < numOfBins; ++i) {
pHisto->elems[i].val = -DBL_MAX;
}
pHisto->maxEntries = numOfBins;
pHisto->min = DBL_MAX;
pHisto->max = -DBL_MAX;
return pBuf;
}
int32_t tHistogramAdd(SHistogramInfo** pHisto, double val) {
if (*pHisto == NULL) {
*pHisto = tHistogramCreate(MAX_HISTOGRAM_BIN);
}
#if defined(USE_ARRAYLIST)
int32_t idx = histoBinarySearch((*pHisto)->elems, (*pHisto)->numOfEntries, val);
assert(idx >= 0 && idx <= (*pHisto)->maxEntries && (*pHisto)->elems != NULL);
if ((*pHisto)->elems[idx].val == val && idx >= 0) {
(*pHisto)->elems[idx].num += 1;
if ((*pHisto)->numOfEntries == 0) {
(*pHisto)->numOfEntries += 1;
}
} else { /* insert a new slot */
if ((*pHisto)->numOfElems >= 1 && idx < (*pHisto)->numOfEntries) {
if (idx > 0) {
assert((*pHisto)->elems[idx - 1].val <= val);
} else {
assert((*pHisto)->elems[idx].val > val);
}
} else if ((*pHisto)->numOfElems > 0) {
assert((*pHisto)->elems[(*pHisto)->numOfEntries].val <= val);
}
histogramCreateBin(*pHisto, idx, val);
}
#else
tSkipListKey key = tSkipListCreateKey(TSDB_DATA_TYPE_DOUBLE, &val, tDataTypes[TSDB_DATA_TYPE_DOUBLE].nSize);
SHistBin* entry = calloc(1, sizeof(SHistBin));
entry->val = val;
tSkipListNode* pResNode = SSkipListPut((*pHisto)->pList, entry, &key, 0);
SHistBin* pEntry1 = (SHistBin*)pResNode->pData;
pEntry1->index = -1;
tSkipListNode* pLast = NULL;
if (pEntry1->num == 0) { /* it is a new node */
(*pHisto)->numOfEntries += 1;
pEntry1->num += 1;
/* number of entries reaches the upper limitation */
if (pResNode->pForward[0] != NULL) {
/* we need to update the last updated slot in loser tree*/
pEntry1->delta = ((SHistBin*)pResNode->pForward[0]->pData)->val - val;
if ((*pHisto)->ordered) {
int32_t lastIndex = (*pHisto)->maxIndex;
SLoserTreeInfo* pTree = (*pHisto)->pLoserTree;
(*pHisto)->pLoserTree->pNode[lastIndex + pTree->numOfEntries].pData = pResNode;
pEntry1->index = (*pHisto)->pLoserTree->pNode[lastIndex + pTree->numOfEntries].index;
// update the loser tree
if ((*pHisto)->ordered) {
tLoserTreeAdjust(pTree, pEntry1->index + pTree->numOfEntries);
}
tSkipListKey kx =
tSkipListCreateKey(TSDB_DATA_TYPE_DOUBLE, &(*pHisto)->max, tDataTypes[TSDB_DATA_TYPE_DOUBLE].nSize);
pLast = tSkipListGetOne((*pHisto)->pList, &kx);
}
} else {
/* this node located at the last position of the skiplist, we do not
* update the loser-tree */
pEntry1->delta = DBL_MAX;
pLast = pResNode;
}
if (pResNode->pBackward[0] != &(*pHisto)->pList->pHead) {
SHistBin* pPrevEntry = (SHistBin*)pResNode->pBackward[0]->pData;
pPrevEntry->delta = val - pPrevEntry->val;
SLoserTreeInfo* pTree = (*pHisto)->pLoserTree;
if ((*pHisto)->ordered) {
tLoserTreeAdjust(pTree, pPrevEntry->index + pTree->numOfEntries);
tLoserTreeDisplay(pTree);
}
}
if ((*pHisto)->numOfEntries >= (*pHisto)->maxEntries + 1) {
// set the right value for loser-tree
assert((*pHisto)->pLoserTree != NULL);
if (!(*pHisto)->ordered) {
SSkipListPrint((*pHisto)->pList, 1);
SLoserTreeInfo* pTree = (*pHisto)->pLoserTree;
tSkipListNode* pHead = (*pHisto)->pList->pHead.pForward[0];
tSkipListNode* p1 = pHead;
printf("\n");
while (p1 != NULL) {
printf("%f\t", ((SHistBin*)(p1->pData))->delta);
p1 = p1->pForward[0];
}
printf("\n");
/* last one in skiplist is ignored */
for (int32_t i = pTree->numOfEntries; i < pTree->totalEntries; ++i) {
pTree->pNode[i].pData = pHead;
pTree->pNode[i].index = i - pTree->numOfEntries;
SHistBin* pBin = (SHistBin*)pHead->pData;
pBin->index = pTree->pNode[i].index;
pHead = pHead->pForward[0];
}
pLast = pHead;
for (int32_t i = 0; i < pTree->numOfEntries; ++i) {
pTree->pNode[i].index = -1;
}
tLoserTreeDisplay(pTree);
for (int32_t i = pTree->totalEntries - 1; i >= pTree->numOfEntries; i--) {
tLoserTreeAdjust(pTree, i);
}
tLoserTreeDisplay(pTree);
(*pHisto)->ordered = true;
}
printf("delta is:%lf\n", pEntry1->delta);
SSkipListPrint((*pHisto)->pList, 1);
/* the chosen node */
tSkipListNode* pNode = (*pHisto)->pLoserTree->pNode[0].pData;
SHistBin* pEntry = (SHistBin*)pNode->pData;
tSkipListNode* pNext = pNode->pForward[0];
SHistBin* pNextEntry = (SHistBin*)pNext->pData;
assert(pNextEntry->val - pEntry->val == pEntry->delta);
double newVal = (pEntry->val * pEntry->num + pNextEntry->val * pNextEntry->num) / (pEntry->num + pNextEntry->num);
pEntry->val = newVal;
pNode->key.dKey = newVal;
pEntry->num = pEntry->num + pNextEntry->num;
// update delta value in current node
pEntry->delta = (pNextEntry->delta + pNextEntry->val) - pEntry->val;
// reset delta value in the previous node
SHistBin* pPrevEntry = (SHistBin*)pNode->pBackward[0]->pData;
if (pPrevEntry) {
pPrevEntry->delta = pEntry->val - pPrevEntry->val;
}
SLoserTreeInfo* pTree = (*pHisto)->pLoserTree;
if (pNextEntry->index != -1) {
(*pHisto)->maxIndex = pNextEntry->index;
// set the last element in skiplist, of which delta is FLT_MAX;
pTree->pNode[pNextEntry->index + pTree->numOfEntries].pData = pLast;
((SHistBin*)pLast->pData)->index = pNextEntry->index;
int32_t f = pTree->pNode[pNextEntry->index + pTree->numOfEntries].index;
printf("disappear index is:%d\n", f);
}
tLoserTreeAdjust(pTree, pEntry->index + pTree->numOfEntries);
// remove the next node in skiplist
tSkipListRemoveNode((*pHisto)->pList, pNext);
SSkipListPrint((*pHisto)->pList, 1);
tLoserTreeDisplay((*pHisto)->pLoserTree);
} else { // add to heap
if (pResNode->pForward[0] != NULL) {
pEntry1->delta = ((SHistBin*)pResNode->pForward[0]->pData)->val - val;
} else {
pEntry1->delta = DBL_MAX;
}
if (pResNode->pBackward[0] != &(*pHisto)->pList->pHead) {
SHistBin* pPrevEntry = (SHistBin*)pResNode->pBackward[0]->pData;
pEntry1->delta = val - pPrevEntry->val;
}
printf("delta is:%9lf\n", pEntry1->delta);
}
} else {
SHistBin* pEntry = (SHistBin*)pResNode->pData;
assert(pEntry->val == val);
pEntry->num += 1;
}
#endif
if (val > (*pHisto)->max) {
(*pHisto)->max = val;
}
if (val < (*pHisto)->min) {
(*pHisto)->min = val;
}
(*pHisto)->numOfElems += 1;
return 0;
}
int32_t histoBinarySearch(SHistBin* pEntry, int32_t len, double val) {
int32_t end = len - 1;
int32_t start = 0;
while (start <= end) {
int32_t mid = (end - start) / 2 + start;
if (pEntry[mid].val == val) {
return mid;
}
if (pEntry[mid].val < val) {
start = mid + 1;
} else {
end = mid - 1;
}
}
int32_t ret = start > end ? start : end;
if (ret < 0) {
return 0;
} else {
return ret;
}
}
static void histogramMergeImpl(SHistBin* pHistBin, int32_t* size) {
#if defined(USE_ARRAYLIST)
int32_t oldSize = *size;
double delta = DBL_MAX;
int32_t index = -1;
for (int32_t i = 1; i < oldSize; ++i) {
double d = pHistBin[i].val - pHistBin[i - 1].val;
if (d < delta) {
delta = d;
index = i - 1;
}
}
SHistBin* s1 = &pHistBin[index];
SHistBin* s2 = &pHistBin[index + 1];
double newVal = (s1->val * s1->num + s2->val * s2->num) / (s1->num + s2->num);
s1->val = newVal;
s1->num = s1->num + s2->num;
memmove(&pHistBin[index + 1], &pHistBin[index + 2], (oldSize - index - 2) * sizeof(SHistBin));
(*size) -= 1;
#endif
}
/* optimize this procedure */
int32_t histogramCreateBin(SHistogramInfo* pHisto, int32_t index, double val) {
#if defined(USE_ARRAYLIST)
int32_t remain = pHisto->numOfEntries - index;
if (remain > 0) {
memmove(&pHisto->elems[index + 1], &pHisto->elems[index], sizeof(SHistBin) * remain);
}
assert(index >= 0 && index <= pHisto->maxEntries);
pHisto->elems[index].num = 1;
pHisto->elems[index].val = val;
pHisto->numOfEntries += 1;
/* we need to merge the slot */
if (pHisto->numOfEntries == pHisto->maxEntries + 1) {
histogramMergeImpl(pHisto->elems, &pHisto->numOfEntries);
pHisto->elems[pHisto->maxEntries].val = 0;
pHisto->elems[pHisto->maxEntries].num = 0;
}
#endif
assert(pHisto->numOfEntries <= pHisto->maxEntries);
return 0;
}
void tHistogramDestroy(SHistogramInfo** pHisto) {
if (*pHisto == NULL) {
return;
}
free(*pHisto);
*pHisto = NULL;
}
void tHistogramPrint(SHistogramInfo* pHisto) {
printf("total entries: %d, elements: %"PRId64 "\n", pHisto->numOfEntries, pHisto->numOfElems);
#if defined(USE_ARRAYLIST)
for (int32_t i = 0; i < pHisto->numOfEntries; ++i) {
printf("%d: (%f, %" PRId64 ")\n", i + 1, pHisto->elems[i].val, pHisto->elems[i].num);
}
#else
tSkipListNode* pNode = pHisto->pList->pHead.pForward[0];
for (int32_t i = 0; i < pHisto->numOfEntries; ++i) {
SHistBin* pEntry = (SHistBin*)pNode->pData;
printf("%d: (%f, %" PRId64 ")\n", i + 1, pEntry->val, pEntry->num);
pNode = pNode->pForward[0];
}
#endif
}
/**
* Estimated number of points in the interval (−inf,b].
* @param pHisto
* @param v
*/
int64_t tHistogramSum(SHistogramInfo* pHisto, double v) {
#if defined(USE_ARRAYLIST)
int32_t slotIdx = histoBinarySearch(pHisto->elems, pHisto->numOfEntries, v);
if (pHisto->elems[slotIdx].val != v) {
slotIdx -= 1;
if (slotIdx < 0) {
slotIdx = 0;
assert(v <= pHisto->elems[slotIdx].val);
} else {
assert(v >= pHisto->elems[slotIdx].val);
if (slotIdx + 1 < pHisto->numOfEntries) {
assert(v < pHisto->elems[slotIdx + 1].val);
}
}
}
double m1 = (double)pHisto->elems[slotIdx].num;
double v1 = pHisto->elems[slotIdx].val;
double m2 = (double)pHisto->elems[slotIdx + 1].num;
double v2 = pHisto->elems[slotIdx + 1].val;
double estNum = m1 + (m2 - m1) * (v - v1) / (v2 - v1);
double s1 = (m1 + estNum) * (v - v1) / (2 * (v2 - v1));
for (int32_t i = 0; i < slotIdx; ++i) {
s1 += pHisto->elems[i].num;
}
s1 = s1 + m1 / 2;
return (int64_t)s1;
#endif
}
double* tHistogramUniform(SHistogramInfo* pHisto, double* ratio, int32_t num) {
#if defined(USE_ARRAYLIST)
double* pVal = malloc(num * sizeof(double));
for (int32_t i = 0; i < num; ++i) {
double numOfElem = (ratio[i] / 100) * pHisto->numOfElems;
if (numOfElem == 0) {
pVal[i] = pHisto->min;
continue;
} else if (numOfElem <= pHisto->elems[0].num) {
pVal[i] = pHisto->elems[0].val;
continue;
} else if (numOfElem == pHisto->numOfElems) {
pVal[i] = pHisto->max;
continue;
}
int32_t j = 0;
int64_t total = 0;
while (j < pHisto->numOfEntries) {
total += pHisto->elems[j].num;
if (total <= numOfElem && total + pHisto->elems[j + 1].num > numOfElem) {
break;
}
j += 1;
}
assert(total <= numOfElem && total + pHisto->elems[j + 1].num > numOfElem);
double delta = numOfElem - total;
if (fabs(delta) < FLT_EPSILON) {
pVal[i] = pHisto->elems[j].val;
}
double start = (double)pHisto->elems[j].num;
double range = pHisto->elems[j + 1].num - start;
if (range == 0) {
pVal[i] = (pHisto->elems[j + 1].val - pHisto->elems[j].val) * delta / start + pHisto->elems[j].val;
} else {
double factor = (-2 * start + sqrt(4 * start * start - 4 * range * (-2 * delta))) / (2 * range);
pVal[i] = pHisto->elems[j].val + (pHisto->elems[j + 1].val - pHisto->elems[j].val) * factor;
}
}
#else
double* pVal = malloc(num * sizeof(double));
for (int32_t i = 0; i < num; ++i) {
double numOfElem = ratio[i] * pHisto->numOfElems;
tSkipListNode* pFirst = pHisto->pList->pHead.pForward[0];
SHistBin* pEntry = (SHistBin*)pFirst->pData;
if (numOfElem == 0) {
pVal[i] = pHisto->min;
printf("i/numofSlot: %f, v:%f, %f\n", ratio[i], numOfElem, pVal[i]);
continue;
} else if (numOfElem <= pEntry->num) {
pVal[i] = pEntry->val;
printf("i/numofSlot: %f, v:%f, %f\n", ratio[i], numOfElem, pVal[i]);
continue;
} else if (numOfElem == pHisto->numOfElems) {
pVal[i] = pHisto->max;
printf("i/numofSlot: %f, v:%f, %f\n", ratio[i], numOfElem, pVal[i]);
continue;
}
int32_t j = 0;
int64_t total = 0;
SHistBin* pPrev = pEntry;
while (j < pHisto->numOfEntries) {
if (total <= numOfElem && total + pEntry->num > numOfElem) {
break;
}
total += pEntry->num;
pPrev = pEntry;
pFirst = pFirst->pForward[0];
pEntry = (SHistBin*)pFirst->pData;
j += 1;
}
assert(total <= numOfElem && total + pEntry->num > numOfElem);
double delta = numOfElem - total;
if (fabs(delta) < FLT_EPSILON) {
// printf("i/numofSlot: %f, v:%f, %f\n",
// (double)i/numOfSlots, numOfElem, pHisto->elems[j].val);
pVal[i] = pPrev->val;
}
double start = pPrev->num;
double range = pEntry->num - start;
if (range == 0) {
pVal[i] = (pEntry->val - pPrev->val) * delta / start + pPrev->val;
} else {
double factor = (-2 * start + sqrt(4 * start * start - 4 * range * (-2 * delta))) / (2 * range);
pVal[i] = pPrev->val + (pEntry->val - pPrev->val) * factor;
}
// printf("i/numofSlot: %f, v:%f, %f\n", (double)i/numOfSlots,
// numOfElem, val);
}
#endif
return pVal;
}
SHistogramInfo* tHistogramMerge(SHistogramInfo* pHisto1, SHistogramInfo* pHisto2, int32_t numOfEntries) {
SHistogramInfo* pResHistogram = tHistogramCreate(numOfEntries);
// error in histogram info
if (pHisto1->numOfEntries > MAX_HISTOGRAM_BIN || pHisto2->numOfEntries > MAX_HISTOGRAM_BIN) {
return pResHistogram;
}
SHistBin* pHistoBins = calloc(1, sizeof(SHistBin) * (pHisto1->numOfEntries + pHisto2->numOfEntries));
int32_t i = 0, j = 0, k = 0;
while (i < pHisto1->numOfEntries && j < pHisto2->numOfEntries) {
if (pHisto1->elems[i].val < pHisto2->elems[j].val) {
pHistoBins[k++] = pHisto1->elems[i++];
} else if (pHisto1->elems[i].val > pHisto2->elems[j].val) {
pHistoBins[k++] = pHisto2->elems[j++];
} else {
pHistoBins[k] = pHisto1->elems[i++];
pHistoBins[k++].num += pHisto2->elems[j++].num;
}
}
if (i < pHisto1->numOfEntries) {
int32_t remain = pHisto1->numOfEntries - i;
memcpy(&pHistoBins[k], &pHisto1->elems[i], sizeof(SHistBin) * remain);
k += remain;
}
if (j < pHisto2->numOfEntries) {
int32_t remain = pHisto2->numOfEntries - j;
memcpy(&pHistoBins[k], &pHisto2->elems[j], sizeof(SHistBin) * remain);
k += remain;
}
/* update other information */
pResHistogram->numOfElems = pHisto1->numOfElems + pHisto2->numOfElems;
pResHistogram->min = (pHisto1->min < pHisto2->min) ? pHisto1->min : pHisto2->min;
pResHistogram->max = (pHisto1->max > pHisto2->max) ? pHisto1->max : pHisto2->max;
while (k > numOfEntries) {
histogramMergeImpl(pHistoBins, &k);
}
pResHistogram->numOfEntries = k;
memcpy(pResHistogram->elems, pHistoBins, sizeof(SHistBin) * k);
free(pHistoBins);
return pResHistogram;
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "os.h"
#include "tpercentile.h"
#include "tpagedfile.h"
#include "taosdef.h"
#include "tcompare.h"
#include "ttypes.h"
#define DEFAULT_NUM_OF_SLOT 1024
int32_t getGroupId(int32_t numOfSlots, int32_t slotIndex, int32_t times) {
return (times * numOfSlots) + slotIndex;
}
static SFilePage *loadDataFromFilePage(tMemBucket *pMemBucket, int32_t slotIdx) {
SFilePage *buffer = (SFilePage *)calloc(1, pMemBucket->bytes * pMemBucket->pSlots[slotIdx].info.size + sizeof(SFilePage));
int32_t groupId = getGroupId(pMemBucket->numOfSlots, slotIdx, pMemBucket->times);
SIDList list = getDataBufPagesIdList(pMemBucket->pBuffer, groupId);
int32_t offset = 0;
for(int32_t i = 0; i < list->size; ++i) {
SPageInfo* pgInfo = *(SPageInfo**) taosArrayGet(list, i);
SFilePage* pg = getResBufPage(pMemBucket->pBuffer, pgInfo->pageId);
memcpy(buffer->data + offset, pg->data, (size_t)(pg->num * pMemBucket->bytes));
offset += (int32_t)(pg->num * pMemBucket->bytes);
}
qsort(buffer->data, pMemBucket->pSlots[slotIdx].info.size, pMemBucket->bytes, pMemBucket->comparFn);
return buffer;
}
static void resetBoundingBox(MinMaxEntry* range, int32_t type) {
if (IS_SIGNED_NUMERIC_TYPE(type)) {
range->i64MaxVal = INT64_MIN;
range->i64MinVal = INT64_MAX;
} else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
range->u64MaxVal = 0;
range->u64MinVal = UINT64_MAX;
} else {
range->dMaxVal = -DBL_MAX;
range->dMinVal = DBL_MAX;
}
}
static int32_t setBoundingBox(MinMaxEntry* range, int16_t type, double minval, double maxval) {
if (minval > maxval) {
return -1;
}
if (IS_SIGNED_NUMERIC_TYPE(type)) {
range->i64MinVal = (int64_t) minval;
range->i64MaxVal = (int64_t) maxval;
} else if (IS_UNSIGNED_NUMERIC_TYPE(type)){
range->u64MinVal = (uint64_t) minval;
range->u64MaxVal = (uint64_t) maxval;
} else {
range->dMinVal = minval;
range->dMaxVal = maxval;
}
return 0;
}
static void resetPosInfo(SSlotInfo* pInfo) {
pInfo->size = 0;
pInfo->pageId = -1;
pInfo->data = NULL;
}
double findOnlyResult(tMemBucket *pMemBucket) {
assert(pMemBucket->total == 1);
for (int32_t i = 0; i < pMemBucket->numOfSlots; ++i) {
tMemBucketSlot *pSlot = &pMemBucket->pSlots[i];
if (pSlot->info.size == 0) {
continue;
}
int32_t groupId = getGroupId(pMemBucket->numOfSlots, i, pMemBucket->times);
SIDList list = getDataBufPagesIdList(pMemBucket->pBuffer, groupId);
assert(list->size == 1);
SPageInfo* pgInfo = (SPageInfo*) taosArrayGetP(list, 0);
SFilePage* pPage = getResBufPage(pMemBucket->pBuffer, pgInfo->pageId);
assert(pPage->num == 1);
double v = 0;
GET_TYPED_DATA(v, double, pMemBucket->type, pPage->data);
return v;
}
return 0;
}
int32_t tBucketIntHash(tMemBucket *pBucket, const void *value) {
int64_t v = 0;
GET_TYPED_DATA(v, int64_t, pBucket->type, value);
int32_t index = -1;
if (v > pBucket->range.i64MaxVal || v < pBucket->range.i64MinVal) {
return index;
}
// divide the value range into 1024 buckets
uint64_t span = pBucket->range.i64MaxVal - pBucket->range.i64MinVal;
if (span < pBucket->numOfSlots) {
int64_t delta = v - pBucket->range.i64MinVal;
index = (delta % pBucket->numOfSlots);
} else {
double slotSpan = (double)span / pBucket->numOfSlots;
index = (int32_t)((v - pBucket->range.i64MinVal) / slotSpan);
if (v == pBucket->range.i64MaxVal) {
index -= 1;
}
}
assert(index >= 0 && index < pBucket->numOfSlots);
return index;
}
int32_t tBucketUintHash(tMemBucket *pBucket, const void *value) {
int64_t v = 0;
GET_TYPED_DATA(v, uint64_t, pBucket->type, value);
int32_t index = -1;
if (v > pBucket->range.u64MaxVal || v < pBucket->range.u64MinVal) {
return index;
}
// divide the value range into 1024 buckets
uint64_t span = pBucket->range.u64MaxVal - pBucket->range.u64MinVal;
if (span < pBucket->numOfSlots) {
int64_t delta = v - pBucket->range.u64MinVal;
index = (int32_t) (delta % pBucket->numOfSlots);
} else {
double slotSpan = (double)span / pBucket->numOfSlots;
index = (int32_t)((v - pBucket->range.u64MinVal) / slotSpan);
if (v == pBucket->range.u64MaxVal) {
index -= 1;
}
}
assert(index >= 0 && index < pBucket->numOfSlots);
return index;
}
int32_t tBucketDoubleHash(tMemBucket *pBucket, const void *value) {
double v = 0;
if (pBucket->type == TSDB_DATA_TYPE_FLOAT) {
v = GET_FLOAT_VAL(value);
} else {
v = GET_DOUBLE_VAL(value);
}
int32_t index = -1;
if (v > pBucket->range.dMaxVal || v < pBucket->range.dMinVal) {
return index;
}
// divide a range of [dMinVal, dMaxVal] into 1024 buckets
double span = pBucket->range.dMaxVal - pBucket->range.dMinVal;
if (span < pBucket->numOfSlots) {
int32_t delta = (int32_t)(v - pBucket->range.dMinVal);
index = (delta % pBucket->numOfSlots);
} else {
double slotSpan = span / pBucket->numOfSlots;
index = (int32_t)((v - pBucket->range.dMinVal) / slotSpan);
if (v == pBucket->range.dMaxVal) {
index -= 1;
}
}
assert(index >= 0 && index < pBucket->numOfSlots);
return index;
}
static __perc_hash_func_t getHashFunc(int32_t type) {
if (IS_SIGNED_NUMERIC_TYPE(type)) {
return tBucketIntHash;
} else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
return tBucketUintHash;
} else {
return tBucketDoubleHash;
}
}
static void resetSlotInfo(tMemBucket* pBucket) {
for (int32_t i = 0; i < pBucket->numOfSlots; ++i) {
tMemBucketSlot* pSlot = &pBucket->pSlots[i];
resetBoundingBox(&pSlot->range, pBucket->type);
resetPosInfo(&pSlot->info);
}
}
tMemBucket *tMemBucketCreate(int16_t nElemSize, int16_t dataType, double minval, double maxval) {
tMemBucket *pBucket = (tMemBucket *)calloc(1, sizeof(tMemBucket));
if (pBucket == NULL) {
return NULL;
}
pBucket->numOfSlots = DEFAULT_NUM_OF_SLOT;
pBucket->bufPageSize = DEFAULT_PAGE_SIZE * 4; // 4k per page
pBucket->type = dataType;
pBucket->bytes = nElemSize;
pBucket->total = 0;
pBucket->times = 1;
pBucket->maxCapacity = 200000;
if (setBoundingBox(&pBucket->range, pBucket->type, minval, maxval) != 0) {
// qError("MemBucket:%p, invalid value range: %f-%f", pBucket, minval, maxval);
free(pBucket);
return NULL;
}
pBucket->elemPerPage = (pBucket->bufPageSize - sizeof(SFilePage))/pBucket->bytes;
pBucket->comparFn = getKeyComparFunc(pBucket->type, TSDB_ORDER_ASC);
pBucket->hashFunc = getHashFunc(pBucket->type);
if (pBucket->hashFunc == NULL) {
// qError("MemBucket:%p, not support data type %d, failed", pBucket, pBucket->type);
free(pBucket);
return NULL;
}
pBucket->pSlots = (tMemBucketSlot *)calloc(pBucket->numOfSlots, sizeof(tMemBucketSlot));
if (pBucket->pSlots == NULL) {
free(pBucket);
return NULL;
}
resetSlotInfo(pBucket);
int32_t ret = createDiskbasedResultBuffer(&pBucket->pBuffer, pBucket->bufPageSize, pBucket->bufPageSize * 512, 1);
if (ret != 0) {
tMemBucketDestroy(pBucket);
return NULL;
}
// qDebug("MemBucket:%p, elem size:%d", pBucket, pBucket->bytes);
return pBucket;
}
void tMemBucketDestroy(tMemBucket *pBucket) {
if (pBucket == NULL) {
return;
}
destroyResultBuf(pBucket->pBuffer);
tfree(pBucket->pSlots);
tfree(pBucket);
}
void tMemBucketUpdateBoundingBox(MinMaxEntry *r, const char *data, int32_t dataType) {
if (IS_SIGNED_NUMERIC_TYPE(dataType)) {
int64_t v = 0;
GET_TYPED_DATA(v, int64_t, dataType, data);
if (r->i64MinVal > v) {
r->i64MinVal = v;
}
if (r->i64MaxVal < v) {
r->i64MaxVal = v;
}
} else if (IS_UNSIGNED_NUMERIC_TYPE(dataType)) {
uint64_t v = 0;
GET_TYPED_DATA(v, uint64_t, dataType, data);
if (r->i64MinVal > v) {
r->i64MinVal = v;
}
if (r->i64MaxVal < v) {
r->i64MaxVal = v;
}
} else if (IS_FLOAT_TYPE(dataType)) {
double v = 0;
GET_TYPED_DATA(v, double, dataType, data);
if (r->dMinVal > v) {
r->dMinVal = v;
}
if (r->dMaxVal < v) {
r->dMaxVal = v;
}
} else {
assert(0);
}
}
/*
* in memory bucket, we only accept data array list
*/
int32_t tMemBucketPut(tMemBucket *pBucket, const void *data, size_t size) {
assert(pBucket != NULL && data != NULL && size > 0);
int32_t count = 0;
int32_t bytes = pBucket->bytes;
for (int32_t i = 0; i < size; ++i) {
char *d = (char *) data + i * bytes;
int32_t index = (pBucket->hashFunc)(pBucket, d);
if (index < 0) {
continue;
}
count += 1;
tMemBucketSlot *pSlot = &pBucket->pSlots[index];
tMemBucketUpdateBoundingBox(&pSlot->range, d, pBucket->type);
// ensure available memory pages to allocate
int32_t groupId = getGroupId(pBucket->numOfSlots, index, pBucket->times);
int32_t pageId = -1;
if (pSlot->info.data == NULL || pSlot->info.data->num >= pBucket->elemPerPage) {
if (pSlot->info.data != NULL) {
assert(pSlot->info.data->num >= pBucket->elemPerPage && pSlot->info.size > 0);
// keep the pointer in memory
releaseResBufPage(pBucket->pBuffer, pSlot->info.data);
pSlot->info.data = NULL;
}
pSlot->info.data = getNewDataBuf(pBucket->pBuffer, groupId, &pageId);
pSlot->info.pageId = pageId;
}
memcpy(pSlot->info.data->data + pSlot->info.data->num * pBucket->bytes, d, pBucket->bytes);
pSlot->info.data->num += 1;
pSlot->info.size += 1;
}
pBucket->total += count;
return 0;
}
////////////////////////////////////////////////////////////////////////////////////////////
/*
*
* now, we need to find the minimum value of the next slot for
* interpolating the percentile value
* j is the last slot of current segment, we need to get the first
* slot of the next segment.
*/
static MinMaxEntry getMinMaxEntryOfNextSlotWithData(tMemBucket *pMemBucket, int32_t slotIdx) {
int32_t j = slotIdx + 1;
while (j < pMemBucket->numOfSlots && (pMemBucket->pSlots[j].info.size == 0)) {
++j;
}
assert(j < pMemBucket->numOfSlots);
return pMemBucket->pSlots[j].range;
}
static bool isIdenticalData(tMemBucket *pMemBucket, int32_t index);
static double getIdenticalDataVal(tMemBucket* pMemBucket, int32_t slotIndex) {
assert(isIdenticalData(pMemBucket, slotIndex));
tMemBucketSlot *pSlot = &pMemBucket->pSlots[slotIndex];
double finalResult = 0.0;
if (IS_SIGNED_NUMERIC_TYPE(pMemBucket->type)) {
finalResult = (double) pSlot->range.i64MinVal;
} else if (IS_UNSIGNED_NUMERIC_TYPE(pMemBucket->type)) {
finalResult = (double) pSlot->range.u64MinVal;
} else {
finalResult = (double) pSlot->range.dMinVal;
}
return finalResult;
}
double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction) {
int32_t num = 0;
for (int32_t i = 0; i < pMemBucket->numOfSlots; ++i) {
tMemBucketSlot *pSlot = &pMemBucket->pSlots[i];
if (pSlot->info.size == 0) {
continue;
}
// required value in current slot
if (num < (count + 1) && num + pSlot->info.size >= (count + 1)) {
if (pSlot->info.size + num == (count + 1)) {
/*
* now, we need to find the minimum value of the next slot for interpolating the percentile value
* j is the last slot of current segment, we need to get the first slot of the next segment.
*/
MinMaxEntry next = getMinMaxEntryOfNextSlotWithData(pMemBucket, i);
double maxOfThisSlot = 0;
double minOfNextSlot = 0;
if (IS_SIGNED_NUMERIC_TYPE(pMemBucket->type)) {
maxOfThisSlot = (double) pSlot->range.i64MaxVal;
minOfNextSlot = (double) next.i64MinVal;
} else if (IS_UNSIGNED_NUMERIC_TYPE(pMemBucket->type)) {
maxOfThisSlot = (double) pSlot->range.u64MaxVal;
minOfNextSlot = (double) next.u64MinVal;
} else {
maxOfThisSlot = (double) pSlot->range.dMaxVal;
minOfNextSlot = (double) next.dMinVal;
}
assert(minOfNextSlot > maxOfThisSlot);
double val = (1 - fraction) * maxOfThisSlot + fraction * minOfNextSlot;
return val;
}
if (pSlot->info.size <= pMemBucket->maxCapacity) {
// data in buffer and file are merged together to be processed.
SFilePage *buffer = loadDataFromFilePage(pMemBucket, i);
int32_t currentIdx = count - num;
char *thisVal = buffer->data + pMemBucket->bytes * currentIdx;
char *nextVal = thisVal + pMemBucket->bytes;
double td = 1.0, nd = 1.0;
GET_TYPED_DATA(td, double, pMemBucket->type, thisVal);
GET_TYPED_DATA(nd, double, pMemBucket->type, nextVal);
double val = (1 - fraction) * td + fraction * nd;
tfree(buffer);
return val;
} else { // incur a second round bucket split
if (isIdenticalData(pMemBucket, i)) {
return getIdenticalDataVal(pMemBucket, i);
}
// try next round
pMemBucket->times += 1;
// qDebug("MemBucket:%p, start next round data bucketing, time:%d", pMemBucket, pMemBucket->times);
pMemBucket->range = pSlot->range;
pMemBucket->total = 0;
resetSlotInfo(pMemBucket);
int32_t groupId = getGroupId(pMemBucket->numOfSlots, i, pMemBucket->times - 1);
SIDList list = getDataBufPagesIdList(pMemBucket->pBuffer, groupId);
assert(list->size > 0);
for (int32_t f = 0; f < list->size; ++f) {
SPageInfo *pgInfo = *(SPageInfo **)taosArrayGet(list, f);
SFilePage *pg = getResBufPage(pMemBucket->pBuffer, pgInfo->pageId);
tMemBucketPut(pMemBucket, pg->data, (int32_t)pg->num);
releaseResBufPageInfo(pMemBucket->pBuffer, pgInfo);
}
return getPercentileImpl(pMemBucket, count - num, fraction);
}
} else {
num += pSlot->info.size;
}
}
return 0;
}
double getPercentile(tMemBucket *pMemBucket, double percent) {
if (pMemBucket->total == 0) {
return 0.0;
}
// if only one elements exists, return it
if (pMemBucket->total == 1) {
return findOnlyResult(pMemBucket);
}
percent = fabs(percent);
// find the min/max value, no need to scan all data in bucket
if (fabs(percent - 100.0) < DBL_EPSILON || (percent < DBL_EPSILON)) {
MinMaxEntry* pRange = &pMemBucket->range;
if (IS_SIGNED_NUMERIC_TYPE(pMemBucket->type)) {
double v = (double)(fabs(percent - 100) < DBL_EPSILON ? pRange->i64MaxVal : pRange->i64MinVal);
return v;
} else if (IS_UNSIGNED_NUMERIC_TYPE(pMemBucket->type)) {
double v = (double)(fabs(percent - 100) < DBL_EPSILON ? pRange->u64MaxVal : pRange->u64MinVal);
return v;
} else {
return fabs(percent - 100) < DBL_EPSILON? pRange->dMaxVal:pRange->dMinVal;
}
}
double percentVal = (percent * (pMemBucket->total - 1)) / ((double)100.0);
// do put data by using buckets
int32_t orderIdx = (int32_t)percentVal;
return getPercentileImpl(pMemBucket, orderIdx, percentVal - orderIdx);
}
/*
* check if data in one slot are all identical only need to compare with the bounding box
*/
bool isIdenticalData(tMemBucket *pMemBucket, int32_t index) {
tMemBucketSlot *pSeg = &pMemBucket->pSlots[index];
if (IS_FLOAT_TYPE(pMemBucket->type)) {
return fabs(pSeg->range.dMaxVal - pSeg->range.dMinVal) < DBL_EPSILON;
} else {
return pSeg->range.i64MinVal == pSeg->range.i64MaxVal;
}
}
#include "tscalarfunction.h"
SScalarFunctionInfo scalarFunc[1] = {
{
},
};
此差异已折叠。
......@@ -8,7 +8,7 @@ target_include_directories(
target_link_libraries(
parser
PRIVATE os util common catalog executor transport
PRIVATE os util common catalog function transport
)
ADD_SUBDIRECTORY(test)
ADD_SUBDIRECTORY(test)
\ No newline at end of file
......@@ -246,6 +246,7 @@ typedef struct tSqlExpr {
// used in select clause. select <SArray> from xxx
typedef struct tSqlExprItem {
tSqlExpr *pNode; // The list of expressions
int32_t functionId;
char *aliasName; // alias name, null-terminated string
bool distinct;
} tSqlExprItem;
......
......@@ -26,6 +26,11 @@ extern "C" {
struct SSqlNode;
typedef struct SColumnIndex {
int16_t tableIndex;
int16_t columnIndex;
} SColumnIndex;
typedef struct SInsertStmtInfo {
SHashObj *pTableBlockHashList; // data block for each table
SArray *pDataBlocks; // SArray<STableDataBlocks*>. Merged submit block for each vgroup
......@@ -35,6 +40,51 @@ typedef struct SInsertStmtInfo {
char *sql; // current sql statement position
} SInsertStmtInfo;
// the structure for sql function in select clause
typedef struct SSqlExpr {
char aliasName[TSDB_COL_NAME_LEN]; // as aliasName
char token[TSDB_COL_NAME_LEN]; // original token
SColIndex colInfo;
uint64_t uid; // table uid, todo refactor use the pointer
int16_t functionId; // function id in aAgg array
int16_t resType; // return value type
int16_t resBytes; // length of return value
int32_t interBytes; // inter result buffer size
int16_t colType; // table column type
int16_t colBytes; // table column bytes
int16_t numOfParams; // argument value of each function
SVariant param[3]; // parameters are not more than 3
int32_t offset; // sub result column value of arithmetic expression.
int16_t resColId; // result column id
SColumnFilterList flist;
} SSqlExpr;
typedef struct SExprInfo {
SSqlExpr base;
struct tExprNode *pExpr;
} SExprInfo;
typedef struct SColumn {
uint64_t tableUid;
int32_t columnIndex;
SColumnInfo info;
} SColumn;
typedef struct SInternalField {
TAOS_FIELD field;
bool visible;
SExprInfo *pExpr;
} SInternalField;
void clearTableMetaInfo(STableMetaInfo* pTableMetaInfo);
void clearAllTableMetaInfo(SQueryStmtInfo* pQueryInfo, bool removeMeta, uint64_t id);
/**
* Validate the sql info, according to the corresponding metadata info from catalog.
* @param pCatalog
......
......@@ -23,10 +23,31 @@ extern "C" {
#include "os.h"
#include "ttoken.h"
#define UTIL_TABLE_IS_SUPER_TABLE(metaInfo) \
(((metaInfo)->pTableMeta != NULL) && ((metaInfo)->pTableMeta->tableType == TSDB_SUPER_TABLE))
#define UTIL_TABLE_IS_CHILD_TABLE(metaInfo) \
(((metaInfo)->pTableMeta != NULL) && ((metaInfo)->pTableMeta->tableType == TSDB_CHILD_TABLE))
#define UTIL_TABLE_IS_NORMAL_TABLE(metaInfo) \
(!(UTIL_TABLE_IS_SUPER_TABLE(metaInfo) || UTIL_TABLE_IS_CHILD_TABLE(metaInfo)))
#define UTIL_TABLE_IS_TMP_TABLE(metaInfo) \
(((metaInfo)->pTableMeta != NULL) && ((metaInfo)->pTableMeta->tableType == TSDB_TEMP_TABLE))
int32_t parserValidateIdToken(SToken* pToken);
int32_t parserSetInvalidOperatorMsg(char* dst, int32_t dstBufLen, const char* msg);
int32_t buildInvalidOperationMsg(char* dst, int32_t dstBufLen, const char* msg);
int32_t parserSetSyntaxErrMsg(char* dst, int32_t dstBufLen, const char* additionalInfo, const char* sourceStr);
void columnListCopy(SArray* dst, const SArray* src, uint64_t tableUid);
void columnListCopyAll(SArray* dst, const SArray* src);
void columnListDestroy(SArray* pColumnList);
void cleanupTagCond(STagCond* pTagCond);
void cleanupColumnCond(SArray** pCond);
#ifdef __cplusplus
}
#endif
......
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_QUERYINFOUTIL_H
#define TDENGINE_QUERYINFOUTIL_H
#ifdef __cplusplus
extern "C" {
#endif
#include "parserInt.h"
SSchema* getTbnameColumnSchema();
int32_t getNumOfColumns(const STableMeta* pTableMeta);
int32_t getNumOfTags(const STableMeta* pTableMeta);
SSchema *getTableColumnSchema(const STableMeta *pTableMeta);
SSchema *getTableTagSchema(const STableMeta* pTableMeta);
SSchema *getOneColumnSchema(const STableMeta* pTableMeta, int32_t colIndex);
size_t getNumOfExprs(SQueryStmtInfo* pQueryInfo);
SExprInfo* createExprInfo(STableMetaInfo* pTableMetaInfo, int16_t functionId, SColumnIndex* pColIndex, int16_t type,
int16_t size, int16_t resColId, int16_t interSize, int32_t colType);
void addExprInfo(SQueryStmtInfo* pQueryInfo, int32_t index, SExprInfo* pExprInfo);
void updateExprInfo(SExprInfo* pExprInfo, int16_t functionId, int32_t colId, int16_t srcColumnIndex, int16_t resType, int16_t resSize);
void assignExprInfo(SExprInfo* dst, const SExprInfo* src);
SExprInfo* getExprInfo(SQueryStmtInfo* pQueryInfo, int32_t index);
int32_t copyAllExprInfo(SArray* dst, const SArray* src, bool deepcopy);
void cleanupFieldInfo(SFieldInfo* pFieldInfo);
STableComInfo getTableInfo(const STableMeta* pTableMeta);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_QUERYINFOUTIL_H
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册